# Finetune TinyLlama Colorist 

Training a tinyLlama return single color code based on user description.

### Environment Setup

In [1]:
#!pip install accelerate peft bitsandbytes transformers trl

In [2]:
# from huggingface_hub import notebook_login
# notebook_login()

### Model Training

In [3]:
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig, AutoPeftModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
import os

dataset_id="burkelibbey/colors"
base_model_id="PY007/TinyLlama-1.1B-Chat-v0.3"
model_id_colorist_lora="mychen76/tinyllama-colorist-lora"

def formatted_train(question,answer)->str: 
    return f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant\n{answer}<|im_end|>\n"

def prepare_train_data(data_id):
    data = load_dataset(data_id, split="train")
    data_df = data.to_pandas()
    data_df["text"] = data_df[["description", "color"]].apply(lambda x: "<|im_start|>user\n" + x["description"] + " <|im_end|>\n<|im_start|>assistant\n" + x["color"] + "<|im_end|>\n", axis=1)
    data = Dataset.from_pandas(data_df)
    return data 

def get_model_and_tokenizer(mode_id):
    tokenizer = AutoTokenizer.from_pretrained(mode_id)
    tokenizer.pad_token = tokenizer.eos_token
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True
    )
    model = AutoModelForCausalLM.from_pretrained(
        mode_id, quantization_config=bnb_config, device_map="auto"
    )
    model.config.use_cache=False
    model.config.pretraining_tp=1
    return model, tokenizer

def finetune_tinyllama(data_id,base_model_id,model_id_colorist_lora):
    data = prepare_train_data(data_id)
    model, tokenizer = get_model_and_tokenizer(base_model_id)
    
    peft_config = LoraConfig(
        r=8, lora_alpha=16, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM"
    )
    training_arguments = TrainingArguments(
        output_dir=model_id_colorist_lora,
        per_device_train_batch_size=8,
        gradient_accumulation_steps=4,
        optim="paged_adamw_32bit",
        learning_rate=2e-4,
        lr_scheduler_type="cosine",
        save_strategy="epoch",
        logging_steps=10,
        num_train_epochs=3,
        max_steps=200,
        fp16=True,
        push_to_hub=True
    )
    trainer = SFTTrainer(
        model=model,
        train_dataset=data,
        peft_config=peft_config,
        dataset_text_field="text",
        args=training_arguments,
        tokenizer=tokenizer,
        packing=False,
        max_seq_length=1024
    )
    trainer.train()
    trainer.push_to_hub()

if __name__ == "__main__":
    finetune_tinyllama(dataset_id,base_model_id,model_id_colorist_lora)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Map:   0%|          | 0/33887 [00:00<?, ? examples/s]

[34m[1mwandb[0m: Currently logged in as: [33mmychen76[0m. Use [1m`wandb login --relogin`[0m to force relogin


You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
10,2.7146
20,2.3435
30,2.015
40,1.8451
50,1.698
60,1.6069
70,1.5212
80,1.4468
90,1.4213
100,1.4015


### Push Model to Hub

In [4]:
model_id_colorist_final="mychen76/tinyllama-colorist-v2"

In [5]:
from peft import AutoPeftModelForCausalLM, PeftModel
from transformers import AutoModelForCausalLM
import torch
import os

model = AutoModelForCausalLM.from_pretrained(base_model_id, torch_dtype=torch.float16, load_in_8bit=False, 
                                             device_map="auto", 
                                             trust_remote_code=True)
peft_model = PeftModel.from_pretrained(model, model_id_colorist_lora, from_transformers=True, device_map={"":0})
model = peft_model.merge_and_unload()

In [6]:
model.push_to_hub(model_id_colorist_final)

pytorch_model.bin:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/mychen76/tinyllama-colorist-v2/commit/6a224c3483e4d8ba0c8386436077a3d780f8fa23', commit_message='Upload LlamaForCausalLM', commit_description='', oid='6a224c3483e4d8ba0c8386436077a3d780f8fa23', pr_url=None, pr_revision=None, pr_num=None)

In [7]:
#model.config

In [8]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
tokenizer.push_to_hub(model_id_colorist_final)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Using pad_token, but it is not set yet.
Using pad_token, but it is not set yet.


CommitInfo(commit_url='https://huggingface.co/mychen76/tinyllama-colorist-v2/commit/8d90af0bec9ff02996dbe9741900b1a0c274d553', commit_message='Upload tokenizer', commit_description='', oid='8d90af0bec9ff02996dbe9741900b1a0c274d553', pr_url=None, pr_revision=None, pr_num=None)

### Model Inferencing

In [9]:
model_id_colorist_final="mychen76/tinyllama-colorist-v2"

In [10]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
from transformers import pipeline

In [11]:
def print_color_space(hex_color):
    def hex_to_rgb(hex_color):
        hex_color = hex_color.lstrip('#')
        return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
    r, g, b = hex_to_rgb(hex_color)
    print(f'{hex_color}: \033[48;2;{r};{g};{b}m           \033[0m')

In [12]:
def formatted_prompt(question)-> str: 
    return f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant:"

#### run pipeline

In [16]:
tokenizer = AutoTokenizer.from_pretrained(model_id_colorist_final)
pipe = pipeline(
    "text-generation",
    model=model_id_colorist_final,
    torch_dtype=torch.float16,
    device_map="auto",
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [17]:
from time import perf_counter
start_time = perf_counter()

prompt = formatted_prompt('give me a pure brown color')
sequences = pipe(
    prompt,
    do_sample=True,
    temperature=0.1,
    top_p=0.9,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_new_tokens=12
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

output_time = perf_counter() - start_time
print(f"Time taken for inference: {round(output_time,2)} seconds")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Result: <|im_start|>user
give me a pure brown color<|im_end|>
<|im_start|>assistant: #807070<|im_end
Time taken for inference: 0.21 seconds


In [18]:
print_color_space('#807070')

#807070: [48;2;128;112;112m           [0m


#### load model

In [23]:
tokenizer = AutoTokenizer.from_pretrained(model_id_colorist_final)
model = AutoModelForCausalLM.from_pretrained(model_id_colorist_final)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


#### run streaming

In [24]:
prompt = formatted_prompt('give me a deep blue color')
inputs = tokenizer([prompt], return_tensors="pt")
streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, eos_token_id=[tokenizer.eos_token_id],streamer=streamer, max_new_tokens=10)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<s><|im_start|>user
give me a deep blue color<|im_end|>
<|im_start|>assistant: #000088<|im


In [28]:
print_color_space('#000088')

#000088: [48;2;0;0;136m           [0m


#### run inference

In [26]:
from transformers import GenerationConfig
from time import perf_counter

prompt = formatted_prompt('give me a sky blue color')

inputs = tokenizer([prompt], return_tensors="pt")
generation_config = GenerationConfig(penalty_alpha=0.6,do_sample = True,
    top_k=5,temperature=0.5,repetition_penalty=1.2,
    max_new_tokens=12,pad_token_id=tokenizer.eos_token_id
)
start_time = perf_counter()
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs, generation_config=generation_config)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
output_time = perf_counter() - start_time
print(f"Time taken for inference: {round(output_time,2)} seconds")

<|im_start|>user
give me a sky blue color<|im_end|>
<|im_start|>assistant: #6092ff<|im_end|
Time taken for inference: 2.0 seconds


In [29]:
print_color_space('#6092ff')

#6092ff: [48;2;96;146;255m           [0m
