In [None]:
!pip install accelerate peft bitsandbytes transformers trl

In [None]:
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig, AutoPeftModelForCausalLM, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
import os

### Load Dataset
Load dataset from https://huggingface.co/datasets/burkelibbey/colors

In [None]:
# load dataset from https://huggingface.co/datasets/burkelibbey/colors?row=0
dataset = "burkelibbey/colors"

data = load_dataset(dataset, split="train")
data_df = data.to_pandas()

In [None]:
data_df.head()

In [None]:
print(f"shape:{data_df.shape}")
print(f"max:{max([len(des) for des in data_df['description']])}")
print(f"min:{min([len(des) for des in data_df['description']])}")

In [None]:
# Reformat the data in the ChatML format

def formatted_train(input,response)->str:
  return f"<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n{response}<|im_end|>\n"

# Example
input = 'Pure Black: A shade that completely absorbs light and does not reflect any colors. It is the darkest possible shade.'
response = '#000000'

print(formatted_train(input, response))

In [None]:
def prepare_train_data(data_id):
    data = load_dataset(data_id, split="train")
    data_df = data.to_pandas()
    data_df["text"] = data_df[["description", "color"]].apply(lambda x: "<|im_start|>user\n" + x["description"] + " <|im_end|>\n<|im_start|>assistant\n" + x["color"] + "<|im_end|>\n", axis=1)
    data = Dataset.from_pandas(data_df)
    return data

In [None]:
data = prepare_train_data(dataset)

In [None]:
print(data)
print(data[0]['text'])

### Fetch Model

We will use TinyLlama pretrained model from https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0

It has 1.1B parameters, but we won't be finetuning all the parameters. Instead we will use LoRA for fine-tuning

In [None]:
# Fetch TinyLlama pretrained model and tokenizer from
# https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0

def get_model_and_tokenizer(model_id):
  tokenizer = AutoTokenizer.from_pretrained(model_id)
  tokenizer.pad_token = tokenizer.eos_token

  # use 4 bit quantization to load the model
  bnb_config = BitsAndBytesConfig(
      load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True
  )
  model = AutoModelForCausalLM.from_pretrained(
      model_id, quantization_config=bnb_config, device_map="auto"
  )

  model.config.use_cache = False
  model.config.pretraining_tp = 1
  return model, tokenizer

model_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0"
model, tokenizer = get_model_and_tokenizer(model_id)

In [None]:
model

### Set up LoRA

In [None]:
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [None]:
output_model="sparsh-tinyllama-colorist-v1"
training_args = TrainingArguments(
        output_dir=output_model,
        per_device_train_batch_size=16,
        gradient_accumulation_steps=4,
        optim="paged_adamw_32bit",
        learning_rate=2e-3,
        lr_scheduler_type="cosine",
        save_strategy="epoch",
        logging_steps=10,
        num_train_epochs=3,
        max_steps=250,
        fp16=True,
    )

In [None]:
trainer = SFTTrainer(
    model=model.to('cpu'),
    train_dataset=data,
    peft_config=peft_config,
    dataset_text_field="text",
    args=training_args,
    tokenizer=tokenizer,
    packing=False,
    max_seq_length=2048
)

In [None]:
trainer.train()

### Merge LoRA with the base TinyLlama model

In [None]:
tinyLlama_model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, load_in_8bit=False,
                                             device_map="auto",
                                             trust_remote_code=True)

lora_model_path = "/content/sparsh-tinyllama-colorist-v1/checkpoint-250"

peft_model = PeftModel.from_pretrained(tinyLlama_model, lora_model_path, from_transformers=True, device_map="auto")

merged_model = peft_model.merge_and_unload()

merged_model

In [None]:
def formatted_prompt(question)-> str:
    return f"<|im_start|>user\n{question}<|im_end|>\n<|im_start|>assistant\n"

In [None]:
from transformers import GenerationConfig
from time import perf_counter

def generate_response(user_input):

  prompt = formatted_prompt(user_input)

  inputs = tokenizer([prompt], return_tensors="pt")
  generation_config = GenerationConfig(penalty_alpha=0.6,do_sample = True,
      top_k=5,temperature=0.5,repetition_penalty=1.2,
      max_new_tokens=12,pad_token_id=tokenizer.eos_token_id
  )
  start_time = perf_counter()

  inputs = tokenizer(prompt, return_tensors="pt").to('cuda')

  outputs = model.generate(**inputs, generation_config=generation_config)
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
  output_time = perf_counter() - start_time
  print(f"Time taken for inference: {round(output_time,2)} seconds")

In [None]:
def print_color_space(hex_color):
    def hex_to_rgb(hex_color):
        hex_color = hex_color.lstrip('#')
        return tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
    r, g, b = hex_to_rgb(hex_color)
    print(f'{hex_color}: \033[48;2;{r};{g};{b}m           \033[0m')

In [None]:
generate_response(user_input='Sunflower yellow')

In [None]:
generate_response(user_input='Light orange color')

In [None]:
print_color_space('#205088')

In [None]:
print_color_space('#e0b022')