<a href="https://colab.research.google.com/github/rafaelpivetta/tech-challenge-fase3/blob/main/LoRA_for_Fine_Tuning_Llama3_LLMs_Revisado.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install datasets peft trl bitsandbytes

Collecting datasets
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting peft
  Downloading peft-0.12.0-py3-none-any.whl.metadata (13 kB)
Collecting trl
  Downloading trl-0.10.1-py3-none-any.whl.metadata (12 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting tyro>=0.5.11 (from trl)
  Downloading tyro-0.8.10-py3-none-any.whl.metadata (8.4 kB)
Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl)
  Downloading shtab-1.7.1-py

In [2]:
import os
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, pipeline, logging
from peft import LoraConfig
from trl import SFTTrainer

base_model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
amazon_products_dataset = "ckandemir/amazon-products"
new_model ="llama-1.1B-chat-amazon-products"

dataset = load_dataset(amazon_products_dataset, split="train")
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
)

model.config.use_cache = False
model.config.pretraining_tp = 1


tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token # pad sequences
tokenizer.padding_side = "right" # right pad sequences

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/3.04k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/4.59M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.28M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/514k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/23993 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/6665 [00:00<?, ? examples/s]

Generating eval split:   0%|          | 0/2666 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.40G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [3]:
# run inference
logging.set_verbosity(logging.CRITICAL)
prompt = "Describe the product Hot Wheels 2019 Advent Calendar Vehicles."
pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100,
    do_sample=True,
    top_k=10,
    top_p=0.5,
    repetition_penalty=1.1
)
result = pipe(f"{prompt}")
print(result[0]["generated_text"])
#

Describe the product Hot Wheels 2019 Advent Calendar Vehicles.
Description: 2019 Toyota Camry Hybrid AWD 4dr Sedan - Review, Specs, Price - WIKI!


In [5]:
#LoRA
peft_params = LoraConfig(
    lora_alpha=16, # multiplier of Lora output when its added to the full forward output
    lora_dropout=0.1, # with a probability of 10$ it will set random Lora output to 0
    r=32, # rank of Lora so matrices will have either LHS or RHS dimension of 64
    bias="none", # no bias term
    task_type="CAUSAL_LM"
)

training_params = TrainingArguments(output_dir='./results',
    num_train_epochs=2, # Two passes over the dataset
    per_device_train_batch_size=4, # mbs=3
    gradient_accumulation_steps=16, # effective batch size 16*3
    optim="adamw_torch",
    save_steps=100, # checkpoint every 100 steps
    logging_steps=100,
    learning_rate=3e-4, # step size in the optimizer update
    weight_decay=0.001,
    fp16=True, # 16 bit
    bf16=False, # not supported on V100
    max_grad_norm=0.3, # gradient clipping improves convergence
    max_steps=-1,
    warmup_ratio=0.04, # learning rate warmup
    group_by_length=True,
    lr_scheduler_type="cosine", #cosine lr scheduler
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_params, # parameter efficient fine tuning AKA Lora
    dataset_text_field="Description",
    max_seq_length=256,
    tokenizer=tokenizer,
    args=training_params,
    packing=False
)

import gc # garbage collection
gc.collect()
torch.cuda.empty_cache() #clean cache

trainer.train() #train the model
trainer.model.save_pretrained(new_model)
trainer.tokenizer.save_pretrained(new_model)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/23993 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


{'loss': 3.3655, 'grad_norm': 5.416680335998535, 'learning_rate': 0.0002930191153429835, 'epoch': 0.26671111851975327}
{'loss': 3.083, 'grad_norm': 3.958380937576294, 'learning_rate': 0.0002603819791752244, 'epoch': 0.5334222370395065}
{'loss': 3.0195, 'grad_norm': 3.4534194469451904, 'learning_rate': 0.00020694743502949983, 'epoch': 0.8001333555592599}
{'loss': 2.9121, 'grad_norm': 0.5289844870567322, 'learning_rate': 0.00014278324961029256, 'epoch': 1.066844474079013}
{'loss': 2.8534, 'grad_norm': 0.5807531476020813, 'learning_rate': 7.997879431416655e-05, 'epoch': 1.3335555925987665}
{'loss': 2.8344, 'grad_norm': 0.6115025877952576, 'learning_rate': 3.0367249605787884e-05, 'epoch': 1.6002667111185196}
{'loss': 2.7935, 'grad_norm': 0.5259997844696045, 'learning_rate': 3.2960791148792743e-06, 'epoch': 1.866977829638273}
{'train_runtime': 1313.7845, 'train_samples_per_second': 36.525, 'train_steps_per_second': 0.569, 'train_loss': 2.9739384370691635, 'epoch': 1.9949991665277547}


('llama-1.1B-chat-amazon-products/tokenizer_config.json',
 'llama-1.1B-chat-amazon-products/special_tokens_map.json',
 'llama-1.1B-chat-amazon-products/tokenizer.model',
 'llama-1.1B-chat-amazon-products/added_tokens.json',
 'llama-1.1B-chat-amazon-products/tokenizer.json')

In [23]:
prompt = "Describe the product Hot Wheels 2019 Advent Calendar Vehicles."
pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100,
    do_sample=True,
    # temperature=0.7,
    top_k=10,
    top_p=0.5,
    repetition_penalty=1.1
)
result_fine_tuning = pipe(f'{prompt}')
print(result_fine_tuning[0]["generated_text"])

Describe the product Once Upon a Time. The classic fairy tale of Snow White and Prince Charming.
"Once Upon a Time." 10-Inch action figure with articulated body, head, and arms. Comes with Snow White and Prince Charming figures.
