<a href="https://colab.research.google.com/github/rafaelpivetta/tech-challenge-fase3/blob/main/LoRA%20for%20Fine-Tuning%20Llama3%20LLMs-Revisado.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install datasets peft trl bitsandbytes

Collecting datasets
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting peft
  Downloading peft-0.12.0-py3-none-any.whl.metadata (13 kB)
Collecting trl
  Downloading trl-0.10.1-py3-none-any.whl.metadata (12 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.43.3-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting tyro>=0.5.11 (from trl)
  Downloading tyro-0.8.10-py3-none-any.whl.metadata (8.4 kB)
Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl)
  Downloading shtab-1.7.1-py

In [2]:
import os
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, pipeline, logging
from peft import LoraConfig
from trl import SFTTrainer

base_model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
amazon_products_dataset = "ckandemir/amazon-products"
new_model ="llama-1.1B-chat-amazon-products"

dataset = load_dataset(amazon_products_dataset, split="train")
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
)

model.config.use_cache = False
model.config.pretraining_tp = 1


tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token # pad sequences
tokenizer.padding_side = "right" # right pad sequences

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/3.04k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/4.59M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.28M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/514k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/23993 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/6665 [00:00<?, ? examples/s]

Generating eval split:   0%|          | 0/2666 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.40G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [3]:
# run inference
logging.set_verbosity(logging.CRITICAL)
prompt = "Describe the product Hot Wheels 2019 Advent Calendar Vehicles."
pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100
)
result = pipe(f"{prompt}")
print(result[0]["generated_text"])
#

Describe the product Hot Wheels 2019 Advent Calendar Vehicles.
The 2019 Hot Wheels Advent Calendar is a 36-piece set of 1:64 scale die-cast cars. The set includes 12 cars, each with a different theme. The cars are all based on the 2019 Hot Wheels lineup, and include cars from the 2019 Monster Jam, Monster


In [10]:
#LoRA
peft_params = LoraConfig(
    lora_alpha=16, # multiplier of Lora outpul when its added to the full forward output
    lora_dropout=0.1, # with a probability of 10$ it will set random Lora output to 0
    r=32, # rank of Lora so matrices will have either LHS or RHS dimension of 64
    bias="none", # no bias term
    task_type="CAUSAL_LM"
)

training_params = TrainingArguments(output_dir='./results',
    num_train_epochs=2, # One pass over the dataset
    per_device_train_batch_size=2, # mbs=2
    gradient_accumulation_steps=16, # effective batch size 16*2
    optim="adamw_torch",
    save_steps=100, # checkpoint every 100 steps
    logging_steps=100,
    learning_rate=3e-4, # step size in the optimizer update
    weight_decay=0.001,
    fp16=True, # 16 bit
    bf16=False, # not supported on V100
    max_grad_norm=0.3, # gradient clipping improves convergence
    max_steps=-1,
    warmup_ratio=0.03, # learning rate warmup
    group_by_length=True,
    lr_scheduler_type="cosine", #cosine linear scheduler
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_params, # parameter efficient fine tuning AKA Lora
    dataset_text_field="Description",
    max_seq_length=128,
    tokenizer=tokenizer,
    args=training_params,
    packing=False
)

import gc # garbage collection
gc.collect()
torch.cuda.empty_cache() #clean cache

trainer.train() #train the model
trainer.model.save_pretrained(new_model)
trainer.tokenizer.save_pretrained(new_model)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/23993 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


{'loss': 3.4467, 'grad_norm': 4.4685797691345215, 'learning_rate': 0.00029897876923586135, 'epoch': 0.13336667500208385}
{'loss': 3.094, 'grad_norm': 3.7227742671966553, 'learning_rate': 0.0002917613694713241, 'epoch': 0.2667333500041677}
{'loss': 3.0412, 'grad_norm': 3.6198885440826416, 'learning_rate': 0.000277942602836073, 'epoch': 0.4001000250062516}
{'loss': 2.9686, 'grad_norm': 3.3106021881103516, 'learning_rate': 0.0002581659644416485, 'epoch': 0.5334667000083354}
{'loss': 2.9245, 'grad_norm': 3.149962902069092, 'learning_rate': 0.00023335238815034264, 'epoch': 0.6668333750104193}
{'loss': 2.8889, 'grad_norm': 2.8424782752990723, 'learning_rate': 0.00020465736167424683, 'epoch': 0.8002000500125032}
{'loss': 2.8389, 'grad_norm': 2.7636446952819824, 'learning_rate': 0.00017341711926279008, 'epoch': 0.9335667250145869}
{'loss': 2.7647, 'grad_norm': 2.2772634029388428, 'learning_rate': 0.00014108641761067113, 'epoch': 1.0669334000166708}
{'loss': 2.7266, 'grad_norm': 2.3185920715332

('llama-1.1B-chat-amazon-products/tokenizer_config.json',
 'llama-1.1B-chat-amazon-products/special_tokens_map.json',
 'llama-1.1B-chat-amazon-products/tokenizer.model',
 'llama-1.1B-chat-amazon-products/added_tokens.json',
 'llama-1.1B-chat-amazon-products/tokenizer.json')

In [12]:
prompt = "Describe the product Hot Wheels 2019 Advent Calendar Vehicles."
pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100
)
result_fine_tuning = pipe(f'{prompt}')
print(result_fine_tuning[0]['generated_text'])

Describe the product Hot Wheels 2019 Advent Calendar Vehicles.
The 2019 Hot Wheels Advent Calendar Vehicles are a set of 12 collectible vehicles that are hidden inside a 12-inch Advent Calendar. Each vehicle is a Hot Wheels branded replica of a vehicle that is featured in the 2019 Hot Wheels calendar. The vehicles are packaged in a 12-inch Advent Calendar
