<a href="https://colab.research.google.com/github/rafaelpivetta/tech-challenge-fase3/blob/main/LoRA_for_Fine_Tuning_Llama3_LLMs_Revisado.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install datasets peft trl bitsandbytes huggingface_hub



In [None]:
import os
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, pipeline, logging
from peft import LoraConfig
from trl import SFTTrainer

base_model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
amazon_products_dataset = "ckandemir/amazon-products"
new_model ="llama-1.1B-chat-amazon-products"

dataset = load_dataset(amazon_products_dataset, split="train")
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
)

model.config.use_cache = False
model.config.pretraining_tp = 1


tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token # pad sequences
tokenizer.padding_side = "right" # right pad sequences

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/3.04k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/4.59M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.28M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/514k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/23993 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/6665 [00:00<?, ? examples/s]

Generating eval split:   0%|          | 0/2666 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.40G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/129 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [None]:
# run inference
logging.set_verbosity(logging.CRITICAL)
prompt = "Describe the product Hot Wheels 2019 Advent Calendar Vehicles."
pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100,
    do_sample=True,
    top_k=10,
    top_p=0.5,
    repetition_penalty=1.1
)
result = pipe(f"{prompt}")
print(result[0]["generated_text"])
#

Describe the product Hot Wheels 2019 Advent Calendar Vehicles.
The 2018-2019 Hot Wheels Advent Calendar is a great way to see what's in store for you and your family this year!
This calendar features 36 days of awesome cars, trucks, motorcycles, and more from 2018 and 2019. Each day, you will find a new vehicle that


In [None]:
#LoRA
peft_params = LoraConfig(
    lora_alpha=16, # multiplier of Lora output when its added to the full forward output
    lora_dropout=0.1, # with a probability of 10% it will set random Lora output to 0
    r=64, # rank of Lora so matrices will have either LHS or RHS dimension of 64
    bias="none", # no bias term
    task_type="CAUSAL_LM"
)

training_params = TrainingArguments(output_dir='./results',
    num_train_epochs=3, # Two passes over the dataset
    per_device_train_batch_size=2, # mbs=2
    gradient_accumulation_steps=16, # effective batch size 16*2
    optim="adamw_torch",
    save_steps=100, # checkpoint every 100 steps
    logging_steps=100, # log each n steps
    learning_rate=2e-4, # step size in the optimizer update
    weight_decay=0.001,
    fp16=True, # 16 bit
    bf16=False, # not supported on V100
    max_grad_norm=0.3, # gradient clipping improves convergence
    max_steps=-1,
    warmup_ratio=0.03, # learning rate warmup
    group_by_length=True,
    lr_scheduler_type="cosine", #cosine lr scheduler
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_params, # parameter efficient fine tuning AKA Lora
    dataset_text_field="Description",
    max_seq_length=128,
    tokenizer=tokenizer,
    args=training_params,
    packing=False
)

import gc # garbage collection
gc.collect()
torch.cuda.empty_cache() #clean cache

trainer.train() #train the model
trainer.model.save_pretrained(new_model)
trainer.tokenizer.save_pretrained(new_model)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/23993 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


{'loss': 3.6285, 'grad_norm': 5.709380149841309, 'learning_rate': 0.00019990647448872326, 'epoch': 0.13336667500208385}
{'loss': 3.1413, 'grad_norm': 3.4931588172912598, 'learning_rate': 0.0001982486609054725, 'epoch': 0.2667333500041677}
{'loss': 3.0608, 'grad_norm': 3.163858652114868, 'learning_rate': 0.00019455211832955743, 'epoch': 0.4001000250062516}
{'loss': 3.0015, 'grad_norm': 2.967740535736084, 'learning_rate': 0.0001888935526260013, 'epoch': 0.5334667000083354}
{'loss': 2.9667, 'grad_norm': 2.8244435787200928, 'learning_rate': 0.00018139038302322335, 'epoch': 0.6668333750104193}
{'loss': 2.9433, 'grad_norm': 2.5361852645874023, 'learning_rate': 0.00017219830558125204, 'epoch': 0.8002000500125032}
{'loss': 2.9021, 'grad_norm': 2.4335687160491943, 'learning_rate': 0.0001615080623886925, 'epoch': 0.9335667250145869}
{'loss': 2.8468, 'grad_norm': 1.9681146144866943, 'learning_rate': 0.00014954148352988827, 'epoch': 1.0669334000166708}
{'loss': 2.8159, 'grad_norm': 2.0033307075500

('llama-1.1B-chat-amazon-products/tokenizer_config.json',
 'llama-1.1B-chat-amazon-products/special_tokens_map.json',
 'llama-1.1B-chat-amazon-products/tokenizer.model',
 'llama-1.1B-chat-amazon-products/added_tokens.json',
 'llama-1.1B-chat-amazon-products/tokenizer.json')

In [None]:
#Logging to HuggingFace
from huggingface_hub import login
login()


In [None]:
from huggingface_hub import HfApi

api = HfApi()
model_repo_name = "rafaelpivetta/llama-1.1B-chat-amazon-products"  # Format of Input  <Profile Name > / <Model Repo Name>

#Create Repo in Hugging Face
api.create_repo(repo_id=model_repo_name)

#Upload Model folder from Local to HuggingFace
api.upload_folder(
    folder_path='llama-1.1B-chat-amazon-products',
    repo_id=model_repo_name
)

# Publish Model Tokenizer on Hugging Face
tokenizer.push_to_hub(model_repo_name)

In [None]:
prompt = "Describe the product Hot Wheels 2019 Advent Calendar Vehicles."
pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100,
    do_sample=True,
    # temperature=0.7,
    top_k=10,
    top_p=0.5,
    repetition_penalty=1.0
)
result_fine_tuning = pipe(f'{prompt}')
print(result_fine_tuning[0]["generated_text"])

Describe the product Hot Wheels 2019 Advent Calendar Vehicles. The item "Hot Wheels 2019 Advent Calendar Vehicles 12-Inch Die-Cast Action Figures" is in sale since Tuesday, January 22, 2019. This item is a property of Hot Wheels.
