In [1]:
## Ambiente configurado para treinamento local em um PC com Placa de Vídeo Nvidia RTX-3060 12GB

## Utilizando miniconda, instalado em um Linux Ubuntu conforme orientações do link: https://docs.anaconda.com/miniconda/
## Utilizando miniconda para criação do ambiente do unsloth conforme orientação no link: https://docs.unsloth.ai/get-started/installation/conda-install

## >> Para configurar o ambiente, remova o comentário ("##") e execute os comandos. Lembre-se de instalar o miniconda previamente

#!pip install nbformat
#!conda install -c conda-forge ipywidgets
#!conda create --name unsloth_env python=3.10 pytorch-cuda=12.1 pytorch cudatoolkit xformers -c pytorch -c nvidia -c xformers -y
#!conda activate unsloth_env
#!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
#!pip install --no-deps "trl<0.9.0" peft accelerate bitsandbytes

In [None]:
import helper
import torch; 

import datasets

from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

print(torch.__version__)
print(torch.version.cuda)
print(is_bfloat16_supported())

In [3]:
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

In [None]:
model_name, raw_model, tokenizer = helper.get_model_by_id(0, max_seq_length, dtype, load_in_4bit)  ## "unsloth/Meta-Llama-3.1-8B-bnb-4bit",      # Llama-3.1 15 trillion tokens model 2x faster!

In [5]:
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func_train(examples):        
    inputs       = examples['title']
    outputs      = examples['content']
    texts = []
    #for instruction, input, output in zip(instructions, inputs, outputs):
    for input, output in zip(inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = helper.alpaca_prompt.format(input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

In [None]:
model = helper.get_fast_language_model(raw_model)

In [None]:
dataset = datasets.Dataset.from_csv('../data/trn_sample.csv', sep=';')
dataset = dataset.map(formatting_prompts_func_train, batched = True,)
dataset

In [None]:
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 1, # Set this for 1 full training run.
        #max_steps = 60,
        #learning_rate = 2e-4,
        learning_rate = 3e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

In [None]:
start_gpu_memory, max_memory = helper.print_start_memory_usage()

In [None]:
trainer_stats = trainer.train()

In [None]:
helper.print_final_memory_usage(start_gpu_memory, max_memory, trainer_stats)

In [None]:
# Teste do modelo depois do treinamento

df = dataset.to_pandas().sample(frac=1).head(5).copy()
for _, row in df.iterrows():
  title = row['title']
  print(f"Resultado da predição para o título: [{title}]\n")
  helper.predict_text_streamer(model, tokenizer, title)
  

In [None]:
model.save_pretrained('Meta-Llama-3.1-8B') # Local saving
tokenizer.save_pretrained('Meta-Llama-3.1-8B')