In [1]:
## montar o google driver
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
    !pip install --no-deps unsloth

In [3]:
import json
import torch
from datasets import load_dataset
from transformers import TrainingArguments
from unsloth import FastLanguageModel
from trl import SFTTrainer


Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [17]:
# Configurações
DATA_DIR = "/content/drive/MyDrive/FIAP/tech_challange"
RAW_JSONL = f"{DATA_DIR}/dataset_preparado.jsonl"
CLEAN_JSONL = f"{DATA_DIR}/arquivo_corrigido.jsonl"
FINAL_JSONL = f"{DATA_DIR}/arquivo_corrigido2.jsonl"
MODEL_NAME_INIT = "unsloth/Meta-Llama-3.1-8B"
MODEL_NAME = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
MAX_SEQ_LENGTH = 2048

In [18]:
# 1. Limpeza do dataset JSONL
def clean_jsonl(input_path, output_path):
    with open(input_path, 'r', encoding='utf-8') as fin, \
         open(output_path, 'w', encoding='utf-8') as fout:
        for i, line in enumerate(fin, 1):
            try:
                json.loads(line)
                fout.write(line)
            except Exception as e:
                print(f"Linha {i} removida: {e}")

clean_jsonl(RAW_JSONL, CLEAN_JSONL)
clean_jsonl(CLEAN_JSONL, FINAL_JSONL)


In [19]:
# 2. Carregar modelo e tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=MODEL_NAME,
    max_seq_length=MAX_SEQ_LENGTH,
    dtype=None,
    load_in_4bit=True,
)

==((====))==  Unsloth 2025.4.7: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/55.5k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

In [20]:
# 3. Preparação do dataset
def format_examples(examples):
    return {
        "text": [
            f"{prompt} {completion}"
            for prompt, completion in zip(examples["prompt"], examples["completion"])
        ]
    }

dataset = load_dataset("json", data_files={"train": FINAL_JSONL}, split="train")
dataset = dataset.map(format_examples, batched=True)

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/1390403 [00:00<?, ? examples/s]

In [21]:
# 4. Aplicar LoRA
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

In [22]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

In [None]:
# 5. Configuração e treinamento
training_args = TrainingArguments(
    per_device_train_batch_size = 8, ##troquei de 2
    gradient_accumulation_steps = 4,
    warmup_steps = 5,
    # num_train_epochs = 1, # Set this for 1 full training run.
    max_steps = 600, ##troquei de 60
    learning_rate = 1e-4,  ## troquei de 2e-4,
    fp16 = not is_bfloat16_supported(),
    bf16 = is_bfloat16_supported(),
    logging_steps = 1,
    optim = "adamw_8bit",
    weight_decay = 0.01,
    lr_scheduler_type = "linear",
    seed = 3407,
    output_dir = "outputs",
    report_to = "none", # Use this for WandB etc,
    save_steps=100,
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=MAX_SEQ_LENGTH,
    dataset_num_proc=2,
    packing=False, ## Unsloth: Hugging Face's packing is currently buggy - we're disabling it for now!

    args=training_args,
)

trainer_stats = trainer.train()


Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/1390403 [00:00<?, ? examples/s]

In [None]:
# 6. Estatísticas de GPU
gpu_stats = torch.cuda.get_device_properties(0)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
print(f"GPU: {gpu_stats.name} | Memória total: {max_memory} GB | Máx. usada: {used_memory} GB")
print(f"Tempo de treinamento: {trainer_stats.metrics['train_runtime']} segundos")

In [None]:
# 7. Salvar modelo e tokenizer
model.save_pretrained(f"{DATA_DIR}/fine_tuned_model")
tokenizer.save_pretrained(f"{DATA_DIR}/fine_tuned_model")

In [None]:
# 8. Função de inferência
def responder(pergunta, max_tokens=128):
    inputs = tokenizer([pergunta], return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=max_tokens)
    resposta = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return resposta

In [31]:
# Exemplos de uso
print(responder("O que é Windflower Wedding (Suttons of Yorkshire)?"))
print(responder("O que é The Prophet?"))

O que é Windflower Wedding (Suttons of Yorkshire)? Windflower Wedding (Suttons of Yorkshire) is a beautiful, delicate flower, with soft pink petals and a yellow centre.  This lovely flower is ideal for cutting and arrangements, and is also a popular choice for wedding bouquets and floral decorations.  The flowers are also a popular choice for cottage gardens and wildflower meadows.  Windflower Wedding (Suttons of Yorkshire) is easy to grow and can be sown in either autumn or spring.  The seeds are best sown in a cold frame or in a cold greenhouse in the autumn.  If you are sowing in the spring, sow the seeds in
O que é The Prophet? The Prophet is a book that will be read for centuries to come. It is a book that will be read and re-read, and will be passed from generation to generation. It is a book that will be read by the young and the old, and will be cherished by all. It is a book that will be read in every corner of the world, and will be loved by all. It is a book that will be rea