In [1]:
## ler as propriedades title e content do arquivo trn.json para o finetuning do modelo
import json
import os
import torch
from transformers import TrainingArguments
from unsloth import FastLanguageModel
from trl import SFTTrainer
from datasets import Dataset

max_seq_length = 2048
dtype = None
load_in_4bit = True

fourbit_models = [
    #"unsloth/Meta-Llama-3.1-8B-bnb-4bit",      # Llama-3.1 15 trillion tokens model 2x faster!
    #"unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",           # Phi-3.5 2x faster!
    #"unsloth/Phi-3-medium-4k-instruct",
    #"unsloth/gemma-2-9b-bnb-4bit"
]

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Phi-3.5-mini-instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

def solve_relative_path(file_path: str) -> str:
    project_root: str = os.path.dirname(os.getcwd())
    data_path: str = os.path.abspath(os.path.join(project_root, file_path))
    return data_path

def read_json_file(file_path: str) -> list[tuple[str, str]]:
    training_data = []

    with open(file_path, "r") as file:
        buffer: str = "" 
        for line in file:
            buffer += line.strip()
            try:
                item: dict = json.loads(buffer)
                buffer = ""
                title: str = item.get("title", "")
                content: str = item.get("content", "")
                if title and content:
                    training_data.append((title, content))
            except json.JSONDecodeError:
                continue
    return training_data

def verify_gpu_availability() -> None:
    is_cuda_available = torch.cuda.is_available()
    print(f"CUDA disponível? {is_cuda_available}")

    if is_cuda_available:
        gpu_count = torch.cuda.device_count()
        print(f"Número de GPUs disponíveis: {gpu_count}")
        for i in range(gpu_count):
            print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

def format_data_for_training_with_phi35(data: list[tuple[str,str]]) -> list[str]:
    formatted_data = []
    for title, content in data:
        messages = [
            {"role": "user", "content": f"Tell me about the following title in 10 words: {title}"},
            {"role": "assistant", "content": content},
        ]
        formatted_data.append({"text": tokenizer.apply_chat_template(messages, tokenize=False)})
    return formatted_data

verify_gpu_availability()

raw_data = read_json_file(solve_relative_path("datasets/trn.json"))
formatted_data = format_data_for_training_with_phi35(raw_data)
dataset = Dataset.from_dict({"text": [item["text"] for item in formatted_data]})

training_args = TrainingArguments(
    output_dir = "/home/thiagofernandes101/projects/fiap/FineTunningTechChallenge/results", # Pasta para salvar os resultados
    per_device_train_batch_size = 2, # Tamanho do lote. Use 1 ou 2 se tiver pouca VRAM.
    gradient_accumulation_steps = 4, # Acumula gradientes para simular um lote maior (2 * 4 = lote efetivo de 8)
    warmup_steps = 5, # Passos de aquecimento para o otimizador
    max_steps = 60, # Número total de passos de treinamento. Ajuste conforme necessário.
    learning_rate = 2e-4, # Taxa de aprendizado
    fp16 = not torch.cuda.is_bf16_supported(), # Usa precisão de 16 bits
    bf16 = torch.cuda.is_bf16_supported(),
    logging_steps = 1, # A cada quantos passos ele mostra o progresso (loss)
    optim = "adamw_8bit", # Otimizador eficiente em memória
    weight_decay = 0.01,
    lr_scheduler_type = "linear",
    seed = 42,
)

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    args = training_args,
)

print("Iniciando o fine-tuning...")
trainer.train()
print("Fine-tuning concluído!")


title_for_testing = "A Day in the Life of China"
message_for_testing = [
    {"role": "user", "content": f"Tell me about the following title in 10 words: {title_for_testing}"},
]
input_ids = tokenizer.apply_chat_template(message_for_testing, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
outputs = model.generate(input_ids=input_ids, max_new_tokens=256, use_cache=True)
results = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

print("\n--- TESTE DE INFERÊNCIA ---")
print(results)

  from .autonotebook import tqdm as notebook_tqdm

Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.9.5: Fast Llama patching. Transformers: 4.56.1.
   \\   /|    NVIDIA GeForce RTX 3060 Laptop GPU. Num GPUs = 1. Max memory: 6.0 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 8.6. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2025.9.5 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


CUDA disponível? True
Número de GPUs disponíveis: 1
GPU 0: NVIDIA GeForce RTX 3060 Laptop GPU


: 