In [1]:
# Instalação simples para Google Colab
!pip install --upgrade pip

# Instalar todas as dependências de uma vez
!pip install "transformers>=4.46,<4.47" "accelerate>=0.34,<0.35" "peft>=0.11.1" "bitsandbytes>=0.43.1" "datasets>=2.20.0" "evaluate>=0.4.2" "rouge-score>=0.1.2" "bert-score>=0.3.13" "sentencepiece>=0.2.0" "einops>=0.8.0" "torch>=2.3.0"

# Verificar instalação
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

import transformers
print(f"Transformers version: {transformers.__version__}")

print("Instalação concluída!")

Collecting pip
  Downloading pip-25.2-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.2-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m67.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.2
Collecting transformers<4.47,>=4.46
  Downloading transformers-4.46.3-py3-none-any.whl.metadata (44 kB)
Collecting accelerate<0.35,>=0.34
  Downloading accelerate-0.34.2-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes>=0.43.1
  Downloading bitsandbytes-0.48.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting evaluate>=0.4.2
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Collecting rouge-score>=0.1.2
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Coll

In [2]:
# === Preparação do dataset ===
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import os, json, random
from datasets import Dataset, DatasetDict

DS_DIR = "/content/drive/MyDrive/amazon_ft/cache/prepared_descfmt"

# Localiza o arquivo dataset
if os.path.exists(DS_DIR):
    print("Dataset já existe no Drive:", DS_DIR)
else:
    print("Criando dataset com formato 'Description: ...' em:", DS_DIR)
    candidates = [
        "/content/data/trn.json",
        "/content/drive/MyDrive/projeto_fase_3/trn.json",
        "/content/drive/MyDrive/trn.json",
    ]
    json_path = next((p for p in candidates if os.path.exists(p)), None)
    if json_path is None:
        raise FileNotFoundError(
            "Não encontrou o trn.json. Faça upload em /content/data/trn.json "
            "ou copie para /content/drive/MyDrive/projeto_fase_3/trn.json"
        )

    # Em ingles porque obteve melhores resultados
    QUESTION = "What is the complete product description? Answer strictly in the format: 'Description: ...' and nothing else."
    N_TRAIN, N_VAL, N_TEST = 10000, 1000, 1000

    def build_example(title, content):
        user = (
            "Based on the product title below, answer the question.\n"
            f"Question: {QUESTION}\n"
            f"Title: {title.strip()}"
        )
        target = "Description: " + content.strip()
        return {"input_text": user, "target_text": target}

    # Cria pares de entrada e alvo (input e target)
    pairs = []
    with open(json_path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            try:
                ex = json.loads(line)
            except Exception:
                continue
            title = (ex.get("title") or "").strip()
            content = (ex.get("content") or "").strip()
            if len(title) < 3 or len(content) < 20:
                continue
            pairs.append(build_example(title, content))

    random.seed(42)
    random.shuffle(pairs)
    need = N_TRAIN + N_VAL + N_TEST
    pairs = pairs[:need]

    # Separa em train/val/test
    train_pairs = pairs[:N_TRAIN]
    val_pairs   = pairs[N_TRAIN:N_TRAIN+N_VAL]
    test_pairs  = pairs[N_TRAIN+N_VAL:N_TRAIN+N_VAL+N_TEST]

    # Salva no Drive
    ds = DatasetDict({
        "train": Dataset.from_list(train_pairs),
        "validation": Dataset.from_list(val_pairs),
        "test": Dataset.from_list(test_pairs),
    })
    os.makedirs(DS_DIR, exist_ok=True)
    ds.save_to_disk(DS_DIR)

    print("Dataset salvo no Drive:", DS_DIR)

Mounted at /content/drive
Dataset já existe no Drive: /content/drive/MyDrive/amazon_ft/cache/prepared_descfmt


In [4]:
# === Fine-tuning QLoRA no TinyLlama ===
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import os, types, torch
from datasets import load_from_disk
from transformers import (AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,
                          DataCollatorForLanguageModeling, TrainingArguments, Trainer)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# ===== CONFIG BÁSICA =====
MODEL_NAME    = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
PREP_DIR      = "/content/drive/MyDrive/amazon_ft/cache/prepared_descfmt"
OUT_DIR       = "/content/drive/MyDrive/amazon_ft/outputs/tinyllama11b_descfmt_lora"
ADAPTER_DIR   = f"{OUT_DIR}/adapter"
os.makedirs(OUT_DIR, exist_ok=True)

ds = load_from_disk(PREP_DIR)

# Carrego o tiny já compactado em 4bit
bnb = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.float16
)

tok = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, trust_remote_code=True)
if tok.pad_token is None: tok.pad_token = tok.eos_token

base = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, quantization_config=bnb, device_map="auto", trust_remote_code=True
)
base = prepare_model_for_kbit_training(base)

# LoRA abrangente (garante que o modelo "escute" o adapter)
lora_cfg = LoraConfig(
    r=32, lora_alpha=64, lora_dropout=0.05, task_type="CAUSAL_LM",
    target_modules="all-linear", modules_to_save=["lm_head","embed_tokens"]
)
model = get_peft_model(base, lora_cfg)
model.print_trainable_parameters()

# Formatação e tokenização
def format_chat(example):
    messages = [
        {"role":"user", "content": example["input_text"]},
        {"role":"assistant", "content": example["target_text"]},
    ]
    text = tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
    return {"text": text}

# ===== PREPARAÇÃO DO DATASET =====
train_txt = ds["train"].map(format_chat, remove_columns=ds["train"].column_names)
def tok_fn(e): return tok(e["text"], truncation=True, max_length=512)
train_tok = train_txt.map(tok_fn, batched=True, remove_columns=["text"])
collator = DataCollatorForLanguageModeling(tok, mlm=False)

# ===== ARGS DE TREINO =====
args = TrainingArguments(
    output_dir=OUT_DIR,
    max_steps=5000,
    num_train_epochs=1,
    learning_rate=1e-4,
    lr_scheduler_type="cosine",
    warmup_ratio=0.03,
    gradient_accumulation_steps=8,
    per_device_train_batch_size=2,
    logging_steps=20,
    eval_strategy="no",
    save_steps=5000,
    save_total_limit=1,
    bf16=False, fp16=True,
    report_to="none",
    optim="adamw_torch",
)

trainer = Trainer(model=model, args=args, train_dataset=train_tok, data_collator=collator)

# Patch accelerate (evita erro "optimizer.train()")
try:
    from accelerate.optimizer import AcceleratedOptimizer
    _noop=lambda self,*a,**k: None
    AcceleratedOptimizer.train = types.MethodType(_noop, AcceleratedOptimizer)
    AcceleratedOptimizer.eval  = types.MethodType(_noop,  AcceleratedOptimizer)
except: pass

trainer.train()

# ===== SALVANDO O ADAPTER =====
os.makedirs(ADAPTER_DIR, exist_ok=True)
model.save_pretrained(ADAPTER_DIR)
print("Adapter salvo em:", ADAPTER_DIR)

Mounted at /content/drive
trainable params: 156,303,360 || all params: 1,256,351,744 || trainable%: 12.4411


Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
max_steps is given, it will override any value given in num_train_epochs
  return fn(*args, **kwargs)


Step,Training Loss
20,2.2452
40,1.7814
60,1.7983
80,1.7807
100,1.7735
120,1.7921
140,1.7633
160,1.7319
180,1.7318
200,1.7368


Adapter salvo em: /content/drive/MyDrive/amazon_ft/outputs/tinyllama11b_descfmt_lora/adapter


In [5]:
# === Merge do LoRA em fp16 (modelo único para inferência) ===
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import os, torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# ===== CONFIG BÁSICA =====
MODEL_NAME  = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
ADAPTER_DIR = "/content/drive/MyDrive/amazon_ft/outputs/tinyllama11b_descfmt_lora/adapter"
SAVE_DIR    = "/content/drive/MyDrive/amazon_ft/outputs/tinyllama11b_descfmt_merged_fp16"
os.makedirs(SAVE_DIR, exist_ok=True)

# ===== PREPARAÇÃO DO TOKENIZADOR =====
tok = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, trust_remote_code=True)
if tok.pad_token is None: tok.pad_token = tok.eos_token

# Tenta GPU; se OOM, cai para CPU automaticamente
def try_gpu_then_cpu():
    try:
        base = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME, device_map=None, torch_dtype=torch.float16, trust_remote_code=True
        ).to("cuda")
        ft = PeftModel.from_pretrained(base, ADAPTER_DIR, is_trainable=False)
        merged = ft.merge_and_unload()
        merged.save_pretrained(SAVE_DIR); tok.save_pretrained(SAVE_DIR)
        print("Merge em GPU concluído:", SAVE_DIR)
    except Exception as e:
        print("Aviso: GPU falhou, tentando em CPU (pode demorar). Detalhe:", e)
        base = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME, device_map=None, torch_dtype=torch.float32, trust_remote_code=True
        )  # CPU em fp32 para segurança
        ft = PeftModel.from_pretrained(base, ADAPTER_DIR, is_trainable=False)
        merged = ft.merge_and_unload()
        merged.save_pretrained(SAVE_DIR); tok.save_pretrained(SAVE_DIR)
        print("Merge em CPU concluído:", SAVE_DIR)

try_gpu_then_cpu()

Mounted at /content/drive
Merge em GPU concluído: /content/drive/MyDrive/amazon_ft/outputs/tinyllama11b_descfmt_merged_fp16


In [None]:
# === Avaliação rápida: BASE (4-bit) vs MERGED (4-bit) ===
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import os, json, random, pandas as pd, torch, shutil
from datasets import load_from_disk
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

# ===== CONFIG BÁSICA =====
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
MERGED_DIR = "/content/drive/MyDrive/amazon_ft/outputs/tinyllama11b_descfmt_merged_fp16"
PREP_DIR   = "/content/drive/MyDrive/amazon_ft/cache/prepared_descfmt"
OUT_CSV    = "/content/drive/MyDrive/amazon_ft/outputs/side_by_side_tinyllama_descfmt_merged_quick.csv"
OUT_JSON   = "/content/drive/MyDrive/amazon_ft/outputs/metrics_tinyllama_descfmt_merged_quick.json"

K        = 30
MAX_NEW  = 150

# Métricas
try:
    import evaluate
except Exception:
    import sys, subprocess
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "evaluate", "rouge-score"])
    import evaluate

# Limpa cache da GPU
if torch.cuda.is_available():
    torch.cuda.empty_cache()

# Carrega dataset
ds = load_from_disk(PREP_DIR)
tok = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True, trust_remote_code=True)
if tok.pad_token is None: tok.pad_token = tok.eos_token

# Configuração de 4-bit
bnb_cfg = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True, bnb_4bit_compute_dtype=torch.float16
)

# Diretório de offload
offload_dir = "/content/offload_eval"; shutil.rmtree(offload_dir, ignore_errors=True); os.makedirs(offload_dir, exist_ok=True)

# Carrega modelos
base = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, device_map="auto", trust_remote_code=True,
    quantization_config=bnb_cfg, offload_buffers=True, offload_folder=offload_dir
)
merged = AutoModelForCausalLM.from_pretrained(
    MERGED_DIR, device_map="auto", trust_remote_code=True,
    quantization_config=bnb_cfg, offload_buffers=True, offload_folder=offload_dir
)

# Função de geração de chat
def chat_generate(model, user_text, max_new_tokens=MAX_NEW):
    prompt = tok.apply_chat_template([{"role":"user","content":user_text}],
                                     tokenize=False, add_generation_prompt=True)
    inputs = tok(prompt, return_tensors="pt")  # fica no CPU; accelerate despacha
    with torch.no_grad():
        out = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=False,
                             pad_token_id=tok.eos_token_id, use_cache=True)
    gen_ids = out[0][inputs["input_ids"].shape[-1]:]
    return tok.decode(gen_ids, skip_special_tokens=True).strip()

# Amostra aleatória de índices
random.seed(42)
idxs = random.sample(range(len(ds["test"])), k=min(K, len(ds["test"])))

rows, preds_base, preds_ft, refs = [], [], [], []
for i in idxs:
    ex  = ds["test"][i]
    inp = ex["input_text"]; ref = ex["target_text"]
    yb = chat_generate(base,   inp)
    ym = chat_generate(merged, inp)
    rows.append({
        "id": i,
        "input_preview": inp[:140].replace("\n"," ") + "...",
        "reference_preview": ref[:200].replace("\n"," ") + "...",
        "baseline": yb,
        "fine_tuned_merged": ym,
    })
    preds_base.append(yb); preds_ft.append(ym); refs.append(ref)

# Salva CSV
os.makedirs(os.path.dirname(OUT_CSV), exist_ok=True)
pd.DataFrame(rows).to_csv(OUT_CSV, index=False)
print("CSV salvo em:", OUT_CSV)

rouge = evaluate.load("rouge")
r_base = rouge.compute(predictions=preds_base, references=refs)
r_ft   = rouge.compute(predictions=preds_ft,   references=refs)

metrics = {"rouge_base": {k: float(v) for k,v in r_base.items()},
           "rouge_ft":   {k: float(v) for k,v in r_ft.items()}}
with open(OUT_JSON, "w") as f:
    json.dump(metrics, f, indent=2)
print("Métricas (ROUGE) salvas em:", OUT_JSON)
print("Resumo:", metrics)
