<a href="https://colab.research.google.com/github/rodrigogrigo/analise-requisitos/blob/main/MESTRADO_Modelo_LLM_Few_Shot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Atualizado para testar dois modelos (Gemma 2B e LLaMA 3 8B), manter o RAG + few-shot
# e calcular/exportar o MAE de cada um separadamente.

!pip install -q -U unsloth faiss-cpu sentence-transformers

from google.colab import drive
import os, glob, gc
import pandas as pd
import torch
from sentence_transformers import SentenceTransformer
import faiss
from unsloth import FastLanguageModel

# Monta o Google Drive
drive.mount('/content/drive')

# EMBEDDER PARA RAG
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# CAMINHOS DOS DATASETS
train_dir = "/content/drive/MyDrive/projetoMestrado/datasets_all_processados/treinamento_model"
eval_dir = "/content/drive/MyDrive/projetoMestrado/datasets_all_processados"

# FUNÇÃO: Carregar todos os datasets de um diretório
def carregar_datasets(caminho):
    arquivos = glob.glob(os.path.join(caminho, "*.csv"))
    datasets = []
    for arq in arquivos:
        df = pd.read_csv(arq)
        if "description" in df.columns and "storypoint" in df.columns:
            datasets.append(df[["description", "storypoint"]].dropna())
    return pd.concat(datasets, ignore_index=True)

# MODELOS PARA TESTAR
modelos_para_testar = [
    ("gemma_2b", "unsloth/gemma-2b-it-bnb-4bit"),
    ("llama3_8b", "unsloth/llama-3-8b-Instruct-bnb-4bit"),
]

# INSTRUÇÃO BASE
instrucoes = (
    "Você é um engenheiro de software sênior responsável por estimar o esforço necessário para concluir tarefas, "
    "com base em suas descrições. Utilize a métrica de story points, que representa o esforço relativo.\n\n"
    "Os valores válidos são baseados na sequência de Fibonacci: 1, 2, 3, 5 e 8.\n"
    "- 1: tarefa muito simples\n"
    "- 2 ou 3: tarefa simples ou moderada\n"
    "- 5: tarefa complexa\n"
    "- 8: tarefa de alto esforço\n\n"
    "IMPORTANTE:\n"
    "- Responda com apenas um número: 1, 2, 3, 5 ou 8\n"
    "- Não adicione frases ou símbolos\n"
    "- A resposta deve vir após o marcador ###\n"
)

# LOOP PARA CADA MODELO
for apelido_modelo, modelo_id in modelos_para_testar:
    print(f"\n\U0001f504 Carregando modelo: {apelido_modelo}")

    # Carrega o modelo
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=modelo_id,
        max_seq_length=1024,
        load_in_4bit=True,
        dtype=None,
    )
    FastLanguageModel.for_inference(model)

    # RAG: carrega dados de treinamento e cria o index FAISS
    train_df = carregar_datasets(train_dir)
    train_descriptions = train_df["description"].tolist()
    train_embeddings = embedder.encode(train_descriptions, show_progress_bar=True, convert_to_numpy=True)

    index = faiss.IndexFlatL2(train_embeddings.shape[1])
    index.add(train_embeddings)

    # Avalia arquivos no diretório de validação
    eval_arquivos = glob.glob(os.path.join(eval_dir, "*.csv"))
    resultados = []

    for arq in eval_arquivos:
        eval_df = pd.read_csv(arq)
        for _, row in eval_df.iterrows():
            descricao = row["description"]
            issue_id = row.get("issuekey", "SEM_ID")
            storypoint_real = row.get("storypoint", None)

            # Recupera tarefas similares (few-shot dinâmico)
            desc_embedding = embedder.encode([descricao], convert_to_numpy=True)
            _, indices = index.search(desc_embedding, k=3)
            exemplos = train_df.iloc[indices[0]]

            exemplos_txt = ""
            for _, ex in exemplos.iterrows():
                exemplos_txt += f"Descrição da tarefa:\n{ex['description']}\n###\n{ex['storypoint']}\n\n"

            prompt = (
                instrucoes + exemplos_txt + f"Descrição da tarefa:\n{descricao}\n###"
            )

            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                max_length=1024,
                padding=True
            )
            inputs = {k: v.to(device) for k, v in inputs.items()}

            outputs = model.generate(
                input_ids=inputs["input_ids"],
                attention_mask=inputs.get("attention_mask", None),
                max_new_tokens=3,
                temperature=0.1,
            )

            result_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
            raw_output = result_text.split("###")[-1].strip()

            resultados.append({
                "dataset": os.path.basename(arq),
                "id": issue_id,
                "descricao": descricao,
                "storypoint_previsto": raw_output,
                "storypoint_real": storypoint_real,
                "modelo_usado": apelido_modelo
            })

    # Exporta resultados + MAE
    df_result = pd.DataFrame(resultados)
    df_result["storypoint_previsto_int"] = pd.to_numeric(df_result["storypoint_previsto"], errors="coerce")
    df_result["storypoint_real_int"] = pd.to_numeric(df_result["storypoint_real"], errors="coerce")
    df_validos = df_result.dropna(subset=["storypoint_previsto_int", "storypoint_real_int"])
    mae = (df_validos["storypoint_previsto_int"] - df_validos["storypoint_real_int"]).abs().mean()

    output_path = f"/content/drive/MyDrive/projetoMestrado/storypoints_inferidos_fewshot_{apelido_modelo}.csv"
    df_result.to_csv(output_path, index=False)
    print(f"✅ Resultados exportados: {output_path}")
    print(f"📊 MAE para o modelo {apelido_modelo}: {mae:.2f}")

    # Limpar GPU
    del model
    del tokenizer
    gc.collect()
    torch.cuda.empty_cache()


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.8/46.8 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.6/265.6 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m31.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.9/318.9 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.1/138.1 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.5/31.5 MB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━


Please restructure your imports with 'import unsloth' at the top of your file.
  from unsloth import FastLanguageModel


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
Mounted at /content/drive


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


🔄 Carregando modelo: gemma_2b
==((====))==  Unsloth 2025.5.6: Fast Gemma patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.07G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/154 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/40.6k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

Batches:   0%|          | 0/143 [00:00<?, ?it/s]

✅ Resultados exportados: /content/drive/MyDrive/projetoMestrado/storypoints_inferidos_fewshot_gemma_2b.csv
📊 MAE para o modelo gemma_2b: 2.80

🔄 Carregando modelo: llama3_8b
==((====))==  Unsloth 2025.5.6: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/220 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.1k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

Batches:   0%|          | 0/143 [00:00<?, ?it/s]

✅ Resultados exportados: /content/drive/MyDrive/projetoMestrado/storypoints_inferidos_fewshot_llama3_8b.csv
📊 MAE para o modelo llama3_8b: 2.27
