In [None]:
import pandas as pd
import requests
import time
from pathlib import Path

In [None]:
# === Config ===
MODEL = "model-1"
INPUT_FILE = "../../data/processed/exp003.csv"
EVAL_FILE = "eval.csv"
RESULT_FILE = "../../results/exp003/results.csv"
TEMPERATURE = 0.0
N_PER_CLASS = 50

In [None]:
# === Step 1: Load data ===
df = pd.read_csv(INPUT_FILE).dropna(subset=["Manchete"])

In [None]:
# === Step 2: Sample few-shot examples (3) ===
fewshot_up = df[df["Direção"] == "Aumento"].sample(n=2, random_state=1)
fewshot_down = df[df["Direção"] == "Diminuição"].sample(n=1, random_state=1)
fewshot_df = pd.concat([fewshot_up, fewshot_down])
fewshot_indices = fewshot_df.index

In [None]:
# === Step 3: Exclude few-shot examples from eval set ===
df_eval = df.drop(index=fewshot_indices)

In [None]:
# === Step 4: Sample balanced evaluation set ===
df_up = df_eval[df_eval["Direção"] == "Aumento"].sample(n=N_PER_CLASS, random_state=42)
df_down = df_eval[df_eval["Direção"] == "Diminuição"].sample(n=N_PER_CLASS, random_state=42)
eval_df = pd.concat([df_up, df_down]).sample(frac=1.0, random_state=42).reset_index(drop=True)
eval_df.to_csv(EVAL_FILE, index=False)

In [None]:
# === Step 5: Format few-shot block ===
def format_example(row):
    return f"""Manchete: {row['Manchete']}
DataHora: {row['DataHora']}
Resposta: {row['Direção']}"""

FEWSHOT_BLOCK = "\n\n".join(format_example(row) for _, row in fewshot_df.iterrows())

In [None]:
# === Step 6: Build prompt for each headline ===
def build_fewshot_prompt(headline, timestamp):
    return f"""{FEWSHOT_BLOCK}

Agora classifique a seguinte manchete:

Manchete: {headline}
DataHora: {timestamp}

Responda apenas com uma palavra: Aumento ou Diminuição."""

In [None]:
# === Step 7: Query Ollama ===
def query_ollama(model, prompt):
    res = requests.post("http://localhost:11434/api/chat", json={
        "model": model,
        "temperature": TEMPERATURE,
        "stream": False,
        "messages": [{"role": "user", "content": prompt}]
    })
    res.raise_for_status()
    return res.json()["message"]["content"].strip()

In [None]:
# === Step 8: Run inference ===
predictions = []
for i, row in eval_df.iterrows():
    prompt = build_fewshot_prompt(row["Manchete"], row["DataHora"])
    try:
        out = query_ollama(MODEL, prompt)
    except Exception:
        out = "Erro"
    predictions.append(out)
    time.sleep(0.25)

In [None]:
eval_df["Previsão"] = predictions
eval_df.to_csv(RESULT_FILE, index=False)
print(f"✅ Saved predictions to {RESULT_FILE}")

In [None]:
from sklearn.metrics import classification_report

# Reload the results file
df = pd.read_csv("results.csv")

# Normalize predictions (just in case)
df["Previsão"] = df["Previsão"].str.strip().str.capitalize()
df["Direção"] = df["Direção"].str.strip().str.capitalize()

# Report
report = classification_report(
    df["Direção"], df["Previsão"],
    labels=["Aumento", "Diminuição"],
    target_names=["Aumento", "Diminuição"],
    digits=3
)

print("\nClassification Report:\n")
print(report)