# Datasets

In [None]:
import os
import json
import pandas as pd
from google.colab import drive
drive.mount("/content/drive/")

DATASET_DIR = os.path.join("/content/drive/MyDrive/TCC/workspace/datasets")
RESULTS_DIR =  os.path.join("/content/drive/MyDrive/TCC/workspace/experiments/results")


In [None]:
fakebrcorpus_full = pd.read_csv(f"{DATASET_DIR}/Fakebr/full_text_fakebrcorpus.csv")
liar_full = pd.read_csv(f'{DATASET_DIR}/LIAR/liar_train.csv')

# Setup
- Install packages
- Load model

In [None]:
!python --version

In [None]:
!pip install unsloth==2025.1.1
!pip install pyarrow==17.0.0
!pip install transformers==4.47.1
!pip install torch==2.5.1

In [None]:
import torch
torch.cuda.is_available()

True

In [None]:
from unsloth import FastLanguageModel

base_model_id = "meta-llama/Llama-3.1-8B-Instruct"
max_seq_length = 4096
dtype = None
load_in_4bit = True
device_map = "auto"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=base_model_id,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
    device_map=device_map,
)


# Functions

In [None]:
import re
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import ConfusionMatrixDisplay


REGEX_EXPLANATION = r"(Explanation|Explicação):\s*\**(.*)"
REGEX_ANSWER_EN = r"\**(Answer)\**:\**\s*\[?(\d+)\]?"
REGEX_ANSWER_PT = r"\**(Resposta|Avaliação)\**:\**\s*\[?(\d+)\]?"
REGEX_0_EXCEPTION = r"N/A|0/1|[?]"
REGEX_1_EXCEPTION = r"1/1"
REGEX_ANSWER_EXCEPTION = r"(Answer|Resposta):"

def plot(confusion_matrix, title):
  disp = ConfusionMatrixDisplay(confusion_matrix=confusion_matrix, display_labels=["Fake News", "True News"])
  disp.plot()
  plt.title(title)
  plt.show()


def evaluate_assistant_answer(true_labels, predicted_labels):
    y_true = list(true_labels['label'])
    y_pred = list(predicted_labels['label'])

    cm = confusion_matrix(y_true, y_pred, labels=[0,1], normalize='true')

    # TP = cm[0, 0]  # True Positives
    # FN = cm[0, 1]  # False Negatives
    # FP = cm[1, 0]  # False Positives
    # TN = cm[1, 1]  # True Negatives

    # # Accuracy: (TP + TN) / Total
    # accuracy = (TP + TN) / (TP + TN + FP + FN)

    # # Precision: TP / (TP + FP)
    # precision = TP / (TP + FP) if (TP + FP) != 0 else 0

    # # Recall: TP / (TP + FN)
    # recall = TP / (TP + FN) if (TP + FN) != 0 else 0

    # # F1-Score: 2 * (Precision * Recall) / (Precision + Recall)
    # f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

    accuracy = accuracy_score(y_true, y_pred)

   # 0 = Fake news. Using pos_label=0, we're telling that the class of the interesse is 0 (fake news), then the "positive" values is Fake news
    precision_f = precision_score(y_true, y_pred, pos_label=0)
    recall_f = recall_score(y_true, y_pred, pos_label=0)
    f1_f = f1_score(y_true, y_pred, pos_label=0)

    precision_t = precision_score(y_true, y_pred, pos_label=1)
    recall_t = recall_score(y_true, y_pred, pos_label=1)
    f1_t = f1_score(y_true, y_pred, pos_label=1)

    f1_micro = f1_score(y_true, y_pred, average='micro')
    f1_macro = f1_score(y_true, y_pred, average='macro')

    metrics = {
        "accuracy": round(accuracy,4),
        "precision_f": round(precision_f,4),
        "recall_f": round(recall_f,4),
        "f1_f": round(f1_f,4),
        "precision_t": round(precision_t,4),
        "recall_t": round(recall_t,4),
        "f1_t": round(f1_t,4),
        "f1_micro": round(f1_micro,4),
        "f1_macro": round(f1_macro,4)

    }

    return cm, metrics

def get_results_df(outputs):
    results_df = pd.DataFrame(columns=["label", "explanation", "output"])
    outputs = list(outputs["output"])
    for i, out in enumerate(outputs):
      explanation_out = re.search(REGEX_EXPLANATION, out)
      explanation =  explanation_out.group(2) if explanation_out else "Model couln't give a explanation or answer."

      answer_out = re.search(REGEX_ANSWER_EN, out) or re.search(REGEX_ANSWER_PT, out)
      if answer_out:
        answer = int(answer_out.group(2))
      elif re.search(REGEX_0_EXCEPTION, out):
        answer = 0
      elif re.search(REGEX_1_EXCEPTION, out):
        answer = 1
      elif not re.search(REGEX_ANSWER_EXCEPTION, out):
        answer = 0
      else:
          raise Exception(f"ANSWER PROBLEM FOUND IN CLAIM {i+2}:\n {out} ")


      new_row = [answer, explanation, out]
      results_df.loc[len(results_df)] = new_row

    return results_df

In [None]:
import pandas as pd
import re

import numpy as np
from transformers import TextStreamer, TextIteratorStreamer
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import ConfusionMatrixDisplay


REGEX_EXPLANATION = r"(Explanation|Explicação):\s*\**(.*)"
REGEX_ANSWER_EN = r"\**(Answer)\**:\**\s*\[?(\d+)\]?"
REGEX_ANSWER_PT = r"\**(Resposta|Avaliação)\**:\**\s*(\d+)"
REGEX_0_EXCEPTION = r"N/A|0/1"
REGEX_1_EXCEPTION = r"1/1"
REGEX_ANSWER_EXCEPTION = r"(Answer|Resposta):"

VANILLA_PROMPT = {
"en_US": '''Instructions: Analyze and judge if the claim is true or false.
Claim: {search_text}
Please provide your evaluation of the claim based on the following format:
Explanation: [Explain why you make this judgement.]
Answer: [0/1] (0 if this claim is false or 1 if this claim is true)
Remember to fill in all required fields based on your judgement. You must and can only choose one answer from 1 or 0.''',
"pt_BR": '''Instruções: Analise e julgue se a notícia é verdadeira ou falsa.
Notícia: {search_text}
Forneça sua avaliação da notícia com base no seguinte formato:
Explicação: [Explique por que você fez esse julgamento.]
Resposta: [0/1] (0 se esta notícia for falsa ou 1 se esta notícia for verdadeira)
Lembre-se de preencher todos os campos obrigatórios com base no seu julgamento. Você deve e só pode escolher uma resposta entre 1 ou 0.'''
}


class Model:
  def __init__(self, model, tokenizer, system_prompt="You are a helpful assistant"):
      self.model = model
      self.tokenizer = tokenizer
      self.max_new_tokens = 4096
      self.system_prompt = system_prompt
      self.messages_base = {"role": "system", "content": system_prompt}
      self.terminators = [self.tokenizer.eos_token_id, self.tokenizer.convert_tokens_to_ids("<|eot_id|>")]


  def generate_message(self, messages):
      chat = [self.messages_base, messages]

      input_ids = self.tokenizer.apply_chat_template(
          chat,
          add_generation_prompt=True,
          return_tensors="pt"
      ).to(self.model.device)

      outputs = self.model.generate(
          input_ids,
          max_new_tokens=self.max_new_tokens,
          eos_token_id=self.terminators,
          pad_token_id=self.tokenizer.eos_token_id,
          do_sample=False,
          temperature=0.1,
          top_p=0.75,
          use_cache=True,
          streamer=TextIteratorStreamer(self.tokenizer),
      )

      response = outputs[0][input_ids.shape[-1]:]
      return self.tokenizer.decode(response, skip_special_tokens=True)


  def set_for_inference(self):
      FastLanguageModel.for_inference(self.model)


def run_experiments(agent, dataset, language):
    claims = list(dataset["text"])
    results = []

    print(f"Total of claims to classify: {len(claims)}")
    print(f"{VANILLA_PROMPT[language]}\n")
    for i, claim in enumerate(claims):
      print(f"Running claim number [{i+1}]: {claim}")
      prompt = VANILLA_PROMPT[language].format(search_text=claim)
      messages = {"role": "user", "content": prompt}
      output = agent.generate_message(messages)

      results.append({"output": output})

    return results

def get_results_df(outputs):
    results_df = pd.DataFrame(columns=["label", "explanation", "output"])
    outputs = list(outputs["output"])
    for i, out in enumerate(outputs):
      explanation_out = re.search(REGEX_EXPLANATION, out)
      explanation =  explanation_out.group(2) if explanation_out else "Model couln't give a explanation or answer."

      answer_out = re.search(REGEX_ANSWER_EN, out) or re.search(REGEX_ANSWER_PT, out)
      if answer_out:
        answer = int(answer_out.group(2))
      elif re.search(REGEX_0_EXCEPTION, out):
        answer = 0
      elif re.search(REGEX_1_EXCEPTION, out):
        answer = 1
      elif not re.search(REGEX_ANSWER_EXCEPTION, out):
        answer = 0
      else:
          raise Exception(f"ANSWER PROBLEM FOUND IN CLAIM {i+2}:\n {out} ")


      new_row = [answer, explanation, out]
      results_df.loc[len(results_df)] = new_row

    return results_df

# Experiments

## en_US

In [None]:
SYSTEM_PROMPT = "You are an assistant specialized in analyzing and identifying fake or true news."
agent = Model(model, tokenizer, SYSTEM_PROMPT)
agent.set_for_inference()
agent.max_new_tokens

### 0 - 1000 (DONE)

In [None]:
outputs = run_experiments(agent, liar_full[:1000], "en_US")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_0_1000.csv"))

### 1000 - 2000 (DONE)

In [None]:
outputs = run_experiments(agent, liar_full[1000:2000], "en_US")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_1000_2000.csv"))

### 2000 - 3000 (DONE)

In [None]:
outputs = run_experiments(agent, liar_full[2000:2500], "en_US")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_2000_2500.csv"))

In [None]:
outputs = run_experiments(agent, liar_full[2500:3000], "en_US")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_2500_3000.csv"))

### 3000 - 4000 (DONE)

In [None]:
outputs = run_experiments(agent, liar_full[3000:3500], "en_US")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_3000_3500.csv"))

In [None]:
outputs = run_experiments(agent, liar_full[3500:4000], "en_US")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_3500_4000.csv"))

### 4000 - 5000 (DONE)

In [None]:
outputs = run_experiments(agent, liar_full[4000:4500], "en_US")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_4000_4500.csv"))

In [None]:
outputs = run_experiments(agent, liar_full[4500:5000], "en_US")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_4500_5000.csv"))

### 5000 - 6000 (DONE)

In [None]:
outputs = run_experiments(agent, liar_full[5000:5500], "en_US")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_5000_5500.csv"))

In [None]:
outputs = run_experiments(agent, liar_full[5500:6000], "en_US")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_5500_6000.csv"))

### 6000 - 7342 (DONE)

In [None]:
outputs = run_experiments(agent, liar_full[6000:6500], "en_US")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_6000_6500.csv"))

In [None]:
outputs = run_experiments(agent, liar_full[6500:], "en_US")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_6500_7342.csv"))

### full results

In [None]:
enUS_0_1000    = pd.read_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_0_1000.csv"), index_col=False)["output"]
enUS_1000_2000    = pd.read_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_1000_2000.csv"), index_col=False)["output"]
enUS_2000_2500 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_2000_2500.csv"), index_col=False)["output"]
enUS_2500_3000 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_2500_3000.csv"), index_col=False)["output"]
enUS_3000_3500 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_3000_3500.csv"), index_col=False)["output"]
enUS_3500_4000 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_3500_4000.csv"), index_col=False)["output"]
enUS_4000_4500 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_4000_4500.csv"), index_col=False)["output"]
enUS_4500_5000 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_4500_5000.csv"), index_col=False)["output"]
enUS_5000_5500 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_5000_5500.csv"), index_col=False)["output"]
enUS_5500_6000 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_5500_6000.csv"), index_col=False)["output"]
enUS_6000_6500 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_6000_6500.csv"), index_col=False)["output"]
enUS_6500_7342 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_6500_7342.csv"), index_col=False)["output"]

enUS_full = pd.concat([enUS_0_1000,enUS_1000_2000,enUS_2000_2500,enUS_2500_3000,
                       enUS_3000_3500,enUS_3500_4000,enUS_4000_4500,enUS_4500_5000,enUS_5000_5500,enUS_5500_6000,
                       enUS_6000_6500,enUS_6500_7342])

enUS_full.to_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_full.csv"), index=False)

In [None]:
enUS_full = pd.read_csv(os.path.join(RESULTS_DIR, "raw_enUS_LLaMA_full.csv"))
enUS_full

In [None]:
results_enUS_df = get_results_df(enUS_full)

In [None]:
results_enUS_df

In [None]:
cm, metrics = evaluate_assistant_answer(liar_full, results_enUS_df)
cm, metrics

In [None]:
plot(cm, "LLaMA-3.1-8B-Instruct")

## pt_BR

In [None]:
SYSTEM_PROMPT = "Você é um assistente especializado em analisar e identificar notícias falsas ou verdadeiras."
agent = Model(model, tokenizer, SYSTEM_PROMPT)
agent.set_for_inference()
agent.max_new_tokens

2048

In [None]:
claim = "O presidente dos Estados Unidos, Donald Trump, anunciou nesta terça-feira (21) um investimento de até US$ 500 bilhões em inteligência artificial por meio de uma parceria com as empresas OpenAI, criadora do ChatGPT, Oracle, de gerenciamento de servidores e bancos de dados, e o banco SoftBank."
prompt = VANILLA_PROMPT["pt_BR"].format(search_text=fakebrcorpus_full["text"][0])
messages = {"role": "user", "content": prompt}
output = agent.generate_message(messages)
output

### 0 - 1000 (done)

In [None]:
outputs = run_experiments(agent, fakebrcorpus_full[:500], "pt_BR")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_0_500.csv"))

In [None]:
outputs = run_experiments(agent, fakebrcorpus_full[500:1000], "pt_BR")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_500_1000.csv"))

### 1000 - 2000 (done)

In [None]:
outputs = run_experiments(agent, fakebrcorpus_full[1000:1500], "pt_BR")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_1000_1500.csv"))

In [None]:
outputs = run_experiments(agent, fakebrcorpus_full[1500:2000], "pt_BR")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_1500_2000.csv"))

### 2000 - 3000 (done)

In [None]:
outputs = run_experiments(agent, fakebrcorpus_full[2000:2500], "pt_BR")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_2000_2500.csv"))

In [None]:
outputs = run_experiments(agent, fakebrcorpus_full[2500:3000], "pt_BR")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_2500_3000.csv"))

### 3000 - 4000 (done)

In [None]:
outputs = run_experiments(agent, fakebrcorpus_full[3000:3500], "pt_BR")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_3000_3500.csv"))

In [None]:
outputs = run_experiments(agent, fakebrcorpus_full[3500:4000], "pt_BR")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_3500_4000.csv"))

### 4000 - 5000 (done)

In [None]:
outputs = run_experiments(agent, fakebrcorpus_full[4000:4500], "pt_BR")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_4000_4500.csv"))

In [None]:
outputs = run_experiments(agent, fakebrcorpus_full[4500:5000], "pt_BR")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_4500_5000.csv"))

### 5000 - 6000 (done)

In [None]:
outputs = run_experiments(agent, fakebrcorpus_full[5000:5500], "pt_BR")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_5000_5500.csv"))

In [None]:
outputs = run_experiments(agent, fakebrcorpus_full[5500:6000], "pt_BR")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_5500_6000.csv"))

### 6000 - 7200 (done)

In [None]:
outputs = run_experiments(agent, fakebrcorpus_full[6000:6500], "pt_BR")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_6000_6500.csv"))

In [None]:
outputs = run_experiments(agent, fakebrcorpus_full[6500:], "pt_BR")
raw_df = pd.DataFrame.from_records(outputs, columns=["output"])
raw_df.to_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_6500_7200.csv"))

### full results

In [None]:
ptBR_0_500    = pd.read_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_0_500.csv"), index_col=False)["output"]
ptBR_500_1000    = pd.read_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_500_1000.csv"), index_col=False)["output"]
ptBR_1000_1500 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_1000_1500.csv"), index_col=False)["output"]
ptBR_1500_2000 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_1500_2000.csv"), index_col=False)["output"]
ptBR_2000_2500 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_2000_2500.csv"), index_col=False)["output"]
ptBR_2500_3000 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_2500_3000.csv"), index_col=False)["output"]
ptBR_3000_3500 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_3000_3500.csv"), index_col=False)["output"]
ptBR_3500_4000 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_3500_4000.csv"), index_col=False)["output"]
ptBR_4000_4500 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_4000_4500.csv"), index_col=False)["output"]
ptBR_4500_5000 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_4500_5000.csv"), index_col=False)["output"]
ptBR_5000_5500 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_5000_5500.csv"), index_col=False)["output"]
ptBR_5500_6000 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_5500_6000.csv"), index_col=False)["output"]
ptBR_6000_6500 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_6000_6500.csv"), index_col=False)["output"]
ptBR_6500_7200 = pd.read_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_6500_7200.csv"), index_col=False)["output"]

ptBR_full = pd.concat([ptBR_0_500,ptBR_500_1000,ptBR_1000_1500,ptBR_1500_2000,ptBR_2000_2500,ptBR_2500_3000,
                       ptBR_3000_3500,ptBR_3500_4000,ptBR_4000_4500,ptBR_4500_5000,ptBR_5000_5500,ptBR_5500_6000,
                       ptBR_6000_6500,ptBR_6500_7200])

ptBR_full.to_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_full.csv"), index=False)

In [None]:
ptBR_full = pd.read_csv(os.path.join(RESULTS_DIR, "raw_ptBR_LLaMA_full.csv"))
ptBR_full

In [None]:
results_ptBR_df = get_results_df(ptBR_full)

In [None]:
results_ptBR_df

In [None]:
cm, metrics = evaluate_assistant_answer(fakebrcorpus_full, results_ptBR_df)
cm, metrics

In [None]:
plot(cm, "LLaMA-3.1-8B-Instruct")