# QA evaluation

In [8]:
import os
import sys
import json
import time
import pandas as pd
from pathlib import Path
import torch

from transformers import AutoModelForCausalLM, AutoTokenizer

PROJECT_ROOT = Path(".").resolve().parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

# Import the necessary modules to get the logging data
from src.evaluation.common import load_model_and_tokenizer

In [12]:
# Load data
df = pd.read_csv("../data/QA/article_qa.csv")
df.head()

Unnamed: 0,article_title,article_url,question,answer
0,“Dunks for Tomorrow” Creating Real Opportuniti...,https://www.press.bmwgroup.com/global/article/...,How much money did BMW Munich donate for the d...,"€125,000"
1,BMW celebrates 25 years of the Preis der Natio...,https://www.press.bmwgroup.com/global/article/...,Who designed the 19th BMW Art Car?,John Baldessari
2,Indianapolis 8 Hour: Kelvin van der Linde wins...,https://www.press.bmwgroup.com/global/article/...,Who won the Drivers’ Championship of the Inter...,Kelvin van der Linde
3,Nelson Piquet tribute: BMW Group Classic broug...,https://www.press.bmwgroup.com/global/article/...,In which year did Nelson Piquet become the fir...,1983
4,Iconic BMW Art Cars by Andy Warhol and Julie M...,https://www.press.bmwgroup.com/global/article/...,Which two American artists' BMW Art Cars are e...,"Andy Warhol, Julie Mehretu"


In [None]:
# Load model and tokenizer
model_cfg = {
    "base_model": "HuggingFaceTB/SmolLM2-135M",
    "variant": "base"
}
checkpoint_path = PROJECT_ROOT / "experiments" / "exp_-02_SmolLM2-135M_base" / "checkpoints" / "final" # Change appriopriately
model, tokenizer = load_model_and_tokenizer(model_cfg, checkpoint_path)

In [21]:
for index, row in df.iterrows():
    question = row['question']
    input_text = f"Answer with maximum three words.\nQuestion: {question}\nAnswer:"
    
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(model.device)
    attention_mask = torch.ones_like(input_ids)

    with torch.no_grad():
        output_ids = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=10,
            do_sample=False,
            num_beams=1,
            pad_token_id=tokenizer.eos_token_id,
        )
    answer = tokenizer.decode(output_ids[0], skip_special_tokens=True)[len(input_text):].strip().split('\n')[0] # Get only the first line of the answer
    
    print(f"Q: {question}\nModel A: {answer}\nTrue A:  {row['answer']}")
    print("-" * 50)

Q: How much money did BMW Munich donate for the dunks?
Model A: 1,000,000
True A:  €125,000
--------------------------------------------------
Q: Who designed the 19th BMW Art Car?
Model A: The BMW Art Car was designed by the designer
True A:  John Baldessari
--------------------------------------------------
Q: Who won the Drivers’ Championship of the Intercontinental GT Challenge?
Model A: The BMW M4 GT3 EVO.
True A:  Kelvin van der Linde
--------------------------------------------------
Q: In which year did Nelson Piquet become the first turbo world champion in Formula 1?
Model A: 2018.
True A:  1983
--------------------------------------------------
Q: Which two American artists' BMW Art Cars are exhibited at the Pebble Beach Concours d'Elegance?
Model A: The BMW Art Cars of the 20
True A:  Andy Warhol, Julie Mehretu
--------------------------------------------------
Q: Who are the three drivers selected for the BMW M Racing Academy Class of 2026?
Model A: The three drivers select