In [None]:
import os
import random

import pandas as pd

from groq import Groq
from tqdm import tqdm

In [13]:
from datasets import load_dataset

ds = load_dataset("pminervini/HaluEval", "qa")
df = pd.DataFrame({
    "input": ds['data']['question'],
    "truth": ds['data']['right_answer'],
    "hallucination": ds['data']['hallucinated_answer'],
})

df

Unnamed: 0,input,truth,hallucination
0,Which magazine was started first Arthur's Maga...,Arthur's Magazine,First for Women was started first.
1,The Oberoi family is part of a hotel company t...,Delhi,The Oberoi family's hotel company is based in ...
2,Musician and satirist Allie Goertz wrote a son...,President Richard Nixon,"Allie Goertz wrote a song about Milhouse, a po..."
3,What nationality was James Henry Miller's wife?,American,James Henry Miller's wife was British.
4,Cadmium Chloride is slightly soluble in this c...,alcohol,water with a hint of alcohol
...,...,...,...
9995,Are James Norman Hall and Amiri Baraka from th...,yes,James Norman Hall was French.
9996,The actress who appeared in the 2002 film Love...,1979,The actress who appeared in the 2002 film Love...
9997,how is Ape Escape and Nicktoons Film Festival ...,shorts,Ape Escape and Nicktoons Film Festival are con...
9998,What position did both Warwick Capper and John...,full forward,Warwick Capper played midfield.


In [None]:
groq_API_key = ""

def query_llm_on_subset(dataframe, percentage=0.01, model="qwen-2.5-32b"):
    client = Groq(api_key=groq_API_key)  # Store key in env

    subset_size = int(len(dataframe) * percentage)
    sampled_indices = random.sample(range(len(dataframe)), subset_size)

    llm_outputs = []

    for idx in sampled_indices:
        row = dataframe.iloc[idx]
        question = row["input"]
        
        try:
            response = client.chat.completions.create(
                messages=[{"role": "user", "content": question}],
                model=model
            )
            answer = response.choices[0].message.content
        except Exception as e:
            answer = f"ERROR: {str(e)}"
        
        llm_outputs.append({
            "index": idx,
            "question": question,
            "model_answer": answer,
            "right_answer": row["truth"],
            "hallucinated_answer": row["hallucination"]
        })

    return pd.DataFrame(llm_outputs)
    
output_frame = query_llm_on_subset(df, percentage=0.001)

In [23]:
output_frame

Unnamed: 0,index,question,model_answer,right_answer,hallucinated_answer
0,9424,Rectify was created by which American actor an...,"""Rectify"" was created by American actor and sc...","Raymond ""Ray"" Wilkes McKinnon",Rectify was created by an American actor.
1,8588,James Wolk is an American actor in a drama ser...,"James Wolk starred in the drama series ""Hannib...",James Patterson and Michael Ledwidge,James Wolk stars in a series about animal atta...
2,9936,What ages are the students at the Boy's School...,David Allen Conway was a British Army officer ...,5–18,The students at the Boy's School where David A...
3,1185,"What Teaneck, NJ headquartered the multination...","The Teaneck, NJ headquartered multinational co...",Cognizant,Betsy Atkins is on the Board of Directors of f...
4,8083,What university houses the speech pathology an...,"The ""Monster Study"" refers to a controversial ...",University of Iowa,The Monster Study was unethical.
5,2202,What Scottish novelist created a character who...,The Scottish novelist you're referring to is I...,J. M. Barrie,The character created by a Scottish novelist t...
6,809,What nationality are Ralph Tresvant and Chad S...,Ralph Tresvant is American. He was born in Pet...,American,Ralph Tresvant and Chad Smith are both America...
7,689,Are the bands Tool and Capital Cities both fro...,"Tool is indeed from Los Angeles, California. T...",yes,"No, Tool is from Seattle."
8,8682,The tomb of the Unknown confederate soldier is...,The tomb of the Unknown Confederate Soldier is...,Jefferson Davis,Robert E. Lee also lived at Beauvoir estate.
9,8326,The Church of St. Martin in Landshut is a medi...,The Anaconda Smelter Stack was once part of th...,Anaconda Copper Mining Company,The Anaconda Smelter Stack was a part of an ol...
