# Claim-based Question-Answer Hallucination Detection

Import necessary packages.

In [1]:
import time
from uqlm.longform.black_box import ClaimQAScorer
from uqlm import BlackBoxUQ

#### Load LLM and device

In [2]:
# from langchain_google_vertexai import ChatVertexAI
# llm = ChatVertexAI(model="gemini-1.5-flash")

from dotenv import load_dotenv, find_dotenv
from langchain_openai import AzureChatOpenAI

load_dotenv(find_dotenv())
llm = AzureChatOpenAI(
    deployment_name="gpt-4o-mini",
    openai_api_type="azure",
    openai_api_version="2024-02-15-preview",
    temperature=1,  # User to set temperature
)

In [3]:
import torch

# Set the torch device
if torch.cuda.is_available():  # NVIDIA GPU
    device = torch.device("cuda")
elif torch.backends.mps.is_available():  # macOS
    device = torch.device("mps")
else:
    device = torch.device("cpu")  # CPU
print(f"Using {device.type} device")

Using mps device


#### Setup Prompts and Black Box Scorer

In [4]:
prompts = [
    "write a paragraph about Paul McCartney",
    "write a paragraph about John Lennon"
]

In [5]:
bb_scorer = BlackBoxUQ(
    llm=llm,
    max_calls_per_min=500,  # set value to avoid rate limit error
    device=device,
    scorers=["exact_match"],
)

#### Claim-QA class

There are three methods that can be used to compute Claim-QA score.
- `generate_and_score`: If you only have prompts, call this method generate long response, decompose that response into factoids, then generate questions for each factoids, and compute question-level, factoid-level, and response-level scores.
- `score`: If you already generated long response, call this method
- `evaluate`: If you already have decomposed long responses into factoids, call this method

##### 1. `generate_and_score` method

In [6]:
claim_qa = ClaimQAScorer(llm=llm, black_box_scorers=["exact_match"], response_template="atomic", max_calls_per_min=500, num_questions=2)
start_time = time.time()
result = await claim_qa.generate_and_score(prompts=prompts)

print(f"Computation time: {time.time() - start_time} seconds")

Number of factoids per response:  [28, 24]


Output()

Number of total questions:  104


Length of BB result:  104
Computation time: 81.9830710887909 seconds


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [7]:
factoid_scores = result.to_dict()["data"]["factoid_scores_exact_match"]
print(" Number of factoids*questions for first response: ", len(factoid_scores[0]))
print(" Number of factoids*questions for second response: ", len(factoid_scores[1]))

 Number of factoids*questions for first response:  56
 Number of factoids*questions for second response:  48


In [8]:
result.to_df()

Unnamed: 0,prompt,response,response_scores_exact_match,factoid_scores_exact_match,factoid,response_fact_question,response_fact_questions_response,response_fact_questions_sampled_response
0,write a paragraph about Paul McCartney,write a paragraph about Paul McCartney,,"[0.8, 0.9, 0.30000000000000004, 0.7, 0.5, 1.0,...",[Paul McCartney is a legendary British musicia...,"[[ Who is a legendary British musician? , Wha...","[[David Bowie., British.], [Beethoven., Musici...","[[[David Bowie., David Bowie., David Bowie., D..."
1,write a paragraph about John Lennon,write a paragraph about John Lennon,,"[0.9, 1.0, 0.9, 0.9, 0.5, 1.0, 0.9, 0.9, 1.0, ...","[John Lennon was an iconic English musician., ...","[[ Who was an iconic English musician? , What...","[[David Bowie., British.], [Elvis Presley., Mu...","[[[David Bowie, David Bowie., David Bowie., Da..."


#### 2. `score` method

In [9]:
claim_qa2 = ClaimQAScorer(llm=llm, black_box_scorers=["exact_match"], response_template="atomic", max_calls_per_min=500)
start_time = time.time()
result2 = await claim_qa2.score(prompts=claim_qa.prompts, responses=claim_qa.responses)
print(f"Computation time: {time.time() - start_time} seconds")

Number of factoids per response:  [30, 28]


Output()

Number of total questions:  58


Length of BB result:  58
Computation time: 25.98950719833374 seconds


In [10]:
result2.to_df()

Unnamed: 0,prompt,response,response_scores_exact_match,factoid_scores_exact_match,factoid,response_fact_question,response_fact_questions_response,response_fact_questions_sampled_response
0,write a paragraph about Paul McCartney,write a paragraph about Paul McCartney,0.506667,"[0.8, 0.2, 1.0, 1.0, 0.0, 1.0, 0.8, 0.6, 0.8, ...","[Paul McCartney is a legendary musician., Paul...",[[Who is considered a legendary musician known...,"[[Paul McCartney.], [British musician.], [Sing...","[[[Paul McCartney., Paul McCartney, Paul McCar..."
1,write a paragraph about John Lennon,write a paragraph about John Lennon,0.507143,"[0.4, 0.0, 0.8, 0.4, 1.0, 1.0, 1.0, 0.8, 0.4, ...","[John Lennon was an iconic musician., John Len...",[[Who is considered an iconic musician known f...,"[[John Lennon.], [British; legendary musician....","[[[John Lennon., John Lennon., John Lennon, Jo..."


#### 3. `evaluate` method

In [11]:
claim_qa3 = ClaimQAScorer(llm=llm, black_box_scorers=["exact_match"], response_template="atomic", max_calls_per_min=500, num_questions=2)
start = time.time()
result3 = await claim_qa3.evaluate(prompts=claim_qa.prompts, responses=claim_qa.responses, factoids=claim_qa.factoids)
stop = time.time()
print(f"Computation time: {stop - start} seconds")

Number of factoids per response:  [28, 24]


Output()

Number of total questions:  104


Length of BB result:  104
Computation time: 76.93748188018799 seconds


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [12]:
claim_qa3.response_fact_questions

[[[' Who is a legendary British musician? ',
   ' What nationality is Paul McCartney?'],
  [' Who is a famous composer? ', " What is Paul McCartney's profession?"],
  [' Who is a producer? ',
   ' What role does Paul McCartney fulfill in the music industry?'],
  [' Who is Paul McCartney best known as a co-founder of? ',
   ' What famous band did Paul McCartney help to create?'],
  [' What band is considered one of the most influential in music history? ',
   ' How have The Beatles impacted the music industry?'],
  [' When was Paul McCartney born? ', ' What year was Paul McCartney born?'],
  [' Where was Paul McCartney born? ', ' What city is Paul McCartney from?'],
  [' At what age did Paul McCartney start showcasing his musical talent? ',
   " What does the factoid suggest about Paul McCartney's early interest in music?"],
  [' Who teamed up with John Lennon? ',
   ' Which famous musicians collaborated together?'],
  [' Who did Paul McCartney form a songwriting duo with? ',
   ' What 

In [13]:
claim_qa3.response_fact_questions_responses

[[['David Bowie.', 'British.'],
  ['Beethoven.', 'Musician.'],
  ['A producer is someone who creates or oversees the production of goods, services, or content.',
   'Singer, songwriter, musician.'],
  ['The Beatles.', 'The Beatles.'],
  ['The Beatles.',
   'Revolutionized songwriting, studio practices, and global popularity.'],
  ['June 18, 1942', '1942'],
  ['Liverpool.', 'Liverpool.'],
  ['At 14.', 'It indicates a strong passion for music.'],
  ['Paul McCartney.', 'Freddie Mercury and David Bowie.'],
  ['John Lennon.', 'Lennon and McCartney.'],
  ['Lennon and McCartney.', 'Revolutionized genres, shaped pop culture.'],
  ['Pursued a solo career and formed Wings.', 'Yes.'],
  ['Paul McCartney.', 'Wings'],
  ['Max Martin.', 'Co-founding The Beatles.'],
  ['Revolutionized pop, inspired generations, set songwriting standards.',
   'Highly significant.'],
  ['Songwriting, singing, and bass guitar.', 'Melodic and inventive.'],
  ['Music, especially as a member of The Beatles.', 'Melodic son

In [14]:
claim_qa3.factoids

[['Paul McCartney is a legendary British musician.',
  'Paul McCartney is a composer.',
  'Paul McCartney is a producer.',
  'Paul McCartney is best known as a co-founder of The Beatles.',
  'The Beatles is one of the most influential bands in music history.',
  'Paul McCartney was born on June 18, 1942.',
  'Paul McCartney was born in Liverpool, England.',
  'Paul McCartney showcased his musical talent from a young age.',
  'Paul McCartney teamed up with John Lennon.',
  'Paul McCartney formed a dynamic songwriting duo with John Lennon.',
  'The dynamic songwriting duo would define a generation.',
  'Paul McCartney embarked on a successful solo career.',
  'Paul McCartney formed the band Wings.',
  'Paul McCartney produced numerous hits.',
  "Paul McCartney's hits have left an indelible mark on the music industry.",
  'Paul McCartney is renowned for his melodic bass playing.',
  'Paul McCartney is renowned for his distinctive vocals.',
  'Paul McCartney is renowned for his innovative 

In [15]:
claim_qa3.response_fact_questions

[[[' Who is a legendary British musician? ',
   ' What nationality is Paul McCartney?'],
  [' Who is a famous composer? ', " What is Paul McCartney's profession?"],
  [' Who is a producer? ',
   ' What role does Paul McCartney fulfill in the music industry?'],
  [' Who is Paul McCartney best known as a co-founder of? ',
   ' What famous band did Paul McCartney help to create?'],
  [' What band is considered one of the most influential in music history? ',
   ' How have The Beatles impacted the music industry?'],
  [' When was Paul McCartney born? ', ' What year was Paul McCartney born?'],
  [' Where was Paul McCartney born? ', ' What city is Paul McCartney from?'],
  [' At what age did Paul McCartney start showcasing his musical talent? ',
   " What does the factoid suggest about Paul McCartney's early interest in music?"],
  [' Who teamed up with John Lennon? ',
   ' Which famous musicians collaborated together?'],
  [' Who did Paul McCartney form a songwriting duo with? ',
   ' What 

In [16]:
result3.to_df()

Unnamed: 0,prompt,response,response_scores_exact_match,factoid_scores_exact_match,factoid,response_fact_question,response_fact_questions_response,response_fact_questions_sampled_response
0,write a paragraph about Paul McCartney,write a paragraph about Paul McCartney,,"[0.8, 0.9, 0.2, 0.9, 0.4, 0.5, 0.7, 0.2, 0.5, ...",[Paul McCartney is a legendary British musicia...,"[[ Who is a legendary British musician? , Wha...","[[David Bowie., British.], [Beethoven., Musici...","[[[David Bowie., David Bowie, David Bowie., Da..."
1,write a paragraph about John Lennon,write a paragraph about John Lennon,,"[0.4, 0.6, 0.5, 0.5, 0.6, 1.0, 0.6, 0.0, 0.5, ...","[John Lennon was an iconic English musician., ...","[[ Who was an iconic English musician? , What...","[[David Bowie., British], [Musician., Paul McC...","[[[David Bowie., David Bowie., David Bowie., D..."
