# Claim-based Question-Answer Hallucination Detection

Import necessary packages.

In [1]:
import time
from uqlm.longform.black_box import ClaimQAScorer
from uqlm import BlackBoxUQ

#### Load LLM and device

In [2]:
# from langchain_google_vertexai import ChatVertexAI
# llm = ChatVertexAI(model="gemini-1.5-flash")

from dotenv import load_dotenv, find_dotenv
from langchain_openai import AzureChatOpenAI

load_dotenv(find_dotenv())
llm = AzureChatOpenAI(
    deployment_name="gpt-4o-mini",
    openai_api_type="azure",
    openai_api_version="2024-02-15-preview",
    temperature=1,  # User to set temperature
)

In [3]:
import torch

# Set the torch device
if torch.cuda.is_available():  # NVIDIA GPU
    device = torch.device("cuda")
elif torch.backends.mps.is_available():  # macOS
    device = torch.device("mps")
else:
    device = torch.device("cpu")  # CPU
print(f"Using {device.type} device")

Using mps device


#### Setup Prompts and Black Box Scorer

In [4]:
prompts = [
    "write a paragraph about Paul McCartney",
    "write a paragraph about John Lennon"
]

In [5]:
bb_scorer = BlackBoxUQ(
    llm=llm,
    max_calls_per_min=500,  # set value to avoid rate limit error
    device=device,
    scorers=["exact_match"],
)

#### Claim-QA class

There are three methods that can be used to compute Claim-QA score.
- `generate_and_score`: If you only have prompts, call this method generate long response, decompose that response into factoids, then generate questions for each factoids, and compute question-level, factoid-level, and response-level scores.
- `score`: If you already generated long response, call this method
- `evaluate`: If you already have decomposed long responses into factoids, call this method

##### 1. `generate_and_score` method

In [6]:
claim_qa = ClaimQAScorer(llm=llm, black_box_scorers=["exact_match"], response_template="atomic", max_calls_per_min=500, num_questions=2)
start_time = time.time()
result = await claim_qa.generate_and_score(prompts=prompts)

print(f"Computation time: {time.time() - start_time} seconds")

Number of factoids per response:  [27, 30]
Number of total questions:  57
BB result:  [0.6, 0.4, 1.0, 0.6, 0.4, 1.0, 0.2, 0.8, 0.0, 1.0, 0.0, 1.0, 0.6, 1.0, 0.4, 0.6, 0.0, 0.0, 0.8, 0.4, 0.0, 0.2, 0.0, 0.0, 1.0, 0.0, 0.2, 0.6, 1.0, 0.6, 1.0, 1.0, 0.0, 0.0, 1.0, 0.8, 0.2, 1.0, 1.0, 1.0, 1.0, 0.2, 0.6, 0.6, 1.0, 0.6, 0.0, 0.2, 0.0, 1.0, 0.6, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0]
Length of BB result:  57
Computation time: 29.642112016677856 seconds


In [7]:
factoid_scores = result.to_dict()["data"]["factoid_scores_exact_match"]
print(" Number of factoids for first response: ", len(factoid_scores[0]))
print(" Number of factoids for second response: ", len(factoid_scores[1]))

 Number of factoids for first response:  27
 Number of factoids for second response:  30


In [8]:
result.to_df()

Unnamed: 0,prompt,response,response_scores_exact_match,factoid_scores_exact_match
0,write a paragraph about Paul McCartney,"Paul McCartney is a renowned British musician,...",0.451852,"[0.6, 0.4, 1.0, 0.6, 0.4, 1.0, 0.2, 0.8, 0.0, ..."
1,write a paragraph about John Lennon,"John Lennon was a British musician, singer, an...",0.566667,"[0.6, 1.0, 0.6, 1.0, 1.0, 0.0, 0.0, 1.0, 0.8, ..."


#### 2. `score` method

In [9]:
claim_qa2 = ClaimQAScorer(llm=llm, black_box_scorers=["exact_match"], response_template="atomic", max_calls_per_min=500)
start_time = time.time()
result2 = await claim_qa2.score(prompts=claim_qa.prompts, responses=claim_qa.responses)
print(f"Computation time: {time.time() - start_time} seconds")

Number of factoids per response:  [26, 29]
Number of total questions:  55
BB result:  [1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.4, 0.0, 1.0, 0.0, 0.4, 0.8, 0.6, 0.8, 0.0, 0.4, 0.4, 0.0, 0.0, 0.4, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.8, 1.0, 1.0, 1.0, 1.0, 0.8, 0.8, 1.0, 1.0, 0.0, 1.0, 0.8, 0.0, 0.4, 0.0, 0.8, 0.8, 0.4, 0.8, 1.0, 0.4, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.4]
Length of BB result:  55
Computation time: 20.06681513786316 seconds


In [10]:
result2.to_df()

Unnamed: 0,prompt,response,response_scores_exact_match,factoid_scores_exact_match
0,write a paragraph about Paul McCartney,"Paul McCartney is a renowned British musician,...",0.476923,"[1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.4, 0.0, ..."
1,write a paragraph about John Lennon,"John Lennon was a British musician, singer, an...",0.627586,"[1.0, 0.8, 1.0, 1.0, 1.0, 1.0, 0.8, 0.8, 1.0, ..."


#### 3. `evaluate` method

In [11]:
claim_qa3 = ClaimQAScorer(llm=llm, black_box_scorers=["exact_match"], response_template="atomic", max_calls_per_min=500)
start = time.time()
result3 = await claim_qa3.evaluate(prompts=claim_qa.prompts, responses=claim_qa.responses, factoids=claim_qa.factoids)
stop = time.time()
print(f"Computation time: {stop - start} seconds")

Number of factoids per response:  [27, 30]
Number of total questions:  57
BB result:  [1.0, 0.0, 0.8, 0.4, 0.8, 0.0, 1.0, 0.6, 0.0, 0.8, 0.0, 0.4, 0.8, 1.0, 1.0, 0.0, 0.6, 0.6, 0.8, 1.0, 0.0, 0.0, 0.0, 0.0, 0.8, 0.0, 0.2, 1.0, 1.0, 0.8, 1.0, 0.8, 1.0, 0.4, 1.0, 0.6, 0.2, 0.0, 1.0, 0.8, 0.0, 0.6, 0.0, 0.8, 0.0, 0.6, 0.2, 0.0, 0.0, 0.8, 0.8, 0.2, 1.0, 1.0, 0.0, 0.0, 0.0]
Length of BB result:  57
Computation time: 18.384706735610962 seconds


In [12]:
claim_qa3.response_fact_questions

[['British musician.',
  'Paul McCartney',
  'Musician.',
  'Musician.',
  'Paul McCartney.',
  'The Beatles',
  'June 18, 1942.',
  'Liverpool.',
  'His skiffle band performance.',
  'Paul McCartney.',
  'Co-writer and melodic contributor.',
  '"Hey Jude."',
  '"Hey Jude"',
  '"Yesterday"',
  '1970',
  'He formed Wings.',
  'Wings',
  'Co-founding The Beatles.',
  'Versatility.',
  'Active and influential.',
  'Advocates for peace, animal rights, and environmental causes.',
  'Advocates for veganism, supports renewable energy, participates in environmental campaigns.',
  'Innovative songwriting and timeless melodies.',
  'Paul McCartney has received numerous awards, including 18 Grammy Awards, induction into the Rock and Roll Hall of Fame, and a knighthood.',
  '18',
  'Influences songwriting, recording techniques, and genre fusion.',
  'Inspires through music and activism.'],
 ['British musician.',
  'Musician.',
  'Singer-songwriter.',
  'John Lennon.',
  'The Beatles.',
  'The Beat

In [13]:
claim_qa3.response_fact_questions_responses

[[['British musician.',
   'British musician.',
   'British musician.',
   'British musician.',
   'British musician.'],
  ['Paul McCartney.',
   'Paul McCartney.',
   'Paul McCartney.',
   'Paul McCartney.',
   'Paul McCartney.'],
  ['Musician.', 'Musician.', 'Musician', 'Musician.', 'Musician.'],
  ['Singer-songwriter.', 'Musician.', 'Musician', 'Musician', 'Musician.'],
  ['Paul McCartney.',
   'Paul McCartney.',
   'Paul McCartney',
   'Paul McCartney.',
   'Paul McCartney.'],
  ['The Beatles.',
   'The Beatles.',
   'The Beatles.',
   'The Beatles.',
   'The Beatles.'],
  ['June 18, 1942.',
   'June 18, 1942.',
   'June 18, 1942.',
   'June 18, 1942.',
   'June 18, 1942.'],
  ['Liverpool.',
   'Liverpool, England.',
   'Liverpool.',
   'Liverpool, England.',
   'Liverpool.'],
  ['His school talent show performance.',
   'His song "I\'ll Follow the Sun."',
   'His composition of "In Spite of All the Danger."',
   'Singing and playing guitar.',
   'Playing in local bands.'],
  ['Pau

In [14]:
claim_qa3.factoids

[['Paul McCartney is a British musician.',
  'Paul McCartney is a renowned musician.',
  'Paul McCartney is a singer.',
  'Paul McCartney is a songwriter.',
  'Paul McCartney is best known as a member of The Beatles.',
  'The Beatles revolutionized popular music in the 1960s.',
  'Paul McCartney was born on June 18, 1942.',
  'Paul McCartney was born in Liverpool, England.',
  'Paul McCartney showcased his musical talent from an early age.',
  'Paul McCartney was a key songwriter alongside John Lennon.',
  "Paul McCartney contributed to some of The Beatles' most beloved hits.",
  '"Hey Jude" is one of The Beatles\' hits.',
  '"Let It Be" is one of The Beatles\' hits.',
  '"Yesterday" is one of The Beatles\' hits.',
  'The Beatles disbanded in 1970.',
  'Paul McCartney embarked on a successful solo career.',
  'Paul McCartney formed the band Wings.',
  'Paul McCartney produced a string of chart-topping songs.',
  "Paul McCartney's versatility spans various genres.",
  'Paul McCartney re

In [15]:
claim_qa3.response_fact_questions

[['British musician.',
  'Paul McCartney',
  'Musician.',
  'Musician.',
  'Paul McCartney.',
  'The Beatles',
  'June 18, 1942.',
  'Liverpool.',
  'His skiffle band performance.',
  'Paul McCartney.',
  'Co-writer and melodic contributor.',
  '"Hey Jude."',
  '"Hey Jude"',
  '"Yesterday"',
  '1970',
  'He formed Wings.',
  'Wings',
  'Co-founding The Beatles.',
  'Versatility.',
  'Active and influential.',
  'Advocates for peace, animal rights, and environmental causes.',
  'Advocates for veganism, supports renewable energy, participates in environmental campaigns.',
  'Innovative songwriting and timeless melodies.',
  'Paul McCartney has received numerous awards, including 18 Grammy Awards, induction into the Rock and Roll Hall of Fame, and a knighthood.',
  '18',
  'Influences songwriting, recording techniques, and genre fusion.',
  'Inspires through music and activism.'],
 ['British musician.',
  'Musician.',
  'Singer-songwriter.',
  'John Lennon.',
  'The Beatles.',
  'The Beat

In [16]:
result3.to_df()

Unnamed: 0,prompt,response,response_scores_exact_match,factoid_scores_exact_match
0,write a paragraph about Paul McCartney,"Paul McCartney is a renowned British musician,...",0.466667,"[1.0, 0.0, 0.8, 0.4, 0.8, 0.0, 1.0, 0.6, 0.0, ..."
1,write a paragraph about John Lennon,"John Lennon was a British musician, singer, an...",0.52,"[1.0, 1.0, 0.8, 1.0, 0.8, 1.0, 0.4, 1.0, 0.6, ..."
