# Hallucination Detection

In [13]:
import json
import os
from openai import OpenAI
from tqdm import tqdm
import requests
import httpx
from RefChecker.refchecker.extractor import extractor_prompts
# import RefChecker
import numpy as np
import spacy
from scorer import recompute_hard_labels
import glob
import re
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [14]:
from openai import OpenAI

client = OpenAI(
    api_key="")


## 1. Extracting Claims (Extractor) - Each claim is a merger of triple-structured knowledge.

In [15]:
LLM_TRIPLET_EXTRACTION_PROMPT_Q = extractor_prompts.LLM_TRIPLET_EXTRACTION_PROMPT_Q
LLM_Triplet_To_Claim_PROMPT_Q = extractor_prompts.LLM_Triplet_To_Claim_PROMPT_Q
LLM_CLAIM_EXTRACTION_PROMPT_Q = extractor_prompts.LLM_CLAIM_EXTRACTION_PROMPT_Q

In [16]:
def extract_triplets_to_claims(question, model_output_text):
    prompt = LLM_CLAIM_EXTRACTION_PROMPT_Q.format(q=question, r=model_output_text)

    try:
        chat_completion = client.chat.completions.create(
            messages=[
                {"role": "system", "content": "You are an AI assistant who extracts claims."},
                {"role": "user", "content": prompt}
            ],
            model="gpt-3.5-turbo",
        )

        response_content = chat_completion.choices[0].message.content

        if not response_content:
            print(f"No response for the prompt: {prompt}")
            return []

        return response_content

    except Exception as e:
        print(f"OpenAI API Error: {e}")
        return []


## 2. Obtain Complete References

In [17]:
def get_reference_for_claim(claim):
    prompt = f"""
    Please expand, provide additional relevant factual information and verify the following claim:
    Claims: {claim}

    If the claim is accurate, return the original claim.
    If the claim is inaccurate or incomplete, return a corrected, more detailed statement.
    """

    try:
        chat_completion = client.chat.completions.create(
            messages=[
                {"role": "system", "content": "You are an AI assistant verifying claims."},
                {"role": "user", "content": prompt}
            ],
            model="gpt-3.5-turbo",
        )

        if not chat_completion.choices or len(chat_completion.choices) == 0:
            print(f"No response for the prompt: {prompt}")
            return []

        response_content = chat_completion.choices[0].message.content

        if not response_content.strip():
            print(f"No content in the response for the prompt: {prompt}")
            return []

        return response_content

    except Exception as e:
        print(f"OpenAI API Error: {e}")
        return []


In [18]:
def extract_and_get_references(claims, context):
    references = []
    for claim in claims:
        verified_reference = get_reference_for_claim(claim)
        references.append(verified_reference)

    final_reference = " ".join(references) + " " + context

    return final_reference

## 3. Validate claims, `model_input`, `model_output_text`, and References (Checker)

The validation results should be mapped back to the `model_output_text`, marking hallucination positions and probabilities, and outputting them as `soft_labels`.

In [19]:
def extract_hallucination_positions(model_output_text, hallucination_results):
    # parse JSON data
    try:
        hallucination_results = json.loads(hallucination_results)
    except json.JSONDecodeError:
        print("Failed to decode JSON. Returning empty labels.")
        return {"soft_labels": []}

    soft_labels = []

    # find the position in the original text
    for result in hallucination_results:
        word = result['word']
        prob = result['prob']

        start = 0
        while True:
            start = model_output_text.find(word, start)
            if start == -1:
                break
            end = start + len(word)

            # save soft_labels
            soft_labels.append({
                "start": start,
                "end": end,
                "prob": prob
            })
            start = end

    return {"soft_labels": soft_labels}


In [20]:
def triplets_and_references_checker(claims, model_output_text, references, question):
    prompt = f"""
   Evaluate hallucinations in the model output text using the question, claims, and references.

    ### Question (Model Input)
    {question}

    ### Claims
    {claims}

    ### References
    {references}

    ### Model Output Text
    {model_output_text}

    ### Instructions
    1. Compare each claim with the provided references, question, and existing knowledge.
    2. Mark unsupported claims in `model output text` and return hallucinated words with character offsets and probabilities.
    3. Assign probabilities based on:
    0.7-1.0: Fully fabricated content.
    0.4-0.7: Partially incorrect content.
    0.1-0.4: Minor inaccuracies.
    4. Merge or adjust overlapping hallucinated words appropriately.
    5. Include hallucinated words even with low probabilities and return them strictly in JSON format:
    [
        {{"word": <example_word>, "prob": <probability>}},
     {{"word": <another_word>, "prob": <probability>}}
    ]

    """
    prompt = truncate_text_to_max_tokens(prompt, MAX_TOKENS // 3)

    try:
        chat_completion = client.chat.completions.create(
            messages=[
                {"role": "system",
                 "content": "You are an AI assistant for hallucination detection."},
                {"role": "user", "content": prompt}
            ],
            model="gpt-3.5-turbo",
        )

        if not chat_completion.choices or len(chat_completion.choices) == 0:
            print("Error during hallucination detection: No response choices")
            return {"soft_labels": []}

        raw_labels = chat_completion.choices[0].message.content

        return extract_hallucination_positions(model_output_text, raw_labels)

    except Exception as e:
        print(f"OpenAI API Error: {e}")
        return {"soft_labels": []}


## Main Logic

In [21]:
def hallucination_detect(question, model_output_text, context):
    claims = extract_triplets_to_claims(question, model_output_text)
    references = extract_and_get_references(claims, context)
    hallucination_results = triplets_and_references_checker(claims, model_output_text, references, question)

    soft_labels = hallucination_results.get("soft_labels", [])
    hard_labels = recompute_hard_labels(soft_labels)

    return soft_labels, hard_labels

In [22]:
import tiktoken
MAX_TOKENS = 16385
def truncate_text_to_max_tokens(text, max_tokens):
    tokenizer = tiktoken.encoding_for_model("gpt-3.5-turbo")
    tokens = tokenizer.encode(text)
    if len(tokens) > max_tokens:
        tokens = tokens[:max_tokens]
    return tokenizer.decode(tokens)

def process_data(question, context, model_output_text, prompt):
    """
    Process data and ensure token limits are respected by truncating text proportionally.
    """
    tokenizer = tiktoken.encoding_for_model("gpt-3.5-turbo")

    # Calculate token count for each part
    question_tokens = len(tokenizer.encode(question))
    context_tokens = len(tokenizer.encode(context))
    model_output_tokens = len(tokenizer.encode(model_output_text))
    prompt_tokens = len(tokenizer.encode(prompt))

    total_tokens = question_tokens + context_tokens + model_output_tokens + prompt_tokens

    if total_tokens > MAX_TOKENS:
        print(f"Warning: Total tokens ({total_tokens}) exceed the maximum limit ({MAX_TOKENS}). Adjusting inputs.")

    # Calculate the amount to truncate
    excess_tokens = total_tokens - MAX_TOKENS

    # Proportional length adjustment for each part
    proportion_question = question_tokens / total_tokens
    proportion_context = context_tokens / total_tokens
    proportion_model_output = model_output_tokens / total_tokens
    proportion_prompt = prompt_tokens / total_tokens

    # Truncate each part proportionally
    question = truncate_text_to_max_tokens(question, int(question_tokens - proportion_question * excess_tokens))
    context = truncate_text_to_max_tokens(context, int(context_tokens - proportion_context * excess_tokens))
    model_output_text = truncate_text_to_max_tokens(model_output_text, int(model_output_tokens - proportion_model_output * excess_tokens))
    prompt = truncate_text_to_max_tokens(prompt, int(prompt_tokens - proportion_prompt * excess_tokens))

    # Update token count
    total_tokens = len(tokenizer.encode(question)) + len(tokenizer.encode(context)) + len(tokenizer.encode(model_output_text)) + len(tokenizer.encode(prompt))

    return question, context, model_output_text, prompt, total_tokens

Failed to decode JSON. Returning empty labels.
([], [])


In [23]:
import json
import re

def extract_hallucination_positions(model_output_text, hallucination_results):
    print("hallucination_results:", hallucination_results)

    json_matches = re.findall(r'\[\s*\{.*?\}\s*\]', hallucination_results, re.DOTALL)

    if not json_matches:
        print("No valid JSON found. Returning empty labels.")
        return {"soft_labels": []}

    try:
        hallucination_results = json.loads(json_matches[0].strip('```json').strip('```').strip())
    except json.JSONDecodeError as e:
        print(f"Failed to decode extracted JSON. Error: {e}. Returning empty labels.")
        return {"soft_labels": []}

    soft_labels = []

    # find the position in the original text
    for result in hallucination_results:
        word = result['word']
        prob = result['prob']

        start = 0
        while True:
            start = model_output_text.find(word, start)
            if start == -1:
                break
            end = start + len(word)

            # save soft_labels
            soft_labels.append({
                "start": start,
                "end": end,
                "prob": prob
            })
            start = end

    return {"soft_labels": soft_labels}

In [24]:
import json
import re

def extract_hallucination_positions(model_output_text, hallucination_results):
    print("hallucination_results:", hallucination_results)

    json_matches = re.findall(r'\[\s*\{.*?\}\s*\]', hallucination_results, re.DOTALL)

    if not json_matches:
        print("No valid JSON found. Returning empty labels.")
        return {"soft_labels": []}

    try:
        hallucination_results = json.loads(json_matches[0])
    except json.JSONDecodeError as e:
        print(f"Failed to decode extracted JSON. Error: {e}. Returning empty labels.")
        return {"soft_labels": []}

    soft_labels = []

    # find the position in the original text
    for result in hallucination_results:
        word = result['word']
        prob = result['prob']

        start = 0
        while True:
            start = model_output_text.find(word, start)
            if start == -1:
                break
            end = start + len(word)

            # save soft_labels
            soft_labels.append({
                "start": start,
                "end": end,
                "prob": prob
            })
            start = end

    return {"soft_labels": soft_labels}

In [25]:
import os

def get_project_root():
    return os.path.dirname(os.getcwd())

input_folder = os.path.join(get_project_root(), "data/exknowledge/")
output_folder = os.path.join(get_project_root(), "data/detect_gpt/")

print("Input Folder Absolute Path:", input_folder)
process_dataset(input_folder, output_folder)


Input Folder Absolute Path: /Users/wt/SemEvalTask3/NCL-UoR/data/exknowledge/


Processing Files: 0file [00:00, ?file/s]


## Evaluation

In [26]:
import pandas as pd
import json
import os
from scorer import load_jsonl_file_to_records, score_iou, score_cor, main, recompute_hard_labels
import argparse as ap
import ast

In [27]:
def evaluate_iou_and_cor(val_dir, detect_dir, output_file):
    """
    Evaluate IoU and Spearman correlation between the reference (val) and detected (detect) files.

    :param val_dir: Directory containing the ground truth files (e.g., data/val/val/)
    :param detect_dir: Directory containing the detected files (e.g., data/detect/)
    :param output_file: Path to save the evaluation results (optional)
    """
    # List all files in the validation directory
    val_files = os.listdir(val_dir)
    detect_files = os.listdir(detect_dir)

    # Ensure that we are comparing the same files (same lang)
    for val_file in val_files:
        # Skip non-JSONL files
        if not val_file.endswith('.jsonl'):
            continue

        # Check if the corresponding detect file exists
        detect_file_path = os.path.join(detect_dir, val_file)

        if not os.path.exists(detect_file_path):
            print(f"Warning: {detect_file_path} not found, skipping.")
            continue

        # Load ground truth (val) and detected (detect) data
        ref_dicts = load_jsonl_file_to_records(os.path.join(val_dir, val_file))
        pred_dicts = load_jsonl_file_to_records(detect_file_path)

        # Calculate IoU and Spearman correlation
        try:
            ious, cors = main(ref_dicts, pred_dicts)
        except IndexError as e:
            print(f"IndexError occurred for file: {val_file}, skipping this file. Error: {e}")
            continue

        # Print or save the results
        print(f"Results for {val_file}:")
        print(f"  Mean IoU: {ious.mean():.8f}")
        print(f"  Mean Spearman Correlation: {cors.mean():.8f}")

        # Optionally, save the results to a file
        if output_file:
            with open(output_file, 'a', encoding='utf-8') as f:
                f.write(f"Results for {val_file}:\n")
                f.write(f"  Mean IoU: {ious.mean():.8f}\n")
                f.write(f"  Mean Spearman Correlation: {cors.mean():.8f}\n\n")


val_dir = 'data/val/val/'
detect_dir = 'data/detect_gpt/'
output_file = 'evaluation_results_gpt.txt'
evaluate_iou_and_cor(val_dir, detect_dir, output_file)

Results for mushroom.ar-val.v2.jsonl:
  Mean IoU: 0.18210507
  Mean Spearman Correlation: 0.06644335
