# Hallucination Detection

In [101]:
import json
import os
from openai import OpenAI
from tqdm import tqdm
import requests
import httpx
from RefChecker.refchecker.extractor import extractor_prompts
# import RefChecker
import numpy as np
import spacy
from scorer import recompute_hard_labels
import glob
import re
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [102]:
from openai import OpenAI

client = OpenAI(
    api_key="")


## 1. Extracting Claims (Extractor) - Each claim is a merger of triple-structured knowledge.

In [103]:
LLM_TRIPLET_EXTRACTION_PROMPT_Q = extractor_prompts.LLM_TRIPLET_EXTRACTION_PROMPT_Q
LLM_Triplet_To_Claim_PROMPT_Q = extractor_prompts.LLM_Triplet_To_Claim_PROMPT_Q
LLM_CLAIM_EXTRACTION_PROMPT_Q = extractor_prompts.LLM_CLAIM_EXTRACTION_PROMPT_Q

In [104]:
def extract_triplets_to_claims(question, model_output_text):
    prompt = LLM_CLAIM_EXTRACTION_PROMPT_Q.format(q=question, r=model_output_text)

    try:
        chat_completion = client.chat.completions.create(
            messages=[
                {"role": "system", "content": "You are an AI assistant who extracts claims."},
                {"role": "user", "content": prompt}
            ],
            model="gpt-3.5-turbo",
        )

        response_content = chat_completion.choices[0].message.content

        if not response_content:
            print(f"No response for the prompt: {prompt}")
            return []

        return response_content

    except Exception as e:
        print(f"OpenAI API Error: {e}")
        return []


## 2. Obtain Complete References

In [105]:
def get_reference_for_claim(claim):
    prompt = f"""
    Please expand, provide additional relevant factual information and verify the following claim:
    Claims: {claim}

    If the claim is accurate, return the original claim.
    If the claim is inaccurate or incomplete, return a corrected, more detailed statement.
    """

    try:
        chat_completion = client.chat.completions.create(
            messages=[
                {"role": "system", "content": "You are an AI assistant verifying claims."},
                {"role": "user", "content": prompt}
            ],
            model="gpt-3.5-turbo",
        )

        if not chat_completion.choices or len(chat_completion.choices) == 0:
            print(f"No response for the prompt: {prompt}")
            return []

        response_content = chat_completion.choices[0].message.content

        if not response_content.strip():
            print(f"No content in the response for the prompt: {prompt}")
            return []

        return response_content

    except Exception as e:
        print(f"OpenAI API Error: {e}")
        return []


In [106]:
def extract_and_get_references(claims, context):
    references = []
    for claim in claims:
        verified_reference = get_reference_for_claim(claim)
        references.append(verified_reference)

    final_reference = " ".join(references) + " " + context

    return final_reference

## 3. Validate claims, `model_input`, `model_output_text`, and References (Checker)

The validation results should be mapped back to the `model_output_text`, marking hallucination positions and probabilities, and outputting them as `soft_labels`.

In [107]:
def extract_hallucination_positions(model_output_text, hallucination_results):
    # parse JSON data
    try:
        hallucination_results = json.loads(hallucination_results)
    except json.JSONDecodeError:
        print("Failed to decode JSON. Returning empty labels.")
        return {"soft_labels": []}

    soft_labels = []

    # find the position in the original text
    for result in hallucination_results:
        word = result['word']
        prob = result['prob']

        start = 0
        while True:
            start = model_output_text.find(word, start)
            if start == -1:
                break
            end = start + len(word)

            # save soft_labels
            soft_labels.append({
                "start": start,
                "end": end,
                "prob": prob
            })
            start = end

    return {"soft_labels": soft_labels}


In [108]:
def triplets_and_references_checker(claims, model_output_text, references, question):
    prompt = f"""
    Evaluate the model output text for hallucinations by comparing it to the provided references, existed fact, claims, and question (model input). Identify any hallucinated or potentially inaccurate parts in the entire model output text. Highlight the hallucinated word and assign a probability of the hallucination word in the `model output text`.

    ### Question (Model Input)
    {question}

    ### Claims
    {claims}

    ### References
    {references}

    ### Model Output Text
    {model_output_text}

    ### Instructions
    1. Compare each claim with the provided references, question and existing fact (internal knowledge).
    2. If a claim cannot be fully supported by the references, identify the hallucinated words and mark it to `model output text`.
    3. Return character-level offsetss and assign hallucination probabilities.
    4. If the claim is fully supported, hallucination should not to be labeled.
    5. Assign hallucination probabilities based on the following criteria:
       - **0.7 - 1.0**: Fully fabricated or highly speculative content with no supporting evidence.
       - **0.4 - 0.7**: Partially incorrect or speculative content, but some evidence supports parts of the claim.
       - **0.1 - 0.4**: Minor inaccuracies, such as spelling errors, wrong formatting, or small factual deviations.
    6. Ensure that the hallucinated words do not overlap or repeat. If overlapping occurs, merge them or seperate them appropriately.
    7. Ensure the words are shown in the `model output text`.
    8. Highlight text in `model output text` that could potentially be a hallucination even if not explicitly listed in the claims.
    9. Return **all the hallucinated words or phrases** and assign each a hallucination probability (between 0 and 1).
    10. Do not filter out hallucinations based on low probability. Return results for any potential hallucination.
    11. Do not include any explanations, summaries, or additional text. **Return the JSON list directly.**
    12. Ensure all potential hallucinations are listed, even those with probabilities as low as 0.1.

    ### Output Example
    Only return results with all hallucinated words or phrases and their probability **strictly in the following JSON format**:
    [
        {{"word": <example_word>, "prob": <probability>}},
        {{"word": <another_word>, "prob": <probability>}}
    ]

    """

    try:
        chat_completion = client.chat.completions.create(
            messages=[
                {"role": "system",
                 "content": "You are an AI assistant who checks the factual accuracy of claims and returns position and probability of the hallucination from model output text"},
                {"role": "user", "content": prompt}
            ],
            model="gpt-3.5-turbo",
        )

        if not chat_completion.choices or len(chat_completion.choices) == 0:
            print("Error during hallucination detection: No response choices")
            return {"soft_labels": []}

        raw_labels = chat_completion.choices[0].message.content

        return extract_hallucination_positions(model_output_text, raw_labels)

    except Exception as e:
        print(f"OpenAI API Error: {e}")
        return {"soft_labels": []}


## Main Logic

In [109]:
def hallucination_detect(question, model_output_text, context):
    claims = extract_triplets_to_claims(question, model_output_text)
    references = extract_and_get_references(claims, context)
    hallucination_results = triplets_and_references_checker(claims, model_output_text, references, question)

    soft_labels = hallucination_results.get("soft_labels", [])
    hard_labels = recompute_hard_labels(soft_labels)

    return soft_labels, hard_labels

## Apply on My Dataset

In [110]:
# process the dataset and save the results
def process_dataset(input_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    input_files = glob.glob(os.path.join(input_folder, "*.jsonl"))

    with tqdm(total=len(input_files), desc="Processing Files", unit="file") as file_progress:
        for file_path in input_files:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = [json.loads(line) for line in f]

            output_data = []

            with tqdm(total=len(data), desc=f"Processing {os.path.basename(file_path)}", unit="entry",
                      leave=False) as entry_progress:
                for entry in data:
                    try:
                        question = entry.get("model_input", "")
                        model_output_text = entry.get("model_output_text", "")
                        context = entry.get("context_googlecse", "")

                        soft_labels, hard_labels = hallucination_detect(
                            question, model_output_text, context
                        )

                        output_entry = {
                            "id": entry.get("id"),
                            "lang": entry.get("lang"),
                            "model_input": entry.get("model_input"),
                            "model_output_text": entry.get("model_output_text"),
                            "model_id": entry.get("model_id"),
                            "soft_labels": soft_labels,
                            "hard_labels": hard_labels,
                            "model_output_logits": entry.get("model_output_logits"),
                            "model_output_tokens": entry.get("model_output_tokens")
                        }

                        output_data.append(output_entry)

                    except Exception as e:
                        print(f"OpenAI API Error: {e}")
                        # return []
                        continue
                    entry_progress.update(1)

            output_file = os.path.join(output_folder, os.path.basename(file_path))
            with open(output_file, 'w', encoding='utf-8') as f:
                for item in output_data:
                    f.write(json.dumps(item, ensure_ascii=False) + '\n')

            file_progress.update(1)
            print(f"Processed and saved: {output_file}")

In [111]:
def extract_hallucination_positions(model_output_text, hallucination_results):
    json_matches = re.findall(r'\[\s*\{.*?\}\s*\]', hallucination_results, re.DOTALL)

    if not json_matches:
        print("No valid JSON found. Returning empty labels.")
        return {"soft_labels": []}

    try:
        hallucination_results = json.loads(json_matches[0])
    except json.JSONDecodeError:
        print("Failed to decode extracted JSON. Returning empty labels.")
        return {"soft_labels": []}

    soft_labels = []

    # find the position in the original text
    for result in hallucination_results:
        word = result['word']
        prob = result['prob']

        start = 0
        while True:
            start = model_output_text.find(word, start)
            if start == -1:
                break
            end = start + len(word)

            # save soft_labels
            soft_labels.append({
                "start": start,
                "end": end,
                "prob": prob
            })
            start = end

    return {"soft_labels": soft_labels}

In [112]:
import os

def get_project_root():
    return os.path.dirname(os.getcwd())

input_folder = os.path.join(get_project_root(), "data/exknowledge/")
output_folder = os.path.join(get_project_root(), "data/detect_gpt/")

print("Input Folder Absolute Path:", input_folder)
process_dataset(input_folder, output_folder)


Input Folder Absolute Path: /Users/wt/SemEvalTask3/NCL-UoR/Jalynn/Method1/data/exknowledge/


Processing Files:   0%|          | 0/10 [00:00<?, ?file/s]
Processing mushroom.ar-val.v2.jsonl:   0%|          | 0/50 [00:00<?, ?entry/s][A
Processing mushroom.ar-val.v2.jsonl:   2%|▏         | 1/50 [01:15<1:01:27, 75.26s/entry][A
Processing mushroom.ar-val.v2.jsonl:   4%|▍         | 2/50 [01:51<41:46, 52.21s/entry]  [A
Processing mushroom.ar-val.v2.jsonl:   6%|▌         | 3/50 [06:54<2:10:34, 166.68s/entry][A
Processing mushroom.ar-val.v2.jsonl:   8%|▊         | 4/50 [07:55<1:35:53, 125.07s/entry][A
Processing mushroom.ar-val.v2.jsonl:  10%|█         | 5/50 [08:36<1:11:01, 94.71s/entry] [A
Processing mushroom.ar-val.v2.jsonl:  12%|█▏        | 6/50 [13:24<1:57:45, 160.58s/entry][A
Processing mushroom.ar-val.v2.jsonl:  14%|█▍        | 7/50 [15:25<1:45:41, 147.47s/entry][A
Processing mushroom.ar-val.v2.jsonl:  16%|█▌        | 8/50 [16:05<1:19:17, 113.26s/entry][A
Processing mushroom.ar-val.v2.jsonl:  18%|█▊        | 9/50 [18:35<1:25:11, 124.67s/entry][A
Processing mushroom.ar-v

Processed and saved: /Users/wt/SemEvalTask3/NCL-UoR/Jalynn/Method1/data/detect_gpt/mushroom.ar-val.v2.jsonl



Processing mushroom.es-val.v2.jsonl:   0%|          | 0/50 [00:00<?, ?entry/s][A
Processing mushroom.es-val.v2.jsonl:   2%|▏         | 1/50 [03:31<2:52:38, 211.40s/entry][A
Processing mushroom.es-val.v2.jsonl:   4%|▍         | 2/50 [04:33<1:38:39, 123.31s/entry][A
Processing mushroom.es-val.v2.jsonl:   6%|▌         | 3/50 [05:40<1:16:42, 97.92s/entry] [A
Processing mushroom.es-val.v2.jsonl:   8%|▊         | 4/50 [16:23<3:59:50, 312.84s/entry][A

OpenAI API Error: Error code: 400 - {'error': {'message': "This model's maximum context length is 16385 tokens. However, your messages resulted in 24280 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}



Processing mushroom.es-val.v2.jsonl:  10%|█         | 5/50 [18:29<3:04:16, 245.71s/entry][A
Processing mushroom.es-val.v2.jsonl:  12%|█▏        | 6/50 [20:32<2:29:25, 203.77s/entry][A
Processing mushroom.es-val.v2.jsonl:  14%|█▍        | 7/50 [31:36<4:13:56, 354.35s/entry][A

OpenAI API Error: Error code: 400 - {'error': {'message': "This model's maximum context length is 16385 tokens. However, your messages resulted in 22008 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}



Processing mushroom.es-val.v2.jsonl:  16%|█▌        | 8/50 [35:02<3:35:00, 307.15s/entry][A
Processing mushroom.es-val.v2.jsonl:  18%|█▊        | 9/50 [36:59<2:49:20, 247.82s/entry][A
Processing mushroom.es-val.v2.jsonl:  20%|██        | 10/50 [41:09<2:45:32, 248.32s/entry][A
Processing mushroom.es-val.v2.jsonl:  22%|██▏       | 11/50 [42:17<2:05:39, 193.32s/entry][A
Processing mushroom.es-val.v2.jsonl:  24%|██▍       | 12/50 [45:03<1:57:04, 184.86s/entry][A
Processing Files:  10%|█         | 1/10 [2:03:11<18:28:41, 7391.31s/file]                 [A


KeyboardInterrupt: 

## Evaluation

In [64]:
import pandas as pd
import json
import os
from scorer import load_jsonl_file_to_records, score_iou, score_cor, main, recompute_hard_labels
import argparse as ap
import ast

In [65]:
def evaluate_iou_and_cor(val_dir, detect_dir, output_file):
    """
    Evaluate IoU and Spearman correlation between the reference (val) and detected (detect) files.

    :param val_dir: Directory containing the ground truth files (e.g., data/val/val/)
    :param detect_dir: Directory containing the detected files (e.g., data/detect/)
    :param output_file: Path to save the evaluation results (optional)
    """
    # List all files in the validation directory
    val_files = os.listdir(val_dir)
    detect_files = os.listdir(detect_dir)

    # Ensure that we are comparing the same files (same lang)
    for val_file in val_files:
        # Skip non-JSONL files
        if not val_file.endswith('.jsonl'):
            continue

        # Check if the corresponding detect file exists
        detect_file_path = os.path.join(detect_dir, val_file)

        if not os.path.exists(detect_file_path):
            print(f"Warning: {detect_file_path} not found, skipping.")
            continue

        # Load ground truth (val) and detected (detect) data
        ref_dicts = load_jsonl_file_to_records(os.path.join(val_dir, val_file))
        pred_dicts = load_jsonl_file_to_records(detect_file_path)

        # Calculate IoU and Spearman correlation
        try:
            ious, cors = main(ref_dicts, pred_dicts)
        except IndexError as e:
            print(f"IndexError occurred for file: {val_file}, skipping this file. Error: {e}")
            continue

        # Print or save the results
        print(f"Results for {val_file}:")
        print(f"  Mean IoU: {ious.mean():.8f}")
        print(f"  Mean Spearman Correlation: {cors.mean():.8f}")

        # Optionally, save the results to a file
        if output_file:
            with open(output_file, 'a', encoding='utf-8') as f:
                f.write(f"Results for {val_file}:\n")
                f.write(f"  Mean IoU: {ious.mean():.8f}\n")
                f.write(f"  Mean Spearman Correlation: {cors.mean():.8f}\n\n")


val_dir = 'data/val/val/'
detect_dir = 'data/detect_gpt/'
output_file = 'evaluation_results_gpt.txt'
evaluate_iou_and_cor(val_dir, detect_dir, output_file)

FileNotFoundError: [Errno 2] No such file or directory: 'data/val/val/'