In this notebook, we investigate the attribution of the hallucinated insights to the different documents. To do so, we will propose a variant of the substring matching procedure used in the GPT-4 paper in the context of data contamination. 
Our variant however, considers every 50-character subtring (or shorter if the insight is smaller) that starts at a specific word in the insight, and checks for its occurrent in an input document at position i in the input. If we say that the insight belongs to that document.

In [1]:
from functools import partial
from collections import defaultdict
from typing import List

import numpy as np
import pandas as pd
import os, json, glob, string, pickle, tqdm

import sys; sys.path.append("../src")
from mitigation_base import SharedOnlyFilter
from utils_io import read_json

import matplotlib
from matplotlib.ticker import MultipleLocator, PercentFormatter

from plotting_utils import *

matplotlib.rc('font', family='serif')

FULL_WIDTH = 6.75133
COL_WIDTH  = 3.25063

  from .autonotebook import tqdm as notebook_tqdm


### Setup

In [2]:
COMBINATION_SIZE = 5
METRIC = "metric__bidirectional"

DOMAINS = (
    "news",
    "conv",
)

MODELS = (
    "accounts/fireworks/models/llama-v3p1-70b-instruct",
    "gpt-4o-2024-05-13",
    "gpt-3.5-turbo-0125",
    "accounts/fireworks/models/qwen2-72b-instruct",
    "gemini-1.5-flash",
)

PROMPT_TYPES = (
    "subtopic", 
    "subtopic_trustworthy",
)

create_doc_matcher = partial(
    SharedOnlyFilter,
    substr_len=50, 
    use_lowercase=True,
    remove_stopwords=False,
    remove_punctuation=True,
    # ^note: tweak parameters above to your taste
    classname="contamination_base.SubstringMatch",
    shared_by_or_more=2,
    cache_only=False,
)
# ^Note: use ``create_doc_matcher()`` to obtain a new instance of document matcher
# this is necessary because of side modifications that occur when processing
# the documents of a specific file
# (since the documents are independent we shouldnt observe any clash in terms
# of uuids, but better be safe than sorrow :)

## Process data

In [3]:
def get_matches(data, cache_dir: str, combination_size: int, exclude_insights_with_no_matches: bool=True): 
    doc_matcher = create_doc_matcher(cache_dir=cache_dir)
    doc_matcher.init_cache()
    match_results = doc_matcher.run(data)
    #^Note: match results consists of the matches for every predicted insight
    # (regardless of its correctness label).
    # Specifically, it is located in: 
    # pd.DataFrame(match_results["postprocessing__SharedOnlyFilter"])["matches"]
    # Therefore, we need to match it with our correctness labels
    match_results_df = pd.DataFrame(match_results["postprocessing__SharedOnlyFilter"])

    doc_attribution_results = defaultdict(list)
    for _, row in match_results_df.iterrows():
        if exclude_insights_with_no_matches and len(row["matches"]) <= 0:
            continue
        
        for i in range(combination_size):
            doc_attribution_results["response_id"].append(row["response_id"])        
            doc_attribution_results["pred_rank"].append(row["pred_rank"])   
            doc_attribution_results["pred_text"].append(row["pred_text"])   
            doc_attribution_results["pred_uuid"].append(row["pred_uuid"])
            doc_attribution_results["doc_position"].append(i)
            doc_attribution_results["is_present"].append(int(i in row["matches"]))
    
    doc_attribution_results = pd.DataFrame(doc_attribution_results)
    return doc_attribution_results


def get_correctness_labels(labels: List[str], examples: pd.DataFrame) -> pd.DataFrame:
    a1 = examples.set_index(["response_id", "pred_uuid"]).copy()
    a2 = pd.DataFrame(labels)
    a2 = a2.rename({"response_idx": "response_id"}, axis=1).set_index(["response_id", "pred_uuid"])
    coverage_labels = a1.join(a2, how="left")
    coverage_labels["coverage"] = coverage_labels["coverage"].fillna("NO_COVERAGE")
    return coverage_labels

In [6]:
all_results = []

for domain in DOMAINS:
    for prompt_type in PROMPT_TYPES:
        for model in MODELS:
            result_filepaths = glob.glob(f"../outputs_{domain}/run_evals_postprocessing-multi-request/gpt-4o-mini-2024-07-18/results_some_shared/{prompt_type}/SummHay__combinations-10/{model}/topic_{domain}*__*examples*.json")
            if len(result_filepaths) > 5: 
                raise ValueError("Unexpected")
        
            for path in result_filepaths:
                data = read_json(path)
                matches = get_matches(data, 
                                      combination_size=COMBINATION_SIZE,
                                      exclude_insights_with_no_matches=True,
                                      cache_dir=f"./document_provenance/{domain}/{prompt_type}/combinations-{COMBINATION_SIZE}",
                                     )
                matches = get_correctness_labels(
                    labels=data["evaluation_assignments"]["metric__bidirectional"]["labels"], 
                    examples=matches,
                )
                matches["domain"] = domain
                matches["prompt_type"] = prompt_type
                matches["model"] = model.rpartition("/")[-1]
                all_results.append(matches.reset_index())
            
all_results = pd.concat(all_results).reset_index(drop=True)

50it [00:00, 87.36it/s] 
50it [00:00, 117.05it/s]
50it [00:00, 107.30it/s]
50it [00:00, 116.27it/s]
50it [00:00, 144.70it/s]
50it [00:00, 130.46it/s]
50it [00:00, 121.24it/s]
50it [00:00, 129.20it/s]
50it [00:00, 266.78it/s]
50it [00:00, 240.26it/s]
50it [00:00, 228.11it/s]
50it [00:00, 292.52it/s]
50it [00:00, 93.31it/s] 
50it [00:00, 140.01it/s]
50it [00:00, 128.70it/s]
50it [00:00, 133.17it/s]
100it [00:01, 98.38it/s]
100it [00:00, 101.16it/s]
100it [00:00, 131.71it/s]
100it [00:00, 102.35it/s]
50it [00:00, 184.36it/s]
50it [00:00, 153.92it/s]
50it [00:00, 143.54it/s]
50it [00:00, 166.02it/s]
50it [00:00, 196.32it/s]
50it [00:00, 158.71it/s]
50it [00:00, 162.85it/s]
50it [00:00, 170.98it/s]
50it [00:00, 294.10it/s]
50it [00:00, 275.36it/s]
50it [00:00, 285.19it/s]
50it [00:00, 349.13it/s]
50it [00:00, 171.89it/s]
50it [00:00, 167.19it/s]
50it [00:00, 161.73it/s]
50it [00:00, 153.76it/s]
100it [00:00, 102.48it/s]
100it [00:00, 116.69it/s]
100it [00:00, 167.14it/s]
100it [00:00, 123.6

## 1.1. Definition of correctness

In [7]:
def is_correct(coverage, use_partial=True):
    correct_labels = ["FULL_COVERAGE"]
    if use_partial:
        correct_labels.append("PARTIAL_COVERAGE")
    return coverage in correct_labels

all_results["correct (fc+pc)"] = all_results["coverage"].apply(is_correct, use_partial=True)
all_results["correct (fc)"] = all_results["coverage"].apply(is_correct, use_partial=False)

### Correctness definitions

In [None]:
all_results_incorrect_fcpc = all_results[~all_results["correct (fc+pc)"]]
all_results_incorrect_fc = all_results[~all_results["correct (fc)"]]

## Subtopic

In [None]:
prompt_type = "subtopic"
subset = all_results_incorrect_fcpc[all_results_incorrect_fcpc["prompt_type"] == prompt_type]

fig, axes = plt.subplots(2, 1, sharex=True, figsize=(COL_WIDTH, COL_WIDTH), dpi=300)

ax = axes[0]
ax.set_title(f"(a) news", fontsize=10)

subset_news = subset[subset["domain"] == "news"]
sns.lineplot(subset_news, x="doc_position", y="is_present", hue="model", ax=ax)

ax.xaxis.set_major_locator(MultipleLocator(1))
ax.yaxis.set_minor_locator(MultipleLocator(0.05))
ax.yaxis.set_major_locator(MultipleLocator(0.10))
ax.yaxis.set_major_formatter(PercentFormatter(1.0))

ax.grid(axis='y', which="major", linewidth=1, linestyle='--', color="gray")
ax.get_legend().remove()
remove_axes(ax)
ax.set_ylim(0, 0.5)
ax.set_ylabel(None)

ax = axes[1]
ax.set_title(f"(b) conv", fontsize=10)
subset_conv = subset[subset["domain"] == "conv"]
sns.lineplot(subset_conv, x="doc_position", y="is_present", hue="model", ax=ax)

ax.xaxis.set_major_locator(MultipleLocator(1))
ax.yaxis.set_minor_locator(MultipleLocator(0.05))
ax.yaxis.set_major_locator(MultipleLocator(0.10))
ax.yaxis.set_major_formatter(PercentFormatter(1.0))

ax.grid(axis='y', which="major", linewidth=1, linestyle='--', color="gray")

ax.set_xlabel("Document Position")
ax.set_ylabel(None)

ax.set_ylim((0.0, 0.5))
ax.set_xlim((0.0, 9))
ax.legend(loc="upper left", ncols=2, bbox_to_anchor=(0.0, 1.0), fontsize=7.5)

remove_axes(ax)
adjust(fig, hspace=0.2)
save_fig(fig, f"both_domains__subtopic__fc+pc.png", dpi=400)

### Subtopic trustworthy

In [5]:
prompt_type = "subtopic_trustworthy"
subset = all_results_incorrect_fcpc[all_results_incorrect_fcpc["prompt_type"] == prompt_type]

fig, axes = plt.subplots(2, 1, sharex=True, figsize=(COL_WIDTH, COL_WIDTH), dpi=300)

ax = axes[0]
ax.set_title(f"(a) news", fontsize=10)

subset_news = subset[subset["domain"] == "news"]
sns.lineplot(subset_news, x="doc_position", y="is_present", hue="model", ax=ax)

ax.xaxis.set_major_locator(MultipleLocator(1))
ax.yaxis.set_minor_locator(MultipleLocator(0.05))
ax.yaxis.set_major_locator(MultipleLocator(0.10))
ax.yaxis.set_major_formatter(PercentFormatter(1.0))

ax.grid(axis='y', which="major", linewidth=1, linestyle='--', color="gray")
ax.get_legend().remove()
remove_axes(ax)
ax.set_ylim(0, 0.5)
ax.set_ylabel(None)

ax = axes[1]
ax.set_title(f"(b) conv", fontsize=10)
subset_conv = subset[subset["domain"] == "conv"]
sns.lineplot(subset_conv, x="doc_position", y="is_present", hue="model", ax=ax)

ax.xaxis.set_major_locator(MultipleLocator(1))
ax.yaxis.set_minor_locator(MultipleLocator(0.05))
ax.yaxis.set_major_locator(MultipleLocator(0.10))
ax.yaxis.set_major_formatter(PercentFormatter(1.0))

ax.grid(axis='y', which="major", linewidth=1, linestyle='--', color="gray")

ax.set_xlabel("Document Position")
ax.set_ylabel(None)

ax.set_ylim((0.0, 0.5))
ax.set_xlim((0.0, 9))
ax.legend(loc="upper left", ncols=2, bbox_to_anchor=(0.0, 1.0), fontsize=7.5)

remove_axes(ax)
adjust(fig, hspace=0.2)
save_fig(fig, f"both_domains__subtopic_trustworthy__fc+pc__{COMBINATION_SIZE}.png", dpi=400)

NameError: name 'all_results_incorrect_fcpc' is not defined