#### import from helpers

In [1]:
from pathlib import Path
import sys

nb_dir = Path.cwd()


project_root = nb_dir.parent.parent   # .../human-ai-collab-uq

print("Notebook dir: ", nb_dir)
print("Project root:", project_root)

# add src/ to Python path
src_path = project_root / "src"
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))

import importlib
import imagenet16h.offline_helpers as off
off = importlib.reload(off)

print("Module path:", off.__file__)

# data root
data_root = project_root / "data"
print("Data root:", data_root)


Notebook dir:  /Users/nooranis/Downloads/Github/human-ai-collab-uq/notebooks/imagenet16h
Project root: /Users/nooranis/Downloads/Github/human-ai-collab-uq
Module path: /Users/nooranis/Downloads/Github/human-ai-collab-uq/src/imagenet16h/offline_helpers.py
Data root: /Users/nooranis/Downloads/Github/human-ai-collab-uq/data


#### Prep Data

In [2]:
paths = off.Imagenet16HPaths.from_data_root(data_root)

off.build_labels_from_raw_csv(paths)

for m in off.MODEL_NAMES:
    off.split_model_predictions(paths, m)
    off.sort_model_predictions(paths, m)

off.prep_human_tables(paths)
for nl in off.NOISE_LEVELS:
    off.build_expert_freq_probs(paths, noise_level=nl)


[labels] wrote /Users/nooranis/Downloads/Github/human-ai-collab-uq/data/imagenet16H.csv (1200 rows)
[labels] wrote /Users/nooranis/Downloads/Github/human-ai-collab-uq/data/imagenet16h/metadata/classes.json
[model] alexnet ω=80: 1200 rows -> /Users/nooranis/Downloads/Github/human-ai-collab-uq/data/models/noise80/alexnet.csv
[model] alexnet ω=95: 1200 rows -> /Users/nooranis/Downloads/Github/human-ai-collab-uq/data/models/noise95/alexnet.csv
[model] alexnet ω=110: 1200 rows -> /Users/nooranis/Downloads/Github/human-ai-collab-uq/data/models/noise110/alexnet.csv
[model] alexnet ω=125: 1200 rows -> /Users/nooranis/Downloads/Github/human-ai-collab-uq/data/models/noise125/alexnet.csv
[sorted] alexnet ω=80: 1200 rows -> /Users/nooranis/Downloads/Github/human-ai-collab-uq/data/models/noise80/alexnet_sorted.csv
[sorted] alexnet ω=95: 1200 rows -> /Users/nooranis/Downloads/Github/human-ai-collab-uq/data/models/noise95/alexnet_sorted.csv
[sorted] alexnet ω=110: 1200 rows -> /Users/nooranis/Downloa

### Example Run : CUP vs Human Alone

In [7]:
noise_level = 125
model_name = "vgg19"

strategies = ["topk1", "topk2"]
deltas = [0.7]
epsilons = [0.05]


df_collab = off.sweep_strategies_eps_delta(
    paths=paths,
    noise_level=noise_level,
    model_name=model_name,
    strategies=strategies,
    deltas=deltas,
    epsilons=epsilons,
    num_splits=10,
)
df_collab.head()



Unnamed: 0,strategy,delta,epsilon,human_coverage_mean,human_coverage_std,human_set_size_mean,human_set_size_std,method_coverage_mean,method_coverage_std,method_set_size_mean,method_set_size_std,cond_coverage_not_in_H_mean,cond_coverage_not_in_H_std,cond_error_in_H_mean,cond_error_in_H_std
0,topk1,0.7,0.05,0.725167,0.010474,1.0,0.0,0.881,0.011719,1.347333,0.066676,0.698775,0.041587,0.050056,0.012808
1,topk2,0.7,0.05,0.801833,0.009051,2.0,0.0,0.9015,0.008248,1.469667,0.042634,0.704338,0.037726,0.049866,0.010376


###AI Alone Baseline

In [8]:
import pandas as pd

def attach_ai_alone_at_cup_delta(
    df_collab: pd.DataFrame,
    paths: off.Imagenet16HPaths,
    noise_level: int,
    model_name: str,
    num_splits: int = 10,
    test_size: float = 0.5,
    base_seed: int = 123,
) -> pd.DataFrame:

    ai_rows = []

    for idx, r in df_collab.iterrows():
        eps = float(r["epsilon"])
        cup_cov = float(r["method_coverage_mean"])

        #
        if np.isnan(cup_cov):
            ai_rows.append({
                "ai_delta_param": np.nan,
                "ai_coverage_mean": np.nan,
                "ai_coverage_std": np.nan,
                "ai_set_size_mean": np.nan,
                "ai_set_size_std": np.nan,
                "ai_cond_coverage_not_in_H_mean": np.nan,
                "ai_cond_coverage_not_in_H_std": np.nan,
                "ai_cond_error_in_H_mean": np.nan,
                "ai_cond_error_in_H_std": np.nan,
            })
            continue

        # run AI-alone baseline
        df_ai = off.sweep_ai_alone(
            paths=paths,
            noise_level=noise_level,
            model_name=model_name,
            deltas=[cup_cov],          # <-- δ = coverage achieved by CUP
            epsilon=eps,
            num_splits=num_splits,
            test_size=test_size,
            base_seed=base_seed,
        )


        ai_row = df_ai.iloc[0]

        ai_rows.append({
            "ai_delta_param": ai_row["delta"],
            "ai_coverage_mean": ai_row["method_coverage_mean"],
            "ai_coverage_std": ai_row["method_coverage_std"],
            "ai_set_size_mean": ai_row["method_set_size_mean"],
            "ai_set_size_std": ai_row["method_set_size_std"],
            "ai_cond_coverage_not_in_H_mean": ai_row["cond_coverage_not_in_H_mean"],
            "ai_cond_coverage_not_in_H_std": ai_row["cond_coverage_not_in_H_std"],
            "ai_cond_error_in_H_mean": ai_row["cond_error_in_H_mean"],
            "ai_cond_error_in_H_std": ai_row["cond_error_in_H_std"],
        })

    df_ai_aug = pd.DataFrame(ai_rows, index=df_collab.index)
    df_full = pd.concat([df_collab, df_ai_aug], axis=1)
    return df_full


In [9]:
import numpy as np

df_full = attach_ai_alone_at_cup_delta(
    df_collab=df_collab,
    paths=paths,
    noise_level=noise_level,
    model_name=model_name,
    num_splits=10,
)
df_full.head()


Unnamed: 0,strategy,delta,epsilon,human_coverage_mean,human_coverage_std,human_set_size_mean,human_set_size_std,method_coverage_mean,method_coverage_std,method_set_size_mean,...,cond_error_in_H_std,ai_delta_param,ai_coverage_mean,ai_coverage_std,ai_set_size_mean,ai_set_size_std,ai_cond_coverage_not_in_H_mean,ai_cond_coverage_not_in_H_std,ai_cond_error_in_H_mean,ai_cond_error_in_H_std
0,topk1,0.7,0.05,0.725167,0.010474,1.0,0.0,0.881,0.011719,1.347333,...,0.012808,0.881,0.8815,0.014013,1.464833,0.045895,0.8815,0.014013,,
1,topk2,0.7,0.05,0.801833,0.009051,2.0,0.0,0.9015,0.008248,1.469667,...,0.010376,0.9015,0.904667,0.013433,1.626167,0.06394,0.904667,0.013433,,


### Observing specific results from the previous full table

In [10]:
def summarize_row_human_cup_ai(df_full: pd.DataFrame, strategy: str, epsilon: float, delta: float) -> pd.DataFrame:

    mask = (
        (df_full["strategy"] == strategy)
        & (df_full["epsilon"] == epsilon)
        & (df_full["delta"] == delta)
    )
    if not mask.any():
        raise ValueError(f"No row found for strategy={strategy}, eps={epsilon}, delta={delta}")

    row = df_full[mask].iloc[0]

    human_cov = row["human_coverage_mean"]
    human_size = row["human_set_size_mean"]

    cup_cov = row["method_coverage_mean"]
    cup_size = row["method_set_size_mean"]
    cup_cond_not_in_H = row["cond_coverage_not_in_H_mean"]
    cup_cond_err_in_H = row["cond_error_in_H_mean"]

    ai_delta_param = row["ai_delta_param"]             # this equals cup_cov as an input
    ai_cov = row["ai_coverage_mean"]
    ai_size = row["ai_set_size_mean"]
    ai_cond_not_in_H = row["ai_cond_coverage_not_in_H_mean"]
    ai_cond_err_in_H = row["ai_cond_error_in_H_mean"]

    rows = [
        {
            "variant": f"Human-alone ({strategy})",
            "epsilon": epsilon,
            "delta_param": np.nan,  # human doesn't use δ
            "marginal_coverage": human_cov,
            "avg_set_size": human_size,
            "P(Y in C | Y ∉ H)": np.nan,
            "P(Y ∉ C | Y ∈ H)": np.nan,
        },
        {
            "variant": f"CUP offline ({strategy})",
            "epsilon": epsilon,
            "delta_param": delta,   # CUP's δ param
            "marginal_coverage": cup_cov,
            "avg_set_size": cup_size,
            "P(Y in C | Y ∉ H)": cup_cond_not_in_H,
            "P(Y ∉ C | Y ∈ H)": cup_cond_err_in_H,
        },
        {
            "variant": f"AI-alone (δ = CUP cov)",
            "epsilon": epsilon,
            "delta_param": ai_delta_param,  # this is cup_cov used as δ
            "marginal_coverage": ai_cov,
            "avg_set_size": ai_size,
            "P(Y in C | Y ∉ H)": ai_cond_not_in_H,
            "P(Y ∉ C | Y ∈ H)": ai_cond_err_in_H,
        },
    ]

    return pd.DataFrame(rows)


In [12]:
strategy0 = "topk2"
eps0 = 0.05
delta0 = 0.7

summary_table = summarize_row_human_cup_ai(df_full, strategy=strategy0, epsilon=eps0, delta=delta0)
summary_table


Unnamed: 0,variant,epsilon,delta_param,marginal_coverage,avg_set_size,P(Y in C | Y ∉ H),P(Y ∉ C | Y ∈ H)
0,Human-alone (topk2),0.05,,0.801833,2.0,,
1,CUP offline (topk2),0.05,0.7,0.9015,1.469667,0.704338,0.049866
2,AI-alone (δ = CUP cov),0.05,0.9015,0.904667,1.626167,0.904667,
