In [None]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm
from router_utils import train_weighter
from pac_utils import pac_labeling, zero_one_loss
from plotting_utils import pac_router_plot

### Load data

In [None]:
df = pd.read_csv("bias_source_features.csv")

### Train PAC router

In [None]:
costless = True
if costless:
    costs = np.array([0,0])
else:
    costs = np.array([0.075, 0.25]) # real ratio of claude sonnet and gpt-4o
expert_cost = 1
results = train_weighter(df, calibration_frac=0.1, costs=costs, expert_cost=expert_cost)

### Set parameters for PAC labeling

In [None]:
alpha = 0.05
epsilon = 0.05
num_trials = 1000
K = 500
Y = df["Y"].to_numpy()[~results["label_collected"]]
Yhat_routed = results["Yhat_routed"].to_numpy()[~results["label_collected"]]
confidence_routed = results["confidence_routed"].to_numpy()[~results["label_collected"]]
Yhat_claude = df["Yhat_0"].to_numpy()[~results["label_collected"]]
confidence_claude = df["confidence_0"].to_numpy()[~results["label_collected"]]
Yhat_gpt = df["Yhat_1"].to_numpy()[~results["label_collected"]]
confidence_gpt = df["confidence_1"].to_numpy()[~results["label_collected"]]
pi = 1*np.ones(len(Y))
cost_sensitive = (np.sum(costs) > 0)
if cost_sensitive:
    cost_Yhats = costs[results["routed_model"].to_numpy()[~results["label_collected"]]]
    cost_claude = costs[0]
    cost_gpt = costs[1]

### Run PAC labeling with router

In [None]:
Y_tilde = np.zeros(len(Y))
budget_save_router = np.zeros(num_trials)
errs_router = np.zeros(num_trials)
for i in tqdm(range(num_trials)):
    uncertainty_routed = 1 - confidence_routed + 1e-5*np.random.randn(len(Y)) # break ties
    Y_tilde, labeled_inds, _ = pac_labeling(Y, Yhat_routed, zero_one_loss, epsilon, alpha, uncertainty_routed, pi, K, asymptotic=False)
    if cost_sensitive:
        budget_save_router[i] = (len(Y) - np.sum(labeled_inds))*expert_cost - np.sum(cost_Yhats[np.where(1-labeled_inds)])
    else:
        budget_save_router[i] = (labeled_inds == 0).mean()*100
    errs_router[i] = zero_one_loss(Y, Y_tilde)
print('Error:', np.quantile(errs_router, 1-alpha), 'Budget save:(', np.mean(budget_save_router), '+/-', np.std(budget_save_router),')')

### Run PAC labeling with GPT/Claude individually

In [None]:
Y_tilde = np.zeros(len(Y))
budget_save_claude = np.zeros(num_trials)
errs_claude = np.zeros(num_trials)
for i in tqdm(range(num_trials)):
    uncertainty_claude = 1 - confidence_claude + 1e-5*np.random.randn(len(Y)) # break ties
    Y_tilde, labeled_inds, _ = pac_labeling(Y, Yhat_claude, zero_one_loss, epsilon, alpha, uncertainty_claude, pi, K, asymptotic=False)
    if cost_sensitive:
        budget_save_claude[i] = (len(Y) - np.sum(labeled_inds))*expert_cost - cost_claude*np.sum(1-labeled_inds)
    else:
        budget_save_claude[i] = (labeled_inds == 0).mean()*100
    errs_claude[i] = zero_one_loss(Y, Y_tilde)
print('Error:', np.quantile(errs_claude, 1-alpha), 'Budget save:(', np.mean(budget_save_claude), '+/-', np.std(budget_save_claude),')')

In [None]:
Y_tilde = np.zeros(len(Y))
budget_save_gpt = np.zeros(num_trials)
errs_gpt = np.zeros(num_trials)
for i in tqdm(range(num_trials)):
    uncertainty_gpt = 1 - confidence_gpt + 1e-5*np.random.randn(len(Y)) # break ties
    Y_tilde, labeled_inds, _ = pac_labeling(Y, Yhat_gpt, zero_one_loss, epsilon, alpha, uncertainty_gpt, pi, K, asymptotic=False)
    if cost_sensitive:
        budget_save_gpt[i] = (len(Y) - np.sum(labeled_inds))*expert_cost - cost_gpt*np.sum(1-labeled_inds)
    else:
        budget_save_gpt[i] = (labeled_inds == 0).mean()*100
    errs_gpt[i] = zero_one_loss(Y, Y_tilde)
print('Error:', np.quantile(errs_gpt, 1-alpha), 'Budget save:(', np.mean(budget_save_gpt), '+/-', np.std(budget_save_gpt),')')

### Plot results

In [None]:
pac_router_plot([errs_router, errs_gpt, errs_claude], [budget_save_router, budget_save_gpt, budget_save_claude], epsilon, "routed_costs_epsilon_", num_trials=num_trials, cost_free = (np.sum(costs)==0))

In [None]:
plt.axhline(y=epsilon, linestyle='--', color='black', linewidth=1.2, alpha=0.6)
plt.plot(range(len(uncertainty_routed)), [np.sum((Yhat_routed != Y)[np.where(uncertainty_routed < u)])/len(Y) for u in np.sort(uncertainty_routed)], color='#2274A5', label='router')
plt.plot(range(len(uncertainty_gpt)), [np.sum((Yhat_gpt != Y)[np.where(uncertainty_gpt < u)])/len(Y) for u in np.sort(uncertainty_gpt)], color="#00CC66", label='GPT')
plt.plot(range(len(uncertainty_claude)), [np.sum((Yhat_claude != Y)[np.where(uncertainty_claude < u)])/len(Y) for u in np.sort(uncertainty_claude)], color="#F75C03", label='Claude')
plt.xlabel("$u$", fontsize=16)
plt.ylabel("$L^u$", fontsize=16)
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.grid(True, linestyle=':', color='gray', alpha=0.4)
plt.legend(frameon=False, fontsize=16, loc='upper left')
yticks = plt.yticks()[0]
yticks = np.append(yticks, epsilon)
plt.yticks(yticks)
plt.gca().set_yticklabels(
    [r'$\varepsilon=$' + str(epsilon) if np.isclose(tick, epsilon) else f'{tick:.2f}' for tick in yticks],
    fontsize=12
)
plt.xticks(fontsize=12)
plt.ylim([0,0.5])
plt.tight_layout()
plt.savefig("Lu.pdf")
plt.show()