In [1]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm
from pac_utils import pac_labeling, zero_one_loss, squared_loss
from plotting_utils import pac_plot

In [2]:
np.random.seed(2)

## Demonstration on Bias Data

In [3]:
df = pd.read_csv('bias.csv')
df['Yhat'] = df['Yhat (GPT4o)']

### Initial Run

In [4]:
alpha = 0.05
epsilon = 0.05
num_trials = 100
errs = np.zeros(num_trials)
percent_saved = np.zeros(num_trials)
pi = np.ones(len(df))
K = 500

In [5]:
Y = np.array(df['Y'])
Yhat = np.array(df['Yhat'])

for i in tqdm(range(num_trials)):
    uncertainty = 1 - df['confidence'] + 1e-5 * np.random.normal(size=len(df)) # break ties
    Y_tilde, labeled_inds, _ = pac_labeling(Y, 
                                            Yhat, 
                                            zero_one_loss, 
                                            epsilon, 
                                            alpha, 
                                            uncertainty, 
                                            pi, 
                                            K,
                                            asymptotic=False)
    errs[i] = zero_one_loss(Y,Y_tilde)
    percent_saved[i] = np.mean(labeled_inds==0.0)*100
print('Error:', np.quantile(errs, 1-alpha), 'Budget save:(', np.mean(percent_saved), '+/-', np.std(percent_saved),')')

  0%|          | 0/100 [00:00<?, ?it/s]

Error: 0.04104552276138069 Budget save:( 13.684342171085543 +/- 3.1861053779909403 )


### Next, calibrate the predictions

In [6]:
# Hyperparameters (set without tuning)
calibration_frac = 0.15
granularity = 3
min_size = 15

In [7]:
# Categorized the news sources into 5 clusters:
# 1. Very Conservative
# 2. Conservative
# 3. Liberal
# 4. Very Liberal
# 5. Center/Unknown


lookup_fine = {
    "The Intercept": -2,
    "Mother Jones": -2,
    "Salon": -2,
    "Media Matters": -2,
    "The Root": -2,
    "Democracy Now": -2,
    "ACLU": -2,
    "ThinkProgress": -2,
    "Daily Kos": -2,
    "TruthOut": -2,
    "CNN - Editorial": -1,
    "NPR Online News": -1,
    "CNN (Web News)": -1,
    "New York Times - News": -1,
    "The Atlantic": -1,
    "HuffPost": -1,
    "Guest Writer - Left": -1,
    "Vox": -1,
    "NBC News (Online)": -1,
    "Daily Beast": -1,
    "The New Yorker": -1,
    "FiveThirtyEight": -1,
    "The Guardian": -1,
    "The Boston Globe": -1,
    "Slate": -1,
    "Vanity Fair": -1,
    "New Republic": -1,
    "BuzzFeed News": -1,
    "Juan Williams": -1,
    "New York Magazine": -1,
    "Julian Zelizer": -1,
    "Chicago Sun-Times": -1,
    "Center For American Progress": -1,
    "Washington Post": -1,
    "RollingStone.com": -1,
    "Ezra Klein": -1,
    "Bustle": -1,
    "Pacific Standard": -1,
    "Vice": -1,
    "NBCNews.com": -1,
    "New York Times - Opinion": -1,
    "MSNBC": -1,
    "Grist": -1,
    "Detroit Free Press": -1,
    "Esquire": -1,
    "ProPublica": -1,
    "The Week - Opinion": -1,
    "New York Times (Online News)": -1,
    "The Verge": -1,
    "The Observer (New York)": -1,
    "Time Magazine": -1,
    "BBC News": 0,
    "Politico": 0,
    "Christian Science Monitor": 0,
    "The Hill": 0,
    "Mediaite": 0,
    "Thomas Frey": 0,
    "USA TODAY": 0,
    "Reuters": 0,
    "Lifehacker": 0,
    "Yahoo! The 360": 0,
    "Associated Press": 0,
    "Reason": 0,
    "Wall Street Journal - News": 0,
    "The Flip Side": 0,
    "ABC News": 0,
    "Atlanta Journal-Constitution": 0,
    "Al Jazeera": 0,
    "Deadline.com": 0,
    "Business Insider": 0,
    "Bloomberg": 0,
    "Scientific American": 0,
    "The Week - News": 0,
    "MarketWatch": 0,
    "Smithsonian Magazine": 0,
    "CBS News": 0,
    "Guest Writer": 0,
    "Howard Kurtz": 0,
    "TechCrunch": 0,
    "Damon Linker": 0,
    "The Dallas Morning News": 0,
    "Yahoo! News": 0,
    "Indy Online": 0,
    "RealClearPolitics": 0,
    "Foreign Policy": 0,
    "Pew Research Center": 0,
    "Brookings Institution": 0,
    "CNBC": 0,
    "The Economist": 0,
    "Axios": 0,
    "DAG Blog": 0,
    "Nieman Lab": 0,
    "The Texas Tribune": 0,
    "ABC News (Online)": 0,
    "Whitehouse.gov": 0,
    "Bring Me The News": 0,
    "NBC Today Show": 0,
    "Bipartisan Policy Center": 0,
    "PACE": 0,
    "Washington Times": 1,
    "National Review": 1,
    "Fox Online News": 1,
    "Guest Writer - Right": 1,
    "Fox News": 1,
    "Fox News Opinion": 1,
    "Allysia Finley (Wall Street Journal)": 1,
    "The Daily Caller": 1,
    "John Stossel": 1,
    "David Brooks": 1,
    "New York Post": 1,
    "Scott Walker": 1,
    "The Daily Signal": 1,
    "Rich Lowry": 1,
    "HotAir": 1,
    "The Epoch Times": 1,
    "Independent Journal Review": 1,
    "The Dispatch": 1,
    "City Journal": 1,
    "John Fund": 1,
    "Michael Brendan Dougherty": 1,
    "Fox News (Online)": 1,
    "Commonwealth Journal": 1,
    "Jonah Goldberg": 1,
    "Quillette": 1,
    "The Libertarian Republic": 1,
    "American Enterprise Institute": 1,
    "Daily Mail": 1,
    "Washington Free Beacon": 1,
    "Townhall": 2,
    "CBN": 2,
    "Newsmax": 2,
    "TheBlaze.com": 2,
    "Breitbart News": 2,
    "Newsmax - Opinion": 2,
    "Ann Coulter": 2,
    "American Spectator": 2,
    "Michelle Malkin": 2,
    "Victor Hanson": 2,
    "Newsmax - News": 2,
    "Charles Krauthammer": 2,
    "The Daily Wire": 2,
    "NewsBusters": 2,
    "Ben Shapiro": 2,
    "Media Research Center": 2,
    "Pat Buchanan": 2,
    "Newsmax (News)": 2,
    "InfoWars": 2,
    "ZeroHedge": 2,
    "The Western Journal": 2,
    "CNS News": 2
}

In [8]:
# Data loading
df['confidence_bin'] = pd.cut(df['confidence'], bins=granularity, labels=False)
df['source_type'] = df['source'].apply(lookup_fine.get)

In [9]:
calibration_df = df.sample(frac=calibration_frac, replace=False)
rest_of_df = df[~df.index.isin(calibration_df.index)]
calibration_frac = 1 - len(rest_of_df) / len(df)

In [11]:
cluster_col = 'source_type'
for cluster_id in df[cluster_col].unique():
    print(f'Calibrating cluster {cluster_col}={cluster_id}...')
    cluster_df = calibration_df[calibration_df[cluster_col] == cluster_id]
    for bin_id in df['confidence_bin'].unique():
        bin_df = cluster_df[cluster_df["confidence_bin"] == bin_id].copy()
        
        # Skip empty bins
        if len(bin_df) == 0:
            print(f'Skipping empty bin {bin_id}')
            continue
            
        bin_df['correct'] = (bin_df['Y'] == bin_df['Yhat'])
        correction = bin_df['correct'].mean() - bin_df['confidence'].mean()

        print(bin_id, correction, len(bin_df))
        if len(bin_df) > min_size:
            mask = (rest_of_df[cluster_col] == cluster_id) & (rest_of_df['confidence_bin'] == bin_id)
            rest_of_df.loc[mask, 'confidence'] = np.clip(rest_of_df.loc[mask, 'confidence'] + correction, 1e-5, 1 - 1e-5)

Calibrating cluster source_type=-2...
2 0.10000000000000009 6
Skipping empty bin 1
Skipping empty bin 0
Calibrating cluster source_type=1...
2 -0.05833333333333335 54
1 -0.05263157894736836 19
0 0.7 2
Calibrating cluster source_type=2...
2 -0.03888888888888886 18
1 -0.5428571428571429 7
0 0.7 4
Calibrating cluster source_type=-1...
2 -0.29047619047619044 63
1 -0.20434782608695634 23
0 -0.15 6
Calibrating cluster source_type=0...
2 -0.3749999999999999 58
1 -0.21282051282051273 39
0 -0.3 1


### Post-calibration PAC Labeling

In [12]:
rest_of_df = rest_of_df.reset_index()

In [None]:
Y = np.array(rest_of_df['Y'])
Yhat = np.array(rest_of_df['Yhat'])
pi = np.ones(len(rest_of_df)) # 0.1*np.ones(len(rest_of_df))

for i in tqdm(range(num_trials)):
    uncertainty = 1 - rest_of_df['confidence'] + 1e-5 * np.random.normal(size=len(rest_of_df)) # break ties
    Y_tilde, labeled_inds, _ = pac_labeling(Y, 
                                            Yhat, 
                                            zero_one_loss, 
                                            epsilon * (1 / (1 - calibration_frac)),
                                            alpha, 
                                            uncertainty, 
                                            pi, 
                                            K,
                                            asymptotic=False)
    errs[i] = zero_one_loss(Y,Y_tilde)
    percent_saved[i] = np.mean(labeled_inds==0.0)*100

err = np.quantile(errs, 1-alpha) * (1 - calibration_frac)
print('Error:', err, 
      'Budget save:(', np.mean(percent_saved) * (1 - calibration_frac), 
      '+/-', np.std(percent_saved) * (1 - calibration_frac),')')

  0%|          | 0/100 [00:00<?, ?it/s]