## Setup

In [1]:
import pandas as pd
import numpy as np
from joblib import load

from utils import (
    compute_nc_scores,
    find_threshold,
    predict_conformal_sets,
    evaluate_sets,
    summarize_by_indicator,
    summarize_for_predicate
)

  vect_normalized_discounted_cumulative_gain = vmap(
  monte_carlo_vect_ndcg = vmap(vect_normalized_discounted_cumulative_gain, in_dims=(0,))


## Data and Model Loading

In [2]:
X_calib_f = pd.read_csv("./output/X_calib_f.csv") # 2015, w. protected attributes
X_calib_s = pd.read_csv("./output/X_calib_s.csv") # 2015, w/o protected attributes
y_calib = pd.read_csv("./output/y_calib.csv").iloc[:,0]

X_test_f = pd.read_csv("./output/X_test_f.csv")
X_test_s = pd.read_csv("./output/X_test_s.csv")
y_test = pd.read_csv("./output/y_test.csv").iloc[:,0]

In [3]:
preds_test = pd.read_csv("./output/preds_test.csv")

glm1 = load("./models/glm1.joblib")

## Conformal

In [4]:
# Miscoverage level
alpha = 0.1

In [None]:
probs_calib = glm1.predict_proba(X_calib_f)

nc_scores = compute_nc_scores(probs_calib, y_calib)
q_hat = find_threshold(nc_scores, alpha) # q_hat is data-driven threshold for classification

In [6]:
# With test data
pred_sets = predict_conformal_sets(glm1, X_test_f, q_hat)

In [7]:
# With test data
evaluation = evaluate_sets(pred_sets, y_test)
print(f"Coverage: {evaluation['coverage']:.2f}")
print(f"Avg. set size: {evaluation['avg_size']:.2f}")

Coverage: 0.91
Avg. set size: 1.13


## Analyzing CP per group 

In [8]:
# Create cp_groups with the same index as X_test_f (and y_test)
cp_groups = pd.DataFrame(index=X_test_f.index)
cp_groups['pred_set'] = pd.Series(pred_sets, index=X_test_f.index).apply(lambda s: {int(x) for x in s})
cp_groups['true_label'] = y_test.reindex(X_test_f.index)
cp_groups['frau1'] = X_test_f['frau1']

cp_groups['nongerman'] = np.where(
    X_test_f['maxdeutsch1'] == 0, 
    1, 
    0
)
cp_groups.loc[
    X_test_f['maxdeutsch.Missing.'] == 1, 
    'nongerman'
] = np.nan

cp_groups['nongerman_male'] = np.where(
    (cp_groups['nongerman'] == 1) & (cp_groups['frau1'] == 0),
    1,
    0
)
cp_groups['nongerman_female'] = np.where(
    (cp_groups['nongerman'] == 1) & (cp_groups['frau1'] == 1),
    1,
    0
)

cp_groups = cp_groups.dropna()

In [9]:
summarize_for_predicate(
    cp_groups,
    predicate=lambda s: set(s) == {0},
    description="== {0}"
)

Among cases where pred_set == {0}:
  Proportion true_label == 1:        0.097
  Proportion frau1 == 1:             0.420
  Proportion nongerman == 1:         0.218
  Proportion nongerman_male == 1:    0.139
  Proportion nongerman_female == 1:  0.079



In [10]:
summarize_for_predicate(
    cp_groups,
    predicate=lambda s: set(s) == {1},
    description="== {1}"
)

Among cases where pred_set == {1}:
  Proportion true_label == 1:        0.433
  Proportion frau1 == 1:             0.371
  Proportion nongerman == 1:         0.092
  Proportion nongerman_male == 1:    0.049
  Proportion nongerman_female == 1:  0.043



In [11]:
summarize_for_predicate(
    cp_groups,
    predicate=lambda s: set(s) == {0,1},
    description="== {0,1}"
)

Among cases where pred_set == {0,1}:
  Proportion true_label == 1:        0.319
  Proportion frau1 == 1:             0.477
  Proportion nongerman == 1:         0.124
  Proportion nongerman_male == 1:    0.056
  Proportion nongerman_female == 1:  0.069



In [12]:
# Summarize for frau1 == 1 (vs 0)
counts_female, pct_female = summarize_by_indicator(
    cp_groups,
    indicator_col='frau1',
    positive_label='female',
    negative_label='male'
)

print("\nCounts by gender:\n")
print(counts_female)
print("\nPercentages for by gender:\n")
print(pct_female)


Counts by gender:

        is_ambiguous  is_zero_only  is_one_only
frau1                                          
male            5665         43722          501
female          5175         31625          295

Percentages for by gender:

        is_ambiguous  is_zero_only  is_one_only
frau1                                          
male       11.355436     87.640314     1.004250
female     13.950667     85.254077     0.795255


In [13]:
# Summarize for nongerman == 1 (vs 0)
counts_ng, pct_ng = summarize_by_indicator(
    cp_groups,
    indicator_col='nongerman',
    positive_label='non‐German',
    negative_label='German'
)

print("Counts by nationality (German vs non‐German):\n")
print(counts_ng)
print("\nPercentages by nationality:\n")
print(pct_ng)

Counts by nationality (German vs non‐German):

            is_ambiguous  is_zero_only  is_one_only
nongerman                                          
German              9491         58956          723
non‐German          1349         16391           73

Percentages by nationality:

            is_ambiguous  is_zero_only  is_one_only
nongerman                                          
German         13.721266     85.233483     1.045251
non‐German      7.573121     92.017066     0.409813


In [14]:
# Summarize for nongerman_male == 1 (vs 0)
counts_ng_male, pct_ng_male = summarize_by_indicator(
    cp_groups,
    indicator_col='nongerman_male',
    positive_label='non‐German Male',
    negative_label='Others'
)

print("\nCounts for non‐German Male vs Others:\n")
print(counts_ng_male)
print("\nPercentages for non‐German Male vs Others:\n")
print(pct_ng_male)


Counts for non‐German Male vs Others:

                 is_ambiguous  is_zero_only  is_one_only
nongerman_male                                          
Others                  10236         64887          757
non‐German Male           604         10460           39

Percentages for non‐German Male vs Others:

                 is_ambiguous  is_zero_only  is_one_only
nongerman_male                                          
Others              13.489721     85.512652     0.997628
non‐German Male      5.439971     94.208772     0.351256


In [15]:
# Summarize for nongerman_female == 1 (vs 0)
counts_ng_female, pct_ng_female = summarize_by_indicator(
    cp_groups,
    indicator_col='nongerman_female',
    positive_label='non‐German Female',
    negative_label='Others'
)

print("\nCounts for non‐German Female vs Others:\n")
print(counts_ng_female)
print("\nPercentages for non‐German Female vs Others:\n")
print(pct_ng_female)


Counts for non‐German Female vs Others:

                   is_ambiguous  is_zero_only  is_one_only
nongerman_female                                          
Others                    10095         69416          762
non‐German Female           745          5931           34

Percentages for non‐German Female vs Others:

                   is_ambiguous  is_zero_only  is_one_only
nongerman_female                                          
Others                12.575835     86.474904     0.949261
non‐German Female     11.102832     88.390462     0.506706
