## Setup

In [1]:
import pandas as pd
import numpy as np
from joblib import load

from utils import (
    compute_nc_scores,
    find_threshold,
    predict_conformal_sets,
    evaluate_sets,
    summarize_by_indicator,
    summarize_for_predicate
)

## Data and Model Loading

In [2]:
X_calib_f = pd.read_csv("./output/X_calib_f.csv") # 2015, w. protected attributes
X_calib_s = pd.read_csv("./output/X_calib_s.csv") # 2015, w/o protected attributes
y_calib = pd.read_csv("./output/y_calib.csv").iloc[:,0]

X_test_f = pd.read_csv("./output/X_test_f.csv")
X_test_s = pd.read_csv("./output/X_test_s.csv")
y_test = pd.read_csv("./output/y_test.csv").iloc[:,0]

In [3]:
preds_test = pd.read_csv("./output/preds_test.csv")

glm1 = load("./models/glm1.joblib")

## Conformal

In [4]:
# Miscoverage level
alpha = 0.1

In [5]:
probs_calib = glm1.predict_proba(X_calib_f)

nc_scores = compute_nc_scores(probs_calib, y_calib)
q_hat = find_threshold(nc_scores, alpha)

Users of the modes 'nearest', 'lower', 'higher', or 'midpoint' are encouraged to review the method they used. (Deprecated NumPy 1.22)
  q_hat = find_threshold(nc_scores, alpha)


In [6]:
# With test data
pred_sets = predict_conformal_sets(glm1, X_test_f, q_hat)

In [7]:
# With test data
evaluation = evaluate_sets(pred_sets, y_test)
print(f"Coverage: {evaluation['coverage']:.2f}")
print(f"Avg. set size: {evaluation['avg_size']:.2f}")

Coverage: 0.90
Avg. set size: 1.13


## Analyzing CP per group 

In [8]:
# Create cp_groups with the same index as X_test_f (and y_test)
cp_groups = pd.DataFrame(index=X_test_f.index)
cp_groups['pred_set'] = pd.Series(pred_sets, index=X_test_f.index).apply(lambda s: {int(x) for x in s})
cp_groups['true_label'] = y_test.reindex(X_test_f.index)
cp_groups['frau1'] = X_test_f['frau1']

cp_groups['nongerman'] = np.where(
    X_test_f['maxdeutsch1'] == 0, 
    1, 
    0
)
cp_groups.loc[
    X_test_f['maxdeutsch.Missing.'] == 1, 
    'nongerman'
] = np.nan

cp_groups['nongerman_male'] = np.where(
    (cp_groups['nongerman'] == 1) & (cp_groups['frau1'] == 0),
    1,
    0
)
cp_groups['nongerman_female'] = np.where(
    (cp_groups['nongerman'] == 1) & (cp_groups['frau1'] == 1),
    1,
    0
)

In [9]:
summarize_for_predicate(
    cp_groups,
    predicate=lambda s: set(s) == {0},
    description="== {0}"
)

Among cases where pred_set == {0}:
  Proportion true_label == 1:        0.096
  Proportion frau1 == 1:             0.415
  Proportion nongerman == 1:         0.218
  Proportion nongerman_male == 1:    0.140
  Proportion nongerman_female == 1:  0.078



In [10]:
summarize_for_predicate(
    cp_groups,
    predicate=lambda s: set(s) == {1},
    description="== {1}"
)

Among cases where pred_set == {1}:
  Proportion true_label == 1:        0.359
  Proportion frau1 == 1:             0.480
  Proportion nongerman == 1:         0.066
  Proportion nongerman_male == 1:    0.025
  Proportion nongerman_female == 1:  0.041



In [11]:
summarize_for_predicate(
    cp_groups,
    predicate=lambda s: set(s) == {0,1},
    description="== {0,1}"
)

Among cases where pred_set == {0,1}:
  Proportion true_label == 1:        0.298
  Proportion frau1 == 1:             0.479
  Proportion nongerman == 1:         0.092
  Proportion nongerman_male == 1:    0.032
  Proportion nongerman_female == 1:  0.060



In [12]:
# Summarize for frau1 == 1 (vs 0)
counts_female, pct_female = summarize_by_indicator(
    cp_groups,
    indicator_col='frau1',
    positive_label='female',
    negative_label='male'
)

print("\nCounts by gender:\n")
print(counts_female)
print("\nPercentages for by gender:\n")
print(pct_female)


Counts by gender:

        is_ambiguous  is_zero_only  is_one_only
frau1                                          
male            5969         44518         1125
female          5498         31562         1038

Percentages for by gender:

        is_ambiguous  is_zero_only  is_one_only
frau1                                          
male       11.565140     86.255134     2.179726
female     14.431204     82.844244     2.724552


In [13]:
# Summarize for nongerman == 1 (vs 0)
counts_ng, pct_ng = summarize_by_indicator(
    cp_groups,
    indicator_col='nongerman',
    positive_label='non‐German',
    negative_label='German'
)

print("Counts by nationality (German vs non‐German):\n")
print(counts_ng)
print("\nPercentages by nationality:\n")
print(pct_ng)

Counts by nationality (German vs non‐German):

            is_ambiguous  is_zero_only  is_one_only
nongerman                                          
German              9863         57482         1825
non‐German          1056         16615          142

Percentages by nationality:

            is_ambiguous  is_zero_only  is_one_only
nongerman                                          
German         14.259072     83.102501     2.638427
non‐German      5.928255     93.274575     0.797171


In [14]:
# Summarize for nongerman_male == 1 (vs 0)
counts_ng_male, pct_ng_male = summarize_by_indicator(
    cp_groups,
    indicator_col='nongerman_male',
    positive_label='non‐German Male',
    negative_label='Others'
)

print("\nCounts for non‐German Male vs Others:\n")
print(counts_ng_male)
print("\nPercentages for non‐German Male vs Others:\n")
print(pct_ng_male)


Counts for non‐German Male vs Others:

                 is_ambiguous  is_zero_only  is_one_only
nongerman_male                                          
Others                  11101         65396         2110
non‐German Male           366         10684           53

Percentages for non‐German Male vs Others:

                 is_ambiguous  is_zero_only  is_one_only
nongerman_male                                          
Others              14.122152     83.193609     2.684239
non‐German Male      3.296406     96.226245     0.477348


In [15]:
# Summarize for nongerman_female == 1 (vs 0)
counts_ng_female, pct_ng_female = summarize_by_indicator(
    cp_groups,
    indicator_col='nongerman_female',
    positive_label='non‐German Female',
    negative_label='Others'
)

print("\nCounts for non‐German Female vs Others:\n")
print(counts_ng_female)
print("\nPercentages for non‐German Female vs Others:\n")
print(pct_ng_female)


Counts for non‐German Female vs Others:

                   is_ambiguous  is_zero_only  is_one_only
nongerman_female                                          
Others                    10777         70149         2074
non‐German Female           690          5931           89

Percentages for non‐German Female vs Others:

                   is_ambiguous  is_zero_only  is_one_only
nongerman_female                                          
Others                12.984337     84.516867     2.498795
non‐German Female     10.283159     88.390462     1.326379
