# Setup

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from collections import defaultdict
from matplotlib.lines import Line2D
plt.style.use('seaborn-v0_8')

In [None]:
from sklearn.metrics import precision_score, recall_score, make_scorer, roc_curve, auc, precision_recall_curve, classification_report, confusion_matrix, accuracy_score, log_loss

In [None]:
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import BinaryLabelDatasetMetric, ClassificationMetric

In [None]:
from utils import aif_test, aif_plot, aif_plot2

In [None]:
X_train_f = pd.read_csv("./output/X_train_f.csv")
X_train_s = pd.read_csv("./output/X_train_s.csv")

X_test_f = pd.read_csv("./output/X_test_f.csv")
X_test_s = pd.read_csv("./output/X_test_s.csv")
y_test = pd.read_csv("./output/y_test.csv")

preds_test = pd.read_csv("./output/preds_test.csv")


## Descriptive

In [None]:
comb_test = pd.concat([preds_test, X_test_f], axis = 1)

In [None]:
comb_test['nongerman'] = np.where(comb_test['maxdeutsch1'] == 0, 1, 0)
comb_test.loc[comb_test['maxdeutsch.Missing.'] == 1, 'nongerman'] = np.nan
comb_test['nongerman_male'] = np.where((comb_test['nongerman'] == 1) & (comb_test['frau1'] == 0), 1, 0)
comb_test['nongerman_female'] = np.where((comb_test['nongerman'] == 1) & (comb_test['frau1'] == 1), 1, 0)

In [None]:
comb_test = comb_test.dropna()

In [None]:
# Computes the mean of y_test for each value of nongerman
# Interpreted as the base rate (i.e., unemployment rate) among germans and nongermans
comb_test[['y_test', 'nongerman']].groupby(['nongerman']).mean() # Baseline

#comb_test[['rf2_c1', 'nongerman']].groupby(['nongerman']).mean() # High risk (w/o protected attributes)
#comb_test[['rf2_c2', 'nongerman']].groupby(['nongerman']).mean() # High risk (w/o protected attributes)
#comb_test[['rf2_c3', 'nongerman']].groupby(['nongerman']).mean() # Middle risk (w/o protected attributes)

In [None]:
comb_test[['y_test', 'frau1', 'nongerman', 'nongerman_male', 'nongerman_female']].groupby(['y_test']).mean() # Baseline
#comb_test[['rf2_c1', 'frau1', 'nongerman', 'nongerman_male', 'nongerman_female']].groupby(['rf2_c1']).mean() # High risk (w/o protected attributes)
#comb_test[['rf2_c2', 'frau1', 'nongerman', 'nongerman_male', 'nongerman_female']].groupby(['rf2_c2']).mean() # High risk (w/o protected attributes)
#comb_test[['rf2_c3', 'frau1', 'nongerman', 'nongerman_male', 'nongerman_female']].groupby(['rf2_c3']).mean() # Middle risk (w/o protected attributes)

In [None]:
comb_test.to_csv('./output/comb_test.csv', index = False)

# 01 Fairness Metrics

In [None]:
label_test_s = pd.concat([y_test, X_test_s], axis = 1) # w/o protected attributes
preds_test_s = preds_test

label_test = pd.concat([y_test, X_test_f], axis = 1) # with protected attributes

In [None]:
label_test.loc[label_test['maxdeutsch.Missing.'] == 1, 'maxdeutsch1'] = np.nan
preds_test.loc[label_test['maxdeutsch.Missing.'] == 1, 'y_test'] = np.nan

In [None]:
label_test['nongerman'] = np.where(label_test['maxdeutsch1'] == 0, 1, 0)
label_test['nongerman_male'] = np.where((label_test['nongerman'] == 1) & (label_test['frau1'] == 0), 1, 0)
label_test['nongerman_female'] = np.where((label_test['nongerman'] == 1) & (label_test['frau1'] == 1), 1, 0)

In [None]:
label_test = label_test.dropna().reset_index(drop = True)
preds_test = preds_test.dropna().reset_index(drop = True)

# 01 Stat. Parity Difference

In [None]:
# Baseline Fairness for observed label

protected_attribute = ['frau1']
unprivileged_group = [{'frau1': 1}]
privileged_group = [{'frau1': 0}]

In [None]:
# wraps pandas df into aif360 BinaryLabelDataset (required for computing f metrics with aif360 lib)

test_label = BinaryLabelDataset(df = label_test,
                                label_names = ['ltue'], 
                                protected_attribute_names = protected_attribute)

In [None]:
# computes baseline f metrics for true labels (how fair/unfair is world already before applying any model)

metric_test_label = BinaryLabelDatasetMetric(test_label, 
                                             unprivileged_groups = unprivileged_group,
                                             privileged_groups = privileged_group)

In [None]:
# SPD for sex attribute
# interpretation: 
# - 0 equal positive outcome reates for both groups
# - < 0 (negative) unpriviliged group get fewer positive outcomes
# - > 0 (positive) unpriviliged group get more positive outcomes

base_par_sex = metric_test_label.statistical_parity_difference() # Label diff female

In [None]:
protected_attribute = ['maxdeutsch1']
unprivileged_group = [{'maxdeutsch1': 0}]
privileged_group = [{'maxdeutsch1': 1}]

test_label = BinaryLabelDataset(df = label_test,
                                label_names = ['ltue'], 
                                protected_attribute_names = protected_attribute)

metric_test_label = BinaryLabelDatasetMetric(test_label, 
                                             unprivileged_groups = unprivileged_group,
                                             privileged_groups = privileged_group)

base_par_ger = metric_test_label.statistical_parity_difference() # Label diff nongerman

In [None]:
protected_attribute = ['nongerman_male']
unprivileged_group = [{'nongerman_male': 1}]
privileged_group = [{'nongerman_male': 0}]

test_label = BinaryLabelDataset(df = label_test,
                                label_names = ['ltue'], 
                                protected_attribute_names = protected_attribute)

metric_test_label = BinaryLabelDatasetMetric(test_label, 
                                             unprivileged_groups = unprivileged_group,
                                             privileged_groups = privileged_group)

base_par_ger_male = metric_test_label.statistical_parity_difference() # Label diff nongerman male

In [None]:
protected_attribute = ['nongerman_female']
unprivileged_group = [{'nongerman_female': 1}]
privileged_group = [{'nongerman_female': 0}]

test_label = BinaryLabelDataset(df = label_test,
                                label_names = ['ltue'], 
                                protected_attribute_names = protected_attribute)

metric_test_label = BinaryLabelDatasetMetric(test_label, 
                                             unprivileged_groups = unprivileged_group,
                                             privileged_groups = privileged_group)

base_par_ger_female = metric_test_label.statistical_parity_difference() # Label diff nongerman female

# Loop over models (w protected attributes) and cutoffs to calculate metrics

In [None]:
# calculates SPD across several protected groups for a list of models and stores results in fairness1

fairness1 = []

for column in preds_test[['glm1_c1', 'glm1_c2', 'glm1_c3']]: # ,'net1_c1', 'net1_c2', 'net1_c3','rf1_c1', 'rf1_c2', 'rf1_c3','gbm1_c1', 'gbm1_c2', 'gbm1_c3'

    protected_attribute = ['frau1']
    unprivileged_group = [{'frau1': 1}]
    privileged_group = [{'frau1': 0}]
    
    test_label = BinaryLabelDataset(df = label_test,
                                    label_names = ['ltue'], 
                                    protected_attribute_names = protected_attribute)

    pred = preds_test[column]
    test_pred = test_label.copy()
    test_pred.labels = pred
    
    metric_test_pred = BinaryLabelDatasetMetric(test_pred, 
                                                unprivileged_groups = unprivileged_group,
                                                privileged_groups = privileged_group)
    
    par_sex = metric_test_pred.statistical_parity_difference() # Parity difference for female
    
    protected_attribute = ['maxdeutsch1']
    unprivileged_group = [{'maxdeutsch1': 0}]
    privileged_group = [{'maxdeutsch1': 1}]
    
    test_label = BinaryLabelDataset(df = label_test,
                                    label_names = ['ltue'], 
                                    protected_attribute_names = protected_attribute)

    test_pred = test_label.copy()
    test_pred.labels = pred
    
    metric_test_pred = BinaryLabelDatasetMetric(test_pred, 
                                                unprivileged_groups = unprivileged_group,
                                                privileged_groups = privileged_group)
    
    par_ger = metric_test_pred.statistical_parity_difference() # Parity difference for nongerman
    
    protected_attribute = ['nongerman_male']
    unprivileged_group = [{'nongerman_male': 1}]
    privileged_group = [{'nongerman_male': 0}]
    
    test_label = BinaryLabelDataset(df = label_test,
                                    label_names = ['ltue'], 
                                    protected_attribute_names = protected_attribute)

    test_pred = test_label.copy()
    test_pred.labels = pred
    
    metric_test_pred = BinaryLabelDatasetMetric(test_pred, 
                                                unprivileged_groups = unprivileged_group,
                                                privileged_groups = privileged_group)
    
    par_ger_male = metric_test_pred.statistical_parity_difference() # Parity difference for nongerman male
    
    protected_attribute = ['nongerman_female']
    unprivileged_group = [{'nongerman_female': 1}]
    privileged_group = [{'nongerman_female': 0}]
    
    test_label = BinaryLabelDataset(df = label_test,
                                    label_names = ['ltue'], 
                                    protected_attribute_names = protected_attribute)

    test_pred = test_label.copy()
    test_pred.labels = pred
    
    metric_test_pred = BinaryLabelDatasetMetric(test_pred, 
                                                unprivileged_groups = unprivileged_group,
                                                privileged_groups = privileged_group)
    
    par_ger_female = metric_test_pred.statistical_parity_difference() # Parity difference for nongerman female
    
    fairness1.append([column,
                     par_sex,
                     par_ger,
                     par_ger_male,
                     par_ger_female])


In [None]:
fairness1 = pd.DataFrame(fairness1)

In [None]:
# add new row at the top with SPD calculated from the true labels
fairness1.loc[-1] = ['label', base_par_sex, base_par_ger, base_par_ger_male, base_par_ger_female]
fairness1 = fairness1.sort_index()

In [None]:
fairness1 = fairness1.rename(columns={0: "Model", 1: "Parity Diff. (Female)", 2: "Parity Diff. (Non-German)", 3: "Parity Diff. (Non-German-Male)", 4: "Parity Diff. (Non-German-Female)"})

In [None]:
fairness1.to_latex('./output/test_fairness1.tex', index = False, float_format = "%.3f")
fairness1.to_csv('./output/test_fairness1.csv', index = False)

# Loop over models (w/o protected attributes) and cutoffs to calculate metrics

In [None]:
# skip for now, not calculated yet w/o protected attributes

fairness2 = []

for column in preds_test[['glm2_c1', 'glm2_c2', 'glm2_c3',
                          'net2_c1', 'net2_c2', 'net2_c3',
                          'rf2_c1', 'rf2_c2', 'rf2_c3',
                          'gbm2_c1', 'gbm2_c2', 'gbm2_c3']]:

    protected_attribute = ['frau1']
    unprivileged_group = [{'frau1': 1}]
    privileged_group = [{'frau1': 0}]
    
    test_label = BinaryLabelDataset(df = label_test,
                                    label_names = ['ltue'], 
                                    protected_attribute_names = protected_attribute)

    pred = preds_test[column]
    test_pred = test_label.copy()
    test_pred.labels = pred
    
    metric_test_pred = BinaryLabelDatasetMetric(test_pred, 
                                                unprivileged_groups = unprivileged_group,
                                                privileged_groups = privileged_group)
    
    par_sex = metric_test_pred.statistical_parity_difference() # Parity difference for female
    
    protected_attribute = ['maxdeutsch1']
    unprivileged_group = [{'maxdeutsch1': 0}]
    privileged_group = [{'maxdeutsch1': 1}]
    
    test_label = BinaryLabelDataset(df = label_test,
                                    label_names = ['ltue'], 
                                    protected_attribute_names = protected_attribute)

    test_pred = test_label.copy()
    test_pred.labels = pred
    
    metric_test_pred = BinaryLabelDatasetMetric(test_pred, 
                                                unprivileged_groups = unprivileged_group,
                                                privileged_groups = privileged_group)
    
    par_ger = metric_test_pred.statistical_parity_difference() # Parity difference for nongerman
    
    protected_attribute = ['nongerman_male']
    unprivileged_group = [{'nongerman_male': 1}]
    privileged_group = [{'nongerman_male': 0}]
    
    test_label = BinaryLabelDataset(df = label_test,
                                    label_names = ['ltue'], 
                                    protected_attribute_names = protected_attribute)

    test_pred = test_label.copy()
    test_pred.labels = pred
    
    metric_test_pred = BinaryLabelDatasetMetric(test_pred, 
                                                unprivileged_groups = unprivileged_group,
                                                privileged_groups = privileged_group)
    
    par_ger_male = metric_test_pred.statistical_parity_difference() # Parity difference for nongerman male
    
    protected_attribute = ['nongerman_female']
    unprivileged_group = [{'nongerman_female': 1}]
    privileged_group = [{'nongerman_female': 0}]
    
    test_label = BinaryLabelDataset(df = label_test,
                                    label_names = ['ltue'], 
                                    protected_attribute_names = protected_attribute)

    test_pred = test_label.copy()
    test_pred.labels = pred
    
    metric_test_pred = BinaryLabelDatasetMetric(test_pred, 
                                                unprivileged_groups = unprivileged_group,
                                                privileged_groups = privileged_group)
    
    par_ger_female = metric_test_pred.statistical_parity_difference() # Parity difference for nongerman female
    
    fairness2.append([column,
                     par_sex,
                     par_ger,
                     par_ger_male,
                     par_ger_female])

fairness2 = pd.DataFrame(fairness2)


In [None]:
# skip for now, not calculated yet w/o protected attributes

fairness2.loc[-1] = ['label', base_par_sex, base_par_ger, base_par_ger_male, base_par_ger_female]
fairness2 = fairness2.sort_index()

fairness2 = fairness2.rename(columns={0: "Model", 1: "Parity Diff. (Female)", 2: "Parity Diff. (Non-German)", 3: "Parity Diff. (Non-German-Male)", 4: "Parity Diff. (Non-German-Female)"})

fairness2.to_latex('./output/test_fairness2.tex', index = False, float_format = "%.3f")
fairness2.to_csv('./output/test_fairness2.csv', index = False)

# 02: Cond. Stat. Parity Difference (Edu = Abitur) 

In [None]:
# Baseline Fairness for observed label

protected_attribute = ['frau1', 'maxschule9']
unprivileged_group = [{'frau1': 1, 'maxschule9': 1}]
privileged_group = [{'frau1': 0, 'maxschule9': 1}]

In [None]:
test_label = BinaryLabelDataset(df = label_test,
                                label_names = ['ltue'], 
                                protected_attribute_names = protected_attribute)

In [None]:
metric_test_label = BinaryLabelDatasetMetric(test_label, 
                                             unprivileged_groups = unprivileged_group,
                                             privileged_groups = privileged_group)

In [None]:
base_cpar_sex = metric_test_label.statistical_parity_difference() # Label diff female (edu = abi)

In [None]:
protected_attribute = ['maxdeutsch1', 'maxschule9']
unprivileged_group = [{'maxdeutsch1': 0, 'maxschule9': 1}]
privileged_group = [{'maxdeutsch1': 1, 'maxschule9': 1}]

test_label = BinaryLabelDataset(df = label_test,
                                label_names = ['ltue'], 
                                protected_attribute_names = protected_attribute)

metric_test_label = BinaryLabelDatasetMetric(test_label, 
                                             unprivileged_groups = unprivileged_group,
                                             privileged_groups = privileged_group)

base_cpar_ger = metric_test_label.statistical_parity_difference() # Label diff nongerman (edu = abi)

In [None]:
protected_attribute = ['nongerman_male', 'maxschule9']
unprivileged_group = [{'nongerman_male': 1, 'maxschule9': 1}]
privileged_group = [{'nongerman_male': 0, 'maxschule9': 1}]

test_label = BinaryLabelDataset(df = label_test,
                                label_names = ['ltue'], 
                                protected_attribute_names = protected_attribute)

metric_test_label = BinaryLabelDatasetMetric(test_label, 
                                             unprivileged_groups = unprivileged_group,
                                             privileged_groups = privileged_group)

base_cpar_ger_male = metric_test_label.statistical_parity_difference() # Label diff nongerman male (edu = abi)

In [None]:
protected_attribute = ['nongerman_female', 'maxschule9']
unprivileged_group = [{'nongerman_female': 1, 'maxschule9': 1}]
privileged_group = [{'nongerman_female': 0, 'maxschule9': 1}]

test_label = BinaryLabelDataset(df = label_test,
                                label_names = ['ltue'], 
                                protected_attribute_names = protected_attribute)

metric_test_label = BinaryLabelDatasetMetric(test_label, 
                                             unprivileged_groups = unprivileged_group,
                                             privileged_groups = privileged_group)

base_cpar_ger_female = metric_test_label.statistical_parity_difference() # Label diff nongerman female (edu = abi)

# Loop over models (w protected attributes) and cutoffs to calculate metrics

In [None]:
cond_fair1 = []

for column in preds_test[['glm1_c1', 'glm1_c2', 'glm1_c3']]: # ,'net1_c1', 'net1_c2', 'net1_c3','rf1_c1', 'rf1_c2', 'rf1_c3','gbm1_c1', 'gbm1_c2', 'gbm1_c3'

    protected_attribute = ['frau1', 'maxschule9']
    unprivileged_group = [{'frau1': 1, 'maxschule9': 1}]
    privileged_group = [{'frau1': 0, 'maxschule9': 1}]
    
    test_label = BinaryLabelDataset(df = label_test,
                                    label_names = ['ltue'], 
                                    protected_attribute_names = protected_attribute)

    pred = preds_test[column]
    test_pred = test_label.copy()
    test_pred.labels = pred
    
    metric_test_pred = BinaryLabelDatasetMetric(test_pred, 
                                                unprivileged_groups = unprivileged_group,
                                                privileged_groups = privileged_group)

    par_sex = metric_test_pred.statistical_parity_difference() # Parity difference for female (edu = abi)
    
    protected_attribute = ['maxdeutsch1', 'maxschule9']
    unprivileged_group = [{'maxdeutsch1': 0, 'maxschule9': 1}]
    privileged_group = [{'maxdeutsch1': 1, 'maxschule9': 1}]
    
    test_label = BinaryLabelDataset(df = label_test,
                                    label_names = ['ltue'], 
                                    protected_attribute_names = protected_attribute)

    test_pred = test_label.copy()
    test_pred.labels = pred
    
    metric_test_pred = BinaryLabelDatasetMetric(test_pred, 
                                                unprivileged_groups = unprivileged_group,
                                                privileged_groups = privileged_group)
    
    par_ger = metric_test_pred.statistical_parity_difference() # Parity difference for nongerman (edu = abi)
    
    protected_attribute = ['nongerman_male', 'maxschule9']
    unprivileged_group = [{'nongerman_male': 1, 'maxschule9': 1}]
    privileged_group = [{'nongerman_male': 0, 'maxschule9': 1}]
    
    test_label = BinaryLabelDataset(df = label_test,
                                    label_names = ['ltue'], 
                                    protected_attribute_names = protected_attribute)

    test_pred = test_label.copy()
    test_pred.labels = pred
    
    metric_test_pred = BinaryLabelDatasetMetric(test_pred, 
                                                unprivileged_groups = unprivileged_group,
                                                privileged_groups = privileged_group)
    
    par_ger_male = metric_test_pred.statistical_parity_difference() # Parity difference for nongerman male (edu = abi)
    
    protected_attribute = ['nongerman_female', 'maxschule9']
    unprivileged_group = [{'nongerman_female': 1, 'maxschule9': 1}]
    privileged_group = [{'nongerman_female': 0, 'maxschule9': 1}]
    
    test_label = BinaryLabelDataset(df = label_test,
                                    label_names = ['ltue'], 
                                    protected_attribute_names = protected_attribute)

    test_pred = test_label.copy()
    test_pred.labels = pred
    
    metric_test_pred = BinaryLabelDatasetMetric(test_pred, 
                                                unprivileged_groups = unprivileged_group,
                                                privileged_groups = privileged_group)
    
    par_ger_female = metric_test_pred.statistical_parity_difference() # Parity difference for nongerman female (edu = abi)
    
    cond_fair1.append([column,
                      par_sex,
                      par_ger,
                      par_ger_male,
                      par_ger_female])

cond_fair1 = pd.DataFrame(cond_fair1)

In [None]:
cond_fair1.loc[-1] = ['label', base_cpar_sex, base_cpar_ger, base_cpar_ger_male, base_cpar_ger_female]
cond_fair1 = cond_fair1.sort_index()

cond_fair1 = cond_fair1.rename(columns={0: "Model", 1: "Cond. Parity Diff. (Female)", 2: "Cond. Parity Diff. (Non-German)", 3: "Cond. Parity Diff. (Non-German-Male)", 4: "Cond. Parity Diff. (Non-German-Female)"})


In [None]:
cond_fair1.to_latex('./output/test_cond_fairness1.tex', index = False, float_format = "%.3f")
cond_fair1.to_csv('./output/test_cond_fairness1.csv', index = False)

# Loop over models (w/o protected attributes) and cutoffs to calculate metrics

In [None]:
# ... skip for now, not calculated yet w/o protected attributes yet

# Combine all metrics

In [None]:
fairness1 = pd.read_csv("./output/test_fairness1.csv")
cond_fair1 = pd.read_csv("./output/test_cond_fairness1.csv")

In [None]:
cond_fair1 = cond_fair1.drop(columns={'Model'})

In [None]:
test_full_fair1 = pd.concat([fairness1,
                             cond_fair1],
                            axis = 1)

In [None]:
test_full_fair1.to_latex('./output/test_full_fairness1.tex', index = False, float_format = "%.2f")
