# Bias Mitigation Experiments - COMPAS

In [1]:
data_name = 'Compas'

In [22]:
# to ignore warnings
import warnings
# warnings.filterwarnings('ignore')

In [23]:
import numpy as np
import pandas as pd
import tensorflow.compat.v1 as tf
# tf.disable_eager_execution() # comment out when running TabTransformer models

## [1.](#Table-of-Contents) Prepare COMPAS data

In [24]:
# read in data as pandas data frame
df = pd.read_csv("../data/Compas/compas-scores-two-years.csv")
# check columns
pd.set_option('display.max_columns', None) # expand all columns
print(df.shape)
df.head()

(7214, 53)


Unnamed: 0,id,name,first,last,compas_screening_date,sex,dob,age,age_cat,race,juv_fel_count,decile_score,juv_misd_count,juv_other_count,priors_count,days_b_screening_arrest,c_jail_in,c_jail_out,c_case_number,c_offense_date,c_arrest_date,c_days_from_compas,c_charge_degree,c_charge_desc,is_recid,r_case_number,r_charge_degree,r_days_from_arrest,r_offense_date,r_charge_desc,r_jail_in,r_jail_out,violent_recid,is_violent_recid,vr_case_number,vr_charge_degree,vr_offense_date,vr_charge_desc,type_of_assessment,decile_score.1,score_text,screening_date,v_type_of_assessment,v_decile_score,v_score_text,v_screening_date,in_custody,out_custody,priors_count.1,start,end,event,two_year_recid
0,1,miguel hernandez,miguel,hernandez,2013-08-14,Male,1947-04-18,69,Greater than 45,Other,0,1,0,0,0,-1.0,2013-08-13 06:03:42,2013-08-14 05:41:20,13011352CF10A,2013-08-13,,1.0,F,Aggravated Assault w/Firearm,0,,,,,,,,,0,,,,,Risk of Recidivism,1,Low,2013-08-14,Risk of Violence,1,Low,2013-08-14,2014-07-07,2014-07-14,0,0,327,0,0
1,3,kevon dixon,kevon,dixon,2013-01-27,Male,1982-01-22,34,25 - 45,African-American,0,3,0,0,0,-1.0,2013-01-26 03:45:27,2013-02-05 05:36:53,13001275CF10A,2013-01-26,,1.0,F,Felony Battery w/Prior Convict,1,13009779CF10A,(F3),,2013-07-05,Felony Battery (Dom Strang),,,,1,13009779CF10A,(F3),2013-07-05,Felony Battery (Dom Strang),Risk of Recidivism,3,Low,2013-01-27,Risk of Violence,1,Low,2013-01-27,2013-01-26,2013-02-05,0,9,159,1,1
2,4,ed philo,ed,philo,2013-04-14,Male,1991-05-14,24,Less than 25,African-American,0,4,0,1,4,-1.0,2013-04-13 04:58:34,2013-04-14 07:02:04,13005330CF10A,2013-04-13,,1.0,F,Possession of Cocaine,1,13011511MM10A,(M1),0.0,2013-06-16,Driving Under The Influence,2013-06-16,2013-06-16,,0,,,,,Risk of Recidivism,4,Low,2013-04-14,Risk of Violence,3,Low,2013-04-14,2013-06-16,2013-06-16,4,0,63,0,1
3,5,marcu brown,marcu,brown,2013-01-13,Male,1993-01-21,23,Less than 25,African-American,0,8,1,0,1,,,,13000570CF10A,2013-01-12,,1.0,F,Possession of Cannabis,0,,,,,,,,,0,,,,,Risk of Recidivism,8,High,2013-01-13,Risk of Violence,6,Medium,2013-01-13,,,1,0,1174,0,0
4,6,bouthy pierrelouis,bouthy,pierrelouis,2013-03-26,Male,1973-01-22,43,25 - 45,Other,0,1,0,0,2,,,,12014130CF10A,,2013-01-09,76.0,F,arrest case no charge,0,,,,,,,,,0,,,,,Risk of Recidivism,1,Low,2013-03-26,Risk of Violence,1,Low,2013-03-26,,,2,0,1102,0,0


In [25]:
df['race'].value_counts()

African-American    3696
Caucasian           2454
Hispanic             637
Other                377
Asian                 32
Native American       18
Name: race, dtype: int64

In [26]:
# data pre-processing
import re
label = 'two_year_recid'
protected_attribute = 'race'
# 1. select features of interest
keep_var = ['sex','age','race','juv_fel_count','juv_misd_count','juv_other_count','priors_count',
            'c_charge_degree', 'two_year_recid']
df = df[keep_var]

# 2. encode label and potential protected features 
race_mapping = {'Caucasian': 1, 'African-American': 0}
sex_mapping = {'Female': 1, 'Male': 0} # female is the privileged group in this case

df = df[df.race.isin(race_mapping.keys())] # only include samples with certain race values
df.race.replace(race_mapping, inplace=True)
df.sex.replace(sex_mapping, inplace=True)
df = df.reset_index(drop=True)

print(df.shape)
df.head()

(6150, 9)


Unnamed: 0,sex,age,race,juv_fel_count,juv_misd_count,juv_other_count,priors_count,c_charge_degree,two_year_recid
0,0,34,0,0,0,0,0,F,1
1,0,24,0,0,0,1,4,F,1
2,0,23,0,0,1,0,1,F,0
3,0,41,1,0,0,0,14,F,1
4,1,39,1,0,0,0,0,M,0


In [27]:
# flip labels to ensure pos_label = 1, not becoming recidivist
df[label] = (~df[label].astype(bool)).astype(int)

# select descriptive features and target variable
X = df.drop(columns=label, axis=1) # select all features but target feature
y = df[[protected_attribute, label]] # include protected feature in order to AIF360

In [28]:
# set protected attribute as index
X = X.set_index([protected_attribute], append = True, drop = False)
y = y.set_index([protected_attribute], append = True)

# make y data frames to 1d array to pass modeling, but keep index (protected attribute)
y = pd.Series(y[label], index=y.index)

In [29]:
X.to_pickle('../data/{}/{}_X'.format(data_name, data_name))
y.to_pickle('../data/{}/{}_y'.format(data_name, data_name))

## [2.](#Table-of-Contents) Proposed GridSearch Approach 

The GridSearch Approach includes hyperparameter, threshold, and Bias Mitigation

In [31]:
from FairGridSearch import *
%load_ext autoreload
%autoreload 2
# allow automatic reloading of changes in FairGridSearch file

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [32]:
# None, better
# 'RW', worse
# 'LFR_pre', stuck
# 'LFR_in', stuck
# 'AD', stuck
# 'EGR',better
# 'ROC', better
# 'CEO', better

## 3. Case Study: COMPAS

In [33]:
label = 'two_year_recid'
protected_attribute = 'race'

pos_label = 0
priv_group = 1

cv=10
n_jobs=multiprocessing.cpu_count()-1

In [34]:
X = pd.read_pickle('../data/{}/{}_X'.format(data_name, data_name))
y = pd.read_pickle('../data/{}/{}_y'.format(data_name, data_name))

In [35]:
# define desired metric for the use case
ACC_METRIC = 'avg_norm_mcc_score'
FAIR_METRIC = 'abs_avg_ppvd_score'

### Logistic Regression

In [15]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'C':[1, 10],'solver':['liblinear', 'saga'],'penalty':['l2']}, 
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','LFR_in','AD','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_lr = fair_GridsearchCV(base='LR', param_grid=param_grid, 
                           prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                           cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_lr.fit(X=X, y=y)
    
results_lr = clf_lr.output_table
print(clf_lr._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_lr.to_pickle('./{}_results/{}_results_LR'.format(data_name,data_name))
style_table(results_lr)

  0%|          | 0/4 [00:00<?, ?it/s]

{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [01:01<03:05, 61.93s/it]

{'C': 1, 'penalty': 'l2', 'solver': 'saga'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [02:10<02:12, 66.07s/it]

{'C': 10, 'penalty': 'l2', 'solver': 'liblinear'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [03:15<01:05, 65.62s/it]

{'C': 10, 'penalty': 'l2', 'solver': 'saga'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [04:16<00:00, 64.07s/it]


base_estimator                                                   LR
param              {'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}
Bias_Mitigation                                                  AD
threshold                                                       0.6
Name: 23, dtype: object
Time:  4.27735883 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.3,0.599675,0.574001,0.717826,0.722252,0.230838,0.615419,0.111578,0.101662,0.155706,0.085145,0.040474,0.041821,0.075919,0.084654,0.496159
1,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.4,0.644878,0.625993,0.731284,0.722252,0.307816,0.653908,0.167803,0.145699,0.200517,0.069432,0.005364,0.054992,0.092411,0.110363,0.513895
2,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.5,0.674797,0.668928,0.712626,0.722252,0.344032,0.672016,0.25955,0.227603,0.278039,0.022598,0.052065,0.054667,0.141747,0.191353,0.587534
3,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.6,0.631057,0.641059,0.58777,0.722252,0.292908,0.646454,0.272406,0.240094,0.228308,0.000951,0.067096,0.053626,0.247697,0.353812,0.625952
4,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.7,0.571545,0.592756,0.409891,0.722252,0.234684,0.617342,0.204943,0.179076,0.125459,0.017874,0.074657,0.038504,0.359579,0.509255,0.587601
5,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.3,0.59561,0.569373,0.716542,0.710705,0.223255,0.611627,0.035465,0.018977,0.020153,0.116823,0.022973,0.044585,0.074609,0.082661,0.423838
6,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.4,0.628943,0.609497,0.720747,0.710705,0.270825,0.635412,0.019245,-0.007191,-0.004052,0.11393,0.11599,0.056358,0.095209,0.115446,0.383832
7,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.5,0.653171,0.646175,0.697547,0.710705,0.299696,0.649848,-0.044163,-0.077048,-0.045988,0.098558,0.187841,0.062439,0.146172,0.197301,0.394315
8,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.6,0.631545,0.64138,0.589259,0.710705,0.293352,0.646676,0.017352,-0.01248,0.025209,0.055709,0.151855,0.049463,0.246495,0.352042,0.370676
9,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.7,0.565203,0.587241,0.390756,0.710705,0.226524,0.613262,0.056577,0.035529,0.030176,0.052975,0.11622,0.037691,0.370796,0.523575,0.443315


### Random Forest

In [16]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'n_estimators':[10, 50],'criterion':['gini', 'entropy'], 'max_depth':[16]}, 
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_rf = fair_GridsearchCV(base='RF', param_grid=param_grid,
                           prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                           cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_rf.fit(X=X, y=y)
    
results_rf = clf_rf.output_table
print(clf_rf._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_rf.to_pickle('./{}_results/{}_results_RF'.format(data_name, data_name))
style_table(results_rf)

  0%|          | 0/4 [00:00<?, ?it/s]

{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [00:41<02:04, 41.62s/it]

{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 50}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [01:47<01:51, 55.95s/it]

{'criterion': 'entropy', 'max_depth': 16, 'n_estimators': 10}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [02:39<00:54, 54.25s/it]

{'criterion': 'entropy', 'max_depth': 16, 'n_estimators': 50}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [04:02<00:00, 60.72s/it]


base_estimator                                                    RF
param              {'criterion': 'entropy', 'max_depth': 16, 'n_e...
Bias_Mitigation                                               RW+CEO
threshold                                                        0.4
Name: 116, dtype: object
Time:  4.056987171666667 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",,0.3,0.61561,0.598383,0.70314,0.66319,0.230911,0.615455,0.106297,0.089824,0.141914,0.077496,0.155259,0.184098,0.110417,0.140535,0.490842
1,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",,0.4,0.627317,0.616621,0.689357,0.66319,0.246955,0.623477,0.119614,0.097943,0.145161,0.069217,0.140409,0.208878,0.138534,0.184877,0.496137
2,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",,0.5,0.62439,0.621109,0.655208,0.66319,0.243601,0.6218,0.163223,0.142134,0.19676,0.041868,0.14151,0.230634,0.179442,0.247596,0.541423
3,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",,0.6,0.610244,0.614493,0.601527,0.66319,0.229984,0.614992,0.171675,0.150868,0.195339,0.024559,0.134833,0.240163,0.230142,0.322105,0.556682
4,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",,0.7,0.578699,0.59077,0.510974,0.66319,0.193138,0.596569,0.154254,0.137963,0.172242,0.008952,0.134942,0.230699,0.300365,0.418587,0.557685
5,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",RW,0.3,0.610732,0.593396,0.699751,0.660284,0.219199,0.609599,0.062055,0.045177,0.09055,0.084772,0.175159,0.183415,0.111536,0.14254,0.452455
6,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",RW,0.4,0.62065,0.60956,0.685165,0.660284,0.233071,0.616535,0.074983,0.055433,0.110467,0.069537,0.175014,0.211837,0.139298,0.186063,0.458447
7,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",RW,0.5,0.619187,0.615167,0.653931,0.660284,0.232207,0.616104,0.089016,0.069172,0.129281,0.051353,0.16999,0.23148,0.177888,0.245035,0.472913
8,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",RW,0.6,0.608943,0.613295,0.599541,0.660284,0.227649,0.613824,0.09623,0.076152,0.132296,0.030064,0.158787,0.244553,0.231567,0.324046,0.482405
9,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",RW,0.7,0.585691,0.597399,0.522257,0.660284,0.206279,0.603139,0.091651,0.074004,0.118019,0.016628,0.147578,0.232228,0.292466,0.408987,0.488511


### Gradient Boosting

In [18]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'n_estimators':[10, 50],'criterion':['friedman_mse'],'max_depth':[8, 32]}, 
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_gb = fair_GridsearchCV(base='GB', param_grid=param_grid,
                           prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                           cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_gb.fit(X=X, y=y)
    
results_gb = clf_gb.output_table
print(clf_gb._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_gb.to_pickle('./{}_results/{}_results_GB'.format(data_name, data_name))
style_table(results_gb)

  0%|          | 0/4 [00:00<?, ?it/s]

{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [00:55<02:45, 55.18s/it]

{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 50}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [02:31<02:38, 79.34s/it]

{'criterion': 'friedman_mse', 'max_depth': 32, 'n_estimators': 10}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [03:58<01:23, 83.07s/it]

{'criterion': 'friedman_mse', 'max_depth': 32, 'n_estimators': 50}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [08:43<00:00, 130.81s/it]


base_estimator                                                    GB
param              {'criterion': 'friedman_mse', 'max_depth': 8, ...
Bias_Mitigation                                                   RW
threshold                                                        0.6
Name: 8, dtype: object
Time:  8.725093631666663 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.3,0.585528,0.557847,0.713569,0.709351,0.205327,0.602664,0.078415,0.070334,0.112646,0.094769,0.083604,0.076715,0.071105,0.077293,0.475751
1,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.4,0.639837,0.621482,0.725702,0.709351,0.292978,0.646489,0.165388,0.145861,0.213883,0.06192,0.080595,0.11148,0.09665,0.117469,0.518899
2,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.5,0.66065,0.653486,0.704836,0.709351,0.315182,0.657591,0.214928,0.185914,0.236762,0.038588,0.083137,0.124488,0.141955,0.190915,0.557337
3,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.6,0.636098,0.643896,0.607726,0.709351,0.293926,0.646963,0.151886,0.119767,0.128976,0.038748,0.103809,0.125496,0.232619,0.331047,0.504923
4,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.7,0.534959,0.560009,0.302813,0.709351,0.175523,0.587762,0.125751,0.109632,0.078697,0.048033,0.099379,0.093171,0.423948,0.586155,0.537989
5,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.3,0.586667,0.558959,0.714492,0.698626,0.208391,0.604195,0.044967,0.033772,0.053297,0.107699,0.068374,0.078537,0.070626,0.076441,0.440772
6,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.4,0.633821,0.614769,0.723193,0.698626,0.281662,0.640831,0.08649,0.065084,0.113315,0.080974,0.139582,0.11535,0.095476,0.115685,0.445659
7,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.5,0.654146,0.646586,0.700699,0.698626,0.301673,0.650837,0.06889,0.037967,0.072496,0.077137,0.141469,0.137463,0.142807,0.192066,0.418053
8,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.6,0.632358,0.639537,0.60765,0.698626,0.283981,0.64199,0.001835,-0.028398,0.002354,0.076491,0.150613,0.121561,0.231764,0.32875,0.359844
9,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.7,0.524715,0.550549,0.274185,0.698626,0.154687,0.577343,0.005671,-0.00498,0.010669,0.050535,0.130648,0.088748,0.441825,0.605819,0.428328


### Support Vector Machine

In [17]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'kernel':['rbf','linear','poly','sigmoid'],'gamma':['scale']},
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_svm = fair_GridsearchCV(base='SVM', param_grid=param_grid,
                            prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                            cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_svm.fit(X=X, y=y)
    
results_svm = clf_svm.output_table
print(clf_svm._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_svm.to_pickle('./{}_results/{}_results_SVM'.format(data_name, data_name))
style_table(results_svm)

  0%|          | 0/4 [00:00<?, ?it/s]

{'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [20:12<1:00:38, 1212.87s/it]

{'gamma': 'scale', 'kernel': 'linear'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [29:35<27:40, 830.22s/it]   

{'gamma': 'scale', 'kernel': 'poly'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [41:56<13:09, 789.69s/it]

{'gamma': 'scale', 'kernel': 'sigmoid'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [51:32<00:00, 773.09s/it]


base_estimator                                        SVM
param              {'gamma': 'scale', 'kernel': 'linear'}
Bias_Mitigation                                        RW
threshold                                             0.5
Name: 47, dtype: object
Time:  51.548061155 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.3,0.638374,0.618835,0.728254,0.71922,0.294836,0.647418,0.189346,0.170681,0.236509,0.063433,-0.007808,0.055122,0.091635,0.109257,0.541928
1,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.4,0.659187,0.64498,0.728129,0.71922,0.322318,0.661159,0.224587,0.199339,0.261602,0.048607,0.016419,0.050179,0.108839,0.137291,0.563428
2,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.5,0.672195,0.664129,0.718378,0.71922,0.339463,0.669732,0.271906,0.242521,0.30291,0.022286,0.039299,0.054407,0.13244,0.176072,0.602174
3,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.6,0.670407,0.671506,0.679594,0.71922,0.342379,0.67119,0.305641,0.270018,0.288194,0.004983,0.035261,0.06052,0.177543,0.24919,0.634451
4,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.7,0.61187,0.626659,0.528443,0.71922,0.278641,0.63932,0.234751,0.203574,0.174362,0.004531,0.073784,0.069138,0.28756,0.411968,0.595431
5,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.3,0.630732,0.610309,0.725062,0.715772,0.280774,0.640387,0.09043,0.067233,0.09083,0.096648,0.051267,0.062634,0.090149,0.107008,0.450043
6,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.4,0.652846,0.638446,0.723651,0.715772,0.308351,0.654175,0.121529,0.092757,0.123918,0.081378,0.077018,0.048878,0.110284,0.139701,0.467354
7,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.5,0.66813,0.659453,0.717045,0.715772,0.33125,0.665625,0.097301,0.062551,0.089894,0.077044,0.107862,0.059545,0.131294,0.174072,0.431676
8,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.6,0.663577,0.664049,0.67563,0.715772,0.327551,0.663775,0.071281,0.035071,0.063339,0.065444,0.131827,0.061724,0.178379,0.249414,0.407506
9,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.7,0.599675,0.61661,0.493379,0.715772,0.26607,0.633035,0.037421,0.010971,0.023581,0.057156,0.12832,0.079122,0.309636,0.442455,0.404386


### Gaussian Naive Bayes

In [19]:
# model for Reweighing
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'var_smoothing': np.logspace(0,-9, num=4)}, 
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_nb = fair_GridsearchCV(base='NB', param_grid=param_grid, 
                           prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                           cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_nb.fit(X=X, y=y)
    
results_nb = clf_nb.output_table
# print(clf._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_nb.to_pickle('./{}_results/{}_results_NB'.format(data_name, data_name))
style_table(results_nb)

  0%|          | 0/4 [00:00<?, ?it/s]

{'var_smoothing': 1.0}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [00:34<01:42, 34.21s/it]

{'var_smoothing': 0.001}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [01:07<01:07, 33.52s/it]

{'var_smoothing': 1e-06}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [01:40<00:33, 33.50s/it]

{'var_smoothing': 1e-09}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [02:13<00:00, 33.26s/it]


Time:  2.22212151666666 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,NB,{'var_smoothing': 1.0},,0.3,0.575285,0.546649,0.70922,0.692631,0.177991,0.588996,0.070237,0.064005,0.098239,0.100305,0.086361,0.024163,0.069672,0.07549,0.481242
1,NB,{'var_smoothing': 1.0},,0.4,0.58878,0.562144,0.712883,0.692631,0.204197,0.602098,0.101381,0.093215,0.139549,0.091242,0.061901,0.024748,0.074756,0.08329,0.499282
2,NB,{'var_smoothing': 1.0},,0.5,0.601463,0.57736,0.714471,0.692631,0.223306,0.611653,0.13218,0.121785,0.175475,0.083372,0.068099,0.026602,0.082473,0.095528,0.520527
3,NB,{'var_smoothing': 1.0},,0.6,0.622764,0.602679,0.718093,0.692631,0.257808,0.628904,0.188148,0.173747,0.240596,0.066018,0.042329,0.03187,0.094212,0.114055,0.559244
4,NB,{'var_smoothing': 1.0},,0.7,0.644878,0.639246,0.684619,0.692631,0.283326,0.641663,0.421188,0.397104,0.431835,0.021911,-0.058857,0.036228,0.156826,0.213725,0.779525
5,NB,{'var_smoothing': 1.0},RW,0.3,0.571057,0.542067,0.707301,0.694583,0.16552,0.58276,0.060051,0.05372,0.078762,0.105622,0.065757,0.023154,0.069284,0.075092,0.477291
6,NB,{'var_smoothing': 1.0},RW,0.4,0.585203,0.55782,0.712545,0.694583,0.199458,0.599729,0.077976,0.069263,0.105228,0.097969,0.0632,0.02735,0.072439,0.079589,0.478247
7,NB,{'var_smoothing': 1.0},RW,0.5,0.597724,0.572845,0.714097,0.694583,0.217807,0.608903,0.106102,0.094993,0.137261,0.091372,0.06484,0.027545,0.080048,0.091676,0.497199
8,NB,{'var_smoothing': 1.0},RW,0.6,0.619024,0.598224,0.717429,0.694583,0.251908,0.625954,0.14473,0.12904,0.180728,0.078759,0.061126,0.031447,0.09216,0.110794,0.518776
9,NB,{'var_smoothing': 1.0},RW,0.7,0.637561,0.6348,0.664736,0.694583,0.272469,0.636234,0.215159,0.18803,0.212211,0.054071,0.079711,0.038146,0.175556,0.242236,0.578925


### TabTransformer

In [36]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'epochs':[20, 30],'learing_rate':[1e-04, 1e-05]},
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','ROC','CEO','RW+ROC','RW+CEO']}

clf_tab = fair_GridsearchCV(base='TabTrans',param_grid=param_grid, 
                            prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                            cv=cv, n_jobs=n_jobs)

clf_tab.fit(X=X, y=y)
results_tab = clf_tab.output_table
# print(clf._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_tab.to_pickle('./{}_results/{}_results_TabTrans'.format(data_name, data_name))

style_table(results_tab)

  0%|          | 0/4 [00:00<?, ?it/s]

{'epochs': 20, 'learing_rate': 0.0001}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [05:02<15:06, 302.06s/it]

{'epochs': 20, 'learing_rate': 1e-05}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [09:53<09:51, 295.74s/it]

{'epochs': 30, 'learing_rate': 0.0001}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [15:36<05:17, 317.41s/it]

{'epochs': 30, 'learing_rate': 1e-05}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [21:58<00:00, 329.68s/it]


Time:  21.990890648333334 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",,0.3,0.553008,0.521394,0.702557,0.649415,0.108178,0.554089,0.031325,0.028844,0.046125,0.111003,0.272153,0.012813,0.062661,0.064985,0.477236
1,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",,0.4,0.605691,0.584062,0.70918,0.649415,0.225102,0.612551,0.119099,0.103935,0.138568,0.088041,0.12233,0.056325,0.093439,0.112907,0.506548
2,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",,0.5,0.58878,0.588609,0.581825,0.649415,0.199794,0.599897,0.150041,0.129832,0.122994,0.094595,0.045496,0.082407,0.227891,0.309626,0.550144
3,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",,0.6,0.520488,0.543133,0.276098,0.649415,0.08244,0.54122,0.071166,0.061641,0.061047,0.002665,0.116724,0.044715,0.437411,0.587345,0.529946
4,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",,0.7,0.473333,0.506184,0.031655,0.649415,0.017618,0.508809,0.007567,0.00605,0.001634,0.012558,0.118008,0.006407,0.556927,0.744515,0.498758
5,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",RW,0.3,0.57561,0.54721,0.708775,0.67283,0.167713,0.583857,0.068959,0.062688,0.093208,0.101475,0.125628,0.024553,0.070733,0.077254,0.485102
6,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",RW,0.4,0.616423,0.594591,0.718443,0.67283,0.25265,0.626325,0.135059,0.120703,0.174544,0.078087,0.03056,0.046927,0.088517,0.104775,0.508734
7,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",RW,0.5,0.64,0.632549,0.680353,0.67283,0.279587,0.639794,0.187977,0.16155,0.190663,0.070061,0.046827,0.070732,0.15526,0.206246,0.548183
8,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",RW,0.6,0.559837,0.576835,0.425011,0.67283,0.16354,0.58177,0.157996,0.140474,0.133762,0.143136,0.1063,0.069789,0.352984,0.485898,0.576226
9,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",RW,0.7,0.487805,0.518923,0.097339,0.67283,0.071504,0.535752,0.040797,0.035706,0.023589,-0.044152,0.113761,0.020715,0.525127,0.707391,0.505045
