# Bias Mitigation Experiments - StudentsPerformance

In [3]:
data_name = 'StudentsPerformance'

In [4]:
# to ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [5]:
import numpy as np
import pandas as pd
import tensorflow.compat.v1 as tf
# tf.disable_eager_execution() # comment out when running TabTransformer models

## [1.](#Table-of-Contents) Prepare COMPAS data

In [6]:
# read in data as pandas data frame
df = pd.read_csv("../data/StudentsPerformance/StudentsPerformance.csv")
# check columns
pd.set_option('display.max_columns', None) # expand all columns
print(df.shape)
df.head()

(1000, 8)


Unnamed: 0,gender,race,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [8]:
df['gender'].value_counts()

female    518
male      482
Name: gender, dtype: int64

In [9]:
# data pre-processing
import re
label = 'test preparation course'
protected_attribute = 'gender'
# 1. select features of interest
# 2. encode label and potential protected features 
label_mapping = {'completed': 1, 'none': 0}
# race_mapping = {'group D': 1, 'group C': 0}
sex_mapping = {'female': 1, 'male': 0} # female is the privileged group in this case

# df = df[df.race.isin(race_mapping.keys())] # only include samples with certain race values
# df.race.replace(race_mapping, inplace=True)
df.gender.replace(sex_mapping, inplace=True)
df[label].replace(label_mapping, inplace=True)
df = df.reset_index(drop=True)
print(df.shape)
df.head()

(1000, 8)


Unnamed: 0,gender,race,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,1,group B,bachelor's degree,standard,0,72,72,74
1,1,group C,some college,standard,1,69,90,88
2,1,group B,master's degree,standard,0,90,95,93
3,0,group A,associate's degree,free/reduced,0,47,57,44
4,0,group C,some college,standard,0,76,78,75


In [10]:
# flip labels to ensure pos_label = 1, not becoming recidivist

# select descriptive features and target variable
X = df.drop(columns=label, axis=1) # select all features but target feature
y = df[[protected_attribute, label]] # include protected feature in order to AIF360

In [11]:
df.head(100)

Unnamed: 0,gender,race,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,1,group B,bachelor's degree,standard,0,72,72,74
1,1,group C,some college,standard,1,69,90,88
2,1,group B,master's degree,standard,0,90,95,93
3,0,group A,associate's degree,free/reduced,0,47,57,44
4,0,group C,some college,standard,0,76,78,75
...,...,...,...,...,...,...,...,...
95,0,group C,associate's degree,free/reduced,1,78,81,82
96,0,group B,some high school,standard,1,65,66,62
97,1,group E,some college,standard,1,63,72,70
98,1,group D,some college,free/reduced,0,58,67,62


In [12]:
# set protected attribute as index
X = X.set_index([protected_attribute], append = True, drop = False)
y = y.set_index([protected_attribute], append = True)

# make y data frames to 1d array to pass modeling, but keep index (protected attribute)
y = pd.Series(y[label], index=y.index)

In [13]:
X.to_pickle('../data/{}/{}_X'.format(data_name, data_name))
y.to_pickle('../data/{}/{}_y'.format(data_name, data_name))

## [2.](#Table-of-Contents) Proposed GridSearch Approach 

The GridSearch Approach includes hyperparameter, threshold, and Bias Mitigation

In [20]:
from FairGridSearch import *
%load_ext autoreload
%autoreload 2
# allow automatic reloading of changes in FairGridSearch file

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
# None, better
# 'RW', worse
# 'LFR_pre', stuck
# 'LFR_in', stuck
# 'AD', stuck
# 'EGR',better
# 'ROC', better
# 'CEO', better

## 3. Case Study: StudentsPerformance

In [15]:
label = 'test preparation course'
protected_attribute = 'gender'

pos_label = 0
priv_group = 1

cv=10
n_jobs=multiprocessing.cpu_count()-1

In [16]:
X = pd.read_pickle('../data/{}/{}_X'.format(data_name, data_name))
y = pd.read_pickle('../data/{}/{}_y'.format(data_name, data_name))

In [17]:
# define desired metric for the use case
ACC_METRIC = 'avg_norm_mcc_score'
FAIR_METRIC = 'abs_avg_ppvd_score'

### Logistic Regression

In [21]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'C':[1, 10],'solver':['liblinear', 'saga'],'penalty':['l2']}, 
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','LFR_in','AD','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_lr = fair_GridsearchCV(base='LR', param_grid=param_grid, 
                           prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                           cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_lr.fit(X=X, y=y)
    
results_lr = clf_lr.output_table
print(clf_lr._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_lr.to_pickle('./{}_results/{}_results_LR'.format(data_name,data_name))
style_table(results_lr)

  0%|          | 0/4 [00:00<?, ?it/s]

{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [01:02<03:06, 62.15s/it]

{'C': 1, 'penalty': 'l2', 'solver': 'saga'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [01:51<01:49, 54.66s/it]

{'C': 10, 'penalty': 'l2', 'solver': 'liblinear'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [02:58<01:00, 60.39s/it]

{'C': 10, 'penalty': 'l2', 'solver': 'saga'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [03:48<00:00, 57.07s/it]


base_estimator                                                   LR
param              {'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}
Bias_Mitigation                                                  RW
threshold                                                       0.4
Name: 6, dtype: object
Time:  3.8193586916666664 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.3,0.697,0.722968,0.656472,0.792323,0.431083,0.715541,-0.015446,-0.015386,-0.001486,-0.009404,0.01435,0.3302,0.100632,0.123926,0.299905
1,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.4,0.729,0.718128,0.642237,0.792323,0.429727,0.714864,-0.006177,-0.01273,0.019357,-0.034352,0.028098,0.3226,0.123298,0.166998,0.291313
2,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.5,0.726,0.677983,0.569595,0.792323,0.388289,0.694144,0.004501,-0.001237,0.024405,-0.06778,0.014328,0.2732,0.154888,0.227287,0.310357
3,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.6,0.725,0.649361,0.495651,0.792323,0.370061,0.68503,0.015874,0.008752,0.039398,-0.088022,0.011046,0.2158,0.176696,0.272182,0.330844
4,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.7,0.707,0.603117,0.36434,0.792323,0.325355,0.662677,-0.01825,-0.032445,0.020865,-0.14869,0.02305,0.1346,0.204807,0.328793,0.355573
5,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.3,0.695,0.720798,0.654145,0.793243,0.427051,0.713525,-0.007404,-0.007874,0.007656,-0.013088,0.012931,0.328,0.101389,0.125127,0.293879
6,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.4,0.729,0.718077,0.642272,0.793243,0.429675,0.714838,0.001732,-0.003328,0.025469,-0.037481,0.023827,0.3224,0.12328,0.166976,0.286894
7,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.5,0.725,0.677214,0.56811,0.793243,0.385142,0.692571,0.0062,0.000797,0.027853,-0.072014,0.015654,0.2752,0.155581,0.227749,0.313629
8,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.6,0.725,0.649361,0.495651,0.793243,0.370061,0.68503,0.015874,0.008752,0.039398,-0.088022,0.011046,0.2158,0.176696,0.272182,0.330844
9,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.7,0.708,0.604546,0.36804,0.793243,0.328026,0.664013,-0.016289,-0.030173,0.020865,-0.14869,0.021649,0.1362,0.203881,0.327432,0.352276


### Random Forest

In [23]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'n_estimators':[10, 50],'criterion':['gini', 'entropy'], 'max_depth':[16]}, 
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_rf = fair_GridsearchCV(base='RF', param_grid=param_grid,
                           prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                           cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_rf.fit(X=X, y=y)
    
results_rf = clf_rf.output_table
print(clf_rf._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_rf.to_pickle('./{}_results/{}_results_RF'.format(data_name, data_name))
style_table(results_rf)

  0%|          | 0/4 [00:00<?, ?it/s]

{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [00:18<00:56, 18.94s/it]

{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 50}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [00:42<00:42, 21.43s/it]

{'criterion': 'entropy', 'max_depth': 16, 'n_estimators': 10}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [00:58<00:19, 19.24s/it]

{'criterion': 'entropy', 'max_depth': 16, 'n_estimators': 50}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [01:21<00:00, 20.34s/it]


base_estimator                                                    RF
param              {'criterion': 'gini', 'max_depth': 16, 'n_esti...
Bias_Mitigation                                                   RW
threshold                                                        0.6
Name: 48, dtype: object
Time:  1.3592143400000003 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",,0.3,0.556,0.591734,0.535679,0.630226,0.180485,0.590242,0.067738,0.059409,0.090975,-0.039567,0.008203,0.3326,0.125215,0.166343,0.477496
1,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",,0.4,0.609,0.602014,0.512611,0.630226,0.19705,0.598525,0.091385,0.08441,0.127122,-0.080858,0.013608,0.3502,0.161879,0.220539,0.49286
2,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",,0.5,0.649,0.601546,0.466335,0.630226,0.209891,0.604945,0.094384,0.096944,0.108933,-0.076888,-0.005569,0.3072,0.195141,0.27331,0.489438
3,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",,0.6,0.661,0.588663,0.409722,0.630226,0.203146,0.601573,0.068499,0.069548,0.085076,-0.081756,0.00155,0.249,0.215232,0.309878,0.466926
4,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",,0.7,0.653,0.550311,0.277021,0.630226,0.141675,0.570837,0.069339,0.07817,0.056634,0.006548,-0.01922,0.1664,0.249056,0.368546,0.498501
5,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",RW,0.3,0.554,0.595935,0.543547,0.63935,0.190352,0.595176,0.061352,0.059801,0.068638,-0.017848,-0.008412,0.3292,0.118499,0.156127,0.466176
6,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",RW,0.4,0.61,0.608562,0.524861,0.63935,0.209685,0.604843,0.11689,0.118353,0.126449,-0.042704,-0.017745,0.3354,0.15458,0.209912,0.512047
7,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",RW,0.5,0.655,0.616365,0.498428,0.63935,0.239672,0.619836,0.106014,0.119465,0.087223,0.008074,-0.034806,0.3014,0.180392,0.253043,0.486178
8,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",RW,0.6,0.663,0.590484,0.414833,0.63935,0.214219,0.607109,0.081996,0.095736,0.056108,0.030933,-0.032203,0.2444,0.212242,0.308061,0.474887
9,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",RW,0.7,0.652,0.548461,0.271859,0.63935,0.142811,0.571406,0.042902,0.054489,0.021336,0.080754,-0.023754,0.1602,0.249731,0.370616,0.471497


### Gradient Boosting

In [24]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'n_estimators':[10, 50],'criterion':['friedman_mse'],'max_depth':[8, 32]}, 
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_gb = fair_GridsearchCV(base='GB', param_grid=param_grid,
                           prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                           cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_gb.fit(X=X, y=y)
    
results_gb = clf_gb.output_table
print(clf_gb._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_gb.to_pickle('./{}_results/{}_results_GB'.format(data_name, data_name))
style_table(results_gb)

  0%|          | 0/4 [00:00<?, ?it/s]

{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [00:18<00:55, 18.59s/it]

{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 50}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [01:34<01:45, 52.55s/it]

{'criterion': 'friedman_mse', 'max_depth': 32, 'n_estimators': 10}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [02:17<00:48, 48.13s/it]

{'criterion': 'friedman_mse', 'max_depth': 32, 'n_estimators': 50}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [04:21<00:00, 65.44s/it]


base_estimator                                                    GB
param              {'criterion': 'friedman_mse', 'max_depth': 8, ...
Bias_Mitigation                                                 None
threshold                                                        0.5
Name: 42, dtype: object
Time:  4.366142474999999 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.3,0.616,0.62253,0.546249,0.66776,0.23635,0.618175,0.082723,0.069458,0.115999,-0.061833,0.029942,0.3648,0.143179,0.192623,0.464548
1,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.4,0.649,0.615374,0.501187,0.66776,0.233474,0.616737,0.030607,0.011313,0.091105,-0.117102,0.051441,0.33,0.178282,0.248114,0.41387
2,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.5,0.662,0.592077,0.420902,0.66776,0.21126,0.60563,0.012292,-0.002639,0.051587,-0.127417,0.025793,0.2594,0.211674,0.30485,0.406662
3,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.6,0.669,0.577883,0.354424,0.66776,0.20701,0.603505,0.022423,0.019964,0.03064,-0.075904,-5.8e-05,0.197,0.226694,0.336879,0.418918
4,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.7,0.658,0.541043,0.207464,0.66776,0.144559,0.572279,0.023536,0.025385,0.014398,-0.129167,-0.012797,0.112,0.253686,0.389049,0.451257
5,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.3,0.614,0.622811,0.546451,0.665619,0.23725,0.618625,0.053199,0.042289,0.091622,-0.06217,0.039968,0.3514,0.141771,0.189968,0.434575
6,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.4,0.651,0.612008,0.491475,0.665619,0.230492,0.615246,0.03853,0.027631,0.077035,-0.07341,0.030534,0.3156,0.182931,0.256226,0.423284
7,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.5,0.66,0.5887,0.413719,0.665619,0.20384,0.60192,0.024348,0.020521,0.037376,-0.051744,0.003349,0.257,0.21463,0.308795,0.422428
8,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.6,0.67,0.576869,0.347212,0.665619,0.206957,0.603479,0.022486,0.0153,0.036676,-0.107663,0.004299,0.1874,0.22817,0.339951,0.419007
9,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.7,0.662,0.542834,0.204866,0.665619,0.1602,0.5801,0.023771,0.024314,0.019982,-0.032857,-0.00901,0.101,0.252001,0.389512,0.443671


### Support Vector Machine

In [25]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'kernel':['rbf','linear','poly','sigmoid'],'gamma':['scale']},
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_svm = fair_GridsearchCV(base='SVM', param_grid=param_grid,
                            prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                            cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_svm.fit(X=X, y=y)
    
results_svm = clf_svm.output_table
print(clf_svm._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_svm.to_pickle('./{}_results/{}_results_SVM'.format(data_name, data_name))
style_table(results_svm)

  0%|          | 0/4 [00:00<?, ?it/s]

{'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [00:30<01:32, 30.76s/it]

{'gamma': 'scale', 'kernel': 'linear'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [00:53<00:52, 26.09s/it]

{'gamma': 'scale', 'kernel': 'poly'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [01:16<00:24, 24.54s/it]

{'gamma': 'scale', 'kernel': 'sigmoid'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [01:42<00:00, 25.72s/it]


base_estimator                                        SVM
param              {'gamma': 'scale', 'kernel': 'linear'}
Bias_Mitigation                                      None
threshold                                             0.3
Name: 40, dtype: object
Time:  1.7186631583333338 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.3,0.602,0.632631,0.570282,0.695848,0.258233,0.629117,0.026388,0.018686,0.067185,-0.04656,0.050136,0.2828,0.120328,0.156685,0.397272
1,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.4,0.666,0.626316,0.509478,0.695848,0.259237,0.629618,0.02399,0.020692,0.042571,-0.053419,0.007193,0.2466,0.178204,0.249809,0.394372
2,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.5,0.66,0.568942,0.344423,0.695848,0.185159,0.59258,0.024302,0.018193,0.042294,-0.064661,0.00778,0.1572,0.232289,0.3426,0.431723
3,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.6,0.64,0.520118,0.159467,0.695848,0.075758,0.537879,0.001849,-0.007586,0.021473,-0.066667,0.008339,0.078,0.270981,0.408055,0.46397
4,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.7,0.643,0.508228,0.061066,0.695848,0.043834,0.521917,-0.012109,-0.016024,-0.004245,-0.073333,-0.000781,0.0256,0.276341,0.430821,0.490192
5,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.3,0.602,0.632631,0.570282,0.697731,0.258233,0.629117,0.030447,0.021839,0.073491,-0.04987,0.052481,0.2824,0.120328,0.156685,0.40133
6,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.4,0.669,0.628664,0.512135,0.697731,0.264877,0.632438,0.030306,0.024052,0.050629,-0.060461,0.00967,0.2468,0.17746,0.249183,0.397868
7,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.5,0.666,0.576814,0.360745,0.697731,0.202722,0.601361,0.036186,0.031614,0.045071,-0.065256,0.000386,0.1604,0.226909,0.33549,0.434825
8,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.6,0.643,0.523058,0.16466,0.697731,0.088099,0.544049,0.00785,-0.001785,0.027192,-0.079762,0.008148,0.078,0.268234,0.405662,0.463801
9,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.7,0.642,0.508054,0.064897,0.697731,0.040728,0.520364,-0.006241,-0.010337,0.001866,-0.14,-0.000606,0.0284,0.276906,0.430266,0.485878


### Gaussian Naive Bayes

In [26]:
# model for Reweighing
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'var_smoothing': np.logspace(0,-9, num=4)}, 
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_nb = fair_GridsearchCV(base='NB', param_grid=param_grid, 
                           prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                           cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_nb.fit(X=X, y=y)
    
results_nb = clf_nb.output_table
# print(clf._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_nb.to_pickle('./{}_results/{}_results_NB'.format(data_name, data_name))
style_table(results_nb)

  0%|          | 0/4 [00:00<?, ?it/s]

{'var_smoothing': 1.0}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [00:15<00:46, 15.37s/it]

{'var_smoothing': 0.001}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [00:31<00:31, 15.88s/it]

{'var_smoothing': 1e-06}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [00:47<00:15, 15.79s/it]

{'var_smoothing': 1e-09}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [01:02<00:00, 15.72s/it]


Time:  1.0512449766666672 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,NB,{'var_smoothing': 1.0},,0.3,0.558,0.601351,0.548648,0.648252,0.202301,0.60115,0.139358,0.139186,0.153943,-0.033881,-0.021087,0.1956,0.116295,0.152162,0.538208
1,NB,{'var_smoothing': 1.0},,0.4,0.615,0.602583,0.508316,0.648252,0.199366,0.599683,0.13744,0.132135,0.162514,-0.085907,-0.000438,0.2038,0.166454,0.227735,0.537757
2,NB,{'var_smoothing': 1.0},,0.5,0.652,0.585604,0.419719,0.648252,0.193765,0.596882,0.095045,0.083441,0.128361,-0.173783,0.014305,0.1774,0.213201,0.30479,0.498162
3,NB,{'var_smoothing': 1.0},,0.6,0.658,0.54957,0.258212,0.648252,0.156119,0.578059,0.062018,0.062703,0.062497,-0.260177,-0.008912,0.094,0.249151,0.37481,0.483959
4,NB,{'var_smoothing': 1.0},,0.7,0.643,0.507001,0.052099,0.648252,0.038458,0.519229,0.016178,0.019537,0.008046,-0.133333,-0.011738,0.0194,0.276984,0.432914,0.496949
5,NB,{'var_smoothing': 1.0},RW,0.3,0.559,0.60212,0.54929,0.649213,0.20368,0.60184,0.14536,0.14356,0.162692,-0.03731,-0.016971,0.194,0.116297,0.152135,0.54352
6,NB,{'var_smoothing': 1.0},RW,0.4,0.615,0.603202,0.50971,0.649213,0.200515,0.600257,0.141186,0.13624,0.165963,-0.085673,-0.001649,0.2056,0.165644,0.226562,0.540929
7,NB,{'var_smoothing': 1.0},RW,0.5,0.655,0.589191,0.425877,0.649213,0.201676,0.600838,0.101086,0.090886,0.131486,-0.171616,0.011376,0.1784,0.210895,0.301853,0.500247
8,NB,{'var_smoothing': 1.0},RW,0.6,0.661,0.553856,0.269215,0.649213,0.166283,0.583141,0.071686,0.073049,0.068216,-0.260357,-0.012067,0.0966,0.246042,0.370591,0.488545
9,NB,{'var_smoothing': 1.0},RW,0.7,0.644,0.50839,0.057108,0.649213,0.052143,0.526071,0.02226,0.025072,0.013854,-0.2,-0.011788,0.0204,0.275593,0.431302,0.496189


### TabTransformer

In [18]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'epochs':[20, 30],'learing_rate':[1e-04, 1e-05]},
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','ROC','CEO','RW+ROC','RW+CEO']}

clf_tab = fair_GridsearchCV(base='TabTrans',param_grid=param_grid, 
                            prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                            cv=cv, n_jobs=n_jobs)

clf_tab.fit(X=X, y=y)
results_tab = clf_tab.output_table
# print(clf._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_tab.to_pickle('./{}_results/{}_results_TabTrans'.format(data_name, data_name))

style_table(results_tab)

  0%|          | 0/4 [00:00<?, ?it/s]

{'epochs': 20, 'learing_rate': 0.0001}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [02:07<06:21, 127.28s/it]

{'epochs': 20, 'learing_rate': 1e-05}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [04:13<04:12, 126.38s/it]

{'epochs': 30, 'learing_rate': 0.0001}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [06:19<02:06, 126.59s/it]

{'epochs': 30, 'learing_rate': 1e-05}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [08:59<00:00, 135.00s/it]


Time:  9.006122326666667 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",,0.3,0.453,0.495383,0.375037,0.516574,-0.010323,0.494839,-0.01212,-0.01514,-0.007647,-0.115755,0.063725,0.1126,0.13642,0.191193,0.517281
1,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",,0.4,0.505,0.515326,0.348304,0.516574,0.035825,0.517913,-0.004951,-0.003861,-0.013865,0.011801,0.132666,0.1366,0.1544,0.225766,0.487038
2,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",,0.5,0.53,0.506705,0.293956,0.516574,0.01499,0.507495,0.014541,0.016749,0.006328,0.01803,0.008087,0.1394,0.18856,0.276992,0.507046
3,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",,0.6,0.552,0.495428,0.206411,0.516574,0.00144,0.50072,0.002182,-7.3e-05,0.003826,0.125308,0.108219,0.1028,0.220365,0.329295,0.501462
4,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",,0.7,0.587,0.509288,0.167958,0.516574,0.023059,0.51153,-0.009115,-0.008977,-0.007864,0.038426,0.023844,0.0796,0.227417,0.349902,0.497585
5,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",RW,0.3,0.459,0.506746,0.40966,0.529936,0.018757,0.509379,-0.003798,-0.006356,-0.003087,-0.084418,0.000298,0.1252,0.129459,0.178732,0.49442
6,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",RW,0.4,0.502,0.509789,0.345845,0.529936,0.015097,0.507548,-0.014044,-0.014496,-0.013922,-0.037242,0.033963,0.1502,0.161873,0.233358,0.506496
7,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",RW,0.5,0.543,0.513168,0.29676,0.529936,0.032473,0.516236,0.01778,0.020289,0.00947,0.030598,0.001506,0.1484,0.190033,0.281761,0.501544
8,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",RW,0.6,0.566,0.49901,0.190322,0.529936,0.003987,0.501993,0.030768,0.030394,0.029419,0.114938,0.112612,0.1108,0.227538,0.342336,0.528775
9,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",RW,0.7,0.603,0.502344,0.127155,0.529936,0.00638,0.50319,0.03978,0.039848,0.039802,-0.006476,-0.028268,0.095,0.257294,0.389987,0.536589
