# Bias Mitigation Experiments - Bank_Customer_Churn_Prediction

In [1]:
data_name = 'Bank_Customer_Churn_Prediction'

In [2]:
# to ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [3]:
import numpy as np
import pandas as pd
import tensorflow.compat.v1 as tf

## [1.](#Table-of-Contents) Prepare Bank_Customer_Churn_Prediction data

In [4]:
# read in data as pandas data frame
df = pd.read_csv("../data/Bank_Customer_Churn_Prediction/Bank_Customer_Churn_Prediction.csv")
# check columns
pd.set_option('display.max_columns', None) # expand all columns
print(df.shape)
df.head()

(10000, 12)


Unnamed: 0,customer_id,credit_score,country,gender,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,churn
0,15634602,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,15647311,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,15619304,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,15701354,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,15737888,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [5]:
df['gender'].value_counts()

Male      5457
Female    4543
Name: gender, dtype: int64

In [6]:
# data pre-processing
import re
label = 'churn'
protected_attribute = 'gender'
# 1. select features of interest
# 2. encode label and potential protected features 
# label_mapping = {'completed': 1, 'none': 0}
# race_mapping = {'group D': 1, 'group C': 0}
sex_mapping = {'Male': 1, 'Female': 0} # female is the privileged group in this case

# df = df[df.race.isin(race_mapping.keys())] # only include samples with certain race values
# df.race.replace(race_mapping, inplace=True)
df.gender.replace(sex_mapping, inplace=True)
# df[label].replace(label_mapping, inplace=True)
df = df.reset_index(drop=True)
print(df.shape)
df.head()

(10000, 12)


Unnamed: 0,customer_id,credit_score,country,gender,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,churn
0,15634602,619,France,0,42,2,0.0,1,1,1,101348.88,1
1,15647311,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,15619304,502,France,0,42,8,159660.8,3,1,0,113931.57,1
3,15701354,699,France,0,39,1,0.0,2,0,0,93826.63,0
4,15737888,850,Spain,0,43,2,125510.82,1,1,1,79084.1,0


In [7]:
# flip labels to ensure pos_label = 1, not becoming recidivist

# select descriptive features and target variable
X = df.drop(columns=label, axis=1) # select all features but target feature
y = df[[protected_attribute, label]] # include protected feature in order to AIF360

In [8]:
df.head(100)

Unnamed: 0,customer_id,credit_score,country,gender,age,tenure,balance,products_number,credit_card,active_member,estimated_salary,churn
0,15634602,619,France,0,42,2,0.00,1,1,1,101348.88,1
1,15647311,608,Spain,0,41,1,83807.86,1,0,1,112542.58,0
2,15619304,502,France,0,42,8,159660.80,3,1,0,113931.57,1
3,15701354,699,France,0,39,1,0.00,2,0,0,93826.63,0
4,15737888,850,Spain,0,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...
95,15699461,515,Spain,1,35,10,176273.95,1,0,1,121277.78,0
96,15738721,773,Spain,1,41,9,102827.44,1,0,1,64595.25,0
97,15693683,814,Germany,1,29,8,97086.40,2,1,1,197276.13,0
98,15604348,710,Spain,1,22,8,0.00,2,0,0,99645.04,0


In [11]:
# set protected attribute as index
X = X.set_index([protected_attribute], append = True, drop = False)
y = y.set_index([protected_attribute], append = True)

# make y data frames to 1d array to pass modeling, but keep index (protected attribute)
y = pd.Series(y[label], index=y.index)

In [12]:
X.to_pickle('../data/{}/{}_X'.format(data_name, data_name))
y.to_pickle('../data/{}/{}_y'.format(data_name, data_name))

## [2.](#Table-of-Contents) Proposed GridSearch Approach 

The GridSearch Approach includes hyperparameter, threshold, and Bias Mitigation

In [26]:
from FairGridSearch import *
%load_ext autoreload
%autoreload 2
# allow automatic reloading of changes in FairGridSearch file

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
# None, better
# 'RW', worse
# 'LFR_pre', stuck
# 'LFR_in', stuck
# 'AD', stuck
# 'EGR',better
# 'ROC', better
# 'CEO', better

## 3. Case Study: Bank_Customer_Churn_Prediction

In [15]:
label = 'churn'
protected_attribute = 'gender'

pos_label = 0
priv_group = 1

cv=10
n_jobs=multiprocessing.cpu_count()-1

In [16]:
X = pd.read_pickle('../data/{}/{}_X'.format(data_name, data_name))
y = pd.read_pickle('../data/{}/{}_y'.format(data_name, data_name))

In [17]:
# define desired metric for the use case
ACC_METRIC = 'avg_norm_mcc_score'
FAIR_METRIC = 'abs_avg_ppvd_score'

### Logistic Regression

In [18]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'C':[1, 10],'solver':['liblinear', 'saga'],'penalty':['l2']}, 
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','LFR_in','AD','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_lr = fair_GridsearchCV(base='LR', param_grid=param_grid, 
                           prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                           cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_lr.fit(X=X, y=y)
    
results_lr = clf_lr.output_table
print(clf_lr._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_lr.to_pickle('./{}_results/{}_results_LR'.format(data_name,data_name))
style_table(results_lr)

  0%|          | 0/4 [00:00<?, ?it/s]

{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [01:41<05:04, 101.40s/it]

{'C': 1, 'penalty': 'l2', 'solver': 'saga'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [03:25<03:25, 102.88s/it]

{'C': 10, 'penalty': 'l2', 'solver': 'liblinear'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [04:58<01:38, 98.25s/it] 

{'C': 10, 'penalty': 'l2', 'solver': 'saga'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [06:39<00:00, 99.77s/it]


base_estimator                                                   LR
param              {'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}
Bias_Mitigation                                                  AD
threshold                                                       0.5
Name: 22, dtype: object
Time:  6.658235028333333 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.3,0.7829,0.679015,0.486223,0.765,0.349451,0.674726,-0.175984,-0.160897,-0.137101,0.00481,-0.037763,0.113,0.105132,0.143566,0.501258
1,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.4,0.8035,0.627831,0.407139,0.765,0.308848,0.654424,-0.115516,-0.112376,-0.082367,0.026357,-0.054935,0.07774,0.11159,0.169349,0.461091
2,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.5,0.8091,0.586777,0.310796,0.765,0.268692,0.634346,-0.076144,-0.086999,-0.044683,0.000735,-0.06013,0.0514,0.114902,0.187875,0.441799
3,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.6,0.8091,0.55261,0.202763,0.765,0.227525,0.613762,-0.043,-0.061711,-0.015389,-0.095039,-0.064688,0.02976,0.11746,0.202877,0.429237
4,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.7,0.8014,0.520005,0.084395,0.765,0.139908,0.569954,-0.01823,-0.0291,-0.005289,-0.20881,-0.076543,0.01176,0.123837,0.218235,0.448276
5,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.3,0.7768,0.667335,0.468436,0.756225,0.327728,0.663864,-0.033938,0.004245,-0.011876,-0.102964,-0.068692,0.11344,0.108741,0.14894,0.370074
6,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.4,0.8057,0.628116,0.407502,0.756225,0.31286,0.65643,-0.028983,0.006616,-0.016777,-0.056964,-0.078887,0.07772,0.110836,0.169172,0.372553
7,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.5,0.8126,0.584229,0.301481,0.756225,0.275648,0.637824,-0.016679,-0.000106,-0.003773,-0.093341,-0.077989,0.0479,0.113359,0.188627,0.378855
8,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.6,0.8085,0.549316,0.192288,0.756225,0.221494,0.610747,-0.00598,0.004581,0.00075,-0.095402,-0.082466,0.02658,0.118015,0.204398,0.395233
9,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.7,0.8009,0.518783,0.080258,0.756225,0.134507,0.567253,-0.004747,-0.005271,0.000823,-0.168683,-0.0825,0.01186,0.124231,0.218858,0.437493


### Random Forest

In [19]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'n_estimators':[10, 50],'criterion':['gini', 'entropy'], 'max_depth':[16]}, 
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_rf = fair_GridsearchCV(base='RF', param_grid=param_grid,
                           prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                           cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_rf.fit(X=X, y=y)
    
results_rf = clf_rf.output_table
print(clf_rf._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_rf.to_pickle('./{}_results/{}_results_RF'.format(data_name, data_name))
style_table(results_rf)

  0%|          | 0/4 [00:00<?, ?it/s]

{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [01:04<03:14, 64.75s/it]

{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 50}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [03:07<03:18, 99.15s/it]

{'criterion': 'entropy', 'max_depth': 16, 'n_estimators': 10}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [04:13<01:23, 83.89s/it]

{'criterion': 'entropy', 'max_depth': 16, 'n_estimators': 50}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [06:24<00:00, 96.22s/it] 


base_estimator                                                    RF
param              {'criterion': 'gini', 'max_depth': 16, 'n_esti...
Bias_Mitigation                                                  ROC
threshold                                                        0.5
Name: 62, dtype: object
Time:  6.420090558333333 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",,0.3,0.7977,0.747297,0.571671,0.824542,0.449901,0.72495,-0.106665,-0.060164,-0.067924,-0.05292,-0.039788,0.19416,0.087279,0.111063,0.381714
1,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",,0.4,0.8384,0.73851,0.589511,0.824542,0.490144,0.745072,-0.083539,-0.037332,-0.047085,-0.01797,-0.050764,0.1472,0.082836,0.117636,0.338467
2,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",,0.5,0.8494,0.710356,0.562445,0.824542,0.487609,0.743805,-0.063951,-0.027684,-0.028737,-0.016678,-0.056451,0.11126,0.083453,0.129854,0.320146
3,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",,0.6,0.8534,0.686027,0.528112,0.824542,0.484287,0.742144,-0.050453,-0.027461,-0.012608,-0.04568,-0.056294,0.08756,0.08406,0.139875,0.308309
4,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",,0.7,0.8479,0.648956,0.455718,0.824542,0.452302,0.726151,-0.044516,-0.03609,-0.007017,-0.043274,-0.057428,0.0647,0.089256,0.156031,0.318364
5,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",RW,0.3,0.8005,0.748511,0.574478,0.832304,0.454008,0.727004,-0.075652,-0.034628,-0.031786,-0.100268,-0.039365,0.1931,0.086766,0.110758,0.348648
6,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",RW,0.4,0.8355,0.732872,0.58072,0.832304,0.479622,0.739811,-0.063979,-0.026571,-0.022546,-0.085073,-0.048426,0.1474,0.084567,0.12024,0.324168
7,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",RW,0.5,0.8521,0.713699,0.569503,0.832304,0.497062,0.748531,-0.055606,-0.020365,-0.018145,-0.051741,-0.055334,0.11234,0.081968,0.128184,0.307075
8,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",RW,0.6,0.8537,0.682734,0.523101,0.832304,0.484094,0.742047,-0.043362,-0.016099,-0.009452,-0.046559,-0.060416,0.0853,0.084142,0.141201,0.301315
9,RF,"{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}",RW,0.7,0.8468,0.646996,0.451151,0.832304,0.447245,0.723622,-0.038168,-0.023605,-0.005613,-0.046311,-0.06195,0.06538,0.089983,0.157015,0.314545


### Gradient Boosting

In [20]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'n_estimators':[10, 50],'criterion':['friedman_mse'],'max_depth':[8, 32]}, 
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_gb = fair_GridsearchCV(base='GB', param_grid=param_grid,
                           prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                           cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_gb.fit(X=X, y=y)
    
results_gb = clf_gb.output_table
print(clf_gb._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_gb.to_pickle('./{}_results/{}_results_GB'.format(data_name, data_name))
style_table(results_gb)

  0%|          | 0/4 [00:00<?, ?it/s]

{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [01:42<05:08, 102.82s/it]

{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 50}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [05:29<05:51, 175.96s/it]

{'criterion': 'friedman_mse', 'max_depth': 32, 'n_estimators': 10}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [19:55<08:10, 490.86s/it]

{'criterion': 'friedman_mse', 'max_depth': 32, 'n_estimators': 50}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [1:13:39<00:00, 1104.83s/it]


base_estimator                                                    GB
param              {'criterion': 'friedman_mse', 'max_depth': 8, ...
Bias_Mitigation                                                  ROC
threshold                                                        0.5
Name: 62, dtype: object
Time:  73.66032555 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.3,0.8505,0.748477,0.610981,0.850593,0.521095,0.760547,-0.065285,-0.018378,-0.025526,-0.054008,-0.050527,0.13324,0.077897,0.112978,0.304738
1,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.4,0.8574,0.712451,0.571893,0.850593,0.509825,0.754912,-0.044843,-0.008807,-0.007959,-0.072553,-0.057359,0.10182,0.079956,0.128302,0.289931
2,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.5,0.8562,0.678639,0.517547,0.850593,0.492175,0.746088,-0.040655,-0.02049,-0.003056,-0.071607,-0.057599,0.0763,0.083071,0.142551,0.294568
3,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.6,0.8478,0.640664,0.437687,0.850593,0.453268,0.726634,-0.033692,-0.019695,-0.002897,-0.030078,-0.063822,0.0536,0.08955,0.159396,0.307058
4,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.7,0.819,0.559376,0.213659,0.850593,0.296464,0.648232,-0.017055,-0.014751,-0.001816,0.007286,-0.075756,0.02118,0.110491,0.198177,0.368823
5,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.3,0.8444,0.739352,0.595455,0.849674,0.50129,0.750645,-0.042427,0.005468,-0.005655,-0.100862,-0.056127,0.1343,0.081118,0.117217,0.291781
6,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.4,0.8567,0.712011,0.570693,0.849674,0.507724,0.753862,-0.037843,0.002902,-0.004573,-0.081167,-0.06121,0.10164,0.080289,0.128547,0.283981
7,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.5,0.856,0.6798,0.519517,0.849674,0.4914,0.7457,-0.036963,-0.011816,-0.002211,-0.07448,-0.060332,0.07684,0.083139,0.142117,0.291263
8,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.6,0.8463,0.637898,0.431077,0.849674,0.445403,0.722702,-0.031129,-0.014764,-0.002737,-0.038761,-0.06586,0.05464,0.090593,0.160797,0.308427
9,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.7,0.8183,0.558017,0.209617,0.849674,0.29119,0.645595,-0.014208,-0.009645,-0.000847,-0.026364,-0.077292,0.02054,0.111011,0.19888,0.368613


### Support Vector Machine

In [21]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'kernel':['rbf','linear','poly','sigmoid'],'gamma':['scale']},
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_svm = fair_GridsearchCV(base='SVM', param_grid=param_grid,
                            prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                            cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_svm.fit(X=X, y=y)
    
results_svm = clf_svm.output_table
print(clf_svm._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_svm.to_pickle('./{}_results/{}_results_SVM'.format(data_name, data_name))
style_table(results_svm)

  0%|          | 0/4 [00:00<?, ?it/s]

{'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [43:04<2:09:14, 2584.90s/it]

{'gamma': 'scale', 'kernel': 'linear'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [59:31<54:49, 1644.89s/it]  

{'gamma': 'scale', 'kernel': 'poly'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [1:21:32<24:56, 1496.64s/it]

{'gamma': 'scale', 'kernel': 'sigmoid'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [1:43:07<00:00, 1546.77s/it]


base_estimator                                     SVM
param              {'gamma': 'scale', 'kernel': 'rbf'}
Bias_Mitigation                                 RW+ROC
threshold                                          0.5
Name: 32, dtype: object
Time:  103.12543410833334 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.3,0.8522,0.732383,0.5938,0.824726,0.510886,0.755443,-0.080539,-0.04711,-0.036895,-0.017063,-0.046045,0.10358,0.079686,0.120062,0.325096
1,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.4,0.856,0.711938,0.570353,0.824726,0.506226,0.753113,-0.065285,-0.038623,-0.022618,-0.025558,-0.049914,0.08944,0.080547,0.128611,0.312172
2,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.5,0.8571,0.695465,0.546358,0.824726,0.500858,0.750429,-0.061119,-0.042814,-0.017325,-0.024061,-0.049991,0.07832,0.081507,0.135464,0.31069
3,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.6,0.8559,0.676815,0.513923,0.824726,0.490222,0.745111,-0.058446,-0.052616,-0.011532,-0.035208,-0.04803,0.06818,0.083365,0.143366,0.313335
4,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.7,0.8518,0.65799,0.475717,0.824726,0.470863,0.735431,-0.0506,-0.046097,-0.008304,-0.030033,-0.052827,0.05796,0.086564,0.1517,0.315168
5,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.3,0.852,0.72898,0.589369,0.821988,0.50777,0.753885,-0.028117,0.026366,0.000782,-0.095128,-0.065812,0.10458,0.080201,0.121569,0.274232
6,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.4,0.8558,0.710174,0.567535,0.821988,0.504238,0.752119,-0.022992,0.021281,0.007756,-0.124052,-0.064794,0.0913,0.080851,0.129408,0.270872
7,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.5,0.8574,0.695839,0.547281,0.821988,0.502023,0.751011,-0.024477,0.012822,0.006624,-0.115919,-0.064438,0.08012,0.081331,0.13527,0.273465
8,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.6,0.854,0.673429,0.506779,0.821988,0.482316,0.741158,-0.027329,-0.00055,0.004672,-0.111254,-0.063263,0.06744,0.084568,0.145022,0.286171
9,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.7,0.8499,0.654412,0.467937,0.821988,0.462321,0.73116,-0.026154,-0.000952,0.001735,-0.086925,-0.066762,0.05842,0.087815,0.153454,0.294993


### Gaussian Naive Bayes

In [22]:
# model for Reweighing
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'var_smoothing': np.logspace(0,-9, num=4)}, 
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_nb = fair_GridsearchCV(base='NB', param_grid=param_grid, 
                           prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                           cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_nb.fit(X=X, y=y)
    
results_nb = clf_nb.output_table
# print(clf._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_nb.to_pickle('./{}_results/{}_results_NB'.format(data_name, data_name))
style_table(results_nb)

  0%|          | 0/4 [00:00<?, ?it/s]

{'var_smoothing': 1.0}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [01:05<03:15, 65.26s/it]

{'var_smoothing': 0.001}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [01:58<01:55, 57.95s/it]

{'var_smoothing': 1e-06}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [02:50<00:55, 55.41s/it]

{'var_smoothing': 1e-09}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [03:43<00:00, 55.82s/it]


Time:  3.727368016666666 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,NB,{'var_smoothing': 1.0},,0.3,0.8131,0.640266,0.431313,0.779731,0.341791,0.670896,-0.10255,-0.095513,-0.067782,0.02659,-0.054161,0.0578,0.106327,0.16325,0.431654
1,NB,{'var_smoothing': 1.0},,0.4,0.8184,0.575264,0.269451,0.779731,0.292334,0.646167,-0.043335,-0.055075,-0.014244,-0.039701,-0.06364,0.03238,0.110473,0.191634,0.397168
2,NB,{'var_smoothing': 1.0},,0.5,0.8059,0.527406,0.107369,0.779731,0.189428,0.594714,-0.018142,-0.028977,-0.00305,-0.083005,-0.075036,0.01074,0.120407,0.214193,0.423428
3,NB,{'var_smoothing': 1.0},,0.6,0.8004,0.510432,0.041246,0.779731,0.124099,0.56205,-0.004579,-0.006102,-0.000585,0.047619,-0.083529,0.00318,0.124689,0.222492,0.44253
4,NB,{'var_smoothing': 1.0},,0.7,0.7979,0.503926,0.015516,0.779731,0.068686,0.534343,-0.001446,-0.001703,0.0,0.0,-0.085358,0.00112,0.126646,0.225773,0.467103
5,NB,{'var_smoothing': 1.0},RW,0.3,0.8107,0.635291,0.421122,0.772041,0.33091,0.665455,-0.035448,-0.009794,-0.014047,-0.083182,-0.070359,0.06152,0.107955,0.165663,0.369993
6,NB,{'var_smoothing': 1.0},RW,0.4,0.818,0.572454,0.261018,0.772041,0.288492,0.644246,-0.008824,0.005575,0.001958,-0.108646,-0.079802,0.03032,0.110862,0.192897,0.364578
7,NB,{'var_smoothing': 1.0},RW,0.5,0.8039,0.522127,0.088094,0.772041,0.166886,0.583443,-0.006822,-0.007755,5.2e-05,-0.166623,-0.081571,0.00898,0.121957,0.216829,0.423379
8,NB,{'var_smoothing': 1.0},RW,0.6,0.7997,0.508892,0.035556,0.772041,0.112568,0.556284,-0.002692,-0.002417,-0.000362,-0.052381,-0.0848,0.00272,0.125235,0.223289,0.446408
9,NB,{'var_smoothing': 1.0},RW,0.7,0.7978,0.503683,0.014564,0.772041,0.066713,0.533356,-0.000441,0.000672,0.0,0.0,-0.086144,0.00112,0.126725,0.225899,0.467085


### TabTransformer

In [25]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'epochs':[20, 30],'learing_rate':[1e-04, 1e-05]},
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','ROC','CEO','RW+ROC','RW+CEO']}

clf_tab = fair_GridsearchCV(base='TabTrans',param_grid=param_grid, 
                            prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                            cv=cv, n_jobs=n_jobs)

clf_tab.fit(X=X, y=y)
results_tab = clf_tab.output_table
# print(clf._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_tab.to_pickle('./{}_results/{}_results_TabTrans'.format(data_name, data_name))

style_table(results_tab)

  0%|          | 0/4 [00:00<?, ?it/s]

{'epochs': 20, 'learing_rate': 0.0001}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [06:59<20:58, 419.66s/it]

{'epochs': 20, 'learing_rate': 1e-05}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [14:45<14:53, 446.89s/it]

{'epochs': 30, 'learing_rate': 0.0001}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [23:38<08:06, 486.25s/it]

{'epochs': 30, 'learing_rate': 1e-05}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [32:41<00:00, 490.28s/it]


Time:  32.69316389333338 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",,0.3,0.7082,0.593397,0.358251,0.597033,0.173925,0.586962,-0.021391,0.002665,-0.01052,-0.097864,-0.078244,0.23936,0.13185,0.178148,0.434429
1,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",,0.4,0.759,0.555932,0.192237,0.597033,0.104394,0.552197,-0.009317,0.003493,-0.001764,-0.056384,-0.080552,0.11834,0.127235,0.199207,0.45712
2,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",,0.5,0.788,0.507182,0.032151,0.597033,0.013995,0.506998,0.001439,0.002576,0.002494,-0.01095,-0.08562,0.0207,0.129008,0.224099,0.494441
3,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",,0.6,0.7963,0.5,0.0,0.597033,0.0,0.5,0.0,0.0,0.0,0.0,-0.086349,0.0,0.127904,0.227779,0.5
4,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",,0.7,0.7963,0.5,0.0,0.597033,0.0,0.5,0.0,0.0,0.0,0.0,-0.086349,0.0,0.127904,0.227779,0.5
5,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",RW,0.3,0.7082,0.593397,0.358251,0.595861,0.173925,0.586962,-0.021391,0.002665,-0.01052,-0.097864,-0.078244,0.23936,0.13185,0.178148,0.434429
6,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",RW,0.4,0.7617,0.55617,0.19256,0.595861,0.106699,0.55335,-0.010502,0.003913,-0.004036,-0.052481,-0.081579,0.11432,0.126983,0.199412,0.457152
7,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",RW,0.5,0.7963,0.5,0.0,0.595861,0.0,0.5,0.0,0.0,0.0,0.0,-0.086349,0.0,0.127904,0.227779,0.5
8,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",RW,0.6,0.7963,0.5,0.0,0.595861,0.0,0.5,0.0,0.0,0.0,0.0,-0.086349,0.0,0.127904,0.227779,0.5
9,TabTrans,"{'epochs': 20, 'learing_rate': 0.0001}",RW,0.7,0.7963,0.5,0.0,0.595861,0.0,0.5,0.0,0.0,0.0,0.0,-0.086349,0.0,0.127904,0.227779,0.5
