# Bias Mitigation Experiments - German Credit

In [1]:
data_name = 'German_Credit'

In [2]:
# to ignore warnings
import warnings
# warnings.filterwarnings('ignore')

In [3]:
import numpy as np
import pandas as pd
import tensorflow.compat.v1 as tf
# tf.disable_eager_execution() # comment out when running TabTransformer models

## [1.](#Table-of-Contents) Prepare Dataset

In [4]:
# read in data as pandas data frame
# as given by german.doc
column_names = ['status', 'month', 'credit_history', 'purpose', 'credit_amount', 'savings', 
                'employment', 'investment_as_income_percentage', 'personal_status', 'other_debtors', 
                'residence_since', 'property', 'age', 'installment_plans', 'housing', 'number_of_credits',
                'skill_level', 'people_liable_for', 'telephone', 'foreign_worker', 'credit']
df = pd.read_csv("../data/German_Credit/german.data", sep=' ', header=None, names=column_names, na_values=[])

# check columns
pd.set_option('display.max_columns', None) # expand all columns
print(df.shape)
df.head()

(1000, 21)


Unnamed: 0,status,month,credit_history,purpose,credit_amount,savings,employment,investment_as_income_percentage,personal_status,other_debtors,residence_since,property,age,installment_plans,housing,number_of_credits,skill_level,people_liable_for,telephone,foreign_worker,credit
0,A11,6,A34,A43,1169,A65,A75,4,A93,A101,4,A121,67,A143,A152,2,A173,1,A192,A201,1
1,A12,48,A32,A43,5951,A61,A73,2,A92,A101,2,A121,22,A143,A152,1,A173,1,A191,A201,2
2,A14,12,A34,A46,2096,A61,A74,2,A93,A101,3,A121,49,A143,A152,1,A172,2,A191,A201,1
3,A11,42,A32,A42,7882,A61,A74,2,A93,A103,4,A122,45,A143,A153,1,A173,2,A191,A201,1
4,A11,24,A33,A40,4870,A61,A73,3,A93,A101,4,A124,53,A143,A153,2,A173,2,A191,A201,2


In [5]:
# data pre-processing
import re
label = 'credit'
protected_attribute = 'sex'
# 1. extract "sex" variable from column "status"
status_map = {'A91': 'male', 'A93': 'male', 'A94': 'male',
              'A92': 'female', 'A95': 'female'}
df['sex'] = df['personal_status'].replace(status_map)

# 2. select features of interest
df = df.drop(columns=['personal_status'], axis=1)

# 3. encode label and potential protected features 
label_mapping = {1: 1, 2: 0} # according to german.doc: 1 = Good, 2 = Bad
sex_mapping = {'male': 1, 'female': 0}

df = df[df.sex.isin(sex_mapping.keys())] # only include samples with certain sex values
df.sex.replace(sex_mapping, inplace=True)
df[label].replace(label_mapping, inplace=True)
df = df.reset_index(drop=True)

print(df.shape)
df.head()

(1000, 21)


Unnamed: 0,status,month,credit_history,purpose,credit_amount,savings,employment,investment_as_income_percentage,other_debtors,residence_since,property,age,installment_plans,housing,number_of_credits,skill_level,people_liable_for,telephone,foreign_worker,credit,sex
0,A11,6,A34,A43,1169,A65,A75,4,A101,4,A121,67,A143,A152,2,A173,1,A192,A201,1,1
1,A12,48,A32,A43,5951,A61,A73,2,A101,2,A121,22,A143,A152,1,A173,1,A191,A201,0,0
2,A14,12,A34,A46,2096,A61,A74,2,A101,3,A121,49,A143,A152,1,A172,2,A191,A201,1,1
3,A11,42,A32,A42,7882,A61,A74,2,A103,4,A122,45,A143,A153,1,A173,2,A191,A201,1,1
4,A11,24,A33,A40,4870,A61,A73,3,A101,4,A124,53,A143,A153,2,A173,2,A191,A201,0,1


In [6]:
# unique values of the protected feature race
df[protected_attribute].value_counts()

1    690
0    310
Name: sex, dtype: int64

In [7]:
# select descriptive features and target variable
X = df.drop(columns=label, axis=1) # select all features but target feature
y = df[[protected_attribute, label]] # include protected feature in order to AIF360

In [8]:
# set protected attribute as index
X = X.set_index([protected_attribute], append = True, drop = False)
y = y.set_index([protected_attribute], append = True)

# make y data frames to 1d array to pass modeling, but keep index (protected attribute)
y = pd.Series(y[label], index=y.index)

In [9]:
X.to_pickle('../data/{}/{}_X'.format(data_name, data_name))
y.to_pickle('../data/{}/{}_y'.format(data_name, data_name))

## [2.](#Table-of-Contents) Proposed GridSearch Approach 

The GridSearch Approach includes hyperparameter, threshold, and Bias Mitigation

In [10]:
# import FairGridSearch_new as FairGridSearch
from FairGridSearch_combo import *
%load_ext autoreload
%autoreload 2
# allow automatic reloading of changes in FairGridSearch file

  from .autonotebook import tqdm as notebook_tqdm
pip install 'aif360[FairAdapt]'


In [11]:
# None, better
# 'RW', worse
# 'LFR_pre', stuck
# 'LFR_in', stuck
# 'AD', stuck
# 'EGR',better
# 'ROC', better
# 'CEO', better

## 3. Case Study: German Credit

In [12]:
label = 'credit'
protected_attribute = 'sex'

pos_label = 1
priv_group = 1

cv=10
n_jobs=multiprocessing.cpu_count()-1

In [13]:
X = pd.read_pickle('../data/{}/{}_X'.format(data_name, data_name))
y = pd.read_pickle('../data/{}/{}_y'.format(data_name, data_name))

In [14]:
# define desired metric for the use case
ACC_METRIC = 'avg_norm_mcc_score'
FAIR_METRIC = 'abs_avg_spd_score'

### Logistic Regression

In [15]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'C':[1, 10],'solver':['liblinear', 'saga'],'penalty':['l2']}, 
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','LFR_in','AD','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_lr = fair_GridsearchCV(base='LR', param_grid=param_grid, 
                           prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                           cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_lr.fit(X=X, y=y)
    
results_lr = clf_lr.output_table
print(clf_lr._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_lr.to_pickle('./{}_results/{}_results_LR'.format(data_name,data_name))
style_table(results_lr)

  0%|          | 0/4 [00:00<?, ?it/s]

{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [00:56<02:49, 56.40s/it]

{'C': 1, 'penalty': 'l2', 'solver': 'saga'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [01:47<01:46, 53.45s/it]

{'C': 10, 'penalty': 'l2', 'solver': 'liblinear'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [02:27<00:47, 47.22s/it]

{'C': 10, 'penalty': 'l2', 'solver': 'saga'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [03:09<00:00, 47.26s/it]


base_estimator                                              LR
param              {'C': 1, 'penalty': 'l2', 'solver': 'saga'}
Bias_Mitigation                                             RW
threshold                                                  0.4
Name: 56, dtype: object
Time:  3.1595263283333335 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.3,0.738,0.591905,0.836347,0.78619,0.289396,0.644698,-0.066994,-0.060125,-0.03695,-7.9e-05,-0.051646,0.0998,0.076659,0.08369,0.422296
1,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.4,0.765,0.655952,0.846905,0.78619,0.386606,0.693303,-0.088203,-0.077101,-0.038273,-0.079435,-0.030706,0.1526,0.083957,0.099237,0.3949
2,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.5,0.76,0.687619,0.834817,0.78619,0.404027,0.702014,-0.139313,-0.133722,-0.073295,-0.068408,-0.001388,0.2058,0.106288,0.140002,0.437299
3,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.6,0.744,0.706667,0.813147,0.78619,0.408403,0.704201,-0.116301,-0.098689,-0.062672,-0.068371,-0.006087,0.244,0.134859,0.189776,0.4121
4,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",,0.7,0.719,0.715476,0.781941,0.78619,0.404405,0.702203,-0.115555,-0.104395,-0.051571,-0.09693,0.025323,0.2574,0.169881,0.248275,0.413352
5,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.3,0.74,0.591429,0.838353,0.783619,0.289342,0.644671,-0.010974,0.010906,-0.010859,-0.001032,-0.076735,0.0922,0.074164,0.079498,0.366303
6,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.4,0.752,0.64,0.838584,0.783619,0.346948,0.673474,-0.009007,0.024947,-0.005364,-0.076094,-0.077583,0.1504,0.088681,0.10665,0.335533
7,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.5,0.757,0.680714,0.833535,0.783619,0.393659,0.69683,-0.044898,-0.030746,-0.000802,-0.139112,-0.036104,0.2018,0.106068,0.138751,0.348068
8,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.6,0.749,0.71119,0.816945,0.783619,0.419296,0.709648,-0.045681,-0.019873,-0.006461,-0.098618,-0.039917,0.2404,0.13172,0.184984,0.336034
9,LR,"{'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}",RW,0.7,0.715,0.710714,0.779159,0.783619,0.394761,0.69738,-0.057764,-0.043136,0.000794,-0.120115,-0.003542,0.2594,0.172145,0.251144,0.360384


### Random Forest

In [None]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'n_estimators':[10, 50],'criterion':['gini', 'entropy'], 'max_depth':[16]}, 
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_rf = fair_GridsearchCV(base='RF', param_grid=param_grid,
                           prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                           cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_rf.fit(X=X, y=y)
    
results_rf = clf_rf.output_table
print(clf_rf._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_rf.to_pickle('./{}_results/{}_results_RF'.format(data_name, data_name))
style_table(results_rf)

  0%|          | 0/4 [00:00<?, ?it/s]

{'criterion': 'gini', 'max_depth': 16, 'n_estimators': 10}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


### Gradient Boosting

In [None]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'n_estimators':[10, 50],'criterion':['friedman_mse'],'max_depth':[8, 32]}, 
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_gb = fair_GridsearchCV(base='GB', param_grid=param_grid,
                           prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                           cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_gb.fit(X=X, y=y)
    
results_gb = clf_gb.output_table
print(clf_gb._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_gb.to_pickle('./{}_results/{}_results_GB'.format(data_name, data_name))
# style_table(results_gb)

  0%|          | 0/4 [00:00<?, ?it/s]

{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambd

{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 50}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"

{'criterion': 'friedman_mse', 'max_depth': 32, 'n_estimators': 10}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_e

{'criterion': 'friedman_mse', 'max_depth': 32, 'n_estimators': 50}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"

In [10]:
style_table(results_gb)

Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.3,0.712,0.534286,0.826399,0.733405,0.134516,0.567258,0.009109,0.014645,0.010157,-0.13,-0.073064,0.0488,0.069812,0.07119,0.441851
1,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.4,0.733,0.584524,0.833677,0.733405,0.264133,0.632066,-0.002496,0.008424,0.007471,-0.130029,-0.069423,0.1126,0.077382,0.084965,0.37043
2,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.5,0.737,0.628333,0.827152,0.733405,0.308601,0.654301,-0.000114,0.017349,0.013606,-0.129975,-0.067648,0.1942,0.098378,0.122361,0.345813
3,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.6,0.715,0.647857,0.800277,0.733405,0.30387,0.651935,-0.040123,-0.011933,-0.033677,-0.039669,-0.068946,0.267,0.134481,0.183795,0.388188
4,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",,0.7,0.683,0.673571,0.754977,0.733405,0.323648,0.661824,-0.086862,-0.049035,-0.085928,-0.021232,-0.063871,0.326,0.190555,0.275641,0.425038
5,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.3,0.707,0.529762,0.822999,0.7325,0.110861,0.55543,0.004852,0.008121,0.003444,-0.175,-0.074052,0.0528,0.072313,0.075479,0.449422
6,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.4,0.725,0.579762,0.827641,0.7325,0.233678,0.616839,-0.017719,-0.025552,0.017871,-0.210931,-0.047122,0.1242,0.082836,0.094372,0.40088
7,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.5,0.717,0.609286,0.813116,0.7325,0.253575,0.626787,-0.000581,0.00583,0.021791,-0.147544,-0.058406,0.201,0.108698,0.139294,0.373793
8,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.6,0.722,0.663333,0.803246,0.7325,0.328955,0.664478,-0.017873,0.01147,-0.012745,-0.048876,-0.071927,0.2638,0.134973,0.186305,0.353395
9,GB,"{'criterion': 'friedman_mse', 'max_depth': 8, 'n_estimators': 10}",RW,0.7,0.682,0.671905,0.754084,0.7325,0.321163,0.660581,-0.076326,-0.050347,-0.055222,-0.060412,-0.044318,0.3158,0.19114,0.276123,0.415745


### Support Vector Machine

In [14]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'kernel':['rbf','linear','poly','sigmoid'],'gamma':['scale']},
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_svm = fair_GridsearchCV(base='SVM', param_grid=param_grid,
                            prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                            cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_svm.fit(X=X, y=y)
    
results_svm = clf_svm.output_table
print(clf_svm._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_svm.to_pickle('./{}_results/{}_results_SVM'.format(data_name, data_name))
style_table(results_svm)

  0%|          | 0/4 [00:00<?, ?it/s]

{'gamma': 'scale', 'kernel': 'rbf'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, m

{'gamma': 'scale', 'kernel': 'linear'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  lambda_event = (lambda_vec["+"] - self.ratio * lambd

{'gamma': 'scale', 'kernel': 'poly'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  lambda_event = (lambda_vec["+"

{'gamma': 'scale', 'kernel': 'sigmoid'}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  lambda_event = (lambda_vec["+"] - self.ratio * lambda_vec["-"]).sum(level=_EVENT) / \
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, m

base_estimator                                         SVM
param              {'gamma': 'scale', 'kernel': 'sigmoid'}
Bias_Mitigation                                         RW
threshold                                              0.7
Name: 129, dtype: object
Time:  5.601162014183319 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.3,0.749,0.595952,0.845235,0.79369,0.324107,0.662053,-0.066609,-0.063815,-0.034539,0.033571,-0.046903,0.0952,0.067663,0.06817,0.404556
1,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.4,0.763,0.636429,0.849236,0.79369,0.372009,0.686004,-0.10887,-0.116295,-0.045888,-0.010635,-0.018646,0.1394,0.075551,0.0835,0.422865
2,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.5,0.764,0.671429,0.842458,0.79369,0.395607,0.697804,-0.098229,-0.100238,-0.024016,-0.110274,-0.00562,0.19,0.093654,0.11664,0.400425
3,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.6,0.748,0.695238,0.820654,0.79369,0.397523,0.698762,-0.116054,-0.108964,-0.059548,-0.064829,-0.00511,0.2314,0.124209,0.170473,0.417293
4,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",,0.7,0.725,0.717857,0.788756,0.79369,0.409954,0.704977,-0.132393,-0.118146,-0.077216,-0.069331,0.015392,0.2674,0.162666,0.238034,0.427415
5,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.3,0.745,0.590238,0.842973,0.794452,0.308345,0.654173,-0.05011,-0.039339,-0.032539,0.030476,-0.059552,0.0924,0.068464,0.069481,0.395937
6,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.4,0.755,0.625952,0.844303,0.794452,0.345636,0.672818,-0.064409,-0.057806,-0.026652,-0.046205,-0.043054,0.1386,0.078025,0.087346,0.391591
7,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.5,0.771,0.677381,0.847699,0.794452,0.413465,0.706733,-0.082798,-0.080469,-0.015818,-0.119129,-0.014715,0.1852,0.089337,0.109772,0.376065
8,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.6,0.747,0.693571,0.820068,0.794452,0.394536,0.697268,-0.069032,-0.064408,-0.008223,-0.119507,-0.0115,0.2298,0.124492,0.170701,0.371764
9,SVM,"{'gamma': 'scale', 'kernel': 'rbf'}",RW,0.7,0.722,0.716667,0.785517,0.794452,0.407061,0.703531,-0.077435,-0.058079,-0.034368,-0.085243,-0.014875,0.2732,0.165985,0.24297,0.373904


### Gaussian Naive Bayes

In [20]:
# model for Reweighing
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'var_smoothing': np.logspace(0,-9, num=4)}, 
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','LFR_pre','EGR','ROC','CEO','RW+ROC','RW+CEO']}

clf_nb = fair_GridsearchCV(base='NB', param_grid=param_grid, 
                           prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                           cv=cv, n_jobs=n_jobs)
if __name__ == '__main__':
    clf_nb.fit(X=X, y=y)
    
results_nb = clf_nb.output_table
# print(clf._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_nb.to_pickle('./{}_results/{}_results_NB'.format(data_name, data_name))
style_table(results_nb)

  0%|          | 0/4 [00:00<?, ?it/s]

{'var_smoothing': 1.0}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 25%|██▌       | 1/4 [00:33<01:41, 33.75s/it]

{'var_smoothing': 0.001}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 50%|█████     | 2/4 [01:09<01:10, 35.02s/it]

{'var_smoothing': 1e-06}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


 75%|███████▌  | 3/4 [01:40<00:32, 32.97s/it]

{'var_smoothing': 1e-09}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


100%|██████████| 4/4 [02:09<00:00, 32.32s/it]


Time:  2.1622928800000003 min


Unnamed: 0,base_estimator,param,Bias_Mitigation,threshold,avg_acc_score,avg_bacc_score,avg_f1_score,avg_auc_score,avg_mcc_score,avg_norm_mcc_score,avg_spd_score,avg_aod_score,avg_eod_score,avg_ford_score,avg_ppvd_score,avg_(1-consistency_score),avg_gei_score,avg_ti_score,cost
0,NB,{'var_smoothing': 1.0},,0.3,0.72,0.615238,0.814238,0.76219,0.267205,0.633603,-0.00823,0.010439,0.004797,-0.114481,-0.067995,0.1344,0.109215,0.14016,0.374627
1,NB,{'var_smoothing': 1.0},,0.4,0.723,0.638333,0.811114,0.76219,0.299185,0.649593,-0.040703,-0.028465,-0.011298,-0.100546,-0.048514,0.1666,0.119499,0.158546,0.39111
2,NB,{'var_smoothing': 1.0},,0.5,0.733,0.675952,0.810798,0.76219,0.357722,0.678861,-0.085197,-0.082568,-0.025281,-0.124982,-0.007776,0.195,0.1298,0.178671,0.406336
3,NB,{'var_smoothing': 1.0},,0.6,0.723,0.691667,0.795294,0.76219,0.370574,0.685287,-0.077216,-0.056453,-0.042345,-0.075534,-0.027438,0.214,0.150219,0.214295,0.391929
4,NB,{'var_smoothing': 1.0},,0.7,0.715,0.709762,0.77966,0.76219,0.393646,0.696823,-0.06019,-0.030549,-0.025901,-0.079599,-0.028906,0.2324,0.170612,0.249635,0.363367
5,NB,{'var_smoothing': 1.0},RW,0.3,0.722,0.618571,0.815396,0.758143,0.273073,0.636537,0.01498,0.035595,0.025876,-0.137603,-0.071047,0.131,0.108849,0.139806,0.378444
6,NB,{'var_smoothing': 1.0},RW,0.4,0.72,0.630476,0.810101,0.758143,0.287119,0.643559,0.002055,0.027755,0.010278,-0.108648,-0.072864,0.1528,0.118486,0.156183,0.358496
7,NB,{'var_smoothing': 1.0},RW,0.5,0.723,0.663095,0.804151,0.758143,0.331318,0.665659,-0.033196,-0.017875,-0.003148,-0.097962,-0.044732,0.1872,0.133898,0.184344,0.367536
8,NB,{'var_smoothing': 1.0},RW,0.6,0.719,0.682143,0.793622,0.758143,0.355565,0.677782,-0.050112,-0.034009,-0.015017,-0.10402,-0.035769,0.2078,0.150117,0.212487,0.372329
9,NB,{'var_smoothing': 1.0},RW,0.7,0.72,0.712381,0.784704,0.758143,0.399999,0.699999,-0.042195,-0.013315,-0.01027,-0.093204,-0.036934,0.2332,0.165457,0.242159,0.342195


### TabTransformer

In [21]:
import timeit
start = timeit.default_timer()

param_grid = {'hyperp_grid': {'epochs':[20, 30],'learing_rate':[1e-04, 1e-05]},
              'threshold': np.linspace(0.3, 0.7, 5),
              'Bias_Mitigation':[None,'RW','ROC','CEO','RW+ROC','RW+CEO']}

clf_tab = fair_GridsearchCV(base='TabTrans',param_grid=param_grid, 
                            prot_attr=protected_attribute, pos_label=pos_label, priv_group=priv_group,
                            cv=cv, n_jobs=n_jobs)

clf_tab.fit(X=X, y=y)
results_tab = clf_tab.output_table
# print(clf._best_param)

stop = timeit.default_timer()
runtime = stop - start
if runtime < 60:
    print('Time: ', runtime, 'sec')
else: print('Time: ', runtime/60, 'min')
results_tab.to_pickle('./{}_results/{}_results_TabTrans'.format(data_name, data_name))

style_table(results_tab)

  0%|          | 0/4 [00:00<?, ?it/s]

{'epochs': 20, 'learing_rate': 0.0001}
------------------------------------------------------------------------------------------
start multiprocessing
------------------------------------------------------------------------------------------


  0%|          | 0/4 [00:17<?, ?it/s]


OperatorNotAllowedInGraphError: Using a symbolic `tf.Tensor` as a Python `bool` is not allowed in Graph execution. Use Eager execution or decorate this function with @tf.function.