In [51]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import seaborn as sns
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, roc_auc_score, roc_curve, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import make_column_transformer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import RandomizedSearchCV
from sklearn import clone
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted
from sklearn.exceptions import NotFittedError
import itertools
from pprint import pprint
from tempeh.configurations import datasets
# from fairlearn.widget import FairlearnDashboard
from fairlearn.reductions import GridSearch#, DemographicParity
from sklearn.calibration import CalibratedClassifierCV
from fairlearn.postprocessing import ThresholdOptimizer
%matplotlib inline

In [66]:
path = 'datasets/adult.data'
#csv_url = 'https://raw.githubusercontent.com/propublica/compas-analysis/master/compas-scores-two-years.csv'
df = pd.read_csv(path)
# df.drop(columns=['Loan_ID'], inplace=True)
# 

In [67]:
df.info()
df.head()
df['workclass']

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32561 entries, 0 to 32560
Data columns (total 15 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   age             32561 non-null  int64 
 1   workclass       32561 non-null  object
 2   fnlwgt          32561 non-null  int64 
 3   education       32561 non-null  object
 4   education-num   32561 non-null  int64 
 5   marital-status  32561 non-null  object
 6   occupation      32561 non-null  object
 7   relationship    32561 non-null  object
 8   race            32561 non-null  object
 9   sex             32561 non-null  object
 10  capital-gain    32561 non-null  int64 
 11  capital-loss    32561 non-null  int64 
 12  hours           32561 non-null  int64 
 13  country         32561 non-null  object
 14  label           32561 non-null  object
dtypes: int64(6), object(9)
memory usage: 3.7+ MB


Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours,country,label
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


0                State-gov
1         Self-emp-not-inc
2                  Private
3                  Private
4                  Private
               ...        
32556              Private
32557              Private
32558              Private
32559              Private
32560         Self-emp-inc
Name: workclass, Length: 32561, dtype: object

In [68]:
df.isnull().sum()

age               0
workclass         0
fnlwgt            0
education         0
education-num     0
marital-status    0
occupation        0
relationship      0
race              0
sex               0
capital-gain      0
capital-loss      0
hours             0
country           0
label             0
dtype: int64

In [55]:
len(df)

32561

In [69]:
# df['workclass'] = df.workclass.astype(object)
categorical_features = df.columns[df.dtypes==object].tolist() 
le = LabelEncoder()

df[categorical_features] = df[categorical_features].apply(lambda col: le.fit_transform(col))

# Apply OneHotEncoder on each of the categorical columns
categorical_cols = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'country', 'label']

encoded_features = []
ohe = OneHotEncoder()
for feature in categorical_cols:
    encoded_feat = OneHotEncoder(drop='first').fit_transform(df[feature].values.reshape(-1, 1)).toarray()
    n = df[feature].nunique()
    cols = ['{}_{}'.format(feature, n) for n in range(0, n-1)]
    encoded_df = pd.DataFrame(encoded_feat, columns=cols)
    encoded_df.index = df.index
    encoded_features.append(encoded_df)
df.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours,country,label
0,39,7,77516,9,13,4,1,1,4,1,2174,0,40,39,0
1,50,6,83311,9,13,2,4,0,4,1,0,0,13,39,0
2,38,4,215646,11,9,0,6,1,4,1,0,0,40,39,0
3,53,4,234721,1,7,2,6,0,2,1,0,0,40,39,0
4,28,4,338409,9,13,2,10,5,2,0,0,0,40,5,0


In [70]:
black_under = len(df[(df['race'] == 2) & (df['label'] == 0)])
black_over = len(df[(df['race'] == 2) & (df['label'] == 1)])

white_under = len(df[(df['race'] == 4) & (df['label'] == 0)])
white_over = len(df[(df['race'] == 4) & (df['label'] == 1)])

male_under = len(df[(df['sex'] == 1) & (df['label'] == 0)])
male_over = len(df[(df['sex'] == 1) & (df['label'] == 1)])

female_under = len(df[(df['sex'] == 0) & (df['label'] == 0)])
female_over = len(df[(df['sex'] == 0) & (df['label'] == 1)])

male_over_rate = male_over/(male_over + male_under)
female_over_rate = female_over/(female_over + female_under)
print(f'Male Over 50k Rate: {male_over_rate}\nFemale Over 50k Rate: {female_over_rate}\n')

white_over_rate = white_over/(white_over + white_under)
black_over_rate = black_over/(black_over + black_under)
print(f'White Over 50k Rate: {white_over_rate}\nBlack Over 50k Rate: {black_over_rate}')

'''
Severe Data imbalance but similar data rates
'''

Male Over 50k Rate: 0.3057365764111978
Female Over 50k Rate: 0.10946058861758426

White Over 50k Rate: 0.2558599367270636
Black Over 50k Rate: 0.12387964148527529


'\nSevere Data imbalance but similar data rates\n'

In [71]:
data = df.copy()
labels = data.pop('label')
sensitive_attributes = data['sex']

X_train, X_test, y_train, y_test, sensitive_attributes_train, sensitive_attributes_test = train_test_split(data, labels,
                                                                                                         sensitive_attributes,
                                                                                                         stratify=df['label'], 
                                                                                                         test_size=0.25,
                                                                                                         random_state = 42)


In [72]:
#Training Logisitic Regression
fairness_unaware_lr = LogisticRegression(solver='liblinear', fit_intercept=True)
fairness_unaware_lr.fit(X_train, y_train)

y_preds_lr = fairness_unaware_lr.predict(X_test)
accuracy_score(y_test, y_preds_lr)

LogisticRegression(solver='liblinear')

0.7959710109323179

In [73]:
merged_test_lr = pd.concat([X_test, y_test], axis=1)
merged_test_lr['label_Preds'] = y_preds_lr
merged_test_lr.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours,country,label,label_Preds
17581,49,6,162856,15,10,0,4,1,0,0,0,0,40,39,0,0
16280,27,4,187981,11,9,4,6,3,4,1,0,0,40,39,0,0
24719,33,4,259301,15,10,2,14,0,4,1,0,0,41,39,0,0
3986,30,4,159442,11,9,5,1,1,4,0,0,0,40,39,0,0
5138,41,1,57924,15,10,4,11,3,4,1,0,0,40,39,0,0


In [74]:
#Training Random Forest
fairness_unaware_rf = RandomForestClassifier(
                      n_jobs = -1,
                      random_state = 42,
                      max_features = 'auto')

fairness_unaware_rf.fit(X_train, y_train)
y_preds_rf = fairness_unaware_rf.predict(X_test)
accuracy_score(y_test, y_preds_rf)

RandomForestClassifier(n_jobs=-1, random_state=42)

0.8573885272079597

In [75]:
merged_test_rf = pd.concat([X_test, y_test], axis=1)
merged_test_rf['label_Preds'] = y_preds_rf
merged_test_rf.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours,country,label,label_Preds
17581,49,6,162856,15,10,0,4,1,0,0,0,0,40,39,0,0
16280,27,4,187981,11,9,4,6,3,4,1,0,0,40,39,0,0
24719,33,4,259301,15,10,2,14,0,4,1,0,0,41,39,0,0
3986,30,4,159442,11,9,5,1,1,4,0,0,0,40,39,0,0
5138,41,1,57924,15,10,4,11,3,4,1,0,0,40,39,0,0


In [76]:
# confusion_values = confusion_matrix(merged_test_lr['Loan_Status'], merged_test_lr['Loan_Status_Preds'])
# [[true_negatives , false_positives],[false_negatives , true_positives]] = confusion_values
# [[true_negatives , false_positives],[false_negatives , true_positives]]
# recall = true_positives/ (true_positives + false_negatives)
# precision = true_positives/ (true_positives + false_positives)
# recall
# precision

def calc_precision_recall(df):
    confusion_values = confusion_matrix(df['label'], df['label_Preds'])
    [[true_negatives , false_positives],[false_negatives , true_positives]] = confusion_values
    [[true_negatives , false_positives],[false_negatives , true_positives]]
    recall = true_positives/ (true_positives + false_negatives)
    precision = true_positives/ (true_positives + false_positives)
    return precision, recall

lr_precision, lr_recall = calc_precision_recall(merged_test_lr)
rf_precision, rf_recall = calc_precision_recall(merged_test_rf)

print(f'LR Precision, Recall:{lr_precision, lr_recall}\nRF Precision, Recall:{rf_precision, rf_recall}')

LR Precision, Recall:(0.6816524908869988, 0.28622448979591836)
RF Precision, Recall:(0.7365304914150385, 0.6346938775510204)


In [77]:
'''
Overall Accuracy Equality Using Logistic Regression
'''
(merged_test_lr['label'] == merged_test_lr['label_Preds']).astype(int).groupby(merged_test_lr['sex']).mean()
'''
Overall Accuracy Equality Using Random Forest
'''
(merged_test_rf['label'] == merged_test_rf['label_Preds']).astype(int).groupby(merged_test_rf['sex']).mean()

'\nOverall Accuracy Equality Using Logistic Regression\n'

sex
0    0.893752
1    0.748171
dtype: float64

'\nOverall Accuracy Equality Using Random Forest\n'

sex
0    0.931163
1    0.821324
dtype: float64

In [78]:
'''
checking probability for loan status for those who got their loan approved

Fairness Type: Predictive Parity Using Logistic Regression
'''
merged_test_lr[merged_test_lr['label']==1]['label_Preds'].groupby(merged_test_lr['sex']).mean()

'''
checking probability for loan status for those who got their loan approved

Fairness Type: Predictive Parity Using Random Forest
'''
merged_test_rf[merged_test_rf['label']==1]['label_Preds'].groupby(merged_test_rf['sex']).mean()

'\nchecking probability for loan status for those who got their loan approved\n\nFairness Type: Predictive Parity Using Logistic Regression\n'

sex
0    0.301695
1    0.283483
Name: label_Preds, dtype: float64

'\nchecking probability for loan status for those who got their loan approved\n\nFairness Type: Predictive Parity Using Random Forest\n'

sex
0    0.610169
1    0.639039
Name: label_Preds, dtype: float64

In [80]:
'''
Checking average over 50k predictions of both genders on LR
'''
merged_test_lr.groupby('sex').agg({'label': 'mean',  
                        'label_Preds': 'mean'}).transpose()

'''
Checking average over 50k predictions of both genders on RF
'''
merged_test_rf.groupby('sex').agg({'label': 'mean',  
                        'label_Preds': 'mean'}).transpose()

'\nChecking average over 50k predictions of both genders on LR\n'

sex,0,1
label,0.110363,0.304499
label_Preds,0.062477,0.119971


'\nChecking average over 50k predictions of both genders on RF\n'

sex,0,1
label,0.110363,0.304499
label_Preds,0.093154,0.26335


In [81]:
# male_df = merged_test_lr[merged_test_lr['Gender'] == 1]
# female_df = merged_test_lr[merged_test_lr['Gender'] == 0]


# male_confusion_values = confusion_matrix(male_df['Loan_Status'], male_df['Loan_Status_Preds'])
# [[male_tn , male_fp],[male_fn , male_tp]] = male_confusion_values

# female_confusion_values = confusion_matrix(female_df['Loan_Status'], female_df['Loan_Status_Preds'])
# [[female_tn , female_fp],[female_fn , female_tp]] = female_confusion_values
# [[male_tn , male_fp],[male_fn , male_tp]]
# [[female_tn , female_fp],[female_fn , female_tp]]

def calc_confusion_metrics(df, tag=False):
    male_df = df[df['sex'] == 1]
    female_df = df[df['sex'] == 0]

    pred_col = 'label_Fair' if tag else 'label_Preds'
        
    male_confusion_values = confusion_matrix(male_df['label'], male_df[pred_col])
    [[male_tn , male_fp],[male_fn , male_tp]] = male_confusion_values

    female_confusion_values = confusion_matrix(female_df['label'], female_df[pred_col])
    [[female_tn , female_fp],[female_fn , female_tp]] = female_confusion_values
    return [[male_tn , male_fp],[male_fn , male_tp]],[[female_tn , female_fp],[female_fn , female_tp]]

lr_male_metrics, lr_female_metrics = calc_confusion_metrics(merged_test_lr)
rf_male_metrics, rf_female_metrics = calc_confusion_metrics(merged_test_rf)
# [[white_tn , white_fp],[white_fn , white_tp]] = 
# [[white_tn , white_fp],[white_fn , white_tp]]

print(f'LR CM for M: {lr_male_metrics}\tF: {lr_female_metrics}')
print(f'RF CM for M: {rf_male_metrics}\tF: {rf_female_metrics}')

LR CM for M: [[3619, 184], [1193, 472]]	F: [[2300, 78], [206, 89]]
RF CM for M: [[3427, 376], [601, 1064]]	F: [[2309, 69], [115, 180]]


In [82]:
def fpr_fnr(male_metrics,female_metrics, tag= 'LR'):
    [[male_tn , male_fp],[male_fn , male_tp]] = male_metrics
    [[female_tn , female_fp],[female_fn , female_tp]] = female_metrics
    
    female_fpr = female_fp/ (female_fp + female_tn)
    male_fpr = male_fp/ (male_fp + male_tn)

    female_fnr = female_fn/ (female_fn + female_tp)
    male_fnr = male_fn/ (male_fn + male_tp)
    print(tag)
    print(f'FPR for Female Gender: {female_fpr} | FPR for Male Gender: {male_fpr}')
    print(f'FNR for Female Gender: {female_fnr} | FNR for Male Gender: {male_fnr}\n\n')
    
fpr_fnr(lr_male_metrics, lr_female_metrics, tag= 'LR')
fpr_fnr(rf_male_metrics, rf_female_metrics, tag= 'RF')

LR
FPR for Female Gender: 0.03280067283431455 | FPR for Male Gender: 0.04838285564028399
FNR for Female Gender: 0.6983050847457627 | FNR for Male Gender: 0.7165165165165165


RF
FPR for Female Gender: 0.029015979814970564 | FPR for Male Gender: 0.09886931369971075
FNR for Female Gender: 0.3898305084745763 | FNR for Male Gender: 0.360960960960961




# Mitigating Equalised Odds Unfairness

In [87]:
class RF(BaseEstimator, ClassifierMixin):
    def __init__(self, model):
        self.model = model

    def fit(self, X, y):
        try:
            check_is_fitted(self.model)
            self.model_ = self.model
        except NotFittedError:
            self.model_ = clone(
                self.model
            ).fit(X, y)
        return self

    def predict(self, X):
        scores = self.model_.predict_proba(X)[:, 1]
        return scores
    
class LR(BaseEstimator, ClassifierMixin):
    def __init__(self, model):
        self.model = model

    def fit(self, X, y):
        try:
            check_is_fitted(self.model)
            self.model_ = self.model
        except NotFittedError:
            self.model_ = clone(
                self.model
            ).fit(X, y)
        return self

    def predict(self, X):
        scores = self.model_.predict_proba(X)[:, 1]
        return scores

In [88]:
'''
First Unfairness Mitigation in  Random Forest Classifier
'''
estimator_wrapper_rf = RF(fairness_unaware_rf).fit(X_train, y_train)

postprocessed_predictor_EO_rf = ThresholdOptimizer(
    estimator=estimator_wrapper_rf, constraints="equalized_odds", prefit=True
    #estimator=estimator_wrapper_rf, constraints="false_positive_rate_parity", prefit=True
)

postprocessed_predictor_EO_rf.fit(
    X_train, y_train, sensitive_features=sensitive_attributes_train
)

fairness_aware_predictions_EO_train_rf = postprocessed_predictor_EO_rf.predict(
    X_train, sensitive_features=sensitive_attributes_train
)
fairness_aware_predictions_EO_test_rf = postprocessed_predictor_EO_rf.predict(
    X_test, sensitive_features=sensitive_attributes_test
)

'\nFirst Unfairness Mitigation in  Random Forest Classifier\n'



ThresholdOptimizer(constraints='equalized_odds',
                   estimator=RF(model=RandomForestClassifier(n_jobs=-1,
                                                             random_state=42)),
                   prefit=True)

In [89]:
unaware_y_pred = fairness_unaware_rf.predict(X_test)
accuracy_score(y_test, unaware_y_pred)

0.8573885272079597

In [90]:
accuracy_score(y_test, fairness_aware_predictions_EO_test_rf)

0.8573885272079597

In [91]:
merged_test_rf['label_Fair'] =  fairness_aware_predictions_EO_test_rf
'''
Before Fairness Constraint Applied
'''
(merged_test_rf['label'] == merged_test_rf['label_Preds']).astype(int).groupby(merged_test_rf['sex']).mean()
'''
After Fairness Constraint Applied. Slight Decrease for males
'''
(merged_test_rf['label'] == merged_test_rf['label_Fair']).astype(int).groupby(merged_test_rf['sex']).mean()

'\nBefore Fairness Constraint Applied\n'

sex
0    0.931163
1    0.821324
dtype: float64

'\nAfter Fairness Constraint Applied. Slight Decrease for males\n'

sex
0    0.930789
1    0.821507
dtype: float64

In [92]:
'''
checking probability for loan status for those who got their loan approved

Fairness Type: Predictive Parity Using Random Forest
'''
merged_test_rf[merged_test_rf['label']==1]['label_Preds'].groupby(merged_test_rf['sex']).mean()
'''
After FPR Parity solved. Dip in fairness
'''
merged_test_rf[merged_test_rf['label']==1]['label_Fair'].groupby(merged_test_rf['sex']).mean()

'\nchecking probability for loan status for those who got their loan approved\n\nFairness Type: Predictive Parity Using Random Forest\n'

sex
0    0.610169
1    0.639039
Name: label_Preds, dtype: float64

'\nAfter FPR Parity solved. Dip in fairness\n'

sex
0    0.623729
1    0.661261
Name: label_Fair, dtype: float64

In [93]:
'''
Checking average >50k predictions of both genders on RF
'''
merged_test_rf.groupby('sex').agg({'label': 'mean',  
                        'label_Preds': 'mean'}).transpose()
'''
Checking average >50k predictions of both genders on RF After Fairness. Slight Improvement for males
'''
merged_test_rf.groupby('sex').agg({'label': 'mean',  
                        'label_Fair': 'mean'}).transpose()

'\nChecking average >50k predictions of both genders on RF\n'

sex,0,1
label,0.110363,0.304499
label_Preds,0.093154,0.26335


'\nChecking average >50k predictions of both genders on RF After Fairness. Slight Improvement for males\n'

sex,0,1
label,0.110363,0.304499
label_Fair,0.096521,0.276701


In [94]:
rf_male_metrics, rf_female_metrics = calc_confusion_metrics(merged_test_rf, tag = False)
fpr_fnr(rf_male_metrics, rf_female_metrics, tag= 'RF')
'''
After fairness
'''
rf_male_metrics, rf_female_metrics = calc_confusion_metrics(merged_test_rf, tag = True)
fpr_fnr(rf_male_metrics, rf_female_metrics, tag= 'RF')

RF
FPR for Female Gender: 0.029015979814970564 | FPR for Male Gender: 0.09886931369971075
FNR for Female Gender: 0.3898305084745763 | FNR for Male Gender: 0.360960960960961




'\nAfter fairness\n'

RF
FPR for Female Gender: 0.031118587047939444 | FPR for Male Gender: 0.10833552458585327
FNR for Female Gender: 0.376271186440678 | FNR for Male Gender: 0.3387387387387387




# Mitigating Logistic Regression Unfairness

In [95]:
'''
First Unfairness Mitigation in  Logistic Regression Classifier
'''
estimator_wrapper_lr = LR(fairness_unaware_lr).fit(X_train, y_train)

postprocessed_predictor_EO_lr = ThresholdOptimizer(
    estimator=estimator_wrapper_lr, constraints="equalized_odds", prefit=True
    #estimator=estimator_wrapper_rf, constraints="false_positive_rate_parity", prefit=True
)

postprocessed_predictor_EO_lr.fit(
    X_train, y_train, sensitive_features=sensitive_attributes_train
)

fairness_aware_predictions_EO_train_lr = postprocessed_predictor_EO_lr.predict(
    X_train, sensitive_features=sensitive_attributes_train
)
fairness_aware_predictions_EO_test_lr = postprocessed_predictor_EO_lr.predict(
    X_test, sensitive_features=sensitive_attributes_test
)
unaware_y_pred_lr = fairness_unaware_lr.predict(X_test)
accuracy_score(y_test, unaware_y_pred_lr)

'\nFirst Unfairness Mitigation in  Logistic Regression Classifier\n'



ThresholdOptimizer(constraints='equalized_odds',
                   estimator=LR(model=LogisticRegression(solver='liblinear')),
                   prefit=True)

0.7959710109323179

In [96]:
accuracy_score(y_test, fairness_aware_predictions_EO_test_lr)

0.7996560619088564

In [97]:
merged_test_lr['label_Fair'] =  fairness_aware_predictions_EO_test_lr
'''
Before Fairness Constraint Applied
'''
(merged_test_lr['label'] == merged_test_lr['label_Preds']).astype(int).groupby(merged_test_lr['sex']).mean()
'''
After Fairness Constraint Applied. Significant Increase
'''
(merged_test_lr['label'] == merged_test_lr['label_Fair']).astype(int).groupby(merged_test_lr['sex']).mean()

'\nBefore Fairness Constraint Applied\n'

sex
0    0.893752
1    0.748171
dtype: float64

'\nAfter Fairness Constraint Applied. Significant Increase\n'

sex
0    0.906472
1    0.747440
dtype: float64

In [98]:
'''
checking probability for >50k 

Fairness Type: Predictive Parity Using LR
'''
merged_test_lr[merged_test_lr['label']==1]['label_Preds'].groupby(merged_test_lr['sex']).mean()
'''
After FPR Parity solved. Significant Improvement.
'''
merged_test_lr[merged_test_lr['label']==1]['label_Fair'].groupby(merged_test_lr['sex']).mean()

'\nchecking probability for >50k \n\nFairness Type: Predictive Parity Using LR\n'

sex
0    0.301695
1    0.283483
Name: label_Preds, dtype: float64

'\nAfter FPR Parity solved. Significant Improvement.\n'

sex
0    0.223729
1    0.192192
Name: label_Fair, dtype: float64

In [99]:
'''
Checking average >50k of both genders on LR
'''
merged_test_lr.groupby('sex').agg({'label': 'mean',  
                        'label_Preds': 'mean'}).transpose()
'''
Checking average >50k of both genders on RF After Fairness. Slight Improvement for males
'''
merged_test_lr.groupby('sex').agg({'label': 'mean',  
                        'label_Fair': 'mean'}).transpose()

'\nChecking average >50k of both genders on LR\n'

sex,0,1
label,0.110363,0.304499
label_Preds,0.062477,0.119971


'\nChecking average >50k of both genders on RF After Fairness. Slight Improvement for males\n'

sex,0,1
label,0.110363,0.304499
label_Fair,0.032548,0.065106


In [100]:
'''
Equalised Odds
'''
lr_male_metrics, lr_female_metrics = calc_confusion_metrics(merged_test_lr, tag = False)
fpr_fnr(lr_male_metrics, lr_female_metrics, tag= 'LR')
'''
After fairness
'''
lr_male_metrics, lr_female_metrics = calc_confusion_metrics(merged_test_lr, tag = True)
fpr_fnr(lr_male_metrics, lr_female_metrics, tag= 'LR')

'\nEqualised Odds\n'

LR
FPR for Female Gender: 0.03280067283431455 | FPR for Male Gender: 0.04838285564028399
FNR for Female Gender: 0.6983050847457627 | FNR for Male Gender: 0.7165165165165165




'\nAfter fairness\n'

LR
FPR for Female Gender: 0.008830950378469302 | FPR for Male Gender: 0.00946621088614252
FNR for Female Gender: 0.7762711864406779 | FNR for Male Gender: 0.8078078078078078




In [101]:
def show_proportions(
    X, sensitive_features, y_pred, y=None, description=None, plot_row_index=1
):
    print("\n" + description)
    plt.figure(plot_row_index)
    plt.title(description)
    plt.ylabel("P[recidivism predicted | conditions]")

    indices = {}
    positive_indices = {}
    negative_indices = {}
    recidivism_count = {}
    recidivism_pct = {}
    groups = np.unique(sensitive_features.values)
    n_groups = len(groups)
    max_group_length = 1#max([len(str(group)) for group in groups])
    color = cm.rainbow(np.linspace(0, 1, n_groups))
    x_tick_labels_basic = []
    x_tick_labels_by_label = []
    for index, group in enumerate(groups):
        indices[group] = sensitive_features.index[sensitive_features == group]
        recidivism_count[group] = sum(y_pred[indices[group]])
        recidivism_pct[group] = recidivism_count[group] / len(indices[group])
        print(
            "P[recidivism predicted | {}]                {}= {}".format(
                group, " " * (max_group_length - len(group)), recidivism_pct[group]
            )
        )

        plt.bar(index + 1, recidivism_pct[group], color=color[index])
        x_tick_labels_basic.append(group)

        if y is not None:
            positive_indices[group] = sensitive_features.index[
                (sensitive_features == group) & (y == 1)
            ]
            negative_indices[group] = sensitive_features.index[
                (sensitive_features == group) & (y == 0)
            ]
            prob_1 = sum(y_pred[positive_indices[group]]) / len(positive_indices[group])
            prob_0 = sum(y_pred[negative_indices[group]]) / len(negative_indices[group])
            print(
                "P[recidivism predicted | {}, recidivism]    {}= {}".format(
                    group, " " * (max_group_length - len(group)), prob_1
                )
            )
            print(
                "P[recidivism predicted | {}, no recidivism] {}= {}".format(
                    group, " " * (max_group_length - len(group)), prob_0
                )
            )

            plt.bar(n_groups + 1 + 2 * index, prob_1, color=color[index])
            plt.bar(n_groups + 2 + 2 * index, prob_0, color=color[index])
            x_tick_labels_by_label.extend(
                ["{} recidivism".format(group), "{} no recidivism".format(group)]
            )

    x_tick_labels = x_tick_labels_basic + x_tick_labels_by_label
    plt.xticks(
        range(1, len(x_tick_labels) + 1),
        x_tick_labels,
        rotation=45,
        horizontalalignment="right",
    )

In [102]:
#X_test = X_test.drop(['Gender'], axis = 1)
X_test['sex'] = sensitive_attributes_test.replace({"Female": 0, "Male": 1}, inplace=True)
sensitive_attributes_test.replace({0: "Female", 1: "Male"}, inplace=True)
show_proportions(
    X_test,
    sensitive_attributes_test,
    fairness_unaware_lr.predict(X_test),
    y_test,
    description="Fairness Unaware LR",
    plot_row_index=1,
)
# show_proportions(
#     X_test,
#     sensitive_attributes_test,
#     fairness_aware_predictions_EO_test_lr,
#     y_test,
#     description="Equalized Odds Postprocessing Using Fairlearn",
#     plot_row_index=2,
# )
# plt.show()

TypeError: Cannot compare types 'ndarray(dtype=int32)' and 'str'