# 1. Imports

In [1]:
from impt_functions import *
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score
import pandas as pd 
import numpy as np
from fairlearn.reductions import ExponentiatedGradient, GridSearch, DemographicParity, EqualizedOdds, \
    TruePositiveRateParity, FalsePositiveRateParity, ErrorRateParity, BoundedGroupLoss
from fairlearn.metrics import *
from raiwidgets import FairnessDashboard
from sklearn.metrics import balanced_accuracy_score, roc_auc_score
%matplotlib inline

# 2. Prepare data

In [2]:
data = get_data('/home/mackenzie/git_repositories/delayedimpact/data/simData_oom100.csv')

       score  repay_probability  race  repay_indices
0        601              75.21     1              1
1        693              95.15     1              1
2        791              98.62     1              1
3        637              86.69     1              1
4        775              98.45     1              1
...      ...                ...   ...            ...
99995    797              98.73     1              1
99996    562              58.57     1              1
99997    687              94.60     1              1
99998    589              70.61     1              1
99999    555              52.97     1              0

[100000 rows x 4 columns]


In [3]:
X_train, X_test, y_train, y_test, race_train, race_test, sample_weight_train, sample_weight_test = prep_data(data=data, test_size=0.3, weight_index=1)

Here are the x values:  [[601   1]
 [693   1]
 [791   1]
 ...
 [687   1]
 [589   1]
 [555   1]] 

Here are the y values:  [1 1 1 ... 1 1 0]
Sample weights are all equal.


# Decision Tree Classifier

## Train DTC classifier + Collect Predictions
NOTE: atm sample_weight are all 1s

In [4]:
# Reference: https://www.datacamp.com/community/tutorials/decision-tree-classification-python

# Initialize classifier:
clf = DecisionTreeClassifier()

# Train the classifier:
model = clf.fit(X_train,y_train)

# Make predictions with the classifier:
y_predict = model.predict(X_test)

# Scores on test set
test_scores = model.predict_proba(X_test)[:, 1]

### Evaluation of classifier overall

In [5]:
# Metrics
models_dict = {"Unmitigated": (y_predict, test_scores)}
get_metrics_df(models_dict, y_test, race_test)

Unnamed: 0,Unmitigated
Overall selection rate,0.729567
Demographic parity difference,0.450625
Demographic parity ratio,0.424364
------,
Overall balanced error rate,0.151732
Balanced error rate difference,0.0170221
------,
True positive rate difference,0.190934
True negative rate difference,0.156889
False positive rate difference,0.156889


In [6]:
cm = confusion_matrix(y_test, y_predict)
print(cm)
print(classification_report(y_test, y_predict)) 
evaluation_outcome_rates(y_test, y_predict, sample_weight_test)
get_f1_scores(y_test, y_predict)
get_selection_rates(y_test, y_predict, race_test, 0)

[[ 6644  2036]
 [ 1469 19851]]
              precision    recall  f1-score   support

           0       0.82      0.77      0.79      8680
           1       0.91      0.93      0.92     21320

    accuracy                           0.88     30000
   macro avg       0.86      0.85      0.86     30000
weighted avg       0.88      0.88      0.88     30000

TNR=TN/(TN+FP)=  0.7654377880184332
TPR=TP/(FP+FN)=  0.9310975609756098
FNER=FN/(FN+TP)=  0.06890243902439025
FPER=FP/(FP+TN)=  0.23456221198156682
F1 score micro: 
0.8831666666666667
F1 score weighted: 
0.8819608768731572
F1 score binary: 
0.9188788853657973
Selection Rate Overall:  0.7295666666666667


The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


### Evaluation of classifier by race

In [7]:
evaluation_by_race(X_test, y_test, race_test, y_predict, sample_weight_test)

Selection Rate By Group:  sensitive_feature_0
0    0.332205
1    0.782831
Name: selection_rate, dtype: object 

EVALUATION FOR BLACK GROUP
[[2071  283]
 [ 297  895]]
              precision    recall  f1-score   support

           0       0.87      0.88      0.88      2354
           1       0.76      0.75      0.76      1192

    accuracy                           0.84      3546
   macro avg       0.82      0.82      0.82      3546
weighted avg       0.84      0.84      0.84      3546

TNR=TN/(TN+FP)=  0.8797790994052677
TPR=TP/(FP+FN)=  0.7508389261744967
FNER=FN/(FN+TP)=  0.24916107382550334
FPER=FP/(FP+TN)=  0.12022090059473237
F1 score micro: 
0.8364354201917654
F1 score weighted: 
0.8361947899037017
F1 score binary: 
0.7552742616033755

EVALUATION FOR WHITE GROUP
[[ 4573  1753]
 [ 1172 18956]]
              precision    recall  f1-score   support

           0       0.80      0.72      0.76      6326
           1       0.92      0.94      0.93     20128

    accuracy            

### Delayed impact calculated by race

In [8]:
calculate_delayed_impact(X_test, y_test, y_predict, race_test)

The delayed impact of the black group is:  6.958544839255499
The delayed impact of the white group is:  43.80244953504196


### Fairness Metric Evaluation of classifier

In [9]:
print_fairness_metrics(y_test, y_predict, race_test)

DP Difference:  0.45062527207781544
-->difference of 0 means that all groups have the same selection rate
DP Ratio: 0.42436424030390024
-->ratio of 1 means that all groups have the same selection rate 

EOD Difference:  0.1909337288334525
-->difference of 0 means that all groups have the same TN, TN, FP, and FN rates
EOD Ratio: 0.4338376595335293
-->ratio of 1 means that all groups have the same TN, TN, FP, and FN rates rates 



## Exponentiated Gradient Reduction Alg for Adding Fairness Constraints

### Demographic Parity

In [10]:
eg_dp = add_contraint(model, 'DP', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

Exponentiated Gradient Reduction Alg is used here with  DP  as the fairness constraint.

Evaluation of  DP -constrained classifier overall:
[[ 5473  3207]
 [ 1272 20048]]
              precision    recall  f1-score   support

           0       0.81      0.63      0.71      8680
           1       0.86      0.94      0.90     21320

    accuracy                           0.85     30000
   macro avg       0.84      0.79      0.80     30000
weighted avg       0.85      0.85      0.84     30000

TNR=TN/(TN+FP)=  0.6305299539170507
TPR=TP/(FP+FN)=  0.9403377110694184
FNER=FN/(FN+TP)=  0.05966228893058161
FPER=FP/(FP+TN)=  0.3694700460829493
F1 score micro: 
0.8507000000000001
F1 score weighted: 
0.8445760333639062
F1 score binary: 
0.8995176668536176


The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


Selection Rate Overall:  0.7751666666666667


Evaluation of  DP -constrained classifier by race:
Selection Rate By Group:  sensitive_feature_0
0    0.759729
1    0.777236
Name: selection_rate, dtype: object 

EVALUATION FOR BLACK GROUP
[[ 822 1532]
 [  30 1162]]
              precision    recall  f1-score   support

           0       0.96      0.35      0.51      2354
           1       0.43      0.97      0.60      1192

    accuracy                           0.56      3546
   macro avg       0.70      0.66      0.56      3546
weighted avg       0.79      0.56      0.54      3546

TNR=TN/(TN+FP)=  0.3491928632115548
TPR=TP/(FP+FN)=  0.9748322147651006
FNER=FN/(FN+TP)=  0.025167785234899327
FPER=FP/(FP+TN)=  0.6508071367884452
F1 score micro: 
0.5595036661026509
F1 score weighted: 
0.541447529417134
F1 score binary: 
0.5980442614513639

EVALUATION FOR WHITE GROUP
[[ 4651  1675]
 [ 1242 18886]]
              precision    recall  f1-score   support

           0       0.79      0.74    

### Equalized Odds

In [11]:
eg_eo = add_contraint(model, 'EO', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

Exponentiated Gradient Reduction Alg is used here with  EO  as the fairness constraint.

Evaluation of  EO -constrained classifier overall:
[[ 6561  2119]
 [ 2192 19128]]
              precision    recall  f1-score   support

           0       0.75      0.76      0.75      8680
           1       0.90      0.90      0.90     21320

    accuracy                           0.86     30000
   macro avg       0.82      0.83      0.83     30000
weighted avg       0.86      0.86      0.86     30000

TNR=TN/(TN+FP)=  0.7558755760368664
TPR=TP/(FP+FN)=  0.8971857410881802
FNER=FN/(FN+TP)=  0.10281425891181989
FPER=FP/(FP+TN)=  0.24412442396313364
F1 score micro: 
0.8563
F1 score weighted: 
0.8564776503509381
F1 score binary: 
0.8987243639439002


The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


Selection Rate Overall:  0.7082333333333334


Evaluation of  EO -constrained classifier by race:
Selection Rate By Group:  sensitive_feature_0
0    0.466441
1    0.740644
Name: selection_rate, dtype: object 

EVALUATION FOR BLACK GROUP
[[1748  606]
 [ 144 1048]]
              precision    recall  f1-score   support

           0       0.92      0.74      0.82      2354
           1       0.63      0.88      0.74      1192

    accuracy                           0.79      3546
   macro avg       0.78      0.81      0.78      3546
weighted avg       0.83      0.79      0.79      3546

TNR=TN/(TN+FP)=  0.7425658453695837
TPR=TP/(FP+FN)=  0.8791946308724832
FNER=FN/(FN+TP)=  0.12080536912751678
FPER=FP/(FP+TN)=  0.2574341546304163
F1 score micro: 
0.7884940778341794
F1 score weighted: 
0.7941544848700293
F1 score binary: 
0.7364722417427969

EVALUATION FOR WHITE GROUP
[[ 4813  1513]
 [ 2048 18080]]
              precision    recall  f1-score   support

           0       0.70      0.76    

### EOO (True Positive Rate Parity)

In [12]:
eg_tprp = add_contraint(model, 'TPRP', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

Exponentiated Gradient Reduction Alg is used here with  TPRP  as the fairness constraint.

Evaluation of  TPRP -constrained classifier overall:
[[ 6216  2464]
 [ 1448 19872]]
              precision    recall  f1-score   support

           0       0.81      0.72      0.76      8680
           1       0.89      0.93      0.91     21320

    accuracy                           0.87     30000
   macro avg       0.85      0.82      0.84     30000
weighted avg       0.87      0.87      0.87     30000

TNR=TN/(TN+FP)=  0.7161290322580646
TPR=TP/(FP+FN)=  0.9320825515947467
FNER=FN/(FN+TP)=  0.06791744840525328
FPER=FP/(FP+TN)=  0.2838709677419355
F1 score micro: 
0.8695999999999999
F1 score weighted: 
0.8670643312809774
F1 score binary: 
0.9103903243540407


The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


Selection Rate Overall:  0.7445333333333334


Evaluation of  TPRP -constrained classifier by race:
Selection Rate By Group:  sensitive_feature_0
0    0.547095
1    0.770999
Name: selection_rate, dtype: object 

EVALUATION FOR BLACK GROUP
[[1496  858]
 [ 110 1082]]
              precision    recall  f1-score   support

           0       0.93      0.64      0.76      2354
           1       0.56      0.91      0.69      1192

    accuracy                           0.73      3546
   macro avg       0.74      0.77      0.72      3546
weighted avg       0.81      0.73      0.73      3546

TNR=TN/(TN+FP)=  0.6355140186915887
TPR=TP/(FP+FN)=  0.9077181208053692
FNER=FN/(FN+TP)=  0.09228187919463088
FPER=FP/(FP+TN)=  0.3644859813084112
F1 score micro: 
0.7270163564579808
F1 score weighted: 
0.7338322315936725
F1 score binary: 
0.6909323116219669

EVALUATION FOR WHITE GROUP
[[ 4720  1606]
 [ 1338 18790]]
              precision    recall  f1-score   support

           0       0.78      0.75  

### False Positive Rate Parity

In [13]:
eg_fprp = add_contraint(model, 'FPRP', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

Exponentiated Gradient Reduction Alg is used here with  FPRP  as the fairness constraint.

Evaluation of  FPRP -constrained classifier overall:
[[ 6393  2287]
 [ 1388 19932]]
              precision    recall  f1-score   support

           0       0.82      0.74      0.78      8680
           1       0.90      0.93      0.92     21320

    accuracy                           0.88     30000
   macro avg       0.86      0.84      0.85     30000
weighted avg       0.88      0.88      0.88     30000

TNR=TN/(TN+FP)=  0.7365207373271889
TPR=TP/(FP+FN)=  0.9348968105065666
FNER=FN/(FN+TP)=  0.0651031894934334
FPER=FP/(FP+TN)=  0.2634792626728111
F1 score micro: 
0.8775
F1 score weighted: 
0.8754195958866932
F1 score binary: 
0.9155929166953766


The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


Selection Rate Overall:  0.7406333333333334


Evaluation of  FPRP -constrained classifier by race:
Selection Rate By Group:  sensitive_feature_0
0    0.467569
1    0.777236
Name: selection_rate, dtype: object 

EVALUATION FOR BLACK GROUP
[[1742  612]
 [ 146 1046]]
              precision    recall  f1-score   support

           0       0.92      0.74      0.82      2354
           1       0.63      0.88      0.73      1192

    accuracy                           0.79      3546
   macro avg       0.78      0.81      0.78      3546
weighted avg       0.82      0.79      0.79      3546

TNR=TN/(TN+FP)=  0.740016992353441
TPR=TP/(FP+FN)=  0.87751677852349
FNER=FN/(FN+TP)=  0.12248322147651007
FPER=FP/(FP+TN)=  0.2599830076465591
F1 score micro: 
0.7862380146644106
F1 score weighted: 
0.7919727067804931
F1 score binary: 
0.7340350877192983

EVALUATION FOR WHITE GROUP
[[ 4651  1675]
 [ 1242 18886]]
              precision    recall  f1-score   support

           0       0.79      0.74     

### Error Rate Parity

In [14]:
eg_erp = add_contraint(model, 'ERP', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

Exponentiated Gradient Reduction Alg is used here with  ERP  as the fairness constraint.



KeyboardInterrupt: 

### Bounded Group Loss (TODO: issue, need to figure out loss parameter)

In [None]:
#add_contraint(model, 'BGL', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

## Grid Search Reduction Alg for Adding Fairness Constraints

### Demographic Parity

In [None]:
gs_dp = add_contraint(model, 'DP', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

In [None]:
# We can examine the values of lambda_i chosen for us:
lambda_vecs = gs_dp.lambda_vecs_
print(lambda_vecs[0])

The next few cells come from: https://github.com/fairlearn/fairlearn/blob/main/notebooks/Binary%20Classification%20with%20the%20UCI%20Credit-card%20Default%20Dataset.ipynb

Note: we train multiple models corresponding to different trade-off points between the performance metric (balanced accuracy) and fairness metric.

In [None]:
grid_search_show(gs_dp, demographic_parity_difference, y_predict, X_test, y_test, race_test, 'DemParityDifference','GS DPD', models_dict, 0.3)

In [None]:
models_dict.pop('GS DPD')
models_dict

### Equalized Odds Used

In [None]:
gs_eo = add_contraint(model, 'EO', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

In [None]:
# We can examine the values of lambda_i chosen for us:
lambda_vecs = gs_eo.lambda_vecs_
print(lambda_vecs[0])

In [None]:
grid_search_show(gs_eo, equalized_odds_difference, y_predict, X_test, y_test, race_test, 'EOddsDifference','GS EO', models_dict, 0.3)

In [None]:
models_dict.pop('GS EO')
models_dict

### EOO (True Positive Rate Parity)

In [None]:
gs_tprp = add_contraint(model, 'TPRP', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

In [None]:
# We can examine the values of lambda_i chosen for us:
lambda_vecs = gs_tprp.lambda_vecs_
print(lambda_vecs[0])

In [None]:
grid_search_show(gs_tprp, true_positive_rate_difference, y_predict, X_test, y_test, race_test, 'TPRPDifference','GS TPRP', models_dict, 0.3)

In [None]:
models_dict.pop('GS TPRP')
models_dict

### False Positive Rate Parity

In [None]:
gs_fprp = add_contraint(model, 'FPRP', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

In [None]:
# We can examine the values of lambda_i chosen for us:
lambda_vecs = gs_fprp.lambda_vecs_
print(lambda_vecs[0])

In [None]:
# NOTE: the below models are the same!!

In [None]:
grid_search_show(gs_fprp, false_positive_rate_difference, y_predict, X_test, y_test, race_test, 'FPRPDifference','GS FPRP', models_dict, 0.4)

In [None]:
models_dict.pop('GS FPRP')
models_dict

### Error Rate Parity

In [None]:
gs_erp = add_contraint(model, 'ERP', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

In [None]:
# We can examine the values of lambda_i chosen for us:
lambda_vecs = gs_erp.lambda_vecs_
print(lambda_vecs[0])

In [None]:
# Fairlearn doesnt have an erp difference metric for the below
#grid_search_show(gs_erp, error_difference, y_predict, X_test, y_test, race_test, 'ERDifference','GS ERP', models_dict, 0.3)
#models_dict.pop('GS FPRP')
#models_dict

### Bounded Group Loss (issue, need to figure out loss parameter)

In [None]:
#gs_bgl = add_contraint(model, 'BGL', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

In [None]:
# We can examine the values of lambda_i chosen for us:
#lambda_vecs = gs_dp.lambda_vecs_
#print(lambda_vecs[0])