# 0. IMPORTANT--Specify classifier to be trained and dataset

In [1]:
model_name = 'svm_linear'  # options include: {'Decision Tree': 'dt', 'Gaussian Naive Bayes':'gnb',\
                           #                  'Logistic Regression': 'lgr', \
                           #                  'Support Vector Machine Linear': 'svm_linear', \ 
                           #                  '??': 'auc', \ 
                           #                  'Gradient_Boosted_Trees': 'gbt'} 
                           #                  gbt info: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html
data_file = '/home/mackenzie/git_repositories/delayedimpact/data/simData_oom10.csv'  # ...oom10, ...oom50, ...oom100

# 1. Imports and Set Up

In [2]:
from impt_functions import *
import csv
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, \
    balanced_accuracy_score, roc_auc_score
import pandas as pd 
import numpy as np
from fairlearn.reductions import ExponentiatedGradient, GridSearch, DemographicParity, EqualizedOdds, \
    TruePositiveRateParity, FalsePositiveRateParity, ErrorRateParity, BoundedGroupLoss
from fairlearn.metrics import *
from raiwidgets import FairnessDashboard
%matplotlib inline

In [3]:
# Dict used for saving overall results to a csv later
# Example: overall = {'Unmitigated':[], 'EG DP': [], 'EG EO': [],
#                   'EG EOO': [], 'EG FPRP': [], 'EG ERP': [],
#                   'GS DP': [], 'GS EO': [],
#                   'GS EOO': [], 'GS FPRP': [], 'GS ERP': []}
# Example: black_results = {'Unmitigated Black': [], 
#                          'EG DP Black': [], ..., 'GS ERP Black': []}

# Instantiate lists for holding results
unmitigated, unmitigated_black, unmitigated_white = [], [], []

eg_dp, eg_dp_black, eg_dp_white = [], [], []
eg_eo, eg_eo_black, eg_eo_white = [], [], []
eg_eoo, eg_eoo_black, eg_eoo_white = [], [], []
eg_fprp, eg_fprp_black, eg_fprp_white = [], [], []
eg_erp, eg_erp_black, eg_erp_white = [], [], []

gs_dp, gs_dp_black, gs_dp_white = [], [], []
gs_eo, gs_eo_black, gs_eo_white = [], [], []
gs_eoo, gs_eoo_black, gs_eoo_white = [], [], []
gs_fprp, gs_fprp_black, gs_fprp_white = [], [], []
gs_erp, gs_erp_black, gs_erp_white = [], [], []

# 2. Prepare data

In [4]:
data = get_data(data_file)

      score  repay_probability  race  repay_indices
0       832              98.99     1              1
1       724              97.08     1              0
2       746              97.96     1              0
3       475              20.06     1              0
4       687              94.60     1              1
...     ...                ...   ...            ...
9995    716              96.62     1              0
9996    721              96.90     1              1
9997    439              13.95     1              0
9998    802              98.79     1              1
9999    324               1.20     0              0

[10000 rows x 4 columns]


In [5]:
X_train, X_test, y_train, y_test, race_train, race_test, sample_weight_train, sample_weight_test = prep_data(data=data, test_size=0.3, weight_index=1)

Here are the x values:  [[832   1]
 [724   1]
 [746   1]
 ...
 [439   1]
 [802   1]
 [324   0]] 

Here are the y values:  [1 0 0 ... 0 1 0]
Sample weights are all equal.


# 3. Classifier

In [6]:
print('The classifier trained below is: ', model_name)

The classifier trained below is:  svm_linear


In [7]:
if model_name == 'dt':
    # Initialize classifier:
    classifier = DecisionTreeClassifier()
elif model_name == 'gnb':
    classifier = GaussianNB()
elif model_name == 'lgr':
    # Reference: https://towardsdatascience.com/logistic-regression-using-python-sklearn-numpy-mnist-handwriting-recognition-matplotlib-a6b31e2b166a
    classifier = LogisticRegression()
elif model_name == 'svm_linear':
    # Reference: https://www.datacamp.com/community/tutorials/svm-classification-scikit-learn-python
    classifier = svm.SVC(kernel='linear', probability=True)
elif model_name = 'gbt':
    # Reference: https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html
    # Note: max_depth default is 3 but tune this parameter for best performance
    classifier = GradientBoostingClassifier(n_estimators=100) 
else:
    print('PROBLEM: input a specified classifier above')

## Train classifier and collect predictions
NOTE: atm sample_weight are all 1s

In [8]:
# Reference: https://www.datacamp.com/community/tutorials/decision-tree-classification-python

# Train the classifier:
model = classifier.fit(X_train,y_train, sample_weight_train)

# Make predictions with the classifier:
y_predict = model.predict(X_test)

# Scores on test set
test_scores = model.predict_proba(X_test)[:, 1]

### Evaluation of classifier overall

In [9]:
# Metrics
models_dict = {"Unmitigated": (y_predict, test_scores)}
get_metrics_df(models_dict, y_test, race_test)

Unnamed: 0,Unmitigated
Overall selection rate,0.742333
Demographic parity difference,0.510095
Demographic parity ratio,0.367202
------,
Overall balanced error rate,0.167679
Balanced error rate difference,0.000917695
------,
True positive rate difference,0.276165
True negative rate difference,0.278001
False positive rate difference,0.278001


In [10]:
cm = confusion_matrix(y_test, y_predict)
print(cm)
results_dict = classification_report(y_test, y_predict, output_dict=True)
print(classification_report(y_test, y_predict))
# Add accuracy to the results list
unmitigated.append(round(results_dict['accuracy']*100, 2))
f1_micro, f1_weighted, f1_binary = get_f1_scores(y_test, y_predict)
f1_str = str(round(f1_micro*100, 2))+"/"+str(round(f1_weighted*100, 2))+"/"+str(round(f1_binary*100, 2))
# Add f1 scores to results list
unmitigated.append(f1_str)
# Add Selection rate to results list
sr = get_selection_rates(y_test, y_predict, race_test, 0)
unmitigated.append(round(sr*100, 2))
# Add Outcome rates to results list
tnr, tpr, fner, fper = evaluation_outcome_rates(y_test, y_predict, sample_weight_test)
unmitigated.append(round(tnr*100, 2))
unmitigated.append(round(tpr*100, 2))
unmitigated.append(round(fner*100, 2))
unmitigated.append(round(fper*100, 2))

[[ 614  218]
 [ 159 2009]]
              precision    recall  f1-score   support

           0       0.79      0.74      0.77       832
           1       0.90      0.93      0.91      2168

    accuracy                           0.87      3000
   macro avg       0.85      0.83      0.84      3000
weighted avg       0.87      0.87      0.87      3000

F1 score micro: 
0.8743333333333333
F1 score weighted: 
0.8728670685677226
F1 score binary: 
0.9142207053469852

Selection Rate Overall:  0.7423333333333333
TNR=TN/(TN+FP)=  0.7379807692307693
TPR=TP/(FP+FN)=  0.926660516605166
FNER=FN/(FN+TP)=  0.07333948339483395
FPER=FP/(FP+TN)=  0.2620192307692308


The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


### Delayed impact calculated

In [11]:
di_black, di_white = calculate_delayed_impact(X_test, y_test, y_predict, race_test)
# Add DI to results list
di_str = str(round(di_black, 2))+"/"+str(round(di_white, 2))
unmitigated.append(di_str)

The delayed impact of the black group is:  13.8
The delayed impact of the white group is:  42.97142857142857


### Fairness Metric Evaluation of classifier

In [12]:
dp_diff, eod_diff, eoo_dif, fpr_dif, er_dif = print_fairness_metrics(y_test, y_predict, race_test, sample_weight_test)

# Add the fairness metric differences to results list
unmitigated.append(round(dp_diff*100, 2))
unmitigated.append(round(eod_diff*100, 2))
unmitigated.append(round(eoo_dif*100, 2))
unmitigated.append(round(fpr_dif*100, 2))
unmitigated.append(round(er_dif*100, 2))

DP Difference:  0.5100952380952382
-->difference of 0 means that all groups have the same selection rate
DP Ratio: 0.3672022684310019
-->ratio of 1 means that all groups have the same selection rate 

EOD Difference:  0.2780008666762964
-->difference of 0 means that all groups have the same TN, TN, FP, and FN rates
EOD Ratio: 0.17962489343563512
-->ratio of 1 means that all groups have the same TN, TN, FP, and FN rates rates 

EOO/TPR Difference:  0.27616547633252087
FPR Difference:  0.2780008666762964
ER Difference:  0.04533333333333334



### Evaluation of classifier by race

In [13]:
results_black, results_white = evaluation_by_race(X_test, y_test, race_test, y_predict, sample_weight_test)
unmitigated_black.extend(results_black)
unmitigated_white.extend(results_white)

EVALUATION FOR BLACK GROUP
[[216  14]
 [ 48  97]]
              precision    recall  f1-score   support

           0       0.82      0.94      0.87       230
           1       0.87      0.67      0.76       145

    accuracy                           0.83       375
   macro avg       0.85      0.80      0.82       375
weighted avg       0.84      0.83      0.83       375

F1 score micro: 
0.8346666666666667
F1 score weighted: 
0.8293771086369772
F1 score binary: 
0.7578125

TNR=TN/(TN+FP)=  0.9391304347826087
TPR=TP/(FP+FN)=  0.6689655172413793
FNER=FN/(FN+TP)=  0.3310344827586207
FPER=FP/(FP+TN)=  0.06086956521739131

EVALUATION FOR WHITE GROUP
[[ 398  204]
 [ 111 1912]]
              precision    recall  f1-score   support

           0       0.78      0.66      0.72       602
           1       0.90      0.95      0.92      2023

    accuracy                           0.88      2625
   macro avg       0.84      0.80      0.82      2625
weighted avg       0.88      0.88      0.88  

The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


### Save results to dictionaries

In [14]:
run_key = 'Unmitigated'
overall_results_dict = add_values_in_dict({}, run_key, unmitigated)
black_results_dict = add_values_in_dict({}, run_key, unmitigated_black)
white_results_dict = add_values_in_dict({}, run_key, unmitigated_white)
print(overall_results_dict)
print(black_results_dict)
print(white_results_dict)

{'Unmitigated': [87.43, '87.43/87.29/91.42', 74.23, 73.8, 92.67, 7.33, 26.2, '13.8/42.97', 51.01, 27.8, 27.62, 27.8, 4.53]}
{'Unmitigated': [83.47, '83.47/82.94/75.78', 29.6, 93.91, 66.9, 33.1, 6.09, 13.8]}
{'Unmitigated': [88.0, '88.0/87.63/92.39', 80.61, 66.11, 94.51, 5.49, 33.89, 42.97]}


## Exponentiated Gradient Reduction Alg for Adding Fairness Constraints

In [15]:
from IPython.core.display import display, HTML
display(HTML("<style>div.output_scroll { height: 60em; }</style>"))

### Demographic Parity

In [None]:
mitigator, results_overall, results_black, results_white= add_constraint(model, 'DP', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, True)

Exponentiated Gradient Reduction Alg is used here with  DP  as the fairness constraint.



### Save results to dictionaries

In [None]:
run_key = 'EG DP Mitigated'
overall_results_dict = add_values_in_dict(overall_results_dict, run_key, results_overall)
black_results_dict = add_values_in_dict(black_results_dict, run_key, results_black)
white_results_dict = add_values_in_dict(white_results_dict, run_key, results_white)
print(overall_results_dict)
print(black_results_dict)
print(white_results_dict)

### Equalized Odds

In [None]:
mitigator, results_overall, results_black, results_white= add_constraint(model, 'EO', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, True)

### Save results to dictionaries

In [None]:
run_key = 'EG EO Mitigated'
overall_results_dict = add_values_in_dict(overall_results_dict, run_key, results_overall)
black_results_dict = add_values_in_dict(black_results_dict, run_key, results_black)
white_results_dict = add_values_in_dict(white_results_dict, run_key, results_white)
print(overall_results_dict)
print(black_results_dict)
print(white_results_dict)

### EOO (True Positive Rate Parity)

In [None]:
mitigator, results_overall, results_black, results_white= add_constraint(model, 'TPRP', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, True)

### Save results to dictionaries

In [None]:
run_key = 'EG EOO Mitigated'
overall_results_dict = add_values_in_dict(overall_results_dict, run_key, results_overall)
black_results_dict = add_values_in_dict(black_results_dict, run_key, results_black)
white_results_dict = add_values_in_dict(white_results_dict, run_key, results_white)
print(overall_results_dict)
print(black_results_dict)
print(white_results_dict)

### False Positive Rate Parity

In [None]:
mitigator, results_overall, results_black, results_white= add_constraint(model, 'FPRP', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, True)

### Save results to dictionaries

In [None]:
run_key = 'EG FPRP Mitigated'
overall_results_dict = add_values_in_dict(overall_results_dict, run_key, results_overall)
black_results_dict = add_values_in_dict(black_results_dict, run_key, results_black)
white_results_dict = add_values_in_dict(white_results_dict, run_key, results_white)
print(overall_results_dict)
print(black_results_dict)
print(white_results_dict)

### Error Rate Parity

In [None]:
mitigator, results_overall, results_black, results_white= add_constraint(model, 'ERP', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, True)

### Save results to dictionaries

In [None]:
run_key = 'EG ERP Mitigated'
overall_results_dict = add_values_in_dict(overall_results_dict, run_key, results_overall)
black_results_dict = add_values_in_dict(black_results_dict, run_key, results_black)
white_results_dict = add_values_in_dict(white_results_dict, run_key, results_white)
print(overall_results_dict)
print(black_results_dict)
print(white_results_dict)

### Bounded Group Loss (issue, need to figure out loss parameter)

In [None]:
'''
mitigator, results_overall, results_black, results_white= add_constraint(model, 'BGL', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)
run_key = 'EG DP Mitigated'
overall_results_dict = add_values_in_dict(overall_results_dict, run_key, results_overall)
black_results_dict = add_values_in_dict(black_results_dict, run_key, results_black)
white_results_dict = add_values_in_dict(white_results_dict, run_key, results_white)
print(overall_results_dict)
print(black_results_dict)
print(white_results_dict)
'''

## Grid Search Reduction Alg for Adding Fairness Constraints

### Demographic Parity

In [None]:
mitigator, results_overall, results_black, results_white= add_constraint(model, 'DP', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, True)

In [None]:
# We can examine the values of lambda_i chosen for us:
lambda_vecs = mitigator.lambda_vecs_
print(lambda_vecs[0])

The next few cells come from: https://github.com/fairlearn/fairlearn/blob/main/notebooks/Binary%20Classification%20with%20the%20UCI%20Credit-card%20Default%20Dataset.ipynb

Note: we train multiple models corresponding to different trade-off points between the performance metric (balanced accuracy) and fairness metric.

In [None]:
grid_search_show(mitigator, demographic_parity_difference, y_predict, X_test, y_test, race_test, 'DemParityDifference','GS DPD', models_dict, 0.3)

In [None]:
models_dict.pop('GS DPD')
models_dict

### Save results to dictionaries

In [None]:
run_key = 'GS DP Mitigated'
overall_results_dict = add_values_in_dict(overall_results_dict, run_key, results_overall)
black_results_dict = add_values_in_dict(black_results_dict, run_key, results_black)
white_results_dict = add_values_in_dict(white_results_dict, run_key, results_white)
print(overall_results_dict)
print(black_results_dict)
print(white_results_dict)

### Equalized Odds Used

In [None]:
mitigator, results_overall, results_black, results_white= add_constraint(model, 'EO', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, True)

In [None]:
# We can examine the values of lambda_i chosen for us:
lambda_vecs = mitigator.lambda_vecs_
print(lambda_vecs[0])

In [None]:
grid_search_show(mitigator, equalized_odds_difference, y_predict, X_test, y_test, race_test, 'EOddsDifference','GS EO', models_dict, 0.3)

In [None]:
models_dict.pop('GS EO')
models_dict

In [None]:
run_key = 'GS EO Mitigated'
overall_results_dict = add_values_in_dict(overall_results_dict, run_key, results_overall)
black_results_dict = add_values_in_dict(black_results_dict, run_key, results_black)
white_results_dict = add_values_in_dict(white_results_dict, run_key, results_white)
print(overall_results_dict)
print(black_results_dict)
print(white_results_dict)

### EOO (True Positive Rate Parity)

In [None]:
mitigator, results_overall, results_black, results_white= add_constraint(model, 'TPRP', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, True)

In [None]:
# We can examine the values of lambda_i chosen for us:
lambda_vecs = mitigator.lambda_vecs_
print(lambda_vecs[0])

In [None]:
grid_search_show(mitigator, true_positive_rate_difference, y_predict, X_test, y_test, race_test, 'TPRPDifference','GS TPRP', models_dict, 0.3)

In [None]:
models_dict.pop('GS TPRP')
models_dict

In [None]:
run_key = 'GS EOO Mitigated'
overall_results_dict = add_values_in_dict(overall_results_dict, run_key, results_overall)
black_results_dict = add_values_in_dict(black_results_dict, run_key, results_black)
white_results_dict = add_values_in_dict(white_results_dict, run_key, results_white)
print(overall_results_dict)
print(black_results_dict)
print(white_results_dict)

### False Positive Rate Parity

In [None]:
mitigator, results_overall, results_black, results_white= add_constraint(model, 'FPRP', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, True)

In [None]:
# We can examine the values of lambda_i chosen for us:
lambda_vecs = mitigator.lambda_vecs_
print(lambda_vecs[0])

In [None]:
# NOTE: the below models are the same for DT classifier!!

In [None]:
grid_search_show(mitigator, false_positive_rate_difference, y_predict, X_test, y_test, race_test, 'FPRPDifference','GS FPRP', models_dict, 0.4)

In [None]:
models_dict.pop('GS FPRP')
models_dict

In [None]:
run_key = 'GS FPRP Mitigated'
overall_results_dict = add_values_in_dict(overall_results_dict, run_key, results_overall)
black_results_dict = add_values_in_dict(black_results_dict, run_key, results_black)
white_results_dict = add_values_in_dict(white_results_dict, run_key, results_white)
print(overall_results_dict)
print(black_results_dict)
print(white_results_dict)

### Error Rate Parity

In [None]:
mitigator, results_overall, results_black, results_white= add_constraint(model, 'ERP', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, True)

In [None]:
# We can examine the values of lambda_i chosen for us:
lambda_vecs = mitigator.lambda_vecs_
print(lambda_vecs[0])

In [None]:
# Fairlearn doesnt have an erp difference metric for the below
#grid_search_show(gs_erp, error_difference, y_predict, X_test, y_test, race_test, 'ERDifference','GS ERP', models_dict, 0.3)
#models_dict.pop('GS FPRP')
#models_dict

In [None]:
run_key = 'GS ERP Mitigated'
overall_results_dict = add_values_in_dict(overall_results_dict, run_key, results_overall)
black_results_dict = add_values_in_dict(black_results_dict, run_key, results_black)
white_results_dict = add_values_in_dict(white_results_dict, run_key, results_white)
print(overall_results_dict)
print(black_results_dict)
print(white_results_dict)

### Bounded Group Loss (issue, need to figure out loss parameter)

In [None]:
'''
mitigator, results_overall, results_black, results_white= add_constraint(model, 'BGL', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)
run_key = 'GS EO Mitigated'
overall_results_dict = add_values_in_dict(overall_results_dict, run_key, results_overall)
black_results_dict = add_values_in_dict(black_results_dict, run_key, results_black)
white_results_dict = add_values_in_dict(white_results_dict, run_key, results_white)
print(overall_results_dict)
print(black_results_dict)
print(white_results_dict)
'''

In [None]:
# We can examine the values of lambda_i chosen for us:
#lambda_vecs = gs_dp.lambda_vecs_
#print(lambda_vecs[0])

# 4. Save results to csv files

In [None]:
# To use below!!
overall_fieldnames = ['Run', 'Acc', 'F1micro/F1w/F1bsr', 'SelectionRate', 'TNR rate', 'TPR rate', 'FNER', 'FPER', 'DIB/DIW', 'DP Diff', 'EO Diff', 'TPR Diff', 'FPR Diff', 'ER Diff']
byrace_fieldnames = ['Run', 'Acc', 'F1micro/F1w/F1bsr', 'SelectionRate', 'TNR rate', 'TPR rate', 'FNER', 'FPER', 'DIB/DIW']
save_dict_2_csv(overall_results_dict, overall_fieldnames, model_name+'_overall_results.csv')
save_dict_2_csv(black_results_dict, byrace_fieldnames, model_name+'_black_results.csv')
save_dict_2_csv(white_results_dict, byrace_fieldnames, model_name+'_white_results.csv')