# 1. Imports

In [1]:
from impt_functions import *
from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pandas as pd 
import numpy as np
from fairlearn.reductions import ExponentiatedGradient, GridSearch, DemographicParity, EqualizedOdds, \
    TruePositiveRateParity, FalsePositiveRateParity, ErrorRateParity, BoundedGroupLoss
from fairlearn.metrics import *
from raiwidgets import FairnessDashboard

# 2. Prepare data

In [2]:
data = get_data('/home/mackenzie/git_repositories/delayedimpact/data/simData_oom10.csv')

      score  repay_probability  race  repay_indices
0       610              78.90     1              1
1       568              47.77     0              0
2       750              98.13     1              1
3       775              98.45     1              1
4       704              95.88     1              1
...     ...                ...   ...            ...
9995    832              98.99     1              1
9996    416              10.91     1              0
9997    444              14.63     1              0
9998    778              98.47     1              1
9999    738              97.68     1              1

[10000 rows x 4 columns]


In [3]:
X_train, X_test, y_train, y_test, race_train, race_test, sample_weight_train, sample_weight_test = prep_data(data=data, test_size=0.3, weight_index=1)

Here are the x values:  [[610   1]
 [568   0]
 [750   1]
 ...
 [444   1]
 [778   1]
 [738   1]] 

Here are the y values:  [1 0 1 ... 0 1 1]
Sample weights are all equal.


# 3. Support Vector Machines with Fairlearn

Reference: https://www.datacamp.com/community/tutorials/svm-classification-scikit-learn-python

## SVM classifier + Collect Predictions

In [4]:
# TODO: add cells from above!!
# TODO: try other svm kernels??

## Linear Kernel

In [5]:
# Instantiate classifier:
clf = svm.SVC(kernel='linear')  # can try other kernels

#Train the model using the training sets
model = clf.fit(X_train, y_train)

# Make predictions with the classifier:
y_predict = model.predict(X_test)

### Evaluation of classifier overall

In [6]:
cm = confusion_matrix(y_test, y_predict)
print(cm)
print(classification_report(y_test, y_predict)) 
evaluation_outcome_rates(y_test, y_predict, sample_weight_test)

[[ 646  203]
 [ 131 2020]]
              precision    recall  f1-score   support

           0       0.83      0.76      0.79       849
           1       0.91      0.94      0.92      2151

    accuracy                           0.89      3000
   macro avg       0.87      0.85      0.86      3000
weighted avg       0.89      0.89      0.89      3000

FNER=FN/(FN+TP)=  0.06090190609019061
FPER=FP/(FP+TN)=  0.23910482921083628
TNR=TN/(TN+FP)=  0.7608951707891637
TPR=TP/(FP+FN)=  0.9390980939098094


### Evaluation of classifier by race

In [7]:
evaluation_by_race(X_test, y_test, race_test, y_predict, sample_weight_test)

EVALUATION FOR BLACK GROUP
[[213  20]
 [ 40  86]]
              precision    recall  f1-score   support

           0       0.84      0.91      0.88       233
           1       0.81      0.68      0.74       126

    accuracy                           0.83       359
   macro avg       0.83      0.80      0.81       359
weighted avg       0.83      0.83      0.83       359

FNER=FN/(FN+TP)=  0.31746031746031744
FPER=FP/(FP+TN)=  0.08583690987124463
TNR=TN/(TN+FP)=  0.9141630901287554
TPR=TP/(FP+FN)=  0.6825396825396826

EVALUATION FOR WHITE GROUP
[[ 433  183]
 [  91 1934]]
              precision    recall  f1-score   support

           0       0.83      0.70      0.76       616
           1       0.91      0.96      0.93      2025

    accuracy                           0.90      2641
   macro avg       0.87      0.83      0.85      2641
weighted avg       0.89      0.90      0.89      2641

FNER=FN/(FN+TP)=  0.044938271604938275
FPER=FP/(FP+TN)=  0.29707792207792205
TNR=TN/(TN+FP)= 

### Delayed impact calculated by race

In [8]:
calculate_delayed_impact(X_test, y_test, y_predict, race_test)

The delayed impact of the black group is:  9.610027855153204
The delayed impact of the white group is:  44.52858765619084


### Fairness Metric Evaluation of classifier

In [9]:
print_fairness_metrics(y_test, y_predict, race_test)

Selection Rate Overall:  0.741
Selection Rate By Group:  sensitive_feature_0
0    0.295265
1     0.80159
Name: selection_rate, dtype: object 

DP Difference:  0.5063256827465751
-->difference of 0 means that all groups have the same selection rate
DP Ratio: 0.3683485459925816
-->ratio of 1 means that all groups have the same selection rate 

EOD Difference:  0.2725220458553792
-->difference of 0 means that all groups have the same TN, TN, FP, and FN rates
EOD Ratio: 0.2889373578179601
-->ratio of 1 means that all groups have the same TN, TN, FP, and FN rates rates 



The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


## Exponentiated Gradient Reduction Alg for Adding Fairness Constraints

### Demographic Parity

In [10]:
add_contraint(model, 'DP', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

Exponentiated Gradient Reduction Alg is used here with  DP  as the fairness constraint.

Evaluation of  DP -constrained classifier overall:
[[ 527  322]
 [ 114 2037]]
              precision    recall  f1-score   support

           0       0.82      0.62      0.71       849
           1       0.86      0.95      0.90      2151

    accuracy                           0.85      3000
   macro avg       0.84      0.78      0.81      3000
weighted avg       0.85      0.85      0.85      3000

FNER=FN/(FN+TP)=  0.05299860529986053
FPER=FP/(FP+TN)=  0.3792697290930506
TNR=TN/(TN+FP)=  0.6207302709069493
TPR=TP/(FP+FN)=  0.9470013947001394


Evaluation of  DP -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[ 66 167]
 [  1 125]]
              precision    recall  f1-score   support

           0       0.99      0.28      0.44       233
           1       0.43      0.99      0.60       126

    accuracy                           0.53       359
   macro avg       0.71      0.64     

The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


### Equalized Odds

In [11]:
add_contraint(model, 'EO', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

Exponentiated Gradient Reduction Alg is used here with  EO  as the fairness constraint.

Evaluation of  EO -constrained classifier overall:
[[ 664  185]
 [ 300 1851]]
              precision    recall  f1-score   support

           0       0.69      0.78      0.73       849
           1       0.91      0.86      0.88      2151

    accuracy                           0.84      3000
   macro avg       0.80      0.82      0.81      3000
weighted avg       0.85      0.84      0.84      3000

FNER=FN/(FN+TP)=  0.1394700139470014
FPER=FP/(FP+TN)=  0.21790341578327443
TNR=TN/(TN+FP)=  0.7820965842167256
TPR=TP/(FP+FN)=  0.8605299860529986


Evaluation of  EO -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[177  56]
 [ 17 109]]
              precision    recall  f1-score   support

           0       0.91      0.76      0.83       233
           1       0.66      0.87      0.75       126

    accuracy                           0.80       359
   macro avg       0.79      0.81     

The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


### True Positive Rate Parity

In [12]:
add_contraint(model, 'TPRP', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

Exponentiated Gradient Reduction Alg is used here with  TPRP  as the fairness constraint.

Evaluation of  TPRP -constrained classifier overall:
[[ 604  245]
 [ 159 1992]]
              precision    recall  f1-score   support

           0       0.79      0.71      0.75       849
           1       0.89      0.93      0.91      2151

    accuracy                           0.87      3000
   macro avg       0.84      0.82      0.83      3000
weighted avg       0.86      0.87      0.86      3000

FNER=FN/(FN+TP)=  0.07391910739191074
FPER=FP/(FP+TN)=  0.28857479387514723
TNR=TN/(TN+FP)=  0.7114252061248527
TPR=TP/(FP+FN)=  0.9260808926080892


Evaluation of  TPRP -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[122 111]
 [  5 121]]
              precision    recall  f1-score   support

           0       0.96      0.52      0.68       233
           1       0.52      0.96      0.68       126

    accuracy                           0.68       359
   macro avg       0.74      0.

The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


### False Positive Rate Parity

In [13]:
add_contraint(model, 'FPRP', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

Exponentiated Gradient Reduction Alg is used here with  FPRP  as the fairness constraint.

Evaluation of  FPRP -constrained classifier overall:
[[ 637  212]
 [ 128 2023]]
              precision    recall  f1-score   support

           0       0.83      0.75      0.79       849
           1       0.91      0.94      0.92      2151

    accuracy                           0.89      3000
   macro avg       0.87      0.85      0.86      3000
weighted avg       0.88      0.89      0.88      3000

FNER=FN/(FN+TP)=  0.05950720595072059
FPER=FP/(FP+TN)=  0.2497055359246172
TNR=TN/(TN+FP)=  0.7502944640753828
TPR=TP/(FP+FN)=  0.9404927940492794


Evaluation of  FPRP -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[177  56]
 [ 16 110]]
              precision    recall  f1-score   support

           0       0.92      0.76      0.83       233
           1       0.66      0.87      0.75       126

    accuracy                           0.80       359
   macro avg       0.79      0.8

The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


### Error Rate Parity

In [14]:
add_contraint(model, 'ERP', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

Exponentiated Gradient Reduction Alg is used here with  ERP  as the fairness constraint.

Evaluation of  ERP -constrained classifier overall:
[[ 662  187]
 [ 271 1880]]
              precision    recall  f1-score   support

           0       0.71      0.78      0.74       849
           1       0.91      0.87      0.89      2151

    accuracy                           0.85      3000
   macro avg       0.81      0.83      0.82      3000
weighted avg       0.85      0.85      0.85      3000

FNER=FN/(FN+TP)=  0.12598791259879125
FPER=FP/(FP+TN)=  0.22025912838633688
TNR=TN/(TN+FP)=  0.7797408716136631
TPR=TP/(FP+FN)=  0.8740120874012087


Evaluation of  ERP -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[213  20]
 [ 40  86]]
              precision    recall  f1-score   support

           0       0.84      0.91      0.88       233
           1       0.81      0.68      0.74       126

    accuracy                           0.83       359
   macro avg       0.83      0.80 

The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


### Bounded Group Loss (TODO: issue, need to figure out loss parameter)

In [15]:
#add_contraint(model, 'BGL', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

## Grid Search Reduction Alg for Adding Fairness Constraints

### Demographic Parity

In [16]:
add_contraint(model, 'DP', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

Grid Search Reduction Alg is used here with  DP  as the fairness constraint.

Evaluation of  DP -constrained classifier overall:
[[ 572  277]
 [ 116 2035]]
              precision    recall  f1-score   support

           0       0.83      0.67      0.74       849
           1       0.88      0.95      0.91      2151

    accuracy                           0.87      3000
   macro avg       0.86      0.81      0.83      3000
weighted avg       0.87      0.87      0.86      3000

FNER=FN/(FN+TP)=  0.05392840539284054
FPER=FP/(FP+TN)=  0.32626619552414604
TNR=TN/(TN+FP)=  0.673733804475854
TPR=TP/(FP+FN)=  0.9460715946071595


Evaluation of  DP -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[113 120]
 [  4 122]]
              precision    recall  f1-score   support

           0       0.97      0.48      0.65       233
           1       0.50      0.97      0.66       126

    accuracy                           0.65       359
   macro avg       0.73      0.73      0.65      

The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


### Equalized Odds Used

In [17]:
add_contraint(model, 'EO', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

Grid Search Reduction Alg is used here with  EO  as the fairness constraint.

Evaluation of  EO -constrained classifier overall:
[[ 608  241]
 [ 116 2035]]
              precision    recall  f1-score   support

           0       0.84      0.72      0.77       849
           1       0.89      0.95      0.92      2151

    accuracy                           0.88      3000
   macro avg       0.87      0.83      0.85      3000
weighted avg       0.88      0.88      0.88      3000

FNER=FN/(FN+TP)=  0.05392840539284054
FPER=FP/(FP+TN)=  0.2838633686690224
TNR=TN/(TN+FP)=  0.7161366313309776
TPR=TP/(FP+FN)=  0.9460715946071595


Evaluation of  EO -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[157  76]
 [ 10 116]]
              precision    recall  f1-score   support

           0       0.94      0.67      0.79       233
           1       0.60      0.92      0.73       126

    accuracy                           0.76       359
   macro avg       0.77      0.80      0.76      

The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


### True Positive Rate Parity

In [18]:
add_contraint(model, 'TPRP', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

Grid Search Reduction Alg is used here with  TPRP  as the fairness constraint.

Evaluation of  TPRP -constrained classifier overall:
[[ 604  245]
 [ 111 2040]]
              precision    recall  f1-score   support

           0       0.84      0.71      0.77       849
           1       0.89      0.95      0.92      2151

    accuracy                           0.88      3000
   macro avg       0.87      0.83      0.85      3000
weighted avg       0.88      0.88      0.88      3000

FNER=FN/(FN+TP)=  0.05160390516039052
FPER=FP/(FP+TN)=  0.28857479387514723
TNR=TN/(TN+FP)=  0.7114252061248527
TPR=TP/(FP+FN)=  0.9483960948396095


Evaluation of  TPRP -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[161  72]
 [ 12 114]]
              precision    recall  f1-score   support

           0       0.93      0.69      0.79       233
           1       0.61      0.90      0.73       126

    accuracy                           0.77       359
   macro avg       0.77      0.80      0.7

The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


### False Positive Rate Parity

In [19]:
add_contraint(model, 'FPRP', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

Grid Search Reduction Alg is used here with  FPRP  as the fairness constraint.

Evaluation of  FPRP -constrained classifier overall:
[[ 646  203]
 [ 131 2020]]
              precision    recall  f1-score   support

           0       0.83      0.76      0.79       849
           1       0.91      0.94      0.92      2151

    accuracy                           0.89      3000
   macro avg       0.87      0.85      0.86      3000
weighted avg       0.89      0.89      0.89      3000

FNER=FN/(FN+TP)=  0.06090190609019061
FPER=FP/(FP+TN)=  0.23910482921083628
TNR=TN/(TN+FP)=  0.7608951707891637
TPR=TP/(FP+FN)=  0.9390980939098094


Evaluation of  FPRP -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[213  20]
 [ 40  86]]
              precision    recall  f1-score   support

           0       0.84      0.91      0.88       233
           1       0.81      0.68      0.74       126

    accuracy                           0.83       359
   macro avg       0.83      0.80      0.8

The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


### Error Rate Parity

In [20]:
add_contraint(model, 'ERP', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

Grid Search Reduction Alg is used here with  ERP  as the fairness constraint.

Evaluation of  ERP -constrained classifier overall:
[[ 646  203]
 [ 131 2020]]
              precision    recall  f1-score   support

           0       0.83      0.76      0.79       849
           1       0.91      0.94      0.92      2151

    accuracy                           0.89      3000
   macro avg       0.87      0.85      0.86      3000
weighted avg       0.89      0.89      0.89      3000

FNER=FN/(FN+TP)=  0.06090190609019061
FPER=FP/(FP+TN)=  0.23910482921083628
TNR=TN/(TN+FP)=  0.7608951707891637
TPR=TP/(FP+FN)=  0.9390980939098094


Evaluation of  ERP -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[213  20]
 [ 40  86]]
              precision    recall  f1-score   support

           0       0.84      0.91      0.88       233
           1       0.81      0.68      0.74       126

    accuracy                           0.83       359
   macro avg       0.83      0.80      0.81  

The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


### Bounded Group Loss (TODO: issue, need to figure out loss parameter)

In [21]:
#add_contraint(model, 'BGL', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)