# 1. Imports

In [1]:
from impt_functions import *
from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pandas as pd 
import numpy as np
from fairlearn.reductions import ExponentiatedGradient, GridSearch, DemographicParity, EqualizedOdds, \
    TruePositiveRateParity, FalsePositiveRateParity, ErrorRateParity, BoundedGroupLoss
from fairlearn.metrics import *
from raiwidgets import FairnessDashboard

# 2. Prepare data

In [2]:
data = get_data('/home/mackenzie/git_repositories/delayedimpact/data/simData_oom10.csv')

      score  repay_probability  race  repay_indices
0       610              78.90     1              1
1       568              47.77     0              0
2       750              98.13     1              1
3       775              98.45     1              1
4       704              95.88     1              1
...     ...                ...   ...            ...
9995    832              98.99     1              1
9996    416              10.91     1              0
9997    444              14.63     1              0
9998    778              98.47     1              1
9999    738              97.68     1              1

[10000 rows x 4 columns]


In [3]:
X_train, X_test, y_train, y_test, race_train, race_test, sample_weight_train, sample_weight_test = prep_data(data=data, test_size=0.3, weight_index=1)

Sample weights are all equal.


# 3. Support Vector Machines with Fairlearn

Reference: https://www.datacamp.com/community/tutorials/svm-classification-scikit-learn-python

## SVM classifier + Collect Predictions

In [4]:
# TODO: add cells from above!!
# TODO: try other svm kernels??

## Linear Kernel

In [5]:
# Instantiate classifier:
clf = svm.SVC(kernel='linear')  # can try other kernels

#Train the model using the training sets
model = clf.fit(X_train, y_train)

# Make predictions with the classifier:
y_predict = model.predict(X_test)

### Evaluation of classifier overall

In [6]:
cm = confusion_matrix(y_test, y_predict)
print(cm)
print(classification_report(y_test, y_predict)) 
evaluation_outcome_rates(y_test, y_predict, sample_weight_test)

[[ 646  203]
 [ 131 2020]]
              precision    recall  f1-score   support

           0       0.83      0.76      0.79       849
           1       0.91      0.94      0.92      2151

    accuracy                           0.89      3000
   macro avg       0.87      0.85      0.86      3000
weighted avg       0.89      0.89      0.89      3000

FNER=FN/(FN+TP)=  0.06090190609019061
FPER=FP/(FP+TN)=  0.23910482921083628
TNR=TN/(TN+FP)=  0.7608951707891637
TPR=TP/(FP+FN)=  0.9390980939098094


### Evaluation of classifier by race

In [7]:
evaluation_by_race(X_test, y_test, race_test, y_predict, sample_weight_test)

EVALUATION FOR BLACK GROUP
[[213  20]
 [ 40  86]]
              precision    recall  f1-score   support

           0       0.84      0.91      0.88       233
           1       0.81      0.68      0.74       126

    accuracy                           0.83       359
   macro avg       0.83      0.80      0.81       359
weighted avg       0.83      0.83      0.83       359

FNER=FN/(FN+TP)=  0.31746031746031744
FPER=FP/(FP+TN)=  0.08583690987124463
TNR=TN/(TN+FP)=  0.9141630901287554
TPR=TP/(FP+FN)=  0.6825396825396826

EVALUATION FOR WHITE GROUP
[[ 433  183]
 [  91 1934]]
              precision    recall  f1-score   support

           0       0.83      0.70      0.76       616
           1       0.91      0.96      0.93      2025

    accuracy                           0.90      2641
   macro avg       0.87      0.83      0.85      2641
weighted avg       0.89      0.90      0.89      2641

FNER=FN/(FN+TP)=  0.044938271604938275
FPER=FP/(FP+TN)=  0.29707792207792205
TNR=TN/(TN+FP)= 

### Delayed impact calculated by race

In [8]:
calculate_delayed_impact(X_test, y_test, y_predict, race_test)

The delayed impact of the black group is:  9.610027855153204
The delayed impact of the white group is:  44.52858765619084


### Fairness Metric Evaluation of classifier

In [9]:
print_fairness_metrics(y_test, y_predict, race_test)

Selection Rate Overall:  0.741
Selection Rate By Group:  sensitive_feature_0
0    0.295265
1     0.80159
Name: selection_rate, dtype: object 

DP Difference:  0.5063256827465751
-->difference of 0 means that all groups have the same selection rate
DP Ratio: 0.3683485459925816
-->ratio of 1 means that all groups have the same selection rate 

EOD Difference:  0.2725220458553792
-->difference of 0 means that all groups have the same TN, TN, FP, and FN rates
EOD Ratio: 0.2889373578179601
-->ratio of 1 means that all groups have the same TN, TN, FP, and FN rates rates 



The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


## Exponentiated Gradient Reduction Alg for Adding Fairness Constraints

### Demographic Parity

In [10]:
add_contraint(model, 'DP', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

Exponentiated Gradient Reduction Alg is used here with  DP  as the fairness constraint.



KeyboardInterrupt: 

### Equalized Odds

In [None]:
add_contraint(model, 'EO', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

### True Positive Rate Parity

In [None]:
add_contraint(model, 'TPRP', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

### False Positive Rate Parity

In [None]:
add_contraint(model, 'FPRP', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

### Error Rate Parity

In [None]:
add_contraint(model, 'ERP', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

### Bounded Group Loss (TODO: issue, need to figure out loss parameter)

In [None]:
#add_contraint(model, 'BGL', 'EG', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

## Grid Search Reduction Alg for Adding Fairness Constraints

### Demographic Parity

In [11]:
add_contraint(model, 'DP', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

Grid Search Reduction Alg is used here with  DP  as the fairness constraint.



KeyboardInterrupt: 

### Equalized Odds Used

In [None]:
add_contraint(model, 'EO', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

### True Positive Rate Parity

In [None]:
add_contraint(model, 'TPRP', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

### False Positive Rate Parity

In [None]:
add_contraint(model, 'FPRP', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

### Error Rate Parity

In [None]:
add_contraint(model, 'ERP', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)

### Bounded Group Loss (TODO: issue, need to figure out loss parameter)

In [None]:
#add_contraint(model, 'BGL', 'GS', X_train, y_train, race_train, race_test, X_test, y_test, y_predict, sample_weight_test, False)