# 1. Imports

In [1]:
%matplotlib inline

In [2]:
import seaborn as sns
import matplotlib.pyplot as plt   
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pandas as pd 
import numpy as np
from fairlearn.reductions import ExponentiatedGradient, GridSearch, DemographicParity, EqualizedOdds, \
    TruePositiveRateParity, FalsePositiveRateParity, ErrorRateParity, BoundedGroupLoss
from fairlearn.metrics import *
from raiwidgets import FairnessDashboard

# 2. Extract data from csv

In [3]:
data = pd.read_csv('/home/mackenzie/git_repositories/delayedimpact/data/simData_oom10.csv')
data[['score', 'race']] = data[['score', 'race']].astype(int)
print(data)

      score  repay_probability  race  repay_indices
0       610              78.90     1              1
1       568              47.77     0              0
2       750              98.13     1              1
3       775              98.45     1              1
4       704              95.88     1              1
...     ...                ...   ...            ...
9995    832              98.99     1              1
9996    416              10.91     1              0
9997    444              14.63     1              0
9998    778              98.47     1              1
9999    738              97.68     1              1

[10000 rows x 4 columns]


# 3. Prepare data into train/test form

In [4]:
x = data[['score', 'race']].values
y = data['repay_indices'].values
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

# collect our sensitive attribute
race_train = X_train[:, 1]
race_test = X_test[:, 1]

# for fairlearn mitigator algs to work, I have to weigh the data
# for now I'm weighing everything the same
# TODO: add correct sample weights according to, http://www.surveystar.com/startips/weighting.pdf
#       and https://www.nlsinfo.org/content/cohorts/nlsy97/using-and-understanding-the-data/sample-weights-design-effects/page/0/0/#intro
sample_weight_train = np.ones(shape=(len(y_train),))
sample_weight_test = np.ones(shape=(len(y_test),))
#sample_weight[y_train[:,1] == 0] = 1.5 

# Below example from: https://androidkt.com/set-sample-weight-in-keras/
#sample_weight[y_train == 3] = 1.5

# Q: do I need to scale the data??
# Standardize features by removing mean and scaling to unit variance:
#scaler = StandardScaler()
#scaler.fit(X_train)
#X_train = scaler.transform(X_train)
#X_test = scaler.transform(X_test) 

# 4. Functions

In [5]:
def display_cm(cm, title):
    ax= plt.subplot()
    sns.heatmap(cm, annot=True, fmt='g', ax=ax);  #annot=True to annotate cells, ftm='g' to disable scientific notation

    # labels, title and ticks
    ax.set_xlabel('Predicted Labels')  # predicted labels
    ax.set_ylabel('True Labels')  # true labels
    ax.set_title(title)
    ax.xaxis.set_ticklabels(['repay', 'default'])
    ax.yaxis.set_ticklabels(['repay', 'default'])
    return

def evaluation_outcome_rates(y_true, y_pred, sample_weight):
    fner = false_negative_rate(y_true, y_pred, pos_label=1, sample_weight=sample_weight)
    print('FNER', fner)
    fper = false_positive_rate(y_true, y_pred, pos_label=1, sample_weight=sample_weight)
    print('FPER', fper)
    tnr = true_negative_rate(y_true, y_pred, pos_label=1, sample_weight=sample_weight)
    print('TNR', tnr)
    tpr = true_positive_rate(y_true, y_pred, pos_label=1, sample_weight=sample_weight)
    print('TPR', tpr)
    return

def evaluation_by_race(X_test, y_test, y_predict, sample_weight):

    y_test_black, y_pred_black, sw_black, y_test_white, y_pred_white, sw_white = [],[],[],[],[],[]
    
    # splitting up the y_test and y_pred values by race to then use for race specific classification reports
    for index, race in enumerate(race_test):
        if(race == 0):  # black
            y_test_black.append(y_test[index])
            y_pred_black.append(y_predict[index])
            sw_black.append(sample_weight[index])
        elif(race == 1):  # white
            y_test_white.append(y_test[index])
            y_pred_white.append(y_predict[index])
            sw_white.append(sample_weight[index])

        else:
            print('You should not end up here...')
            
    print('EVALUATION FOR BLACK GROUP')
    cm_black = confusion_matrix(y_test_black, y_pred_black)
    #display_cm(cm_black, 'Confusion Matrix for Black Group')             
    print(cm_black)
    print(classification_report(y_test_black, y_pred_black)) 
    evaluation_outcome_rates(y_test_black, y_pred_black, sw_black)
    
    print('\nEVALUATION FOR WHITE GROUP')
    cm_white = confusion_matrix(y_test_white, y_pred_white)
    #display_cm(cm_white, 'Confusion Matrix for White Group')
    print(cm_white)
    print(classification_report(y_test_white, y_pred_white))
    evaluation_outcome_rates(y_test_white, y_pred_white, sw_white)
    return

In [6]:
# Reference: https://fairlearn.org/v0.5.0/api_reference/fairlearn.metrics.html

def add_contraint(constraint_str, reduction_alg, X_train, y_train, race_train, X_test, y_test, sample_weight_test):
    # set seed for consistent results with ExponentiatedGradient
    np.random.seed(0)  
    
    if constraint_str=='DP':
        constraint = DemographicParity()
    elif constraint_str=='EO':
        constraint = EqualizedOdds()
    elif constraint_str=='TPRP':
        constraint = TruePositiveRateParity()
    elif constraint_str=='FPRP':
        constraint = FalsePositiveRateParity()
    elif constraint_str=='ERP':
        constraint = ErrorRateParity()
    elif constraint_str=='BGL':
        # Parameters: 
        #   loss : {SquareLoss, AbsoluteLoss}
        #   A loss object with an `eval` method, e.g. `SquareLoss` or `AbsoluteLoss`
        constraint = BoundedGroupLoss('SquareLoss')
    
    if reduction_alg=='EG':
        mitigator = ExponentiatedGradient(model, constraint)
        print('Exponentiated Gradient Reduction Alg is used here with ', constraint_str, ' as the fairness constraint.\n')
    elif reduction_alg=='GS':
        mitigator = GridSearch(model, constraint)
        print('Grid Search Reduction Alg is used here with ', constraint_str, ' as the fairness constraint.\n')
    else:
        print('ISSUE: need to put in a valid reduction_alg parameter')

        
    mitigator.fit(X_train, y_train, sensitive_features=race_train)
    y_pred_mitigated = mitigator.predict(X_test)
    
    print('Evaluation of ', constraint_str, '-constrained classifier overall:')
    cm = confusion_matrix(y_test, y_pred_mitigated)
    display_cm(cm, 'Confusion Matrix for Mitigated Model')
    print(cm)
    print(classification_report(y_test, y_pred_mitigated)) 
    evaluation_outcome_rates(y_test, y_pred_mitigated, sample_weight_test)
    print('\n')
    
    print('Evaluation of ', constraint_str, '-constrained classifier by race:')
    evaluation_by_race(X_test, y_test, y_pred_mitigated, sample_weight_test)
    print('\n')
    
    print('Fairness metric evaluation of ', constraint_str, '-constrained classifier')
    print_fairness_metrics(y_true=y_test, y_pred=y_pred_mitigated, sensitive_features=race_test)
    
    FairnessDashboard(sensitive_features=race_test,
                   y_true=y_test,
                   y_pred={"initial model": y_predict, "mitigated model": y_pred_mitigated}) 
    return


def print_fairness_metrics(y_true, y_pred, sensitive_features):
    sr_mitigated = MetricFrame(metric=selection_rate, y_true=y_true, y_pred=y_pred, sensitive_features=sensitive_features)
    print('Selection Rate Overall: ', sr_mitigated.overall)
    print('Selection Rate By Group: ',sr_mitigated.by_group, '\n')

    print('Note: difference of 0 means that all groups have the same selection rate.')
    dp_diff = demographic_parity_difference(y_true=y_true, y_pred=y_pred, sensitive_features=sensitive_features)
    print('DP Difference: ', dp_diff)
    print('Note: ratio of 1 means that all groups have the same selection rate.')
    dp_ratio = demographic_parity_ratio(y_true=y_true, y_pred=y_pred, sensitive_features=sensitive_features)
    print('DP Ratio:', dp_ratio, '\n')
    
    print('Note: difference of 0 means that all groups have the same TN, TN, FP, and FN rates.')
    eod_diff = equalized_odds_difference(y_true=y_true, y_pred=y_pred, sensitive_features=sensitive_features)
    print('EOD Difference: ', eod_diff)
    print('Note: ratio of 1 means that all groups have the same TN, TN, FP, and FN rates rates.')
    eod_ratio = equalized_odds_ratio(y_true=y_true, y_pred=y_pred, sensitive_features=sensitive_features)
    print('EOD Ratio:', eod_ratio, '\n')
    
    return

# Support Vector Machines with Fairlearn

Reference: https://www.datacamp.com/community/tutorials/svm-classification-scikit-learn-python

## SVM classifier + Collect Predictions
NOTE: atm sample_weight are all 1s

In [7]:
# TODO: add cells from above!!
# TODO: try other svm kernels??

## Linear Kernel

In [15]:
# Instantiate classifier:
clf = svm.SVC(kernel='linear')  # can try other kernels

#Train the model using the training sets
model = clf.fit(X_train, y_train)

# Make predictions with the classifier:
y_predict = model.predict(X_test)

### Evaluation of classifier overall

In [16]:
cm = confusion_matrix(y_test, y_predict)
display_cm(cm, 'Confusion Matrix for baseline model')
print(cm)

print(classification_report(y_test, y_predict)) 
evaluation_outcome_rates(y_test, y_predict, sample_weight_test)

[[ 646  203]
 [ 131 2020]]
              precision    recall  f1-score   support

           0       0.83      0.76      0.79       849
           1       0.91      0.94      0.92      2151

    accuracy                           0.89      3000
   macro avg       0.87      0.85      0.86      3000
weighted avg       0.89      0.89      0.89      3000

FNER 0.06090190609019061
FPER 0.23910482921083628
TNR 0.7608951707891637
TPR 0.9390980939098094




### Evaluation of classifier by race

In [17]:
evaluation_by_race(X_test, y_test, y_predict, sample_weight_test)

EVALUATION FOR BLACK GROUP
[[213  20]
 [ 40  86]]
              precision    recall  f1-score   support

           0       0.84      0.91      0.88       233
           1       0.81      0.68      0.74       126

    accuracy                           0.83       359
   macro avg       0.83      0.80      0.81       359
weighted avg       0.83      0.83      0.83       359

FNER 0.31746031746031744
FPER 0.08583690987124463
TNR 0.9141630901287554
TPR 0.6825396825396826

EVALUATION FOR WHITE GROUP
[[ 433  183]
 [  91 1934]]
              precision    recall  f1-score   support

           0       0.83      0.70      0.76       616
           1       0.91      0.96      0.93      2025

    accuracy                           0.90      2641
   macro avg       0.87      0.83      0.85      2641
weighted avg       0.89      0.90      0.89      2641

FNER 0.044938271604938275
FPER 0.29707792207792205
TNR 0.702922077922078
TPR 0.9550617283950618


### Fairness Metric Evaluation of classifier

In [18]:
print_fairness_metrics(y_test, y_predict, race_test)

Selection Rate Overall:  0.741
Selection Rate By Group:  sensitive_feature_0
0    0.295265
1     0.80159
Name: selection_rate, dtype: object 

Note: difference of 0 means that all groups have the same selection rate.
DP Difference:  0.5063256827465751
Note: ratio of 1 means that all groups have the same selection rate.
DP Ratio: 0.3683485459925816 

Note: difference of 0 means that all groups have the same TN, TN, FP, and FN rates.
EOD Difference:  0.2725220458553792
Note: ratio of 1 means that all groups have the same TN, TN, FP, and FN rates rates.
EOD Ratio: 0.2889373578179601 



The positional argument 'metric' has been replaced by a keyword argument 'metrics'. From version 0.10.0 passing it as a positional argument or as a keyword argument 'metric' will result in an error


## Exponentiated Gradient Reduction Alg for Adding Fairness Constraints

### Demographic Parity

In [19]:
add_contraint('DP', 'EG', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

Exponentiated Gradient Reduction Alg is used here with  DP  as the fairness constraint.



KeyboardInterrupt: 

### Equalized Odds

In [None]:
add_contraint('EO', 'EG', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

### True Positive Rate Parity

In [None]:
add_contraint('TPRP', 'EG', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

### False Positive Rate Parity

In [None]:
add_contraint('FPRP', 'EG', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

### Error Rate Parity

In [None]:
add_contraint('ERP', 'EG', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

### Bounded Group Loss (TODO: issue, need to figure out loss parameter)

In [None]:
#add_contraint('BGL', 'EG', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

## Grid Search Reduction Alg for Adding Fairness Constraints

### Demographic Parity

In [None]:
add_contraint('DP', 'GS', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

### Equalized Odds Used

In [None]:
add_contraint('EO', 'GS', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

### True Positive Rate Parity

In [None]:
add_contraint('TPRP', 'GS', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

### False Positive Rate Parity

In [None]:
add_contraint('FPRP', 'GS', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

### Error Rate Parity

In [None]:
add_contraint('ERP', 'GS', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

### Bounded Group Loss (TODO: issue, need to figure out loss parameter)

In [None]:
#add_contraint('BGL', 'GS', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

In [None]:
evaluation_by_race(X_test, y_test, y_predict)