# 1. Imports

In [39]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pandas as pd 
import numpy as np
from fairlearn.reductions import ExponentiatedGradient, GridSearch, DemographicParity, EqualizedOdds, \
    TruePositiveRateParity, FalsePositiveRateParity, ErrorRateParity, BoundedGroupLoss
from fairlearn.metrics import *

# 2. Extract data from csv

In [40]:
data = pd.read_csv('/home/mackenzie/git_repositories/delayedimpact/data/simData_oom10.csv')
data[['score', 'race']] = data[['score', 'race']].astype(int)
print(data)

      score  repay_probability  race  repay_indices
0       610              78.90     1              1
1       568              47.77     0              0
2       750              98.13     1              1
3       775              98.45     1              1
4       704              95.88     1              1
...     ...                ...   ...            ...
9995    832              98.99     1              1
9996    416              10.91     1              0
9997    444              14.63     1              0
9998    778              98.47     1              1
9999    738              97.68     1              1

[10000 rows x 4 columns]


# 3. Prepare data into train/test form

In [41]:
x = data[['score', 'race']].values
y = data['repay_indices'].values
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

# collect our sensitive attribute
race_train = X_train[:, 1]
race_test = X_test[:, 1]

# for fairlearn mitigator algs to work, I have to weigh the data
# for now I'm weighing everything the same
# TODO: add correct sample weights according to, http://www.surveystar.com/startips/weighting.pdf
#       and https://www.nlsinfo.org/content/cohorts/nlsy97/using-and-understanding-the-data/sample-weights-design-effects/page/0/0/#intro
sample_weight_train = np.ones(shape=(len(y_train),))
sample_weight_test = np.ones(shape=(len(y_test),))
#sample_weight[y_train[:,1] == 0] = 1.5 

# Below example from: https://androidkt.com/set-sample-weight-in-keras/
#sample_weight[y_train == 3] = 1.5

# Q: do I need to scale the data??
# Standardize features by removing mean and scaling to unit variance:
#scaler = StandardScaler()
#scaler.fit(X_train)
#X_train = scaler.transform(X_train)
#X_test = scaler.transform(X_test) 

# 4. Functions

In [42]:
def evaluation_outcome_rates(y_true, y_pred, sample_weight):
    fner = false_negative_rate(y_true, y_pred, pos_label=1, sample_weight=sample_weight)
    print('FNER', fner)
    fper = false_positive_rate(y_true, y_pred, pos_label=1, sample_weight=sample_weight)
    print('FPER', fper)
    tnr = true_negative_rate(y_true, y_pred, pos_label=1, sample_weight=sample_weight)
    print('TNR', tnr)
    tpr = true_positive_rate(y_true, y_pred, pos_label=1, sample_weight=sample_weight)
    print('TPR', tpr)
    return

def evaluation_by_race(X_test, y_test, y_predict, sample_weight):

    y_test_black, y_pred_black, sw_black, y_test_white, y_pred_white, sw_white = [],[],[],[],[],[]
    
    # splitting up the y_test and y_pred values by race to then use for race specific classification reports
    for index, race in enumerate(race_test):
        if(race == 0):  # black
            y_test_black.append(y_test[index])
            y_pred_black.append(y_predict[index])
            sw_black.append(sample_weight[index])
        elif(race == 1):  # white
            y_test_white.append(y_test[index])
            y_pred_white.append(y_predict[index])
            sw_white.append(sample_weight[index])

        else:
            print('You should not end up here...')
            
    print('EVALUATION FOR BLACK GROUP')
    print(confusion_matrix(y_test_black, y_pred_black))
    print(classification_report(y_test_black, y_pred_black)) 
    evaluation_outcome_rates(y_test_black, y_pred_black, sw_black)
    
    print('\nEVALUATION FOR WHITE GROUP')
    print(confusion_matrix(y_test_white, y_pred_white))
    print(classification_report(y_test_white, y_pred_white))
    evaluation_outcome_rates(y_test_white, y_pred_white, sw_white)
    return

In [43]:
# Reference: https://fairlearn.org/v0.5.0/api_reference/fairlearn.metrics.html

def add_contraint(constraint_str, reduction_alg, X_train, y_train, race_train, X_test, y_test, sample_weight_test):
    # set seed for consistent results with ExponentiatedGradient
    np.random.seed(0)  
    
    if constraint_str=='DP':
        constraint = DemographicParity()
    elif constraint_str=='EO':
        constraint = EqualizedOdds()
    elif constraint_str=='TPRP':
        constraint = TruePositiveRateParity()
    elif constraint_str=='FPRP':
        constraint = FalsePositiveRateParity()
    elif constraint_str=='ERP':
        constraint = ErrorRateParity()
    elif constraint_str=='BGL':
        # Parameters: 
        #   loss : {SquareLoss, AbsoluteLoss}
        #   A loss object with an `eval` method, e.g. `SquareLoss` or `AbsoluteLoss`
        constraint = BoundedGroupLoss('SquareLoss')
    
    if reduction_alg=='EG':
        mitigator = ExponentiatedGradient(model, constraint)
        print('Exponentiated Gradient Reduction Alg is used here with ', constraint_str, ' as the fairness constraint.\n')
    elif reduction_alg=='GS':
        mitigator = GridSearch(model, constraint)
        print('Grid Search Reduction Alg is used here with ', constraint_str, ' as the fairness constraint.\n')
    else:
        print('ISSUE: need to put in a valid reduction_alg parameter')

        
    mitigator.fit(X_train, y_train, sensitive_features=race_train)
    y_pred_mitigated = mitigator.predict(X_test)
    
    print('Evaluation of ', constraint_str, '-constrained classifier overall:')
    print(confusion_matrix(y_test, y_pred_mitigated))
    print(classification_report(y_test, y_pred_mitigated)) 
    evaluation_outcome_rates(y_test, y_pred_mitigated, sample_weight_test)
    print('\n')
    
    print('Evaluation of ', constraint_str, '-constrained classifier by race:')
    evaluation_by_race(X_test, y_test, y_pred_mitigated, sample_weight_test)
    print('\n')
    
    print('Fairness metric evaluation of ', constraint_str, '-constrained classifier')
    print_fairness_metrics(y_true=y_test, y_pred=y_pred_mitigated, sensitive_features=race_test)
    return


def print_fairness_metrics(y_true, y_pred, sensitive_features):
    sr_mitigated = MetricFrame(metric=selection_rate, y_true=y_true, y_pred=y_pred, sensitive_features=sensitive_features)
    print('Selection Rate Overall: ', sr_mitigated.overall)
    print('Selection Rate By Group: ',sr_mitigated.by_group, '\n')

    print('Note: difference of 0 means that all groups have the same selection rate.')
    dp_diff = demographic_parity_difference(y_true=y_true, y_pred=y_pred, sensitive_features=sensitive_features)
    print('DP Difference: ', dp_diff)
    print('Note: ratio of 1 means that all groups have the same selection rate.')
    dp_ratio = demographic_parity_ratio(y_true=y_true, y_pred=y_pred, sensitive_features=sensitive_features)
    print('DP Ratio:', dp_ratio, '\n')
    
    print('Note: difference of 0 means that all groups have the same TN, TN, FP, and FN rates.')
    eod_diff = equalized_odds_difference(y_true=y_true, y_pred=y_pred, sensitive_features=sensitive_features)
    print('EOD Difference: ', eod_diff)
    print('Note: ratio of 1 means that all groups have the same TN, TN, FP, and FN rates rates.')
    eod_ratio = equalized_odds_ratio(y_true=y_true, y_pred=y_pred, sensitive_features=sensitive_features)
    print('EOD Ratio:', eod_ratio, '\n')
    
    return

# TODOS:
- Get fairlearn widget to work below by updating the widget/using the raiwidgets tool?
- Add code pipeline for last 3 classifiers 
- Add the delayed impact results to the evaluation of results
- Calculate sample weights and make that a boolean/if+else option in the code and/or make a new notebook?
- Try to figure out bounded group loss metric, need a loss parameter. Definition: 'asks that the prediction error restricted to any protected group remain below some pre-determined level' from https://arxiv.org/abs/1905.12843


# Notes:
- Fairness constraint options: DP refers to demographic parity, EO to equalized odds, TPRP to true positive rate parity, FPRP to false positive rate parity, ERP to error rate parity, and BGL to bounded group loss.
- Couldn't use K-nearest neighbors as an ML classifier bc the fit function does not take in sample weights parameter
- CAN use (their fit functions take in sample weights): gaussian naive bayes, decision tree, logistic regression, and svm

# Gaussian Naive Bayes classifier (Fairlearn used)

## Train GNB classifier + Collect Predictions
NOTE: atm sample_weight are all 1s

In [44]:
# Initialize classifier:
gnb = GaussianNB()

# Train the classifier:
model = gnb.fit(X_train, y_train, sample_weight_train)

# Make predictions with the classifier:
y_predict = gnb.predict(X_test)

### Evaluation of classifier overall

In [45]:
print(confusion_matrix(y_test, y_predict))
print(classification_report(y_test, y_predict)) 
evaluation_outcome_rates(y_test, y_predict, sample_weight_test)

[[ 617  232]
 [ 188 1963]]
              precision    recall  f1-score   support

           0       0.77      0.73      0.75       849
           1       0.89      0.91      0.90      2151

    accuracy                           0.86      3000
   macro avg       0.83      0.82      0.82      3000
weighted avg       0.86      0.86      0.86      3000

FNER 0.08740120874012088
FPER 0.27326266195524146
TNR 0.7267373380447585
TPR 0.9125987912598791


### Evaluation of classifier by race

In [46]:
evaluation_by_race(X_test, y_test, y_predict, sample_weight_test)

EVALUATION FOR BLACK GROUP
[[233   0]
 [122   4]]
              precision    recall  f1-score   support

           0       0.66      1.00      0.79       233
           1       1.00      0.03      0.06       126

    accuracy                           0.66       359
   macro avg       0.83      0.52      0.43       359
weighted avg       0.78      0.66      0.54       359

FNER 0.9682539682539683
FPER 0.0
TNR 1.0
TPR 0.031746031746031744

EVALUATION FOR WHITE GROUP
[[ 384  232]
 [  66 1959]]
              precision    recall  f1-score   support

           0       0.85      0.62      0.72       616
           1       0.89      0.97      0.93      2025

    accuracy                           0.89      2641
   macro avg       0.87      0.80      0.82      2641
weighted avg       0.88      0.89      0.88      2641

FNER 0.03259259259259259
FPER 0.37662337662337664
TNR 0.6233766233766234
TPR 0.9674074074074074


### Fairness Metric Evaluation of classifier

In [47]:
print_fairness_metrics(y_test, y_predict, race_test)

Selection Rate Overall:  0.7316666666666667
Selection Rate By Group:  sensitive_feature_0
0    0.0111421
1      0.82961
Name: selection_rate, dtype: object 

Note: difference of 0 means that all groups have the same selection rate.
DP Difference:  0.8184679349322184
Note: ratio of 1 means that all groups have the same selection rate.
DP Ratio: 0.013430480987681945 

Note: difference of 0 means that all groups have the same TN, TN, FP, and FN rates.
EOD Difference:  0.9356613756613756
Note: ratio of 1 means that all groups have the same TN, TN, FP, and FN rates rates.
EOD Ratio: 0.0 



## Exponentiated Gradient Reduction Alg for Adding Fairness Constraints

### Demographic Parity

In [48]:
add_contraint('DP', 'EG', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

Exponentiated Gradient Reduction Alg is used here with  DP  as the fairness constraint.

Evaluation of  DP -constrained classifier overall:
[[ 615  234]
 [ 909 1242]]
              precision    recall  f1-score   support

           0       0.40      0.72      0.52       849
           1       0.84      0.58      0.68      2151

    accuracy                           0.62      3000
   macro avg       0.62      0.65      0.60      3000
weighted avg       0.72      0.62      0.64      3000

FNER 0.4225941422594142
FPER 0.2756183745583039
TNR 0.7243816254416962
TPR 0.5774058577405857


Evaluation of  DP -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[114 119]
 [ 72  54]]
              precision    recall  f1-score   support

           0       0.61      0.49      0.54       233
           1       0.31      0.43      0.36       126

    accuracy                           0.47       359
   macro avg       0.46      0.46      0.45       359
weighted avg       0.51      0.47    

### Equalized Odds

In [49]:
add_contraint('EO', 'EG', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

Exponentiated Gradient Reduction Alg is used here with  EO  as the fairness constraint.

Evaluation of  EO -constrained classifier overall:
[[ 602  247]
 [ 292 1859]]
              precision    recall  f1-score   support

           0       0.67      0.71      0.69       849
           1       0.88      0.86      0.87      2151

    accuracy                           0.82      3000
   macro avg       0.78      0.79      0.78      3000
weighted avg       0.82      0.82      0.82      3000

FNER 0.13575081357508137
FPER 0.29093050647820967
TNR 0.7090694935217904
TPR 0.8642491864249187


Evaluation of  EO -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[163  70]
 [ 17 109]]
              precision    recall  f1-score   support

           0       0.91      0.70      0.79       233
           1       0.61      0.87      0.71       126

    accuracy                           0.76       359
   macro avg       0.76      0.78      0.75       359
weighted avg       0.80      0.76  

In [50]:
DemographicParity, EqualizedOdds, \
    TruePositiveRateParity, FalsePositiveRateParity, ErrorRateParity, BoundedGroupLoss

(fairlearn.reductions._moments.utility_parity.DemographicParity,
 fairlearn.reductions._moments.utility_parity.EqualizedOdds,
 fairlearn.reductions._moments.utility_parity.TruePositiveRateParity,
 fairlearn.reductions._moments.utility_parity.FalsePositiveRateParity,
 fairlearn.reductions._moments.utility_parity.ErrorRateParity,
 fairlearn.reductions._moments.bounded_group_loss.BoundedGroupLoss)

### True Positive Rate Parity

In [51]:
add_contraint('TPRP', 'EG', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

Exponentiated Gradient Reduction Alg is used here with  TPRP  as the fairness constraint.

Evaluation of  TPRP -constrained classifier overall:
[[ 591  258]
 [ 136 2015]]
              precision    recall  f1-score   support

           0       0.81      0.70      0.75       849
           1       0.89      0.94      0.91      2151

    accuracy                           0.87      3000
   macro avg       0.85      0.82      0.83      3000
weighted avg       0.87      0.87      0.87      3000

FNER 0.06322640632264064
FPER 0.303886925795053
TNR 0.696113074204947
TPR 0.9367735936773594


Evaluation of  TPRP -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[114 119]
 [  5 121]]
              precision    recall  f1-score   support

           0       0.96      0.49      0.65       233
           1       0.50      0.96      0.66       126

    accuracy                           0.65       359
   macro avg       0.73      0.72      0.65       359
weighted avg       0.80      0.6

### False Positive Rate Parity

In [52]:
add_contraint('FPRP', 'EG', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

Exponentiated Gradient Reduction Alg is used here with  FPRP  as the fairness constraint.

Evaluation of  FPRP -constrained classifier overall:
[[ 663  186]
 [ 559 1592]]
              precision    recall  f1-score   support

           0       0.54      0.78      0.64       849
           1       0.90      0.74      0.81      2151

    accuracy                           0.75      3000
   macro avg       0.72      0.76      0.73      3000
weighted avg       0.80      0.75      0.76      3000

FNER 0.2598791259879126
FPER 0.21908127208480566
TNR 0.7809187279151943
TPR 0.7401208740120874


Evaluation of  FPRP -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[187  46]
 [ 73  53]]
              precision    recall  f1-score   support

           0       0.72      0.80      0.76       233
           1       0.54      0.42      0.47       126

    accuracy                           0.67       359
   macro avg       0.63      0.61      0.61       359
weighted avg       0.65      0

### Error Rate Parity

In [53]:
add_contraint('ERP', 'EG', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

Exponentiated Gradient Reduction Alg is used here with  ERP  as the fairness constraint.

Evaluation of  ERP -constrained classifier overall:
[[ 618  231]
 [ 272 1879]]
              precision    recall  f1-score   support

           0       0.69      0.73      0.71       849
           1       0.89      0.87      0.88      2151

    accuracy                           0.83      3000
   macro avg       0.79      0.80      0.80      3000
weighted avg       0.84      0.83      0.83      3000

FNER 0.12645281264528127
FPER 0.27208480565371024
TNR 0.7279151943462897
TPR 0.8735471873547187


Evaluation of  ERP -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[221  12]
 [ 52  74]]
              precision    recall  f1-score   support

           0       0.81      0.95      0.87       233
           1       0.86      0.59      0.70       126

    accuracy                           0.82       359
   macro avg       0.83      0.77      0.79       359
weighted avg       0.83      0.8

### Bounded Group Loss (TODO: issue, need to figure out loss parameter)

In [54]:
#add_contraint('BGL', 'EG', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

## Grid Search Reduction Alg for Adding Fairness Constraints

### Demographic Parity

In [55]:
add_contraint('DP', 'GS', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

Grid Search Reduction Alg is used here with  DP  as the fairness constraint.

Evaluation of  DP -constrained classifier overall:
[[ 552  297]
 [ 122 2029]]
              precision    recall  f1-score   support

           0       0.82      0.65      0.72       849
           1       0.87      0.94      0.91      2151

    accuracy                           0.86      3000
   macro avg       0.85      0.80      0.82      3000
weighted avg       0.86      0.86      0.86      3000

FNER 0.056717805671780565
FPER 0.3498233215547703
TNR 0.6501766784452296
TPR 0.9432821943282195


Evaluation of  DP -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[ 82 151]
 [  1 125]]
              precision    recall  f1-score   support

           0       0.99      0.35      0.52       233
           1       0.45      0.99      0.62       126

    accuracy                           0.58       359
   macro avg       0.72      0.67      0.57       359
weighted avg       0.80      0.58      0.56   

### Equalized Odds Used

In [56]:
add_contraint('EO', 'GS', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

Grid Search Reduction Alg is used here with  EO  as the fairness constraint.

Evaluation of  EO -constrained classifier overall:
[[ 620  229]
 [ 112 2039]]
              precision    recall  f1-score   support

           0       0.85      0.73      0.78       849
           1       0.90      0.95      0.92      2151

    accuracy                           0.89      3000
   macro avg       0.87      0.84      0.85      3000
weighted avg       0.88      0.89      0.88      3000

FNER 0.05206880520688052
FPER 0.2697290930506478
TNR 0.7302709069493521
TPR 0.9479311947931195


Evaluation of  EO -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[209  24]
 [ 30  96]]
              precision    recall  f1-score   support

           0       0.87      0.90      0.89       233
           1       0.80      0.76      0.78       126

    accuracy                           0.85       359
   macro avg       0.84      0.83      0.83       359
weighted avg       0.85      0.85      0.85    

### True Positive Rate Parity

In [57]:
add_contraint('TPRP', 'GS', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

Grid Search Reduction Alg is used here with  TPRP  as the fairness constraint.

Evaluation of  TPRP -constrained classifier overall:
[[ 617  232]
 [ 116 2035]]
              precision    recall  f1-score   support

           0       0.84      0.73      0.78       849
           1       0.90      0.95      0.92      2151

    accuracy                           0.88      3000
   macro avg       0.87      0.84      0.85      3000
weighted avg       0.88      0.88      0.88      3000

FNER 0.05392840539284054
FPER 0.27326266195524146
TNR 0.7267373380447585
TPR 0.9460715946071595


Evaluation of  TPRP -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[213  20]
 [ 39  87]]
              precision    recall  f1-score   support

           0       0.85      0.91      0.88       233
           1       0.81      0.69      0.75       126

    accuracy                           0.84       359
   macro avg       0.83      0.80      0.81       359
weighted avg       0.83      0.84      0

### False Positive Rate Parity

In [58]:
add_contraint('FPRP', 'GS', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

Grid Search Reduction Alg is used here with  FPRP  as the fairness constraint.

Evaluation of  FPRP -constrained classifier overall:
[[ 617  232]
 [ 188 1963]]
              precision    recall  f1-score   support

           0       0.77      0.73      0.75       849
           1       0.89      0.91      0.90      2151

    accuracy                           0.86      3000
   macro avg       0.83      0.82      0.82      3000
weighted avg       0.86      0.86      0.86      3000

FNER 0.08740120874012088
FPER 0.27326266195524146
TNR 0.7267373380447585
TPR 0.9125987912598791


Evaluation of  FPRP -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[233   0]
 [122   4]]
              precision    recall  f1-score   support

           0       0.66      1.00      0.79       233
           1       1.00      0.03      0.06       126

    accuracy                           0.66       359
   macro avg       0.83      0.52      0.43       359
weighted avg       0.78      0.66      0

### Error Rate Parity

In [59]:
add_contraint('ERP', 'GS', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

Grid Search Reduction Alg is used here with  ERP  as the fairness constraint.

Evaluation of  ERP -constrained classifier overall:
[[ 607  242]
 [ 122 2029]]
              precision    recall  f1-score   support

           0       0.83      0.71      0.77       849
           1       0.89      0.94      0.92      2151

    accuracy                           0.88      3000
   macro avg       0.86      0.83      0.84      3000
weighted avg       0.88      0.88      0.88      3000

FNER 0.056717805671780565
FPER 0.2850412249705536
TNR 0.7149587750294464
TPR 0.9432821943282195


Evaluation of  ERP -constrained classifier by race:
EVALUATION FOR BLACK GROUP
[[223  10]
 [ 56  70]]
              precision    recall  f1-score   support

           0       0.80      0.96      0.87       233
           1       0.88      0.56      0.68       126

    accuracy                           0.82       359
   macro avg       0.84      0.76      0.78       359
weighted avg       0.83      0.82      0.80

### Bounded Group Loss (TODO: issue, need to figure out loss parameter)

In [None]:
#add_contraint('BGL', 'GS', X_train, y_train, race_train, X_test, y_test, sample_weight_test)

### TODO: USE BELOW CHUNK TO TEST FAIRLEARN DASHBOARD??

## Use Reduction Algorithm to make the classifier DP-constrained

In [13]:
# Reference for this cell's code: https://fairlearn.org/main/quickstart.html
# Reduction Algs explained here: https://fairlearn.org/main/user_guide/mitigation.html#reductions

# TODO--try Gridsearch reduction alg: https://fairlearn.org/main/user_guide/mitigation.html

# set seed for consistent results with ExponentiatedGradient
np.random.seed(0)  
constraint = DemographicParity()
mitigator = ExponentiatedGradient(model, constraint)
mitigator.fit(X_train, y_train, sensitive_features=race_train)
y_pred_mitigated = mitigator.predict(X_test)

### Evaluation of DP-constrained classifier overall

In [14]:
print(confusion_matrix(y_test, y_pred_mitigated))
print(classification_report(y_test, y_pred_mitigated)) 
evaluation_outcome_rates(y_test, y_pred_mitigated, sample_weight_test)

[[ 615  234]
 [ 909 1242]]
              precision    recall  f1-score   support

           0       0.40      0.72      0.52       849
           1       0.84      0.58      0.68      2151

    accuracy                           0.62      3000
   macro avg       0.62      0.65      0.60      3000
weighted avg       0.72      0.62      0.64      3000

FNER 0.4225941422594142
FPER 0.2756183745583039
TNR 0.7243816254416962
TPR 0.5774058577405857


### Evaluation of DP-constrained classifier by race

In [15]:
evaluation_by_race(X_test, y_test, y_pred_mitigated, sample_weight_test)

EVALUATION FOR BLACK GROUP
[[114 119]
 [ 72  54]]
              precision    recall  f1-score   support

           0       0.61      0.49      0.54       233
           1       0.31      0.43      0.36       126

    accuracy                           0.47       359
   macro avg       0.46      0.46      0.45       359
weighted avg       0.51      0.47      0.48       359

FNER 0.5714285714285714
FPER 0.5107296137339056
TNR 0.4892703862660944
TPR 0.42857142857142855

EVALUATION FOR WHITE GROUP
[[ 501  115]
 [ 837 1188]]
              precision    recall  f1-score   support

           0       0.37      0.81      0.51       616
           1       0.91      0.59      0.71      2025

    accuracy                           0.64      2641
   macro avg       0.64      0.70      0.61      2641
weighted avg       0.79      0.64      0.67      2641

FNER 0.41333333333333333
FPER 0.18668831168831168
TNR 0.8133116883116883
TPR 0.5866666666666667


### Fairness Metric Evaluation of DP-constrained classifier

In [16]:
print_fairness_metrics(y_true=y_test, y_pred=y_pred_mitigated, sensitive_features=race_test)

Selection Rate Overall:  0.492
Selection Rate By Group:  sensitive_feature_0
0    0.481894
1    0.493374
Name: selection_rate, dtype: object 

Note: difference of 0 means that all groups have the same selection rate.
DP Difference:  0.011479571657144305
Note: ratio of 1 means that all groups have the same selection rate.
DP Ratio: 0.9767325028806462 

Note: difference of 0 means that all groups have the same TN, TN, FP, and FN rates.
EOD Difference:  0.3240413020455939
Note: ratio of 1 means that all groups have the same TN, TN, FP, and FN rates rates.
EOD Ratio: 0.36553257666703043 



In [17]:
# TODO: figure out how to get the widget to work, https://fairlearn.org/v0.6.2/api_reference/fairlearn.widget.html
# Roman said that there was a new version out / try using raiwidgets
# TODO: update fairlearn widget and try to get dashboard to go
from fairlearn.widget import FairlearnDashboard
FairlearnDashboard(sensitive_features=race_test,
                   sensitive_feature_names=['race'],
                   y_true=y_test,
                   y_pred={"initial model": y_predict, "mitigated model": y_pred_mitigated}) 

  warn("The FairlearnDashboard will move from Fairlearn to the "


FairlearnWidget(value={'true_y': [0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1…

<fairlearn.widget._fairlearn_dashboard.FairlearnDashboard at 0x7f483df21c40>

In [18]:
from fairlearn import show_versions
show_versions()


System:
    python: 3.8.10 (default, Jun  2 2021, 10:49:15)  [GCC 9.4.0]
executable: /usr/bin/python3
   machine: Linux-5.8.0-59-generic-x86_64-with-glibc2.29

Python dependencies:
    Cython: 0.29.21
matplotlib: 3.3.3
     numpy: 1.19.5
    pandas: 1.1.5
       pip: 20.0.2
     scipy: 1.4.1
setuptools: 45.2.0
   sklearn: 0.24.1
    tempeh: None


In [19]:
pip show fairlearn jupyter notebook

Name: fairlearn
Version: 0.6.2
Summary: Algorithms for mitigating unfairness in supervised machine learning
Home-page: https://github.com/fairlearn/fairlearn
Author: Miroslav Dudik, Richard Edgar, Brandon Horn, Roman Lutz
Author-email: fairlearn@microsoft.com
License: UNKNOWN
Location: /home/mackenzie/.local/lib/python3.8/site-packages
Requires: numpy, pandas, scikit-learn, scipy
Required-by: parity-fairness
---
Name: jupyter
Version: 1.0.0
Summary: Jupyter metapackage. Install all the Jupyter components in one go.
Home-page: http://jupyter.org
Author: Jupyter Development Team
Author-email: jupyter@googlegroups.org
License: BSD
Location: /home/mackenzie/.local/lib/python3.8/site-packages
Requires: ipywidgets, notebook, ipykernel, qtconsole, jupyter-console, nbconvert
Required-by: witwidget
---
Name: notebook
Version: 6.4.0
Summary: A web-based notebook environment for interactive computing
Home-page: http://jupyter.org
Author: Jupyter Development Team
Author-email: jupyter@googlegroups

# Decision Tree Classifier with Fairlearn

In [None]:
# TODO: add cells from above!!

In [11]:
# Reference: https://www.datacamp.com/community/tutorials/decision-tree-classification-python

# Initialize classifier:
clf = DecisionTreeClassifier()

# Train the classifier:
clf = clf.fit(X_train,y_train)

In [12]:
# Make predictions with the classifier:
y_pred = clf.predict(X_test)

print(confusion_matrix(y_test, y_predict))
print(classification_report(y_test, y_predict)) 

[[ 617  232]
 [ 188 1963]]
              precision    recall  f1-score   support

           0       0.77      0.73      0.75       849
           1       0.89      0.91      0.90      2151

    accuracy                           0.86      3000
   macro avg       0.83      0.82      0.82      3000
weighted avg       0.86      0.86      0.86      3000



In [13]:
evaluation_by_race(X_test, y_test, y_predict)

EVALUATION FOR BLACK GROUP
[[233   0]
 [122   4]]
              precision    recall  f1-score   support

           0       0.66      1.00      0.79       233
           1       1.00      0.03      0.06       126

    accuracy                           0.66       359
   macro avg       0.83      0.52      0.43       359
weighted avg       0.78      0.66      0.54       359

EVALUATION FOR WHITE GROUP
[[ 384  232]
 [  66 1959]]
              precision    recall  f1-score   support

           0       0.85      0.62      0.72       616
           1       0.89      0.97      0.93      2025

    accuracy                           0.89      2641
   macro avg       0.87      0.80      0.82      2641
weighted avg       0.88      0.89      0.88      2641



# Logistic Regression with Fairlearn

In [None]:
# TODO: add cells from above!!

In [14]:
# Reference: https://towardsdatascience.com/logistic-regression-using-python-sklearn-numpy-mnist-handwriting-recognition-matplotlib-a6b31e2b166a

# Instantiate classifier:
logisticRegr = LogisticRegression()

# Train the classifier:
logisticRegr.fit(X_train, y_train)

LogisticRegression()

In [15]:
# Make predictions with the classifier:
y_pred = logisticRegr.predict(X_test)

print(confusion_matrix(y_test, y_predict))
print(classification_report(y_test, y_predict)) 

[[ 617  232]
 [ 188 1963]]
              precision    recall  f1-score   support

           0       0.77      0.73      0.75       849
           1       0.89      0.91      0.90      2151

    accuracy                           0.86      3000
   macro avg       0.83      0.82      0.82      3000
weighted avg       0.86      0.86      0.86      3000



In [16]:
evaluation_by_race(X_test, y_test, y_predict)

EVALUATION FOR BLACK GROUP
[[233   0]
 [122   4]]
              precision    recall  f1-score   support

           0       0.66      1.00      0.79       233
           1       1.00      0.03      0.06       126

    accuracy                           0.66       359
   macro avg       0.83      0.52      0.43       359
weighted avg       0.78      0.66      0.54       359

EVALUATION FOR WHITE GROUP
[[ 384  232]
 [  66 1959]]
              precision    recall  f1-score   support

           0       0.85      0.62      0.72       616
           1       0.89      0.97      0.93      2025

    accuracy                           0.89      2641
   macro avg       0.87      0.80      0.82      2641
weighted avg       0.88      0.89      0.88      2641



# Support Vector Machines with Fairlearn

Reference: https://www.datacamp.com/community/tutorials/svm-classification-scikit-learn-python

In [None]:
# TODO: add cells from above!!
# TODO: try other svm kernels??

## 8.1 Linear Kernel

In [17]:
# Instantiate classifier:
clf = svm.SVC(kernel='linear')  # can try other kernels

#Train the model using the training sets
clf.fit(X_train, y_train)

SVC(kernel='linear')

In [18]:
# Make predictions
y_pred = clf.predict(X_test)

print(confusion_matrix(y_test, y_predict))
print(classification_report(y_test, y_predict))

[[ 617  232]
 [ 188 1963]]
              precision    recall  f1-score   support

           0       0.77      0.73      0.75       849
           1       0.89      0.91      0.90      2151

    accuracy                           0.86      3000
   macro avg       0.83      0.82      0.82      3000
weighted avg       0.86      0.86      0.86      3000



In [19]:
evaluation_by_race(X_test, y_test, y_predict)

EVALUATION FOR BLACK GROUP
[[233   0]
 [122   4]]
              precision    recall  f1-score   support

           0       0.66      1.00      0.79       233
           1       1.00      0.03      0.06       126

    accuracy                           0.66       359
   macro avg       0.83      0.52      0.43       359
weighted avg       0.78      0.66      0.54       359

EVALUATION FOR WHITE GROUP
[[ 384  232]
 [  66 1959]]
              precision    recall  f1-score   support

           0       0.85      0.62      0.72       616
           1       0.89      0.97      0.93      2025

    accuracy                           0.89      2641
   macro avg       0.87      0.80      0.82      2641
weighted avg       0.88      0.89      0.88      2641

