In [20]:
# import pandas and numpy

import pandas as pd
import numpy as np


# import sklearn

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV


# fairlearn metrics

import fairlearn
from fairlearn.metrics import MetricFrame
from fairlearn.metrics import selection_rate, demographic_parity_ratio, demographic_parity_difference
from fairlearn.metrics import false_negative_rate, equalized_odds_ratio, equalized_odds_difference


# fairlearn reductions

from fairlearn.reductions import DemographicParity, EqualizedOdds


# fairlearn postprocessing

from fairlearn.postprocessing import ThresholdOptimizer

In [2]:
# load in preprocessed dataset and display first 5 rows

data = pd.read_csv("../Preprocessing/preprocessed_pilot_non-pilot_data.csv")
data.head()

Unnamed: 0,Pilot,Age,Gender,PSS,JSS,MFI,GF,PF,RA,RM,MF
0,yes,21,male,0.464286,0.4,0.53,0.65,0.75,0.65,0.5,0.1
1,yes,19,female,0.410714,0.5,0.568421,0.5,0.933333,0.8,0.45,0.25
2,yes,21,male,0.357143,0.25,0.54,0.55,0.35,0.6,0.4,0.8
3,yes,19,male,0.160714,0.0,0.5,0.55,0.45,0.45,0.5,0.55
4,yes,18,female,0.392857,0.3,0.42,0.3,0.4,0.6,0.5,0.3


In [3]:
# determine number of yes and no values for Pilot column

data["Pilot"].value_counts()

Pilot
yes    28
no     19
Name: count, dtype: int64

In [4]:
# since there are less no values than yes values and the value types need to be the same for class 
# label balance, there needs to be a random sample of 19 yes values selected from the data

yes = data[data["Pilot"] == "yes"]
yes_sample = yes.sample(n = 19, random_state = 42)

In [5]:
# display the random sample of yes values

yes_sample

Unnamed: 0,Pilot,Age,Gender,PSS,JSS,MFI,GF,PF,RA,RM,MF
9,yes,18,male,0.285714,0.0,0.53,0.75,0.65,0.35,0.45,0.45
25,yes,19,female,0.625,1.0,0.5,0.45,0.55,0.7,0.4,0.4
8,yes,20,female,0.321429,0.2,0.57,0.5,0.55,0.55,0.6,0.65
21,yes,23,female,0.375,0.6,0.43,0.15,0.35,0.6,0.55,0.5
0,yes,21,male,0.464286,0.4,0.53,0.65,0.75,0.65,0.5,0.1
12,yes,19,female,0.482143,0.4,0.32,0.25,0.45,0.2,0.25,0.45
17,yes,21,male,0.642857,0.4,0.3,0.1,0.2,0.5,0.4,0.3
22,yes,21,male,0.428571,0.25,0.58,0.5,0.65,0.45,0.6,0.7
11,yes,18,male,0.392857,0.15,0.46,0.45,0.2,0.6,0.55,0.5
13,yes,26,male,0.25,0.2,0.67,0.65,0.7,0.7,0.65,0.65


In [6]:
# extract the no values from the data and combine the yes_sample with the no values into one dataframe

no = data[data["Pilot"] == "no"]
final_data = pd.concat([yes_sample, no])

In [7]:
# display the combined dataframe

final_data

Unnamed: 0,Pilot,Age,Gender,PSS,JSS,MFI,GF,PF,RA,RM,MF
9,yes,18,male,0.285714,0.0,0.53,0.75,0.65,0.35,0.45,0.45
25,yes,19,female,0.625,1.0,0.5,0.45,0.55,0.7,0.4,0.4
8,yes,20,female,0.321429,0.2,0.57,0.5,0.55,0.55,0.6,0.65
21,yes,23,female,0.375,0.6,0.43,0.15,0.35,0.6,0.55,0.5
0,yes,21,male,0.464286,0.4,0.53,0.65,0.75,0.65,0.5,0.1
12,yes,19,female,0.482143,0.4,0.32,0.25,0.45,0.2,0.25,0.45
17,yes,21,male,0.642857,0.4,0.3,0.1,0.2,0.5,0.4,0.3
22,yes,21,male,0.428571,0.25,0.58,0.5,0.65,0.45,0.6,0.7
11,yes,18,male,0.392857,0.15,0.46,0.45,0.2,0.6,0.55,0.5
13,yes,26,male,0.25,0.2,0.67,0.65,0.7,0.7,0.65,0.65


In [8]:
# use pandas to create dummy variables for the columns that have categorical value

one_hot_encoded_data = pd.get_dummies(final_data, columns = ["Pilot", "Gender"])

In [9]:
one_hot_encoded_data

Unnamed: 0,Age,PSS,JSS,MFI,GF,PF,RA,RM,MF,Pilot_no,Pilot_yes,Gender_female,Gender_male
9,18,0.285714,0.0,0.53,0.75,0.65,0.35,0.45,0.45,False,True,False,True
25,19,0.625,1.0,0.5,0.45,0.55,0.7,0.4,0.4,False,True,True,False
8,20,0.321429,0.2,0.57,0.5,0.55,0.55,0.6,0.65,False,True,True,False
21,23,0.375,0.6,0.43,0.15,0.35,0.6,0.55,0.5,False,True,True,False
0,21,0.464286,0.4,0.53,0.65,0.75,0.65,0.5,0.1,False,True,False,True
12,19,0.482143,0.4,0.32,0.25,0.45,0.2,0.25,0.45,False,True,True,False
17,21,0.642857,0.4,0.3,0.1,0.2,0.5,0.4,0.3,False,True,False,True
22,21,0.428571,0.25,0.58,0.5,0.65,0.45,0.6,0.7,False,True,False,True
11,18,0.392857,0.15,0.46,0.45,0.2,0.6,0.55,0.5,False,True,False,True
13,26,0.25,0.2,0.67,0.65,0.7,0.7,0.65,0.65,False,True,False,True


In [10]:
# remove extra columns that were created from pandas dummy variables

one_hot_encoded_data = one_hot_encoded_data.loc[:,~one_hot_encoded_data.columns.isin(["Pilot_no","Gender_female"])]
one_hot_encoded_data

Unnamed: 0,Age,PSS,JSS,MFI,GF,PF,RA,RM,MF,Pilot_yes,Gender_male
9,18,0.285714,0.0,0.53,0.75,0.65,0.35,0.45,0.45,True,True
25,19,0.625,1.0,0.5,0.45,0.55,0.7,0.4,0.4,True,False
8,20,0.321429,0.2,0.57,0.5,0.55,0.55,0.6,0.65,True,False
21,23,0.375,0.6,0.43,0.15,0.35,0.6,0.55,0.5,True,False
0,21,0.464286,0.4,0.53,0.65,0.75,0.65,0.5,0.1,True,True
12,19,0.482143,0.4,0.32,0.25,0.45,0.2,0.25,0.45,True,False
17,21,0.642857,0.4,0.3,0.1,0.2,0.5,0.4,0.3,True,True
22,21,0.428571,0.25,0.58,0.5,0.65,0.45,0.6,0.7,True,True
11,18,0.392857,0.15,0.46,0.45,0.2,0.6,0.55,0.5,True,True
13,26,0.25,0.2,0.67,0.65,0.7,0.7,0.65,0.65,True,True


In [12]:
# for Pilot, True = Yes and False = No
# for Gender, True = Male and False = Female

one_hot_encoded_data.rename(columns = {"Pilot_yes" : "Pilot", "Gender_male" : "Gender"}, inplace = True)

In [13]:
one_hot_encoded_data

Unnamed: 0,Age,PSS,JSS,MFI,GF,PF,RA,RM,MF,Pilot,Gender
9,18,0.285714,0.0,0.53,0.75,0.65,0.35,0.45,0.45,True,True
25,19,0.625,1.0,0.5,0.45,0.55,0.7,0.4,0.4,True,False
8,20,0.321429,0.2,0.57,0.5,0.55,0.55,0.6,0.65,True,False
21,23,0.375,0.6,0.43,0.15,0.35,0.6,0.55,0.5,True,False
0,21,0.464286,0.4,0.53,0.65,0.75,0.65,0.5,0.1,True,True
12,19,0.482143,0.4,0.32,0.25,0.45,0.2,0.25,0.45,True,False
17,21,0.642857,0.4,0.3,0.1,0.2,0.5,0.4,0.3,True,True
22,21,0.428571,0.25,0.58,0.5,0.65,0.45,0.6,0.7,True,True
11,18,0.392857,0.15,0.46,0.45,0.2,0.6,0.55,0.5,True,True
13,26,0.25,0.2,0.67,0.65,0.7,0.7,0.65,0.65,True,True


In [14]:
# X is the features, y is the target variable

X = one_hot_encoded_data.loc[:, one_hot_encoded_data.columns != "Pilot"]

y = one_hot_encoded_data["Pilot"]

In [15]:
# function to perform grid search cross validation and determine the optimal hyperparameters for the decision tree
# using 5 folds
# best_params are the best parameters
# best_score is the average performance 

def grid_search(X, y, cv):
    param_grid = {"criterion" : ["gini", "entropy"], "max_depth": np.arange(3, 15), 
                  "min_samples_split": [2, 3, 4, 5], "min_samples_leaf": [2, 3, 4, 5]}
    
    decision_tree = DecisionTreeClassifier()
    
    grid_search_cv = GridSearchCV(decision_tree, param_grid, cv = cv)
    grid_search_cv.fit(X, y)
    
    print("Best Parameters: ", grid_search_cv.best_params_)
    print("Best Score: ", grid_search_cv.best_score_)
    
    # return best estimator to use for the decision tree
    return grid_search_cv.best_estimator_

In [16]:
# accuracy score for decision tree model
# approximately 82% accurate
# approximately 31/38 samples classified correctly

best_estimator = grid_search(X, y, 5)
y_pred = best_estimator.predict(X)

print(accuracy_score(y, y_pred, normalize = True))
print(accuracy_score(y, y_pred, normalize = False))

Best Parameters:  {'criterion': 'gini', 'max_depth': 3, 'min_samples_leaf': 3, 'min_samples_split': 5}
Best Score:  0.6035714285714285
0.8157894736842105
31


In [45]:
# lists to hold metric values before mitigation algorithm for each of the 30 iterations
# for EQUALIZED ODDS, only looking at false negative rate, equalized odds rate, and equalized odds
# difference

female_fnr_before = []
male_fnr_before = []

eor_before = []
eod_before = []

In [46]:
# lists to hold metric values before mitigation algorithm for each of the 30 iterations
# for EQUALIZED ODDS, only looking at false negative rate, equalized odds rate, and equalized odds
# difference

female_fnr_after = []
male_fnr_after = []

eor_after = []
eod_after = []

In [47]:
# run function 30 times
# get y_pred values 30 times
# get metric values 30 times before mitigation algorithm
# get metric values 30 times after mitigation algortihm
# metric values: false negative rate, equalized odds ratio, equalized odds difference
# mitigation algorithm: threshold optimizer (use equalized_odds for constraint)


for i in range(1, 31):
    # get best estimator from grid search cv
    best_estimator = grid_search(X, y, 5)
    
    # get y_pred values
    y_pred = best_estimator.predict(X)
    
    # metrics before mitigation
    # True = Male, False = Female
    print("ITERATION: ", i)
    
    metrics = {"False Negative Rate" : false_negative_rate}

    metric_frame = MetricFrame(metrics = metrics, y_true = y, y_pred = y_pred, sensitive_features = X["Gender"])

    # append to lists to hold metric values before mitigation algorithm for each of the 30 iterations
    female_fnr_before.append(metric_frame.by_group["False Negative Rate"].iloc[0])
    male_fnr_before.append(metric_frame.by_group["False Negative Rate"].iloc[1])

    try:
        eor_before.append(fairlearn.metrics.equalized_odds_ratio(y_true = y, y_pred = y_pred, 
                                                             sensitive_features = X["Gender"], 
                                                             method = "between_groups"))
    except ZeroDivisionError:
        eor_before.append(0.0)
    
    eod_before.append(fairlearn.metrics.equalized_odds_difference(y_true = y, y_pred = y_pred, 
                                                                  sensitive_features = X["Gender"], 
                                                                  method = "between_groups"))
    
    # threshold optimizer with equalized odds
    threshold_optimizer = ThresholdOptimizer(estimator = best_estimator, constraints = "equalized_odds", 
                                             predict_method = "predict_proba", prefit = False)
    
    # fit the model and get y_pred values
    threshold_optimizer.fit(X, y, sensitive_features = X["Gender"])
    y_pred_optimized = threshold_optimizer.predict(X, sensitive_features = X["Gender"])
    
    # metrics after mitigation
    # True = Male, False = Female
    metric_frame_optimized = MetricFrame(metrics = metrics, y_true = y, y_pred = y_pred_optimized, 
                                         sensitive_features = X["Gender"])

    # append to lists to hold metric values after mitigation algorithm for each of the 30 iterations
    female_fnr_after.append(metric_frame_optimized.by_group["False Negative Rate"].iloc[0])
    male_fnr_after.append(metric_frame_optimized.by_group["False Negative Rate"].iloc[1])

    try:
        eor_after.append(fairlearn.metrics.equalized_odds_ratio(y_true = y, y_pred = y_pred_optimized, 
                                                                sensitive_features = X["Gender"], 
                                                                method = "between_groups"))
    except ZeroDivisionError:
        eor_after.append(0.0)

    eod_after.append(fairlearn.metrics.equalized_odds_difference(y_true = y, y_pred = y_pred_optimized, 
                                                                 sensitive_features = X["Gender"], 
                                                                 method = "between_groups"))

Best Parameters:  {'criterion': 'gini', 'max_depth': 8, 'min_samples_leaf': 3, 'min_samples_split': 4}
Best Score:  0.6035714285714285
ITERATION:  1
Best Parameters:  {'criterion': 'gini', 'max_depth': 6, 'min_samples_leaf': 3, 'min_samples_split': 3}
Best Score:  0.6035714285714285
ITERATION:  2
Best Parameters:  {'criterion': 'gini', 'max_depth': 3, 'min_samples_leaf': 4, 'min_samples_split': 2}
Best Score:  0.5821428571428571
ITERATION:  3
Best Parameters:  {'criterion': 'gini', 'max_depth': 6, 'min_samples_leaf': 3, 'min_samples_split': 4}
Best Score:  0.6035714285714285
ITERATION:  4
Best Parameters:  {'criterion': 'gini', 'max_depth': 13, 'min_samples_leaf': 3, 'min_samples_split': 4}
Best Score:  0.6035714285714285
ITERATION:  5
Best Parameters:  {'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 3, 'min_samples_split': 2}
Best Score:  0.6035714285714285
ITERATION:  6
Best Parameters:  {'criterion': 'gini', 'max_depth': 12, 'min_samples_leaf': 3, 'min_samples_split': 5}
B

In [48]:
# convert results of metrics to a dataframe

results = {
    "Female False Negative Rate Before": female_fnr_before,
    "Male False Negative Rate Before": male_fnr_before,
    "Female False Negative Rate After": female_fnr_after,
    "Male False Negative Rate After": male_fnr_after,
    "Equalized Odds Ratio Before": eor_before,
    "Equalized Odds Ratio After": eor_after,
    "Equalized Odds Difference Before": eod_before,
    "Equalized Odds Difference After" : eod_after,
}

metric_results = pd.DataFrame(results)
metric_results

Unnamed: 0,Female False Negative Rate Before,Male False Negative Rate Before,Female False Negative Rate After,Male False Negative Rate After,Equalized Odds Ratio Before,Equalized Odds Ratio After,Equalized Odds Difference Before,Equalized Odds Difference After
0,0.5,0.076923,0.333333,0.307692,0.0,0.0,0.423077,0.071429
1,0.5,0.076923,0.0,0.076923,0.0,0.923077,0.423077,0.076923
2,0.5,0.384615,0.0,0.076923,0.0,0.466667,0.115385,0.228571
3,0.5,0.076923,0.333333,0.307692,0.0,0.357143,0.423077,0.128571
4,0.333333,0.230769,0.166667,0.230769,0.357143,0.714286,0.128571,0.114286
5,0.5,0.076923,0.166667,0.230769,0.0,0.923077,0.423077,0.064103
6,0.333333,0.230769,0.0,0.0,0.357143,0.714286,0.128571,0.171429
7,0.5,0.384615,0.333333,0.307692,0.0,0.7,0.115385,0.085714
8,0.333333,0.384615,0.5,0.538462,0.0,0.0,0.2,0.038462
9,0.5,0.076923,0.0,0.076923,0.0,0.595238,0.423077,0.242857


In [49]:
# convert average of each metric for to a dataframe

averages = pd.DataFrame(metric_results.mean()).T
averages

Unnamed: 0,Female False Negative Rate Before,Male False Negative Rate Before,Female False Negative Rate After,Male False Negative Rate After,Equalized Odds Ratio Before,Equalized Odds Ratio After,Equalized Odds Difference Before,Equalized Odds Difference After
0,0.422222,0.246154,0.127778,0.164103,0.095238,0.562597,0.240769,0.136093


In [50]:
# save metric_results and averages dataframes as csv files

metric_results.to_csv("equalized_odds_metric_results.csv", index = False)
averages.to_csv("equalized_odds_averages.csv", index = False)