In [1]:
# import pandas, scipy.stats, and numpy

import pandas as pd
import scipy.stats as stats
import numpy as np

In [7]:
# read in average data

avg_data = pd.read_csv("../Data/equalized_odds_averages.csv")

In [8]:
# display data

avg_data

Unnamed: 0,Female False Negative Rate Before,Male False Negative Rate Before,Female False Negative Rate After,Male False Negative Rate After,Equalized Odds Ratio Before,Equalized Odds Ratio After,Equalized Odds Difference Before,Equalized Odds Difference After
0,0.2,0.038095,0.027778,0.042857,0.490981,0.74881,0.176984,0.080952


In [9]:
# average false negative rate before mitigation

avg_fnr_before = ((avg_data["Female False Negative Rate Before"] + 
                   avg_data["Male False Negative Rate Before"]) / 2)[0] * 100

print("Average False Negative Rate Before Mitigation: ", avg_fnr_before)


# average false negative rate after mitigation

avg_fnr_after = ((avg_data["Female False Negative Rate After"] + 
                  avg_data["Male False Negative Rate After"]) / 2)[0] * 100

print("Average False Negative Rate After Mitigation: ", avg_fnr_after)

Average False Negative Rate Before Mitigation:  11.904761904761902
Average False Negative Rate After Mitigation:  3.5317460317460245


In [10]:
# percent improvement in the equalized odds ratio from before mitigation to the equalized odds ratio after 
# mitigation

eor_before = avg_data["Equalized Odds Ratio Before"]
eor_after = avg_data["Equalized Odds Ratio After"]

eor_percent_improvement = (eor_after - eor_before) / eor_before

In [11]:
# There is a 52.51% improvement in the equalized odds ratio from before mitigation to the equalized odds ratio  
# after mitigation

abs(eor_percent_improvement[0] * 100)

52.51285819250553

In [12]:
# percent improvement in the equalized odds difference from before mitigation to the equalized odds difference  
# after mitigation

eod_before = avg_data["Equalized Odds Difference Before"]
eod_after = avg_data["Equalized Odds Difference After"]

eod_percent_improvement = (eod_after - eod_before) / eod_before

In [13]:
# There is a 54.26% improvement in the equalized odds difference from before mitigation to the equalized odds 
# difference after mitigation

abs(eod_percent_improvement[0] * 100)

54.26008968609869

In [14]:
# read in data

data = pd.read_csv("../Data/equalized_odds_metric_results.csv")

In [15]:
# dispplay data

data

Unnamed: 0,Female False Negative Rate Before,Male False Negative Rate Before,Female False Negative Rate After,Male False Negative Rate After,Equalized Odds Ratio Before,Equalized Odds Ratio After,Equalized Odds Difference Before,Equalized Odds Difference After
0,0.333333,0.071429,0.0,0.0,0.0,0.777778,0.261905,0.047619
1,0.333333,0.214286,0.333333,0.285714,0.848485,0.0,0.119048,0.142857
2,0.166667,0.071429,0.0,0.0,0.428571,0.777778,0.095238,0.047619
3,0.166667,0.0,0.0,0.142857,0.214286,0.857143,0.261905,0.142857
4,0.166667,0.0,0.0,0.0,0.642857,0.535714,0.166667,0.309524
5,0.166667,0.0,0.0,0.0,0.642857,0.933333,0.166667,0.02381
6,0.166667,0.142857,0.0,0.142857,0.428571,0.583333,0.095238,0.142857
7,0.166667,0.0,0.0,0.0,0.583333,0.933333,0.166667,0.02381
8,0.166667,0.071429,0.0,0.0,0.428571,0.777778,0.095238,0.047619
9,0.166667,0.142857,0.166667,0.142857,0.0,0.777778,0.142857,0.047619


In [17]:
# need to check variance of the two samples before performing the two sample t-test

# if the ratio of the sample with the larger variance to the sample with the smaller variance is less than 4, 
# we can state that the variances are approximately equal and use Student’s t-test. Otherwise, we have to use 
# Welch's t-test

print("Variance of 'Equalized Odds Difference Before': ", 
      np.var(data["Equalized Odds Difference Before"]))

print("Variance of 'Equalized Odds Difference After': ", 
      np.var(data["Equalized Odds Difference After"]))

print("Variance ratio: ", np.var(data["Equalized Odds Difference After"]) / 
      np.var(data["Equalized Odds Difference Before"]))

Variance of 'Equalized Odds Difference Before':  0.0027847064751826676
Variance of 'Equalized Odds Difference After':  0.0036885865457294
Variance ratio:  1.3245871974666346


In [18]:
# two sample t-test (Student's t-test) to determine whether or not the difference between the mean of the 
# equalized odds difference before mitigation is significantly different from the mean of the equalized odds  
# difference after mitigation

# degrees of freedom = 28

eod_t_statistic, eod_p_value = stats.ttest_ind(data["Equalized Odds Difference Before"], 
                                               data["Equalized Odds Difference After"], equal_var = True)

In [20]:
# The p-value is less than our alpha value of 0.05, so the mean of the equalized odds difference before mitigation 
# is significantly different from the mean of the equalized odds difference after mitigation

# The t-statistic, which is calculated as (mean of sample 1 - mean of sample 2) / standard error, is positive, as 
# the mean of the equalized odds difference before mitigation is larger than the mean of the equalized odds 
# difference after mitigation

if eod_p_value < 0.05:
    print("p-value: ", eod_p_value)
    print("t-statistic: ", eod_t_statistic)
    print("Degrees of freedom: 28")
    print("Reject the null hypothesis: The means are significantly different.")
else:
    print("p-value: ", eod_p_value)
    print("t-statistic: ", eod_t_statistic)
    print("Degrees of freedom: 28")
    print("Accept the null hypothesis: The means are NOT significantly different.")

p-value:  2.6620224326515427e-08
t-statistic:  6.427634764359828
Degrees of freedom: 28
Reject the null hypothesis: The means are significantly different.
