In [2]:
# import pandas, scipy.stats, and numpy

import pandas as pd
import scipy.stats as stats
import numpy as np

In [3]:
# read in average data

avg_data = pd.read_csv("../Results/equalized_odds_averages.csv")

In [4]:
# display data

avg_data

Unnamed: 0,Female False Negative Rate Before,Male False Negative Rate Before,Female False Negative Rate After,Male False Negative Rate After,Equalized Odds Ratio Before,Equalized Odds Ratio After,Equalized Odds Difference Before,Equalized Odds Difference After
0,0.422222,0.246154,0.127778,0.164103,0.095238,0.562597,0.240769,0.136093


In [5]:
# average false negative rate before mitigation

avg_fnr_before = ((avg_data["Female False Negative Rate Before"] + 
                   avg_data["Male False Negative Rate Before"]) / 2)[0] * 100

print("Average False Negative Rate Before Mitigation: ", avg_fnr_before)


# average false negative rate after mitigation

avg_fnr_after = ((avg_data["Female False Negative Rate After"] + 
                  avg_data["Male False Negative Rate After"]) / 2)[0] * 100

print("Average False Negative Rate After Mitigation: ", avg_fnr_after)

Average False Negative Rate Before Mitigation:  33.41880341880342
Average False Negative Rate After Mitigation:  14.594017094017092


In [6]:
# percent improvement in the equalized odds ratio from before mitigation to the equalized odds ratio after 
# mitigation

eor_before = avg_data["Equalized Odds Ratio Before"]
eor_after = avg_data["Equalized Odds Ratio After"]

eor_percent_improvement = (eor_after - eor_before) / eor_before

In [7]:
# There is a 490.73% improvement in the equalized odds ratio from before mitigation to the equalized odds ratio  
# after mitigation

abs(eor_percent_improvement[0] * 100)

490.72649572649595

In [8]:
# percent improvement in the equalized odds difference from before mitigation to the equalized odds difference  
# after mitigation

eod_before = avg_data["Equalized Odds Difference Before"]
eod_after = avg_data["Equalized Odds Difference After"]

eod_percent_improvement = (eod_after - eod_before) / eod_before

In [9]:
# There is a 43.48% improvement in the equalized odds difference from before mitigation to the equalized odds 
# difference after mitigation

abs(eod_percent_improvement[0] * 100)

43.47583548861505

In [10]:
# read in data

data = pd.read_csv("../Results/equalized_odds_metric_results.csv")

In [11]:
# dispplay data

data

Unnamed: 0,Female False Negative Rate Before,Male False Negative Rate Before,Female False Negative Rate After,Male False Negative Rate After,Equalized Odds Ratio Before,Equalized Odds Ratio After,Equalized Odds Difference Before,Equalized Odds Difference After
0,0.5,0.076923,0.333333,0.307692,0.0,0.0,0.423077,0.071429
1,0.5,0.076923,0.0,0.076923,0.0,0.923077,0.423077,0.076923
2,0.5,0.384615,0.0,0.076923,0.0,0.466667,0.115385,0.228571
3,0.5,0.076923,0.333333,0.307692,0.0,0.357143,0.423077,0.128571
4,0.333333,0.230769,0.166667,0.230769,0.357143,0.714286,0.128571,0.114286
5,0.5,0.076923,0.166667,0.230769,0.0,0.923077,0.423077,0.064103
6,0.333333,0.230769,0.0,0.0,0.357143,0.714286,0.128571,0.171429
7,0.5,0.384615,0.333333,0.307692,0.0,0.7,0.115385,0.085714
8,0.333333,0.384615,0.5,0.538462,0.0,0.0,0.2,0.038462
9,0.5,0.076923,0.0,0.076923,0.0,0.595238,0.423077,0.242857


In [12]:
# need to check variance of the two samples before performing the two sample t-test

# if the ratio of the sample with the larger variance to the sample with the smaller variance is less than 4, 
# we can state that the variances are approximately equal and use Student’s t-test. Otherwise, we have to use 
# Welch's t-test

print("Variance of 'Equalized Odds Difference Before': ", 
      np.var(data["Equalized Odds Difference Before"]))

print("Variance of 'Equalized Odds Difference After': ", 
      np.var(data["Equalized Odds Difference After"]))

print("Variance ratio: ", np.var(data["Equalized Odds Difference Before"]) / 
      np.var(data["Equalized Odds Difference After"]))

Variance of 'Equalized Odds Difference Before':  0.017548069878839116
Variance of 'Equalized Odds Difference After':  0.005377662684622393
Variance ratio:  3.263140681734913


In [13]:
# two sample t-test (Student's t-test) to determine whether or not the difference between the mean of the 
# equalized odds difference before mitigation is significantly different from the mean of the equalized odds  
# difference after mitigation

# degrees of freedom = 30

eod_t_statistic, eod_p_value = stats.ttest_ind(data["Equalized Odds Difference Before"], 
                                               data["Equalized Odds Difference After"], equal_var = True)

In [14]:
# The p-value is less than our alpha value of 0.05, so the mean of the equalized odds difference before mitigation 
# is significantly different from the mean of the equalized odds difference after mitigation

# The t-statistic, which is calculated as (mean of sample 1 - mean of sample 2) / standard error, is positive, as 
# the mean of the equalized odds difference before mitigation is larger than the mean of the equalized odds 
# difference after mitigation

if eod_p_value < 0.05:
    print("p-value: ", eod_p_value)
    print("t-statistic: ", eod_t_statistic)
    print("Degrees of freedom: 30")
    print("Reject the null hypothesis: The means are significantly different.")
else:
    print("p-value: ", eod_p_value)
    print("t-statistic: ", eod_t_statistic)
    print("Degrees of freedom: 30")
    print("Accept the null hypothesis: The means are NOT significantly different.")

p-value:  0.00044728364158658874
t-statistic:  3.7229423394508605
Degrees of freedom: 30
Reject the null hypothesis: The means are significantly different.
