In [1]:
# import pandas, scipy.stats, and numpy

import pandas as pd
import scipy.stats as stats
import numpy as np

In [3]:
# read in average data for gender

avg_gender_data = pd.read_csv("../Data/equalized_odds_averages_for_gender_coughvid_data.csv")

In [4]:
# display data

avg_gender_data

Unnamed: 0,Female False Negative Rate Before,Male False Negative Rate Before,Female False Negative Rate After,Male False Negative Rate After,Equalized Odds Ratio Gender Before,Equalized Odds Ratio Gender After,Equalized Odds Difference Gender Before,Equalized Odds Difference Gender After
0,0.348649,0.401642,0.261712,0.261576,0.723673,0.862529,0.06458,0.031737


In [22]:
# average false negative rate before mitigation for gender

avg_fnr_gender_before = ((avg_gender_data["Female False Negative Rate Before"] + 
                  avg_gender_data["Male False Negative Rate Before"]) / 2)[0] * 100

print("Average False Negative Rate Before Mitigation: ", avg_fnr_gender_before)


# average false negative rate after mitigation for gender

avg_fnr_gender_after = ((avg_gender_data["Female False Negative Rate After"] + 
                  avg_gender_data["Male False Negative Rate After"]) / 2)[0] * 100

print("Average False Negative Rate After Mitigation: ", avg_fnr_gender_after)

Average False Negative Rate Before Mitigation:  37.51453423867216
Average False Negative Rate After Mitigation:  26.164403319575737


In [5]:
# percent improvement in the equalized odds ratio from before mitigation for gender to the equalized odds ratio 
# after mitigation for gender

gender_eor_before = avg_gender_data["Equalized Odds Ratio Gender Before"]
gender_eor_after = avg_gender_data["Equalized Odds Ratio Gender After"]

gender_eor_percent_improvement = (gender_eor_after - gender_eor_before) / gender_eor_before

In [6]:
# There is a 19.19% improvement in the equalized odds ratio from before mitigation for gender to the equalized 
# odds ratio after mitigation for gender

abs(gender_eor_percent_improvement[0] * 100)

19.187774095931154

In [7]:
# percent improvement in the equalized odds difference from before mitigation for gender to the equalized odds 
# difference after mitigation for gender

gender_eod_before = avg_gender_data["Equalized Odds Difference Gender Before"]
gender_eod_after = avg_gender_data["Equalized Odds Difference Gender After"]

gender_eod_percent_improvement = (gender_eod_after - gender_eod_before) / gender_eod_before

In [9]:
# There is a 50.86% improvement in the equalized odds difference from before mitigation for gender to the 
# equalized odds difference after mitigation for gender

abs(gender_eod_percent_improvement[0] * 100)

50.85688590699943

In [2]:
# read in gender data

gender_data = pd.read_csv("../Data/equalized_odds_metric_results_by_gender_coughvid_data.csv")

In [11]:
# display data

gender_data

Unnamed: 0,Female False Negative Rate Before,Male False Negative Rate Before,Female False Negative Rate After,Male False Negative Rate After,Equalized Odds Ratio Gender Before,Equalized Odds Ratio Gender After,Equalized Odds Difference Gender Before,Equalized Odds Difference Gender After
0,0.412162,0.492611,0.209459,0.256158,0.863145,0.940929,0.080449,0.046698
1,0.391892,0.487685,0.27027,0.231527,0.355655,0.878661,0.095793,0.038743
2,0.195946,0.172414,0.168919,0.17734,0.553823,0.838329,0.043821,0.018941
3,0.418919,0.487685,0.27027,0.251232,0.881659,0.803347,0.068766,0.061454
4,0.25,0.221675,0.256757,0.295567,0.569038,0.803347,0.05387,0.03881
5,0.297297,0.349754,0.378378,0.344828,0.527197,0.527197,0.052456,0.033772
6,0.304054,0.344828,0.344595,0.334975,0.669456,0.71131,0.040774,0.010871
7,0.418919,0.487685,0.22973,0.256158,0.881659,0.96569,0.068766,0.026428
8,0.168919,0.187192,0.222973,0.216749,0.553823,0.937238,0.043821,0.006224
9,0.324324,0.330049,0.324324,0.339901,0.527197,0.843515,0.033772,0.015577


In [12]:
# need to check variance of the two samples before performing the two sample t-test

# if the ratio of the sample with the larger variance to the sample with the smaller variance is less than 4, we 
# can state that the variances are approximately equal and use Student’s t-test. Otherwise, we have to use Welch's 
# t-test

print("Variance of 'Equalized Odds Difference Gender Before': ", 
      np.var(gender_data["Equalized Odds Difference Gender Before"]))

print("Variance of 'Equalized Odds Difference Gender After': ", 
      np.var(gender_data["Equalized Odds Difference Gender After"]))

print("Variance ratio: ", np.var(gender_data["Equalized Odds Difference Gender Before"]) / 
      np.var(gender_data["Equalized Odds Difference Gender After"]))

Variance of 'Equalized Odds Difference Gender Before':  0.00046187036970673394
Variance of 'Equalized Odds Difference Gender After':  0.00036275322151080986
Variance ratio:  1.2732357490392967


In [3]:
# two sample t-test (Student's t-test) to determine whether or not the difference between the mean of the 
# equalized odds difference before mitigation for gender is significantly different from the mean of the equalized 
# odds difference after mitigation for gender

# degrees of freedom = 28

eod_gender_t_statistic, eod_gender_p_value = stats.ttest_ind(gender_data["Equalized Odds Difference Gender Before"], 
                                                             gender_data["Equalized Odds Difference Gender After"], 
                                                             equal_var = True)

In [4]:
# The p-value is less than our alpha value of 0.05, so the mean of the equalized odds difference before mitigation 
# for gender is significantly different from the mean of the equalized odds difference after mitigation for gender

# The t-statistic, which is calculated as (mean of sample 1 - mean of sample 2) / standard error, is positive, as 
# the mean of the equalized odds difference before mitigation for gender is larger than the mean of the equalized 
# odds difference after mitigation for gender

if eod_gender_p_value < 0.05:
    print("p-value: ", eod_gender_p_value)
    print("t-statistic: ", eod_gender_t_statistic)
    print("Degrees of freedom: 28")
    print("Reject the null hypothesis: The means are significantly different.")
else:
    print("p-value: ", eod_gender_p_value)
    print("t-statistic: ", eod_gender_t_statistic)
    print("Degrees of freedom: 28")
    print("Accept the null hypothesis: The means are NOT significantly different.")

p-value:  7.438135639369021e-08
t-statistic:  6.159126036249233
Degrees of freedom: 28
Reject the null hypothesis: The means are significantly different.


In [15]:
# read in average data for age

avg_age_data = pd.read_csv("../Data/equalized_odds_averages_for_age_coughvid_data.csv")

In [16]:
# display data

avg_age_data

Unnamed: 0,Young False Negative Rate Before,Old False Negative Rate Before,Young False Negative Rate After,Old False Negative Rate After,Equalized Odds Ratio Age Before,Equalized Odds Ratio Age After,Equalized Odds Difference Age Before,Equalized Odds Difference Age After
0,0.260398,0.574185,0.203823,0.201003,0.09016,0.866587,0.313788,0.018091


In [23]:
# average false negative rate before mitigation for age

avg_fnr_age_before = ((avg_age_data["Young False Negative Rate Before"] + 
                  avg_age_data["Old False Negative Rate Before"]) / 2)[0] * 100

print("Average False Negative Rate Before Mitigation: ", avg_fnr_age_before)


# average false negative rate after mitigation for age

avg_fnr_age_after = ((avg_age_data["Young False Negative Rate After"] + 
                  avg_age_data["Old False Negative Rate After"]) / 2)[0] * 100

print("Average False Negative Rate After Mitigation: ", avg_fnr_age_after)

Average False Negative Rate Before Mitigation:  41.72915085879838
Average False Negative Rate After Mitigation:  20.24125681175416


In [17]:
# percent improvement in the equalized odds ratio from before mitigation for age to the equalized odds ratio after 
# mitigation for age

age_eor_before = avg_age_data["Equalized Odds Ratio Age Before"]
age_eor_after = avg_age_data["Equalized Odds Ratio Age After"]

age_eor_percent_improvement = (age_eor_after - age_eor_before) / age_eor_before

In [18]:
# There is a 861.17% improvement in the equalized odds ratio from before mitigation for age to the equalized odds 
# ratio after mitigation for age

abs(age_eor_percent_improvement[0] * 100)

861.1706471723519

In [19]:
# percent improvement in the equalized odds difference from before mitigation for age to the equalized odds 
# difference after mitigation for age

age_eod_before = avg_age_data["Equalized Odds Difference Age Before"]
age_eod_after = avg_age_data["Equalized Odds Difference Age After"]

age_eod_percent_improvement = (age_eod_after - age_eod_before) / age_eod_before

In [21]:
# There is a 94.23% improvement in the equalized odds difference from before mitigation for age to the equalized 
# odds difference after mitigation for age

abs(age_eod_percent_improvement[0] * 100)

94.23455088273217

In [5]:
# read in age data

age_data = pd.read_csv("../Data/equalized_odds_metric_results_by_age_coughvid_data.csv")

In [26]:
# display data

age_data

Unnamed: 0,Young False Negative Rate Before,Old False Negative Rate Before,Young False Negative Rate After,Old False Negative Rate After,Equalized Odds Ratio Age Before,Equalized Odds Ratio Age After,Equalized Odds Difference Age Before,Equalized Odds Difference Age After
0,0.357798,0.62406,0.009174,0.022556,0.122881,0.972916,0.266262,0.015098
1,0.344037,0.616541,0.0,0.0,0.0,0.993185,0.272505,0.003799
2,0.03211,0.428571,0.43578,0.413534,0.042741,0.962068,0.396461,0.022246
3,0.357798,0.62406,0.041284,0.022556,0.122881,0.974856,0.266262,0.018728
4,0.077982,0.488722,0.522936,0.473684,0.032768,0.655367,0.41074,0.049252
5,0.206422,0.526316,0.504587,0.503759,0.061441,0.508621,0.319894,0.005552
6,0.201835,0.533835,0.545872,0.511278,0.061441,0.929217,0.332,0.034593
7,0.357798,0.62406,0.013761,0.022556,0.122881,0.974856,0.266262,0.013637
8,0.022936,0.43609,0.385321,0.413534,0.042741,0.655367,0.413154,0.028213
9,0.206422,0.526316,0.477064,0.511278,0.061441,0.655367,0.319894,0.034214


In [31]:
# need to check variance of the two samples before performing the two sample t-test

# if the ratio of the sample with the larger variance to the sample with the smaller variance is less than 4, we 
# can state that the variances are approximately equal and use Student’s t-test. Otherwise, we have to use Welch's 
# t-test

print("Variance of 'Equalized Odds Difference Age Before': ", 
      np.var(age_data["Equalized Odds Difference Age Before"]))

print("Variance of 'Equalized Odds Difference Age After': ", 
      np.var(age_data["Equalized Odds Difference Age After"]))

print("Variance ratio: ", np.var(age_data["Equalized Odds Difference Age Before"]) / 
      np.var(age_data["Equalized Odds Difference Age After"]))

Variance of 'Equalized Odds Difference Age Before':  0.002953127563577196
Variance of 'Equalized Odds Difference Age After':  0.00010296977838596657
Variance ratio:  28.679556369518888


In [6]:
# two sample t-test (Welch's t-test) to determine whether or not the difference between the mean of the equalized 
# odds difference before mitigation for age is significantly different from the mean of the equalized odds 
# difference after mitigation for age

# degrees of freedom = 28

eod_age_t_statistic, eod_age_p_value = stats.ttest_ind(age_data["Equalized Odds Difference Age Before"], 
                                                       age_data["Equalized Odds Difference Age After"], 
                                                       equal_var = False)

In [7]:
# The p-value is less than our alpha value of 0.05, so the mean of the equalized odds difference before mitigation 
# for age is significantly different from the mean of the equalized odds difference after mitigation for age

# The t-statistic, which is calculated as (mean of sample 1 - mean of sample 2) / standard error, is positive, as 
# the mean of the equalized odds difference before mitigation for age is larger than the mean of the equalized 
# odds difference after mitigation for age

if eod_age_p_value < 0.05:
    print("p-value: ", eod_age_p_value)
    print("t-statistic: ", eod_age_t_statistic)
    print("Degrees of freedom: 28")
    print("Reject the null hypothesis: The means are significantly different.")
else:
    print("p-value: ", eod_age_p_value)
    print("t-statistic: ", eod_age_t_statistic)
    print("Degrees of freedom: 28")
    print("Accept the null hypothesis: The means are NOT significantly different.")

p-value:  5.967268777970636e-24
t-statistic:  28.804595389875875
Degrees of freedom: 28
Reject the null hypothesis: The means are significantly different.
