In [1]:
# import pandas, scipy.stats, and numpy

import pandas as pd
import scipy.stats as stats
import numpy as np

In [2]:
# read in average data

avg_data = pd.read_csv("../Model Data/copd_covid_demographic_parity_averages.csv")

In [3]:
# display data

avg_data

Unnamed: 0,Female Selection Rate Before,Male Selection Rate Before,Female Selection Rate After,Male Selection Rate After,Demographic Parity Ratio Before,Demographic Parity Ratio After,Demographic Parity Difference Before,Demographic Parity Difference After
0,0.676353,0.691465,0.714245,0.714994,0.926181,0.986423,0.048477,0.009002


In [4]:
# average selection rate before mitigation

avg_sr_before = ((avg_data["Female Selection Rate Before"] + avg_data["Male Selection Rate Before"]) / 2)[0] * 100

print("Average Selection Rate Before Mitigation: ", avg_sr_before)


# average selection rate after mitigation

avg_sr_after = ((avg_data["Female Selection Rate After"] + avg_data["Male Selection Rate After"]) / 2)[0] * 100

print("Average Selection Rate After Mitigation: ", avg_sr_after)

Average Selection Rate Before Mitigation:  68.39090487879416
Average Selection Rate After Mitigation:  71.46196236161633


In [5]:
# percent improvement in the demographic parity ratio from before mitigation to the demographic parity ratio after 
# mitigation

dpr_before = avg_data["Demographic Parity Ratio Before"]
dpr_after = avg_data["Demographic Parity Ratio After"]

dpr_percent_improvement = (dpr_after - dpr_before) / dpr_before

In [6]:
# There is a 6.50% improvement in the demographic parity ratio from before mitigation to the demographic parity 
# ratio after mitigation

abs(dpr_percent_improvement[0] * 100)

6.504280429699478

In [14]:
# percent improvement in the demographic parity difference from before mitigation to the demographic parity 
# difference after mitigation

dpd_before = avg_data["Demographic Parity Difference Before"]
dpd_after = avg_data["Demographic Parity Difference After"]

dpd_percent_improvement = (dpd_after - dpd_before) / dpd_before

In [15]:
# There is a 81.43% improvement in the demographic parity difference from before mitigation to the demographic 
# parity difference after mitigation

abs(dpd_percent_improvement[0] * 100)

81.42958820538901

In [16]:
# read in data

data = pd.read_csv("../Model Data/copd_covid_demographic_parity_metric_results.csv")

In [17]:
# display data

data

Unnamed: 0,Female Selection Rate Before,Male Selection Rate Before,Female Selection Rate After,Male Selection Rate After,Demographic Parity Ratio Before,Demographic Parity Ratio After,Demographic Parity Difference Before,Demographic Parity Difference After
0,0.854701,0.84083,0.726496,0.695502,0.983772,0.957338,0.01387,0.030994
1,0.752137,0.740484,0.717949,0.726644,0.984508,0.988034,0.011652,0.008695
2,0.478632,0.633218,0.726496,0.709343,0.755873,0.976389,0.154586,0.017153
3,0.589744,0.529412,0.777778,0.778547,0.897698,0.999012,0.060332,0.000769
4,0.82906,0.743945,0.735043,0.740484,0.897335,0.992651,0.085115,0.005442
5,0.547009,0.50519,0.641026,0.650519,0.923551,0.985406,0.041818,0.009493
6,0.888889,0.768166,0.820513,0.820069,0.864187,0.999459,0.120723,0.000444
7,0.692308,0.678201,0.589744,0.595156,0.979623,0.990906,0.014107,0.005412
8,0.888889,0.875433,0.871795,0.882353,0.984862,0.988034,0.013456,0.010558
9,0.735043,0.792388,0.692308,0.695502,0.92763,0.995408,0.057345,0.003194


In [19]:
# need to check variance of the two samples before performing the two sample t-test

# if the ratio of the sample with the larger variance to the sample with the smaller variance is less than 4, 
# we can state that the variances are approximately equal and use Student’s t-test. Otherwise, we have to use 
# Welch's t-test

print("Variance of 'Demographic Parity Difference Before': ", 
      np.var(data["Demographic Parity Difference Before"]))

print("Variance of 'Demographic Parity Difference After': ", 
      np.var(data["Demographic Parity Difference After"]))

print("Variance ratio: ", np.var(data["Demographic Parity Difference Before"]) / 
      np.var(data["Demographic Parity Difference After"]))

Variance of 'Demographic Parity Difference Before':  0.0018944470711586897
Variance of 'Demographic Parity Difference After':  5.887558516781068e-05
Variance ratio:  32.177125131903566


In [20]:
# two sample t-test (Welch's t-test) to determine whether or not the difference between the mean of the 
# demographic parity difference before mitigation is significantly different from the mean of the demographic 
# parity difference after mitigation

# degrees of freedom = 30.80

# The p-value is less than our alpha value of 0.05, so the mean of the demographic parity difference before 
# mitigation is significantly different from the mean of the demographic parity difference after mitigation

stats.ttest_ind(data["Demographic Parity Difference Before"], data["Demographic Parity Difference After"], 
                equal_var = False)

TtestResult(statistic=4.809873484526905, pvalue=3.754943500353525e-05, df=30.800783477468674)