In [7]:
import scipy
import numpy as np
import pandas as pd
from scipy.stats import ttest_ind # to perform the t-test 

In [4]:
def getCSV(var : str):
    # var is the compartment that should be analyzed
    nv = pd.read_csv(novax+var+str(".csv")) # novax csv as dataframe
    sv = pd.read_csv(standard_path+var+str(".csv")) # standardvax csv as dataframe
    av = pd.read_csv(agevax_path+var+str(".csv")) # agevax csv as dataframe
    mv = pd.read_csv(movement_path+var+str(".csv")) # movementvax csv as dataframe
    spv = pd.read_csv(spreading_path+var+str(".csv")) # spreadingvax csv as dataframe
    return nv, sv, av, mv, spv

In [5]:
same_root = "~/dev/RxCovea/COVID-19_Simulation/Results/61days/Aggregates/" # the core root part of each path

novax = same_root + "NoVax/" # path to no vaccination

standard_path = same_root + "StandardVax/"# path to the standard vaccination

agevax_path = same_root + "AgeVax/"# path to the age vaccination path

movement_path = same_root + "MovementVax/"# path to the movement vaccination strategy

spreading_path = same_root + "SpreadingVax/"# path to the spreading radius strategy

In [102]:
df1, df2, df3, df4, df5 = getCSV("D") # get the 'Death' CSV file

#for i in range(1000):
#    x = df5['Run ' + str(i+1)].iloc[61]
##    print(x)
 #   if np.isnan(x):
 #       print("Index of NaN: ", i)
print(df5['Run 750'].iloc[32])

nan


In [113]:
def getNumpyArray(df: pd.DataFrame): # convert the desired row into a numpy array
    arr = np.zeros(1000)
    for i in range(1000):
        name = 'Run ' + str(i+1) # get the name of the column of the dataframe
        arr[i]=df[name].iloc[61] # add the value of the 61st day in run (i+1) to numpy array
    return arr
# load up the numpy arrays
arr1, arr2, arr3, arr4, arr5 = getNumpyArray(df1), getNumpyArray(df2), getNumpyArray(df3), getNumpyArray(df4), getNumpyArray(df5)

# clean the data by removing all nan values
nan_array = np.isnan(arr5) # filter out the nan values for arr5
not_nan_array = ~ nan_array
arr5 = arr5[not_nan_array]

arr5.mean()

87.45845845845845

In [183]:
def conduct_ttest(arr1, arr2, type_of_test: str):
    return (ttest_ind(arr1, arr2, equal_var=False, alternative=type_of_test)[0], ttest_ind(arr1,arr2, equal_var=False, alternative=type_of_test)[1]) # return the one-sided p value of the Welch's t-test by dividing two-sided by 2

In [194]:
conduct_ttest(arr3, arr1, 'less') # test if the mean of the first population is less than the second

(-6.4856031984237825, 5.5994918200319267e-11)

In [199]:
def significanceTable(nparrays):
    significances = [[], [], [], [], []]
    for i, arr in enumerate(nparrays):
        for j, arr2 in enumerate(nparrays):
            result = conduct_ttest(arr, arr2,'less')
            significances[i].append(result[1])
    return significances

In [196]:
npversion = np.array(significanceTable([arr1, arr2, arr3, arr4, arr5]))
np.shape(npversion)

(5, 5)

In [197]:
labels = ['NoVax', 'StandardVax', 'AgeVax', 'MovementVax', 'SpreadingVax']
sigDF = pd.DataFrame(npversion, columns=labels, index=labels)
sigDF

Unnamed: 0,NoVax,StandardVax,AgeVax,MovementVax,SpreadingVax
NoVax,0.5,0.098933,5.599492e-11,0.145067,0.282056
StandardVax,0.901067,0.5,6.185106e-08,0.596517,0.772698
AgeVax,1.0,1.0,0.5,1.0,1.0
MovementVax,0.854933,0.403483,1.216626e-08,0.5,0.693241
SpreadingVax,0.717944,0.227302,4.187775e-10,0.306759,0.5


In [198]:
print(sigDF.to_latex(index=True))

\begin{tabular}{lrrrrr}
\toprule
{} &     NoVax &  StandardVax &        AgeVax &  MovementVax &  SpreadingVax \\
\midrule
NoVax        &  0.500000 &     0.098933 &  5.599492e-11 &     0.145067 &      0.282056 \\
StandardVax  &  0.901067 &     0.500000 &  6.185106e-08 &     0.596517 &      0.772698 \\
AgeVax       &  1.000000 &     1.000000 &  5.000000e-01 &     1.000000 &      1.000000 \\
MovementVax  &  0.854933 &     0.403483 &  1.216626e-08 &     0.500000 &      0.693241 \\
SpreadingVax &  0.717944 &     0.227302 &  4.187775e-10 &     0.306759 &      0.500000 \\
\bottomrule
\end{tabular}



Find p-values for the ICU values

In [209]:
icu1, icu2, icu3, icu4, icu5 = getCSV("ICU")
arr1, arr2, arr3, arr4, arr5 = getNumpyArray(icu1), getNumpyArray(icu2), getNumpyArray(icu3), getNumpyArray(icu4), getNumpyArray(icu5)


# clean the data by removing all nan values
nan_array = np.isnan(arr5) # filter out the nan values for arr5
not_nan_array = ~ nan_array
arr5 = arr5[not_nan_array]
# get the numpy array of pvalues
npversionICU = np.array(significanceTable([arr1, arr2, arr3, arr4, arr5])) # get pvalues
ICUsig = pd.DataFrame(npversionICU, columns=labels, index=labels) # get the dataframe for pvalues
ICUsig # display the pvalues

Unnamed: 0,NoVax,StandardVax,AgeVax,MovementVax,SpreadingVax
NoVax,0.5,0.9671282,1.0,0.972881,0.9773413
StandardVax,0.03287175,0.5,1.0,0.5292537,0.5557047
AgeVax,1.8065680000000002e-29,8.402528e-22,0.5,7.782143e-22,7.346498000000001e-22
MovementVax,0.02711897,0.4707463,1.0,0.5,0.5265964
SpreadingVax,0.02265869,0.4442953,1.0,0.4734036,0.5


In [210]:
print(ICUsig.to_latex(index=True)) # get the latex code for the table

\begin{tabular}{lrrrrr}
\toprule
{} &         NoVax &   StandardVax &  AgeVax &   MovementVax &  SpreadingVax \\
\midrule
NoVax        &  5.000000e-01 &  9.671282e-01 &     1.0 &  9.728810e-01 &  9.773413e-01 \\
StandardVax  &  3.287175e-02 &  5.000000e-01 &     1.0 &  5.292537e-01 &  5.557047e-01 \\
AgeVax       &  1.806568e-29 &  8.402528e-22 &     0.5 &  7.782143e-22 &  7.346498e-22 \\
MovementVax  &  2.711897e-02 &  4.707463e-01 &     1.0 &  5.000000e-01 &  5.265964e-01 \\
SpreadingVax &  2.265869e-02 &  4.442953e-01 &     1.0 &  4.734036e-01 &  5.000000e-01 \\
\bottomrule
\end{tabular}

