In [9]:
#import necessary libraries
import numpy as np
import pandas as pd

In [10]:
# Load the dataset

georgia = pd.read_csv("georgia_cases_deaths.csv")
indiana = pd.read_csv("indiana_cases_deaths.csv")
                           
georgia_cases= georgia['new_case'].to_numpy()
georgia_deaths= georgia['new_death'].to_numpy()

indiana_cases= indiana['new_case'].astype(float).to_numpy()
indiana_deaths= indiana['new_death'].to_numpy()


In [11]:
#Data for deaths and cases for each of the states for Feb21 and March21

georgia_deaths_feb = georgia_deaths[343:363]   
georgia_deaths_march = georgia_deaths[364:393] 

georgia_cases_feb = georgia_cases[343:363]
georgia_cases_march = georgia_cases[364:393]


indiana_deaths_feb = indiana_deaths[331:357]   
indiana_deaths_march = indiana_deaths[358:388] 

indiana_cases_feb = indiana_cases[331:357]
indiana_cases_march = indiana_cases[358:388]

In [12]:
# Two sided Walds Test for single sample

def walds_single_sample(sample,prediction,hypothesis):
    
    # Using MLE of Poission which is the sample mean 
    theta_estimate = np.mean(sample)
    
    #Computing the se(theta_estimate)
    se = np.sqrt(theta_estimate/len(sample))
    
    # Computing walds statistic
    W = (theta_estimate - prediction)/se
    
    print("Walds Statistic",W)
    
    # Using alpha = 0.05 and Z_alpha_by_2 = 1.96 to accept or reject
    
    Z_alpha_by_2 = 1.96
    
    if(abs(W) > Z_alpha_by_2):
        print("Reject Ho:",hypothesis)
    else:
        print("Accept Ho:",hypothesis)
    
print("For Georgia:")    
walds_single_sample(georgia_cases_march,np.mean(georgia_cases_feb),"Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Georgia")
print("\n")
walds_single_sample(georgia_deaths_march,np.mean(georgia_deaths_feb),"Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Georgia")
print("\n")
print("For Indiana:")
walds_single_sample(indiana_cases_march,np.mean(indiana_cases_feb),"Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Indiana")
print("\n")
walds_single_sample(indiana_deaths_march,np.mean(indiana_deaths_feb),"Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Indiana")

For Georgia:
Walds Statistic -197.83594450021198
Reject Ho: Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Georgia


Walds Statistic -17.634729199836478
Reject Ho: Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Georgia


For Indiana:
Walds Statistic -89.04326703459927
Reject Ho: Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Indiana


Walds Statistic -32.04809414405769
Reject Ho: Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Indiana


In [13]:
# Two sided Z test for single sample

def z_test_single_sample(sample,prediction,entire_sample,hypothesis):
    
    n = len(sample)
    
    #Computing the mean of the sample
    X_bar = np.mean(sample)
    
    # Using the std deviation of the entire sample
    sigma = np.std(entire_sample) 
    
    # Computing the Z statistic
    Z = (X_bar - prediction)/(sigma/np.sqrt(n))
    
    print("Z statistic",Z)
    
    # Using alpha = 0.05 and Z_alpha_by_2 = 1.96 to accept or reject
    
    Z_alpha_by_2 = 1.96
    
    if(abs(Z) > Z_alpha_by_2):
        print("Reject Ho:",hypothesis)
    else:
        print("Accept Ho:",hypothesis)
    
    
print("For Georgia:")    
z_test_single_sample(georgia_cases_march,np.mean(georgia_cases_feb),georgia_cases,"Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Georgia")
print("\n")
z_test_single_sample(georgia_deaths_march,np.mean(georgia_deaths_feb),georgia_deaths,"Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Georgia")
print("\n")
print("For Indiana:")
z_test_single_sample(indiana_cases_march,np.mean(indiana_cases_feb),indiana_cases,"Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Indiana")
print("\n")
z_test_single_sample(indiana_deaths_march,np.mean(indiana_deaths_feb),indiana_deaths,"Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Indiana")

For Georgia:
Z statistic -4.5129098617929255
Reject Ho: Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Georgia


Z statistic -3.813521351104936
Reject Ho: Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Georgia


For Indiana:
Z statistic -1.8692838881167924
Accept Ho: Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Indiana


Z statistic -6.343576828280494
Reject Ho: Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Indiana


In [14]:
#Two sided T test for single sample

def t_test_single_sample(sample,prediction,tvalue,hypothesis):
    
    n = len(sample)
    
    #Computing the mean of the sample
    X_bar = np.mean(sample)
    
    #Computing the corrected std deviation of the sample
    s = np.std(sample,ddof=1)  
    
    # Computing the T statistic
    T = (X_bar - prediction)/(s/np.sqrt(n))
    print("T statistic",T)
    
    # Using the value of tn-1,alpha/2 to accept or reject
    
    if(abs(T) > tvalue):
        print("Reject Ho:",hypothesis)
    else:
        print("Accept Ho:",hypothesis)
        
        
print("For Georgia:")    
t_test_single_sample(georgia_cases_march,np.mean(georgia_cases_feb),2.048407,"Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Georgia")
print("\n")
t_test_single_sample(georgia_deaths_march,np.mean(georgia_deaths_feb),2.048407,"Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Georgia")
print("\n")
print("For Indiana:")
t_test_single_sample(indiana_cases_march,np.mean(indiana_cases_feb),2.04523,"Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Indiana")
print("\n")
t_test_single_sample(indiana_deaths_march,np.mean(indiana_deaths_feb),2.04523,"Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Indiana")

For Georgia:
T statistic -14.554543830491523
Reject Ho: Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Georgia


T statistic -3.7751383617947973
Reject Ho: Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Georgia


For Indiana:
T statistic -12.81434557977271
Reject Ho: Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Indiana


T statistic -14.885660164497072
Reject Ho: Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Indiana


In [15]:
# Two sided Two sample walds test

def walds_two_sample_test(sample1,sample2,hypothesis):
    n = len(sample1)
    
    m = len(sample2)
    
    # Using MLE of Poission which is the sample mean 
    theta_1  = np.mean(sample1)
    theta_2  = np.mean(sample2)
    
    #Calculating the se(theta_1 - theta_2)
    se = np.sqrt((theta_1/n)+(theta_2/m))
    
    #Compute the Wald's Statistic
    W = (theta_1 - theta_2)/se
    print("Walds Statistic: ",W)
    
    
    # Using alpha = 0.05 and Z_alpha_by_2 = 1.96 to accept or reject
    
    Z_alpha_by_2 = 1.96
    
    if(abs(W) > Z_alpha_by_2):
        print("Reject Ho:",hypothesis)
    else:
        print("Accept Ho:",hypothesis)
    
    
print("For Georgia:")    
walds_two_sample_test(georgia_cases_march,georgia_cases_feb,"Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Georgia")
print("\n")
walds_two_sample_test(georgia_deaths_march,georgia_deaths_feb,"Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Georgia")
print("\n")
print("For Indiana:")
walds_two_sample_test(indiana_cases_march,indiana_cases_feb,"Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Indiana")
print("\n")
walds_two_sample_test(indiana_deaths_march,indiana_deaths_feb,"Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Indiana")

For Georgia:
Walds Statistic:  -102.46616574287205
Reject Ho: Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Georgia


Walds Statistic:  -10.06229058540575
Reject Ho: Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Georgia


For Indiana:
Walds Statistic:  -53.08793994967661
Reject Ho: Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Indiana


Walds Statistic:  -16.324483293500613
Reject Ho: Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Indiana


In [16]:
# Two sided unpaired T test

def t_test_unpaired(sample1,sample2,tvalue,hypothesis):
    n = len(sample1)
    
    m = len(sample2)
    
    # Computing the means of sample1 and sample2
    X_bar  = np.mean(sample1)
    Y_bar = np.mean(sample2)
    
    # Computing the corrected std deviation of sample1 and sample2
    s1 = np.std(sample1,ddof=1)
    s2 = np.std(sample2,ddof=1)
    
    # Compute the T statistic
    T = (X_bar - Y_bar)/(np.sqrt(((s1**2)/n)+((s2**2)/m)))
    print("T statistic",T)
                         
    # Using the value of tn+m-2,0.025 where n and m is the number of datapoints in sample1 and sample2
    # to accept or reject
    
    if(abs(T) > tvalue):
        print("Reject Ho:",hypothesis)
    else:
        print("Accept Ho:",hypothesis)

        
print("For Georgia:")    
t_test_unpaired(georgia_cases_march,georgia_cases_feb,2.048407,"Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Georgia")
print("\n")
t_test_unpaired(georgia_deaths_march,georgia_deaths_feb,2.048407,"Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Georgia")
print("\n")
print("For Indiana:")
t_test_unpaired(indiana_cases_march,indiana_cases_feb,2.04523,"Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Indiana")
print("\n")
t_test_unpaired(indiana_deaths_march,indiana_deaths_feb,2.04523,"Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Indiana")

For Georgia:
T statistic -6.670734804444858
Reject Ho: Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Georgia


T statistic -1.9788875675958115
Accept Ho: Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Georgia


For Indiana:
T statistic -4.414811766678156
Reject Ho: Mean of the number of COVID19 cases are same for Feb’21 and March’21 in Indiana


T statistic -5.776913802549136
Reject Ho: Mean of the number of COVID19 deaths are same for Feb’21 and March’21 in Indiana


## Usage of the Tests

The walds test is applicable for both the states as well as the cases and deaths as the only requirement of the wald's test is an asymtotically normal estimator and since we have used the MLE estimator for poisson the test is valid.

The Z test is applicable where the size of the dataset is greater than 30. In our case georgia has around 29 datapoints and indiana has 30 datapoints. The Z test also requires the standard deviation of the entire dataset to be known which we can can calculate from the dataset. However since the dataset is constantly updated this standard deviation keeps on changing making Z test an impractical test for a real world scenario such as this.

The T test does not have any assumptions and is thus a valid test for the purposes of hypothesis testing on our dataset.