In [851]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import collections

from scipy.stats import poisson, binom, geom

states_df = pd.read_csv("22_cleaned.csv")

In [852]:
#Obtaining the data for TN and TX confirmed cases on a daily basis dated from 10/1/2020 to 12/28/2020
TN_cases = np.array(states_df['TN confirmed'])[251:312]
TX_cases = np.array(states_df['TX confirmed'])[251:312]

In [853]:
#Obtaining the data for TN and TX confirmed cases on a daily basis dated from 10/1/2020 to 12/28/2020
TN_deaths = np.array(states_df['TN deaths'])[251:312]
TX_deaths = np.array(states_df['TX deaths'])[251:312]

**For TN confirmed cases and TX confirmed cases:**

**Null hypothesis (H0):**

Distribution of TN's daily confirmed cases equals distribution of TX's daily confirmed cases from Oct-Dec 2020.

**Alternate hypothesis(H1):**

Distribution of TN's daily confirmed cases not equals distribution of TX's daily confirmed cases from Oct-Dec 2020.




**For TN deaths and TX deaths:**

**Null hypothesis (H0):**

Distribution of TN's daily deaths equals distribution of TX's daily deaths from Oct-Dec 2020.

**Alternate hypothesis(H1):**

Distribution of TN's daily deaths not equals distribution of TX's daily deaths from Oct-Dec 2020.

**PERMUTATION** **TEST**


In [854]:
def permutation_test(TN, TX):

  c = 0.05

  def permutation_sample(data1, data2):
      data = np.concatenate((data1, data2))   
      permuted_data = np.random.permutation(data)   
      perm_sample_1 = permuted_data[:len(data1)]
      perm_sample_2 = permuted_data[len(data1):]   
      return perm_sample_1, perm_sample_2

  def draw_perm_reps(data_1, data_2, size):
      perm_replicates = np.empty(size)    
      for i in range(size):
          perm_sample_1, perm_sample_2 = permutation_sample(data_1, data_2)        
          perm_replicates[i] = abs(np.mean(perm_sample_1) - np.mean(perm_sample_2))    
      return perm_replicates


  T_obs_b = abs(np.mean(TN) - np.mean(TX))
  T_obs_b

  print("T-Observed is:", T_obs_b)

  #Using 1000 permutations
  for n in [1000]:
      perm_replicates = draw_perm_reps(TN,TX,n)
      p_b = np.sum(perm_replicates >= T_obs_b)/len(perm_replicates)
      print("n:",n," p-value is:", p_b)
      

  if(p_b <= c):
          print("Hence, we REJECT the Null Hypothesis")
  else:
          print("Hence, we ACCEPT the Null Hypothesis")        


**Permutation Test - TN confirmed cases and TX confirmed cases**

In [855]:
permutation_test(TN_cases,TX_cases)

T-Observed is: 4556.377049180328
n: 1000  p-value is: 0.0
Hence, we REJECT the Null Hypothesis


*Since the obtained p-value is less than the threshold value of 0.05, we REJECT the Null Hypothesis.*

**Permutation Test - TN confirmed deaths and TX confirmed deaths**

In [856]:
permutation_test(TN_deaths,TX_deaths)

T-Observed is: 66.19672131147541
n: 1000  p-value is: 0.0
Hence, we REJECT the Null Hypothesis


*Since the obtained p-value is less than the threshold value of 0.05, we REJECT the Null Hypothesis.*

**2 SAMPLE K-S TEST**

In [857]:
# Getting the e-CDF
def eCDF(A):
    n = len(A)
    Sort = sorted(A)
    delta = .1
    A = []
    B = [0]
    for i in range(0,n):
        A = A + [Sort[i]]
        B = B + [B[len(B)-1]+(1/n)]
    B = B + [1]
        
    return A,B

In [858]:
def KS_2_sample(A1,B1, A2,B2):
    matrix = np.zeros((len(A1),6))
    max_total = -1
    for i in range(len(matrix)):
        matrix[i,0] = B1[i]
        matrix[i,1] = B1[i+1]
        index1 = [idx for idx, val in enumerate(A2) if val >= A1[i]]
        index2 = [idx for idx, val in enumerate(A2) if val < A1[i]]
        if index1 == []:
            matrix[i,3] = 1
        else :    
            matrix[i,3] = Y2[index1[0]]
        if index2 == []:
            matrix[i,2] = 0
        else:
            matrix[i,2] = Y2[index2[-1]]
        
        matrix[i,4] = abs( matrix[i,0] - matrix[i,2])
        matrix[i,5] = abs(matrix[i,1] - matrix[i,3])
        cmax = max(matrix[i,4], matrix[i,5])
        if cmax > max_total:
            max_total = cmax
            a1_max = A1[i]
            a1_max = matrix[i,0]
            y2_max = matrix[i,2]
    
    return max_total

**2 Sample K-S Test - TN confirmed cases and TX confirmed cases**

In [859]:
TN_cases = np.array(states_df['TN confirmed'])[251:312]
TX_cases = np.array(states_df['TX confirmed'])[251:312]

#Computing eCDF for TN confirmed cases and TX confirmed cases
A1, B1 = eCDF(TN_cases)
A2, B2 = eCDF(TX_cases)

KS_val = KS_2_sample(A1,B1, A2,B2)

print('KS statistic : ', KS_val)

c=0.05

if(KS_val > c):

  print("Hence, we REJECT the Null Hypothesis")
else:
  print("Hence, we ACCEPT the Null Hypothesis")  

KS statistic :  0.7049180327868858
Hence, we REJECT the Null Hypothesis


*Since the obtained KS statistic value is greater than the critical value of 0.05, we REJECT the Null Hypothesis.*

**2 Sample K-S Test - TN deaths and TX deaths**

In [860]:
TN_deaths = np.array(states_df['TN deaths'])[251:312]
TX_deaths = np.array(states_df['TX deaths'])[251:312]

#Computing eCDF for TN deaths and TX deaths
A1, B1 = eCDF(TN_deaths)
A2, B2 = eCDF(TX_deaths)

KS_val = KS_2_sample(A1,B1, A2,B2)

print('KS statistic : ', KS_val)

c=0.05

if(KS_val > c):

  print("Hence, we REJECT the Null Hypothesis")
else:
  print("Hence, we ACCEPT the Null Hypothesis")  

KS statistic :  0.6557377049180335
Hence, we REJECT the Null Hypothesis


*Since the obtained KS statistic value is greater than the critical value of 0.05, we REJECT the Null Hypothesis.*

**1 SAMPLE K-S TEST**

In [861]:
def KS_1_sample(A1,B1, CDF, parameter):
    max_total = -1
        
    matrix = np.zeros((len(A1),4))
    for i in range(len(matrix)):
        matrix[i,0] = B1[i]
        matrix[i,1] = B1[i+1]
        Fx = CDF(parameter, A1[i])
        matrix[i,2] = abs(matrix[i,0] - Fx)
        matrix[i,3] = abs(matrix[i,1] - Fx)
        cmax = max(matrix[i,2], matrix[i,3])
        if cmax > max_total:
            max_total = cmax
        
        
    return max_total

In [862]:
# Obtaining eCDF for TX confirmed cases
test_A, test_B = eCDF(TX_cases)

In [863]:
# Obtaining eCDF for TX deaths
test_P, test_Q = eCDF(TX_deaths)

**1 SAMPLE K-S TEST - POISSON DISTRIBUTION**



Obtaining parameters for Poisson distribution 

In [864]:
def Poisson_MME(X):
    poiss_mme = np.mean(X)
    return poiss_mme

def Poisson_CDF(lambda_, x):
    poiss_cdf = poisson.cdf(x, lambda_)
    return poiss_cdf

**TN confirmed cases and TX confirmed cases**



In [865]:
# Obtaining MME parametersfor TN confirmed cases
MME_TNconfirmedcases = Poisson_MME(TN_cases)
print('Poisson parameter(lambda) : ', MME_TNconfirmedcases)

# 1 sample KS-test on TX confirmed cases

KS_val = KS_1_sample(test_A, test_B, Poisson_CDF, MME_TNconfirmedcases )

print('KS statistic : ', KS_val)

c=0.05

if(KS_val > c):

  print("Hence, we REJECT the Null Hypothesis")
else:
  print("Hence, we ACCEPT the Null Hypothesis")  

Poisson parameter(lambda) :  2664.27868852459
KS statistic :  0.9344262295080024
Hence, we REJECT the Null Hypothesis


*Since the obtained KS statistic value is greater than the critical value of 0.05, we REJECT the Null Hypothesis.*

**TN deaths and TX deaths**

In [866]:
# Obtaining MME parameters for TN deaths
MME_TNdeaths = Poisson_MME(TN_deaths)
print('Poisson parameter(lambda) : ', MME_TNdeaths)

# 1 sample KS-test on TX deaths

KS_val = KS_1_sample(test_P, test_Q, Poisson_CDF, MME_TNdeaths )

print('KS statistic : ', KS_val)

c=0.05

if(KS_val > c):

  print("Hence, we REJECT the Null Hypothesis")
else:
  print("Hence, we ACCEPT the Null Hypothesis")  

Poisson parameter(lambda) :  35.49180327868852
KS statistic :  0.7851347244191668
Hence, we REJECT the Null Hypothesis


*Since the obtained KS statistic value is greater than the critical value of 0.05, we REJECT the Null Hypothesis.*

**1 SAMPLE K-S TEST - BINOMIAL DISTRIBUTION**



Obtaining parameters for Binomial distribution



In [867]:
def Binomial_param(X):
    mean = np.mean(X)
    n_estimate = np.square(mean)/(mean-np.var(X))
    p_estimate = mean/n_estimate
    return n_estimate,p_estimate

def Binomial_CDF(params,x):
    prob = binom.cdf(x, params[0], params[1])
    return prob

**TN confirmed cases and TX confirmed cases**



In [868]:
# Obtaining MME parameters for TN confirmed cases
n,p = Binomial_param(TN_cases)

print('Parameters of Binomial Distribution (n,p) : ', n,p)

# 1 sample KS-test on TX confirmed cases
KS_val = KS_1_sample(test_A, test_B, Binomial_CDF, [n,p] )

print('KS statistic : ', KS_val)

c=0.05

if(KS_val > c):

  print("Hence, we REJECT the Null Hypothesis")
else:
  print("Hence, we ACCEPT the Null Hypothesis")  

Parameters of Binomial Distribution (n,p) :  -4.833191310504729 -551.2462713267521
KS statistic :  1.0
Hence, we REJECT the Null Hypothesis


*Since the obtained KS statistic value is greater than the critical value of 0.05, we REJECT the Null Hypothesis.*

**TN deaths and TX deaths**


In [869]:
# Obtaining MME parameters for TN deaths

n,p = Binomial_param(TN_deaths)

print('Parameters of Binomial Distribution (n,p) : ', n,p)

# 1 sample KS-test on TX deaths
KS_val = KS_1_sample(test_P, test_Q, Binomial_CDF, [n,p] )

print('KS statistic : ', KS_val)

c=0.05

if(KS_val > c):

  print("Hence, we REJECT the Null Hypothesis")
else:
  print("Hence, we ACCEPT the Null Hypothesis")  

Parameters of Binomial Distribution (n,p) :  -2.675828670695916 -13.263854919925794
KS statistic :  1.0
Hence, we REJECT the Null Hypothesis


*Since the obtained KS statistic value is greater than the critical value of 0.05, we REJECT the Null Hypothesis.*

**1 SAMPLE K-S TEST - GEOMETRIC DISTRIBUTION**

Obtaining parameters for Geometric distribution

In [870]:
def Geometric_MME(X):
    sample_mean = np.mean(X)
    geo_mme = 1/sample_mean    
    return geo_mme

def Geometric_CDF(p,x):
    geo_cdf = geom.cdf(x, p)
    return geo_cdf

**TN confirmed cases and TX confirmed cases**

In [871]:
# Obtaining MME parameters for TN confirmed cases

p = Geometric_MME(TN_cases)

print('Geometric parameter : ', p)

# 1 sample KS-test on TX confirmed cases
KS_val = KS_1_sample(test_A, test_B, Geometric_CDF, p )

print('KS statistic : ', KS_val)

c=0.05

if(KS_val > c):

  print("Hence, we REJECT the Null Hypothesis")
else:
  print("Hence, we ACCEPT the Null Hypothesis")  

Geometric parameter :  0.000375336110410347
KS statistic :  0.6395840847515362
Hence, we REJECT the Null Hypothesis


*Since the obtained KS statistic value is greater than the critical value of 0.05, we REJECT the Null Hypothesis.*

**TN deaths and TX deaths**

In [872]:
# Obtaining MME parameters for TN deaths

p = Geometric_MME(TN_deaths)

print('Geometric parameter : ', p)

# 1 sample KS-test on TX deaths
KS_val = KS_1_sample(test_P, test_Q, Geometric_CDF, p )

print('KS statistic : ', KS_val)

c=0.05

if(KS_val > c):

  print("Hence, we REJECT the Null Hypothesis")
else:
  print("Hence, we ACCEPT the Null Hypothesis")  

Geometric parameter :  0.02817551963048499
KS statistic :  0.5645215235717812
Hence, we REJECT the Null Hypothesis


*Since the obtained KS statistic value is greater than the critical value of 0.05, we REJECT the Null Hypothesis.*