In [1]:
import os
import datetime
import random

from bokeh.io import show
from bokeh.plotting import figure
from bokeh.io import output_notebook
from bokeh.layouts import gridplot

import numpy as np
import math
import scipy.optimize as optim
import pandas as pd

# Effectiveness of Mask Mandates
In order to slow the spread of covid-19, many states in the United States passed mask mandates. A mandate is a law. The mandates required all people to wear masks in public.

Did these mandates help slow the spread? Did they help? In order to answer this question, you will do two types of analysis:

1. Compare the number of cases before the mandate, and after the mandate, and determine the if the difference is significant. 
2. Compare the rt before the mandate and after tha mandate.

I have included example code for one state. Use this code to do the same analysis for the other states. 

Note that each state has 3 periods. There is the period before, the incubation period, and the period after. The incubation period refers to the time the infection lives in the body before it shows symtomps. On average, this period is 5 days. We will ignore this period. 


In [2]:
def make_bar(labels, nums, title = None, y_range = None, plot_width = 350, plot_height = 350):
    p = figure(title = title, plot_width = plot_width, plot_height = plot_height,
              y_range = y_range)
    p.vbar(x=labels, top=nums, width=0.9)
    p.xgrid.grid_line_color = None
    return p

In [3]:
def resample(l):
    final = []
    for i in range(len(l)):
        final.append(random.choice(l))
    return final

In [4]:
def exp_func(x, initial, ratio):
    return initial * np.power(ratio, x - 1)

In [5]:
def repeat_resample(sample_a, sample_b, num_iter = 1000):
    difference_in_means = []#keep track of the difference in heights for each experiment
    for i in range(num_iter):
        resample_a = resample(sample_a)
        resample_b = resample(sample_b)
        difference = np.mean(resample_a) - np.mean(resample_b)
        difference_in_means.append(difference)
    return difference_in_means


In [6]:
output_notebook()

In [7]:
"""Read in the data"""
DF = pd.read_csv(os.path.join('data', 'mask.csv'))
DF['Date'] = pd.to_datetime(DF['Date'])

In [8]:
ca_pre = DF[
    (DF['period'] == 'pre_mask') #first filter for period (after)
    & (DF['state'] == 'California') #second filter for state (before)
 ]
ca_post =DF[
    (DF['period'] == 'post_mask') # first filter for period (after)
    & (DF['state'] == 'California')] # second filter for perid (before)


In [9]:
"""Get the number of infections before and after the masks, and graph them"""
cases_bef = ca_pre['cases']
cases_aft = ca_post['cases']
p_bef = make_bar(labels = [x for x in range(len(cases_bef))], nums = cases_bef)
p_aft = make_bar(labels = [x for x in range(len(cases_aft))], nums = cases_aft)
show(gridplot([p_bef, p_aft], ncols = 2))


In [17]:
"""are the differences significant? """
print(np.mean(cases_aft) - np.mean(cases_bef))
"""
Cases after increased after the mask mandate. (Surprising, right? The state passed a law and there were *more* 
infections.)

The null hypothesis is the cases afterwards are *not* greater than the cases before. Remember, the null hypothesis
is conservative, and the opposite of what you want to prove. 
"""

# When resample, make the bigger of the two samples the first. Then test that the diff is greater than 0
# You can do it the other way (the least first, and test if diff is less than 0), but this way is easier
resamp_diff = repeat_resample(cases_aft.tolist(), cases_bef.tolist())
lower = np.percentile(resamp_diff, 5)
upper = np.percentile(resamp_diff, 95)
lies_inside =  lower < 1 < upper
if lies_inside:
    print('p value is not < .05; do not reject the null hypothesis')
else:
    print('p value is  < .05;  reject the null hypothesis')
lies_inside


3750.642857142857
p value is  < .05;  reject the null hypothesis


False

In [11]:
"""Let's get the rt value before and after the mask mandate"""
popt_pre, pcov_pre = optim.curve_fit(f = exp_func, xdata =list(range(14)), ydata = ca_pre['cases'])
popt_post, pcov_post = optim.curve_fit(f = exp_func, xdata =list(range(14)), ydata = ca_post['cases'])
rt_bef, rt_aft = popt_pre[1], popt_post[1]
print(rt_bef, rt_aft)
"""The rt *increased* after the mask mandate """

1.0199781964101093 1.0400259829495748


'The rt *increased* after the mask mandate '

In [12]:
"""Conclusion: cases increased after the mandate. rt increased after the mandate. """

'Conclusion: cases increased after the mandate. rt increased after the mandate. '

In [13]:
"""Do the same analysis for the other 22 states"""

'Do the same analysis for the other 22 states'