In [1]:
import datetime
import os
from collections import OrderedDict


In [2]:
import pandas as pd

In [3]:
from bokeh.io import show
from bokeh.plotting import figure
from bokeh.io import output_notebook
from bokeh.io import reset_output
from bokeh.layouts import gridplot
from bokeh.models import Arrow, NormalHead, OpenHead, VeeHead
from bokeh.models import Label
from bokeh.models import Span
from bokeh.embed import components


In [4]:
import scipy.optimize as optim
import numpy as np
from scipy import stats
import random


In [5]:
output_notebook()

In [6]:
def output_graphs(script, div, text, 
                 home_page_dir = '/home/henry/projects/covid19/home_page/'):
    with open(os.path.join(home_page_dir, 'script'), 'w') as write_obj:
              write_obj.write(script)
    with open(os.path.join(home_page_dir, 'div'), 'w') as write_obj:
              write_obj.write(div)
    with open(os.path.join(home_page_dir, 'text.txt'), 'w') as write_obj:
              write_obj.write(text)


In [81]:
def get_state_mask_start():
    return  [('California', datetime.datetime(2020, 6, 18)),
           ('Connecticut', datetime.datetime(2020,4, 20)),
            ('Delaware', datetime.datetime(2020, 4, 28)),
            ('Hawaii', datetime.datetime(2020, 4, 20)),
            ('Illinois', datetime.datetime(2020,5,1)),
            ('Kansas', datetime.datetime(2020, 7, 3)),
            ('Kentucky', datetime.datetime(2020, 7, 10)),
            ('Maine', datetime.datetime(2020, 5, 1)),
            ('Maryland', datetime.datetime(2020, 4, 18)),
            ('Massachusetts', datetime.datetime(2020, 5, 6)),
            ('Michigan', datetime.datetime(2020, 6, 18)),
            ('Nevada', datetime.datetime(2020, 6, 24)),
            ('New Jersey', datetime.datetime(2020, 4, 8)),
            ('New Mexico', datetime.datetime(2020, 5, 16)),
            ('New York', datetime.datetime(2020, 4, 17)),
            ('North Carolina', datetime.datetime(2020, 6, 26)),
            ('Oregon', datetime.datetime(2020, 7, 1)),
            ('Pennsylvania', datetime.datetime(2020, 4, 19)),
            ('Rhode Island', datetime.datetime(2020, 5, 18)),
            ('Texas', datetime.datetime(2020, 7, 3)),
            ('Virginia', datetime.datetime(2020, 5, 29)),
            ('Washington', datetime.datetime(2020, 6, 26)),
            ('West Virginia', datetime.datetime(2020, 7, 6)),
           ]

In [7]:
def make_df(state, start_date):
    df = pd.read_csv('data/states.csv')
    df['date'] = pd.to_datetime(df['date'])
    df = df[(df['state'] == state) & (df['date'] >= start_date)]
    return df

In [8]:
def make_mask_graph(df):
    pass

def make_df(state, start_date):
    df = pd.read_csv('data/states.csv')
    df['date'] = pd.to_datetime(df['date'])
    df = df[(df['state'] == state) & (df['date'] >= start_date)]
    return df

def make_mask_graph(df, mask_start, title = None, plot_height = 450, plot_width = 450,
                   incubation_period = 5, window = 3):
    labels = df['date'].tolist()
    nums = df['cases'].rolling(window).mean()
    p = figure(x_axis_type = 'datetime', title = title, 
                 plot_width = plot_width , plot_height = plot_height, y_range = None)
    dd = list(zip(labels, nums))
    bef = [x for x in dd if x[0] <= mask_start]

    incc = [x for x in dd if x[0] >  mask_start 
        and x[0] <= mask_start + datetime.timedelta(days = incubation_period)
      ]
    aff = [x for x in dd if x[0] > mask_start + datetime.timedelta(days = incubation_period)]

    p.vbar(x=[x[0] for x in bef], top=[x[1] for x in bef] , line_width = 5, 
           width = .9, color = 'yellow', 
       )
    p.vbar(x=[x[0] for x in incc], top=[x[1] for x in incc] , line_width = 5, 
           width = .9, color = 'red', 
       )
    p.vbar(x=[x[0] for x in aff], top=[x[1] for x in aff] , line_width = 5, 
           width = .9, color = 'orange', 
       )
    p.legend.location = "top_center"
    return p

def do_mask_mandates(window = 3, plot_height = 450, 
                    plot_width = 450, ncols = 4):
    return gridplot([
            
            make_mask_graph(df = 
                make_df(state = 'California', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,6,18),
                    title = 'California', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ),   
                
            make_mask_graph(df = 
                make_df(state = 'Connecticut', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,4,20),
                    title = 'Connecticut', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ),
            make_mask_graph(df = 
                make_df(state = 'Delaware', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,4,28),
                    title = 'Delaware', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ), 
            make_mask_graph(df = 
                make_df(state = 'Hawaii', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,4,20),
                    title = 'Hawaii', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ), 
            make_mask_graph(df = 
                make_df(state = 'Illinois', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,5,1),
                    title = 'Illinois', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ), 
            make_mask_graph(df = 
                make_df(state = 'Kansas', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,7,3),
                    title = 'Kansas', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ), 
            make_mask_graph(df = 
                make_df(state = 'Kentucky', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,5,11),
                    title = 'Kentuckky', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ), 
             make_mask_graph(df = 
                make_df(state = 'Maine', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,5,1),
                    title = 'Maine', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ),
             make_mask_graph(df = 
                make_df(state = 'Maryland', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,4,18),
                    title = 'Maryland', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ), 
             make_mask_graph(df = 
                make_df(state = 'Massachusetts', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,5,6),
                    title = 'Massachusetts', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ), 
            make_mask_graph(df = 
                make_df(state = 'Michigan', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,6,18),
                    title = 'Michigan', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ), 
            make_mask_graph(df = 
                make_df(state = 'Nevada', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,6,24),
                    title = 'Nevada', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ),
            make_mask_graph(df = 
                make_df(state = 'New Jersey', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,4,8),
                    title = 'New Jersey', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ),
            make_mask_graph(df = 
                make_df(state = 'New Mexico', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,5,16),
                    title = 'New Mexico', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ), 
             make_mask_graph(df = 
                make_df(state = 'New York', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,4,17),
                    title = 'New York', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ),
            make_mask_graph(df = 
                make_df(state = 'North Carolina', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,6,26),
                    title = 'North Corolina', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ), 
            make_mask_graph(df = 
                make_df(state = 'Oregon', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,7,1),
                    title = 'Oregeon', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ), 
            make_mask_graph(df = 
                make_df(state = 'Pennsylvania', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,4,19),
                    title = 'Pennsylvania', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ),
             make_mask_graph(df = 
                make_df(state = 'Rhode Island', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,5,18),
                    title = 'Rhode Island', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ), 
             make_mask_graph(df = 
                make_df(state = 'Texas', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,7,3),
                    title = 'Texas', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ), 
            make_mask_graph(df = 
                make_df(state = 'Virginia', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,5,29),
                    title = 'Virginia', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ), 
            make_mask_graph(df = 
                make_df(state = 'Washington', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,6,26),
                    title = 'Washington', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ), 
            make_mask_graph(df = 
                make_df(state = 'West Virginia', 
                        start_date = datetime.datetime(2020,4, 1)),
                mask_start = datetime.datetime(2020,7,6),
                    title = 'West Virginia', plot_height = plot_height,
                                     plot_width = plot_width,
                            window = window,
               ), 
         ], 
         
         
         ncols = ncols)

#g = do_mask_mandates(window = 7, plot_height = 250, plot_width = 250)

In [9]:
def exp_func(x, initial, ratio):
    return initial * np.power(ratio, x - 1)

In [10]:
def get_rate(df_m, state, days, start, inc, pop_d):
    df_bef = df_m[(df_m['state'] == state) &
              (df_m['date'] < start) &
              (df_m['date'] >= start - datetime.timedelta(days = days))
             ]
    df_aft = df_m[(df_m['state'] == state) &
              (df_m['date'] > start + datetime.timedelta(days = inc)) &
              (df_m['date'] <= start +  datetime.timedelta(days = inc) + datetime.timedelta(days = days))
             ]
    y_bef = df_bef['cases'].tolist()
    x_bef = range(1,len(y_bef) + 1)
    y_aft = df_aft['cases'].tolist()
    x_aft = range(1,len(y_aft) + 1)
    popt_bef, pcov_bef = optim.curve_fit(f = exp_func, xdata =np.array(x_bef), ydata = np.array(y_bef) )
    ratio_bef_hat = popt_bef[1]
    popt_aft, pcov_aft = optim.curve_fit(f = exp_func, xdata =np.array(x_aft), ydata = np.array(y_aft) )
    ratio_aft_hat = popt_aft[1]
    pop = pop_d[state]
    df_last = df_m[df_m['state'] == state]
    cases_7 = np.mean(df_last['cases'].tolist()[-7]) * 100000/pop
    return (ratio_bef_hat, ratio_aft_hat, cases_7)
    
def ratios_change(days, inc = 5):
    df_m = pd.read_csv('data/states.csv')
    def get_pop():
        pop_df = pd.read_csv('data/seven_day_state.csv')
        states = pop_df['state']
        pop = pop_df['population']
        zz = zip(states, pop)
        l = set(list(zz))
        d = {}
        for i in l:
            d[i[0]] = i[1]
        return d
    pop_d = get_pop()
    df_m['date'] = pd.to_datetime(df_m['date'])
    data = [('California', datetime.datetime(2020, 6, 18)),
           ('Connecticut', datetime.datetime(2020,4, 20)),
            ('Delaware', datetime.datetime(2020, 4, 28)),
            ('Hawaii', datetime.datetime(2020, 4, 20)),
            ('Illinois', datetime.datetime(2020,5,1)),
            ('Kansas', datetime.datetime(2020, 7, 3)),
            ('Kentucky', datetime.datetime(2020, 5, 11)),
            ('Maine', datetime.datetime(2020, 5, 1)),
            ('Maryland', datetime.datetime(2020, 4, 18)),
            ('Massachusetts', datetime.datetime(2020, 5, 6)),
            ('Michigan', datetime.datetime(2020, 6, 18)),
            ('Nevada', datetime.datetime(2020, 6, 24)),
            ('New Jersey', datetime.datetime(2020, 4, 8)),
            ('New Mexico', datetime.datetime(2020, 5, 16)),
            ('New York', datetime.datetime(2020, 4, 17)),
            ('North Carolina', datetime.datetime(2020, 6, 26)),
            ('Oregon', datetime.datetime(2020, 7, 1)),
            ('Pennsylvania', datetime.datetime(2020, 4, 19)),
            ('Rhode Island', datetime.datetime(2020, 5, 18)),
            ('Texas', datetime.datetime(2020, 7, 3)),
            ('Virginia', datetime.datetime(2020, 5, 29)),
            ('Washington', datetime.datetime(2020, 6, 26)),
            ('West Virginia', datetime.datetime(2020, 7, 6)),
           ]
    results = []
    for i in data:
        bef, aft, adj_c = get_rate(df_m = df_m, state = i[0], days = days, 
             start = i[1], inc = inc,pop_d = pop_d)
        results.append((i[0], bef, aft, adj_c))
    return results


In [11]:
def hist_old(x, density = True):
    hist, edges = np.histogram(x, density=density)
    p = figure()
    p.quad(top = hist, bottom=0, left=edges[:-1], right=edges[1:], alpha = .4)
    return p


In [12]:
for i in range(7,15):
    print(i)
    x = ratios_change(days = i)
    diff = [x[2] - x[1] for x in x]
    print(np.mean(diff), len([x for x in diff if x >= 0]), len(diff))

7
-0.008288063747248886 11 23
8
-0.01045528866014853 11 23
9
-0.01382473941023788 10 23
10
-0.01393436317155237 11 23
11
-0.015269660082787547 10 23
12
-0.010162652577646277 11 23
13
0.019118929735184156 9 23
14
-0.007389889823953407 9 23


In [13]:
def make_bar(days, plot_height = 450, plot_width = 450):
    data = ratios_change(days)
    states = [x[0] for x in data]
    change = [x[2] - x[1] for x in data]
    zz = zip(states, change)
    zz = sorted(zz, key = lambda x: x[1])
    states = [x[0] for x in zz]
    change = [x[1] for x in zz]
    p = figure(x_range=states, plot_height = plot_height, plot_width = plot_width,
               title='RT change {i}'.format(i = days))
    p.vbar(x=states, top=change, width=0.9)
    p.xgrid.grid_line_color = None
    p.xaxis.major_label_orientation = "vertical"
    return p

def all_bar_graphs():
    all_graphs = []
    for i in range(7,15):
        all_graphs.append(make_bar(i))
    return all_graphs
grid = gridplot(all_bar_graphs(), ncols = 2)
show(grid)


In [14]:
reset_output()
p = make_bar(10, plot_width = 450, plot_height = 450)

show(p)


In [15]:
p = make_bar(10, plot_width = 650, plot_height = 650)
script, div = components(p)
text = """RT rate for states with mask mandates. For roughly half the states, the RT went down (improved) after 
the mandate. For the other half, the RT increased after the mandate. This is evidence that mask mandates by themselves do not change the growth of the pandemic."""
output_graphs(script, div, text = text)


In [16]:
output_notebook()

In [17]:
def resample_1(l, increase_by = 25, num_iter = 100):
    l_t = l * increase_by
    means = []
    for i in range(num_iter):
        random.shuffle(l_t)
        new_s = l_t[:len(l)]
        means.append(np.mean(new_s))
    return means


In [18]:
def rates_changes():
    all_d = []
    for i in range(6, 15):
        for inc in range(5, 10):
            data = ratios_change(i, inc = inc)
            all_d.extend([x[2] - x[1] for x in data])
            dd =[x[2] - x[1] for x in data]
            if np.mean(dd) > 0:
                pvalue = None
            else:
                pvalue = stats.ttest_1samp(dd,0).pvalue/2
            if pvalue != None and pvalue < .05:
                print(i,inc)
    return all_d
ll = rates_changes()
np.mean(ll)



9 9
11 7


-0.01223474417293313

In [19]:
def hist(x, density = True):
    hist, edges = np.histogram(x, density=density)
    sum_ = sum(hist)
    p = figure()
    #hist = [x/sum_ for x in hist]
    p.quad(top = hist, bottom=0, left=edges[:-1], right=edges[1:], alpha = .4)
    return p
def junk():
    data = ratios_change(14)
    dd = [x[2] -x[1] for x in data]
    p =hist(dd, density = True)
    mean = np.mean(dd)
    std = np.std(dd)
    x = np.linspace(min(dd) - .05, max(dd) + .03, 100)
    y = stats.norm.pdf(x, loc= np.mean(dd), scale = np.std(dd))
    sum_  = sum(y)
    #y = [x/sum_ for x in y]
    p.line(x = x, y = y)
    show(p)

In [20]:
def get_sig(data, less_than = .01):
    res = resample_1(data, num_iter = 100)
    pvalue = 1 - len([x for x in res if x < less_than])/len(res)
    return pvalue

def get_all_sig(less_than = 0):
    sigs = []
    for i in range(6, 15):
        for inc in range(5, 10):
            data = [x[2] - x[1] for x in ratios_change(i, inc = inc)]
            sig = get_sig(data = data, less_than = less_than)
            sigs.append((i, inc, sig))
    return sigs


In [21]:
output_notebook()

In [22]:
def get_quantiles(data):
    # data is a list of lists
    final = []
    for i in data:
        q1 = np.percentile(i, 25) 
        q2 = np.percentile(i, 50) 
        q3 = np.percentile(i, 75) 
        iqr = q3 - q1
        final.append((q1 - 1.5*iqr, q3 + 1.5*iqr,q1, q2,  q3,  [x for x in i if x < q1 - (1.5 * iqr) or x >q3 + (1.5 * iqr)  ]))
    return final


def make_box_raw(p, cats, upper, lower, q1, q2, q3, out):
    p.segment(cats, lower, cats, upper)
    p.vbar(cats, 0.7, q1, q2, fill_color="#E08E79", line_color="black")
    p.vbar(cats, 0.7, q2, q3, fill_color="#3B8686", line_color="black")
    p.rect(cats, lower, 0.2, 0.01, line_color="black")
    p.rect(cats, upper, 0.2, 0.01, line_color="black")
    for k, v in out.items():
        for point in v:
            p.circle([k], [point], size=6, color="#F38630", fill_alpha=0.6)
    return p
  
def zip_data(cats, data):
    zip_data = list(zip(*data))
    outs = OrderedDict()
    for counter, i in enumerate(cats):
      if len(zip_data[5][counter]) != 0:
        outs[i] = zip_data[5][counter]
    return zip_data, outs
cats = ['a', 'b']
data = [[1, 2, 3], [4, 5, 6]]
assert len(cats) == len(data)

def state_ranges():
    d = {}
    for i in range(6, 15):
        for inc in range(5, 10):
            data = [x[2] - x[1] for x in ratios_change(i, inc = inc)]
            d[(i, inc)] = data
    return d

data = []
cats = []
d = state_ranges()
for key in d.keys():
    data.append(d[key])
    cats.append(str(key))

p = figure(tools="save", background_fill_color="#EFE8E2", x_range=cats,
          plot_width = 900, plot_height= 450, title = "Change in RT Pre-Mask and Mask")

f, outs = zip_data(cats, get_quantiles(data))
p = make_box_raw(p, cats, f[0], f[1], f[2], f[3], f[4], outs)
p.line(x = cats, y = [0 for x in cats], line_width = 2, color = 'black')
p.xaxis.major_label_orientation = "vertical"
p.xaxis.axis_label = 'Incubation/Time'
p.yaxis.axis_label = 'Change in RT'

show(p)


    

In [24]:
# do poisson
DF_POISSON = pd.read_csv('data/state_poisson.csv')
DF_POISSON.head()

Unnamed: 0,state,date,cases
0,Washington,2020-01-22,0
1,Washington,2020-01-23,0
2,Washington,2020-01-24,0
3,Washington,2020-01-25,0
4,Washington,2020-01-26,0


In [83]:
def states_poisson(df, state, mask_start, plot_width = 350, plot_height = 350, title = None):
    df_wash=df[df['state'] == state]
    p = figure(x_axis_type = 'datetime', plot_width = plot_width, 
               plot_height = plot_height, title = title)
    mask_start_span = Span(location=mask_start,
                            dimension='height', line_color='red',
                            line_dash='dashed', line_width=1)
    p.line(x = df_wash['date'], y = df_wash['cases'])
    p.add_layout(mask_start_span)
    p.yaxis.axis_label = 'cases'
    return p


def all_states_poisson():
    df = pd.read_csv('data/state_poisson.csv')
    df['date'] =pd.to_datetime(df['date'])
    grid = []
    for i in get_state_mask_start():
        grid.append(states_poisson(df, i[0], i[1],title = i[0]))
    return grid
grids =all_states_poisson()
grid = gridplot(grids, ncols = 4)
show(grid)
