In [1]:
import datetime

In [2]:
import pandas as pd

In [3]:
import statsmodels.api as sm
import numpy as np

In [4]:
from bokeh.plotting  import figure, show


In [5]:
def make_df():
    df = pd.read_csv('data/seven_day_county.csv')
    df['date'] = pd.to_datetime(df['date'])
    return df
make_df().head()


Unnamed: 0,state,county,date,new_deaths,new_cases,population,rucc_2013
0,Nevada,Clark,2020-04-03,5,154,2266715,1
1,Massachusetts,Middlesex,2020-04-04,0,266,1611699,1
2,Nevada,Clark,2020-04-04,2,139,2266715,1
3,Massachusetts,Middlesex,2020-04-05,3,164,1611699,1
4,Nevada,Clark,2020-04-05,0,101,2266715,1


In [6]:
def hist(nums):
    hist, edges = np.histogram(nums, density=True)
    p = figure()
    p.quad(top = hist, bottom=0, left=edges[:-1], right=edges[1:], alpha = .4)     
    return p

In [7]:
def get_mask_states():
    return  [('California', datetime.datetime(2020, 6, 18)),
           ('Connecticut', datetime.datetime(2020,4, 20)),
            ('Delaware', datetime.datetime(2020, 4, 28)),
            ('Hawaii', datetime.datetime(2020, 4, 20)),
            ('Illinois', datetime.datetime(2020,5,1)),
            ('Kansas', datetime.datetime(2020, 7, 3)),
            ('Kentucky', datetime.datetime(2020, 5, 11)),
            ('Maine', datetime.datetime(2020, 5, 1)),
            ('Maryland', datetime.datetime(2020, 4, 18)),
            ('Massachusetts', datetime.datetime(2020, 5, 6)),
            ('Michigan', datetime.datetime(2020, 6, 18)),
            ('Nevada', datetime.datetime(2020, 6, 24)),
            ('New Jersey', datetime.datetime(2020, 4, 8)),
            ('New Mexico', datetime.datetime(2020, 5, 16)),
            ('New York', datetime.datetime(2020, 4, 17)),
            ('North Carolina', datetime.datetime(2020, 6, 26)),
            ('Oregon', datetime.datetime(2020, 7, 1)),
            ('Pennsylvania', datetime.datetime(2020, 4, 19)),
            ('Rhode Island', datetime.datetime(2020, 5, 18)),
            ('Texas', datetime.datetime(2020, 7, 3)),
            ('Virginia', datetime.datetime(2020, 5, 29)),
            ('Washington', datetime.datetime(2020, 6, 26)),
            ('West Virginia', datetime.datetime(2020, 7, 6)),
           ]

In [8]:
def get_rates(df, state, mask_start):
    final = {state:{}}
    df_ = df[df['state'] == state]
    df_before = df_[(df_['date'] <= mask_start)
        & (df_['date'] >mask_start - datetime.timedelta(days = 14))
                   ]
    df_after = df_[(df_['date'] > mask_start + datetime.timedelta(days = 5))
        & (df_['date'] <= mask_start + datetime.timedelta(days = 5) + datetime.timedelta(days = 14))]
    def get_rate(df):
        new_cases = df['new_cases'].tolist()
        if len(new_cases) != 14:
            return None, None
        x = range(len(new_cases))
        X = list(zip(*[x]))
        xm = sm.add_constant(X)
        model = sm.OLS(new_cases, xm)
        result = model.fit()
        rt = result.params[1]
        p_value = result.pvalues[1]
        return rt, p_value

    for i in set(df_before['county'].tolist()):
        df_county_bef = df_before[df_before['county'] == i]
        df_county_aft = df_after[df_after['county'] == i]
        rt_bef, p_bef = get_rate(df_county_bef)
        rt_aft, p_aft = get_rate(df_county_aft)
        final[state][i] = {'rt_bef': rt_bef, 'p_bef':p_bef, 'rt_aft':rt_aft, 'p_aft': p_aft}
    return final
        

def get_all_rates():
    bef = []
    diffs = {}
    for i in get_mask_states():
        d = get_rates(make_df(), i[0],i[1])
        for key1 in d.keys():
            if not diffs.get(key1):
                diffs[key1] = []
            for key2 in d[key1].keys():
                if d[key1][key2]['rt_bef'] == None:
                    continue
                rt_bef = d[key1][key2]['rt_bef']
                if d[key1][key2]['p_bef'] >= .05:
                    rt_bef = 0
                rt_aft = d[key1][key2]['rt_aft']
                if d[key1][key2]['p_aft'] >= .05:
                    rt_aft = 0
                bef.append(rt_bef)
                diffs[key1].append(rt_aft - rt_bef)
    print(np.median(bef))
    return diffs
diffs = get_all_rates()
#show(hist(diffs))
#print(np.percentile(diffs, 5))
#print(np.percentile(diffs, 95))
for state in diffs.keys():
    print(state,np.mean(diffs[state]))
    

0.0
California -0.0809716599190284
Connecticut 0.0
Delaware -0.8945054945054948
Hawaii 0.23461538461538461
Illinois -1.0106784519828003
Kansas -0.06347059533872722
Kentucky 0.008731268731268735
Maine 0.07413919413919418
Maryland 0.20357142857142865
Massachusetts 5.406750392464677
Michigan 0.24129120879120877
Nevada -0.8000000000000005
New Jersey 1.0051282051282036
New Mexico -0.019487179487179495
New York -5.915233042819249
North Carolina 0.07745054945054948
Oregon 0.06767937944408536
Pennsylvania 0.175316990701606
Rhode Island 0.0
Texas -0.8473768908915353
Virginia -0.28919695688926467
Washington -0.017698091382301967
West Virginia -0.06062616628654366


In [9]:
a = {'foo': 1}
b = {'bar': 2}
a.update(b)
a

{'foo': 1, 'bar': 2}