In [65]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
from dateutil.relativedelta import relativedelta
from scipy.stats import ttest_rel 

In [66]:
## Reading data
RAW_COUNTIES_FILENAME = 'us-counties.csv'
POLICIES_FILENAME = 'earliestpolicy_08042020.csv'
raw_counties = pd.read_csv(RAW_COUNTIES_FILENAME)
raw_policies = pd.read_csv(POLICIES_FILENAME)[['fips','policy_date']]
raw_counties.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0


In [67]:
## Calculating R0 values
raw_counties['date'] = pd.to_datetime(raw_counties['date'])
sorted_counties = raw_counties.sort_values(by=['county','state','date'])
sorted_counties['cases'] = sorted_counties['cases'].rolling(window=7)
sorted_counties['new_cases'] = sorted_counties['cases'].diff() 
diff_mask = (sorted_counties['county'] != sorted_counties['county'].shift(1)) & (sorted_counties['state'] != sorted_counties['state'].shift(1))
sorted_counties['new_cases'] = sorted_counties['new_cases'].where(~diff_mask, np.nan)
sorted_counties['R0']= sorted_counties.new_cases / sorted_counties.new_cases.shift()
print(sorted_counties[sorted_counties['county']=='Virginia Beach city'])
sorted_counties = sorted_counties.drop(columns=['deaths','cases'])

NotImplementedError: See issue #11704 https://github.com/pandas-dev/pandas/issues/11704

In [50]:
## Adding Mandate Policy date
policy_dates = [dt.datetime.strptime(date,'%m-%d-%Y') if isinstance(date,str) else None for date in  raw_policies.policy_date ]
fips_2_dates = dict(zip(raw_policies.fips,policy_dates))
sorted_counties['policy_date']= sorted_counties['fips'].map(fips_2_dates)

In [52]:
## Adding county population data
pop_data = pd.read_csv('county_populations.csv')
pop_data['county'] = pop_data['county'].str.replace('County','')
all_counties = pd.merge(sorted_counties,pop_data,how='inner',on=['state','county'])
filtered_counties = all_counties[all_counties['population'] > 100000]

In [53]:
## Creating before and after mandate dataframes
filtered_counties = filtered_counties.dropna()
after_mandate = filtered_counties[filtered_counties.apply(lambda x: x['date'] > x['policy_date'] + relativedelta(days=+14) and x['date'] < x['policy_date'] + relativedelta(days=+44), axis=1)] \
.replace([np.inf, -np.inf], np.nan).dropna()
before_mandate = filtered_counties[filtered_counties.apply(lambda x: x['date'] < x['policy_date'] and x['date'] > x['policy_date'] + relativedelta(days=-30), axis=1)] \
.replace([np.inf, -np.inf], np.nan).dropna()


In [54]:
print(before_mandate)
print(after_mandate)

            date               county     state     fips  new_cases        R0  \
260   2020-04-30      Alexandria city  Virginia  51510.0       54.0  1.148936   
261   2020-05-01      Alexandria city  Virginia  51510.0       48.0  0.888889   
262   2020-05-02      Alexandria city  Virginia  51510.0       46.0  0.958333   
263   2020-05-03      Alexandria city  Virginia  51510.0       51.0  1.108696   
264   2020-05-04      Alexandria city  Virginia  51510.0       41.0  0.803922   
...          ...                  ...       ...      ...        ...       ...   
11289 2020-05-24  Virginia Beach city  Virginia  51810.0        5.0  0.714286   
11290 2020-05-25  Virginia Beach city  Virginia  51810.0       15.0  3.000000   
11291 2020-05-26  Virginia Beach city  Virginia  51810.0       20.0  1.333333   
11292 2020-05-27  Virginia Beach city  Virginia  51810.0        8.0  0.400000   
11293 2020-05-28  Virginia Beach city  Virginia  51810.0       16.0  2.000000   

      policy_date  populati

In [60]:
after_mandate_avg = after_mandate.drop(columns=['date','policy_date']).groupby(['state','county','fips']).mean()
before_mandate_avg = before_mandate.drop(columns=['date','policy_date']).groupby(['state','county','fips']).mean()

In [61]:
condensed = pd.merge(before_mandate_avg,after_mandate_avg,how='inner',on=['state','county','fips'],suffixes=('_b','_a')).dropna()
pd.options.display.max_colwidth = 500
condensed = condensed[(condensed['new_cases_b'] > 40) & (condensed['new_cases_a'] > 40)]

In [62]:
ttest_rel(condensed['R0_b'], condensed['R0_a'])

Ttest_relResult(statistic=0.9130014034588766, pvalue=0.4576191436506212)

In [63]:
print(condensed[['R0_b','R0_a']].mean())

R0_b    1.291558
R0_a    1.164865
dtype: float64
