<a href="https://colab.research.google.com/github/tanner-k/ECON488_Project/blob/main/DD_COVID19.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Import the libraries needed to do the analysis for the project
import pandas as pd
import numpy as np

In [2]:
#Read in the data for the project
df = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv')

In [3]:
df

Unnamed: 0,date,state,fips,cases,deaths
0,2020-01-21,Washington,53,1,0
1,2020-01-22,Washington,53,1,0
2,2020-01-23,Washington,53,1,0
3,2020-01-24,Illinois,17,1,0
4,2020-01-24,Washington,53,1,0
...,...,...,...,...,...
14474,2020-11-20,Virginia,51,213331,3912
14475,2020-11-20,Washington,53,147251,2738
14476,2020-11-20,West Virginia,54,38480,639
14477,2020-11-20,Wisconsin,55,365190,3106


In [4]:
#Create a variable for the list of states we want to include in our analysis. 
included_states = ['Washington', 'Illinois', 'California', 'Arizona', 'Massachusetts',
       'Wisconsin', 'Texas', 'Nebraska', 'Utah', 'Oregon', 'Florida',
       'New York', 'Rhode Island', 'Georgia', 'New Hampshire',
       'North Carolina', 'New Jersey', 'Colorado', 'Maryland', 'Nevada',
       'Tennessee', 'Indiana', 'Kentucky', 'Minnesota',
       'Oklahoma', 'Pennsylvania', 'South Carolina',
       'District of Columbia', 'Kansas', 'Missouri', 'Vermont',
       'Virginia', 'Connecticut', 'Iowa', 'Louisiana', 'Ohio', 'Michigan',
       'South Dakota', 'Arkansas', 'Delaware', 'Mississippi',
       'New Mexico', 'North Dakota', 'Wyoming', 'Maine',
       'Alabama', 'Idaho', 'Montana', 'West Virginia']

In [5]:
#Keep only the states we want to include in our analysis
df = df.loc[df.state.isin(included_states)]

#Organize the data by state and by date
df = df.sort_values(by=['state', 'date'],ascending=[True, True])

In [6]:
#Create daily counts of cases and deaths
df['daily_cases'] = df.groupby(by='state')['cases'].diff().fillna(0)
df['daily_deaths'] = df.groupby(by='state')['deaths'].diff().fillna(0)

#Eliminate negative values from transitioning from state to state
df[df['daily_cases'] < 0] = 0
df[df['daily_deaths'] < 0] = 0

df.loc[df.state=='New York'].describe()

Unnamed: 0,fips,cases,deaths,daily_cases,daily_deaths
count,261.0,261.0,261.0,261.0,261.0
mean,36.0,356702.360153,25844.260536,2247.521073,129.51341
std,0.0,154562.473031,11282.469648,2619.908351,244.197581
min,36.0,1.0,0.0,0.0,0.0
25%,36.0,326659.0,25028.0,667.0,9.0
50%,36.0,404207.0,31979.0,1006.0,20.0
75%,36.0,449658.0,32639.0,2715.0,97.0
max,36.0,589605.0,33696.0,12274.0,1036.0


In [7]:
#Create a week variable
df['date'] = pd.to_datetime(df['date'])
df['week_of_year'] = df.date.dt.strftime('%U').astype('int32')

In [8]:
#Collapse the data into counts per week
cases = df.groupby(['state', 'week_of_year'])['daily_cases','daily_deaths'].sum()
cases = pd.DataFrame(data=cases)
cases = cases.rename(columns={'daily_cases': 'weekly_cases', 'daily_deaths': 'weekly_deaths'})
cases = cases.reset_index(level=['state', 'week_of_year'])
cases = cases.drop(axis=0, index=0)
cases.loc[cases.state=='Arizona'].describe()

  


Unnamed: 0,week_of_year,weekly_cases,weekly_deaths
count,43.0,43.0,43.0
mean,25.0,6805.139535,149.186047
std,12.556539,7449.722595,145.743783
min,4.0,0.0,0.0
25%,14.5,1350.0,50.0
50%,25.0,3577.0,120.0
75%,35.5,9231.5,189.0
max,46.0,25375.0,575.0


In [9]:
#Import mask mandate data
mask = pd.read_csv('https://raw.githubusercontent.com/tanner-k/DD-COVID19/master/Mask_Mandate_Data.csv')
mask.head()

Unnamed: 0,State,Mask Mandate,Week Number
0,Alabama,0,1
1,Alabama,0,2
2,Alabama,0,3
3,Alabama,0,4
4,Alabama,0,5


In [10]:
#Make the mask data have the same columns as the case data
mask = mask.rename(columns={'State': 'state', 'Week Number': 'week_of_year', 'Mask Mandate': 'mask_mandate'})
mask['weeks'] = mask.week_of_year
mask['State'] = mask.state
mask

Unnamed: 0,state,mask_mandate,week_of_year,weeks,State
0,Alabama,0,1,1,Alabama
1,Alabama,0,2,2,Alabama
2,Alabama,0,3,3,Alabama
3,Alabama,0,4,4,Alabama
4,Alabama,0,5,5,Alabama
...,...,...,...,...,...
2543,Wyoming,0,48,48,Wyoming
2544,Wyoming,0,49,49,Wyoming
2545,Wyoming,0,50,50,Wyoming
2546,Wyoming,0,51,51,Wyoming


In [11]:
mask = mask.set_index(['state', 'week_of_year'])
cases = cases.set_index(['state', 'week_of_year'])

In [12]:
#Combine the mask mandate data with the cases data
cases = cases.combine_first(mask)
cases

Unnamed: 0_level_0,Unnamed: 1_level_0,State,mask_mandate,weekly_cases,weekly_deaths,weeks
state,week_of_year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alabama,1,Alabama,0.0,,,1.0
Alabama,2,Alabama,0.0,,,2.0
Alabama,3,Alabama,0.0,,,3.0
Alabama,4,Alabama,0.0,,,4.0
Alabama,5,Alabama,0.0,,,5.0
...,...,...,...,...,...,...
Wyoming,48,Wyoming,0.0,,,48.0
Wyoming,49,Wyoming,0.0,,,49.0
Wyoming,50,Wyoming,0.0,,,50.0
Wyoming,51,Wyoming,0.0,,,51.0


In [13]:
cases['mandate_week'] = 0
cases.loc[cases.mask_mandate==1, 'mandate_week'] = cases.loc[cases.mask_mandate==1, 'weeks']
cases.describe()

Unnamed: 0,mask_mandate,weekly_cases,weekly_deaths,weeks,mandate_week
count,2548.0,1883.0,1883.0,2548.0,2548.0
mean,0.013344,6292.813064,134.490175,26.5,0.350078
std,0.114765,10502.885483,328.524082,15.011277,3.104117
min,0.0,0.0,0.0,1.0,0.0
25%,0.0,674.0,12.0,13.75,0.0
50%,0.0,2878.0,52.0,26.5,0.0
75%,0.0,6880.0,130.5,39.25,0.0
max,1.0,85025.0,6624.0,52.0,46.0


In [14]:
for states, week in cases.groupby(level=0):
  cases.loc[cases.State==states, 'mandate_week'] = week.mandate_week.max()

cases = cases.dropna()

In [15]:
cases['week_diff'] = cases.loc[cases.mandate_week!=0, 'weeks'].subtract(cases.mandate_week)
cases.week_diff.fillna(0, inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  downcast=downcast,


In [16]:
#Import linearmodels library to do the two-way fixed effects regression
from linearmodels import PanelOLS

y = cases.weekly_cases
x = cases.week_diff

mod = PanelOLS(y, x, entity_effects=True, time_effects=True, drop_absorbed=True)
res = mod.fit(cov_type='clustered', cluster_entity=True)
print(res)

ModuleNotFoundError: ignored

In [None]:
#Create a graph that shows the variation of cases 
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(10,10))
sns.lineplot(x=cases.loc[cases.mandate_week!=0,'week_diff'], y=y, data=cases, )

In [None]:
cases = cases[['weekly_cases', 'weekly_deaths', 'mask_mandate', 'week_diff']]

cases.describe()