In [1]:
import pandas as pd

# Dataset Collection 


## Case Data
- Source : [CDC](https://data.cdc.gov/Case-Surveillance/United-States-COVID-19-Cases-and-Deaths-by-State-o/9mfq-cb36/data)
- Updated : 5/21/2022
- Local : [United_States_COVID-19_Cases_and_Deaths_by_State_over_Time.csv](./data/United_States_COVID-19_Cases_and_Deaths_by_State_over_Time.csv)

In [15]:
caseDF = pd.read_csv('./data/United_States_COVID-19_Cases_and_Deaths_by_State_over_Time.csv')

In [16]:
# Get only required columns
caseDF = caseDF[['new_case','new_death','submission_date','state']]

In [17]:
# Getting Final Working Dataset 
caseDF = caseDF.groupby(['submission_date','state']).sum()
caseDF

Unnamed: 0_level_0,Unnamed: 1_level_0,new_case,new_death
submission_date,state,Unnamed: 2_level_1,Unnamed: 3_level_1
01/01/2021,AK,470,4
01/01/2021,AL,3621,82
01/01/2021,AR,4304,35
01/01/2021,AS,0,0
01/01/2021,AZ,10063,151
...,...,...,...
12/31/2021,VT,1301,3
12/31/2021,WA,10367,0
12/31/2021,WI,8010,22
12/31/2021,WV,0,0


## Census Data 
- Source : [Us Census Bureau](https://www.census.gov)
- Updated : 12/21/2021
- Local : [census.csv](./data/census.csv)

> Collected using Selenium based web scraping from website dashboard 

In [21]:
!curl -l https://raw.githubusercontent.com/tigboatnc/Covid-Policy-Effectiveness/main/dataset/sam_censusdata_merged.csv -o data/census.csv

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  5617  100  5617    0     0   127k      0 --:--:-- --:--:-- --:--:--  127k


In [28]:
censusDF = pd.read_csv('./data/census.csv')

# Cleaning unnamed Columns
censusDF = censusDF.loc[:, ~censusDF.columns.str.contains('^Unnamed')]

In [29]:
censusDF.head()

Unnamed: 0,stateName,stateCode,Total Population,Median Household Income,Bachelor's Degree or Higher,Employment Rate,Total Housing Units,Without Health Care Coverage,Total Employer Establishments,Total Households,Hispanic or Latino (of any race)
0,Alabama,AL,5024279,"$51,734",26.3%,54.7%,2288330,9.7%,100731,1897576,264047
1,Alaska,AK,733391,"$75,463",30.2%,59.3%,326200,12.2%,21399,252199,49824
2,Arizona,AZ,7151502,"$62,055",30.2%,56.8%,3082000,11.3%,147163,2670441,2192253
3,Arkansas,AR,3011524,"$48,952",23.3%,55.2%,1365265,9.1%,67243,1163647,256847
4,California,CA,39538223,"$80,440",35.0%,60.3%,14392140,7.7%,966224,13157873,15579652


## NHSPI Index 

- Source [NHSPI Index](https://nhspi.org/tools-resources/)
- Xlsx Download Link : [NHSPI Index 2021 Resource List](https://nhspi.org/wp-content/uploads/2021/05/NHSPI_2021_Data_File_MMP_reviewed.xlsx) 

__Filtering__
- Only picked the domain sheet from the full spreadsheet for brevity. 
- Local : [nhspi index  - Domains.csv](./data/nhspi index  - Domains.csv)


__Quick Recap__
- __HSS__ - HEALTH SECURITY SURVEILLANCE
    - Actions to monitor and detect health threats, and to identify where hazards start and spread so that they can be contained rapidly.
- __CPE__ - COMMUNITY PLANNING & ENGAGEMENT COORDINATION
    - Actions to develop and maintain supportive relationships among government agencies, community organizations, and individual households; and to develop shared plans for responding to disasters and emergencies.
- __IIM__ - INCIDENT & INFORMATION MANAGEMENT
    - Actions to deploy people, supplies, money and information to the locations where they are most effective in protecting health and safety.
- __HD__ - HEALTH CARE DELIVERY
- __CM__ - COUNTERMEASURE MANAGEMENT
- __EOH__ - ENVIRONMENTAL & OCCUPATIONAL HEALTH


In [30]:
nhspiDF = pd.read_csv('./data/nhspi index  - Domains.csv')

In [31]:
nhspiDF

Unnamed: 0,STATE,YEAR,HSS,CPE,IIM,HD,CM,EOH,fc_iim,fc_hss,...,Nat_HDub,HD_class,Nat_CM,Nat_CMlb,Nat_CMub,CM_class,Nat_EOH,Nat_EOHlb,Nat_EOHub,EOH_class
0,AK,2013,6.8,4.6,7.0,3.3,2.9,5.1,0.541271,0.226684,...,4.90,3,5.6,5.32,5.97,3,6.2,5.94,6.53,3
1,AL,2013,7.2,3.8,7.4,4.3,5.6,5.4,0.541271,0.226684,...,4.90,3,5.6,5.32,5.97,2,6.2,5.94,6.53,3
2,AR,2013,7.4,4.0,8.5,4.9,5.5,5.1,0.541271,0.226684,...,4.90,2,5.6,5.32,5.97,2,6.2,5.94,6.53,3
3,AZ,2013,7.3,4.5,7.9,3.5,4.4,5.7,0.541271,0.226684,...,4.90,3,5.6,5.32,5.97,3,6.2,5.94,6.53,3
4,CA,2013,6.9,4.4,8.6,4.2,4.3,7.2,0.541271,0.226684,...,4.90,3,5.6,5.32,5.97,3,6.2,5.94,6.53,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
403,VT,2020,9.0,6.7,8.9,6.0,6.0,7.4,0.541271,0.226684,...,5.25,1,6.2,5.89,6.58,2,6.9,6.65,7.17,1
404,WA,2020,8.3,5.6,8.4,4.2,6.2,8.0,0.541271,0.226684,...,5.25,3,6.2,5.89,6.58,2,6.9,6.65,7.17,1
405,WI,2020,8.8,5.7,9.0,5.2,7.3,7.0,0.541271,0.226684,...,5.25,2,6.2,5.89,6.58,1,6.9,6.65,7.17,2
406,WV,2020,7.2,4.8,8.4,5.3,7.0,4.8,0.541271,0.226684,...,5.25,1,6.2,5.89,6.58,1,6.9,6.65,7.17,3


# Experiment 1 