# Pandemic Preparedness Analysis Data Clean Up - Policy Table

In [1]:
#Dependencies
import pandas as pd
import requests
import json
import os

In [2]:
# pulling in datasets

#cdc csv
cdc = "cdc_source.csv"

#new project placeholder

#google data
policy = "https://storage.googleapis.com/covid19-open-data/v3/oxford-government-response.csv"
vaccine = "https://storage.googleapis.com/covid19-open-data/v3/vaccinations.csv"
vaxsite = "https://storage.googleapis.com/covid19-open-data/covid19-vaccination-access/facility-boundary-us-all.csv"

cdc_df = pd.read_csv(cdc)
policy_df = pd.read_csv(policy)
vaccine_df = pd.read_csv(vaccine)
vaxsite_df = pd.read_csv(vaxsite)

In [3]:
cdc_df.count()

submission_date    60060
state              60060
tot_cases          60060
new_case           60060
tot_death          60060
new_death          60060
dtype: int64

In [4]:
#column cleanup

cdc_df.rename(columns={'submission_date':'Date','state':'State', 'tot_cases':'Total Cases', \
'new_case': 'Confirmed New C19 Case', 'tot_death': 'Total Death', \
                      'new_death': 'Confirmed New C19 Death'}, inplace=True)

In [5]:
cdc_df.count()

Date                       60060
State                      60060
Total Cases                60060
Confirmed New C19 Case     60060
Total Death                60060
Confirmed New C19 Death    60060
dtype: int64

In [6]:
cdc_pre = pd.DataFrame(cdc_df.loc[(cdc_df['Date'] > '2020-02-29') \
                                          & (cdc_df['Date'] <'2021-04-01') \
                                    , :])


cdc_pre = cdc_pre.sort_values(by=['State','Date'], ascending = [True, True])

In [7]:
cdc_pre = pd.DataFrame(cdc_pre.loc[(cdc_pre['State'] == 'NY') | (cdc_pre['State'] == 'TX') \
                                    , :])

In [8]:
cdc_pre = cdc_pre.reset_index(drop=True)

## Policy Data Cleanup

In [9]:
list(policy_df)

['date',
 'location_key',
 'school_closing',
 'workplace_closing',
 'cancel_public_events',
 'restrictions_on_gatherings',
 'public_transport_closing',
 'stay_at_home_requirements',
 'restrictions_on_internal_movement',
 'international_travel_controls',
 'income_support',
 'debt_relief',
 'fiscal_measures',
 'international_support',
 'public_information_campaigns',
 'testing_policy',
 'contact_tracing',
 'emergency_investment_in_healthcare',
 'investment_in_vaccines',
 'facial_coverings',
 'vaccination_policy',
 'stringency_index']

In [10]:
pol_df = policy_df.drop(columns=['school_closing','workplace_closing', \
                                  'cancel_public_events', 'restrictions_on_gatherings', \
                                  'public_transport_closing', \
                                  'restrictions_on_internal_movement', \
                                  'international_travel_controls', 'income_support', \
                                  'debt_relief', 'fiscal_measures', 'international_support', \
                                  'public_information_campaigns', 'testing_policy', \
                                  'contact_tracing', 'emergency_investment_in_healthcare', \
                                  'investment_in_vaccines', 'facial_coverings', \
                                  'vaccination_policy', 'stringency_index'])



In [11]:
list(pol_df)

['date', 'location_key', 'stay_at_home_requirements']

In [12]:
pol_df.rename(columns={'date':'Date','location_key':'State', 'stay_at_home_requirements':'Stay at Home Orders'}, \
              inplace=True)

In [13]:
list(pol_df)

['Date', 'State', 'Stay at Home Orders']

In [14]:
pol_df = pol_df[(pol_df.State == 'US_NY')|(pol_df.State == 'US_TX')]

pol_df

Unnamed: 0,Date,State,Stay at Home Orders
278007,2020-01-01,US_NY,0.0
278008,2020-01-02,US_NY,0.0
278009,2020-01-03,US_NY,0.0
278010,2020-01-04,US_NY,0.0
278011,2020-01-05,US_NY,0.0
...,...,...,...
287255,2022-07-04,US_TX,0.0
287256,2022-07-19,US_TX,0.0
287257,2022-07-20,US_TX,0.0
287258,2022-07-21,US_TX,0.0


In [15]:
pol_df = pol_df.astype({'Date':"datetime64"})


In [16]:
pol_df['State'] = pol_df['State'].replace({'US_NY':'NY','US_TX':'TX'})

In [17]:
pol_df = pol_df.sort_values(by=['State','Date'], ascending = [True, True])

In [18]:
policy_fin = pd.DataFrame(pol_df.loc[(pol_df['Date'] > '2020-02-29') \
                                          & (pol_df['Date'] <'2021-04-01') \
                                    , :])

In [19]:
policy_fin.count()

Date                   792
State                  792
Stay at Home Orders    792
dtype: int64

In [20]:
policy_fin = policy_fin.reset_index(drop=True)

In [21]:
# policy_fin is a data frame containing the pertinent google policy data from 3/2020 to 3/2021

cdc_pre.to_csv("cdc_pre_vax.csv", index=False, header=True)

In [23]:
policy_fin.to_csv("policy_table.csv", index=False, header=True)