In [1]:
import pandas as pd
import numpy as np
import os 


In [2]:
cwd = os.getcwd()

In [4]:
ref = pd.read_csv(cwd + '/raw_data/state_fips_master.csv',
                 dtype = {'fips' : np.str_})
ref['fips'] = ref['fips'].str.zfill(2)
ref_1 = ref[['state_name', 'fips']]
ref = ref[['state_abbr','state_name', 'fips']]

### Hospitalization

In [5]:
df = pd.read_pickle(cwd + '/raw_data/confirmed_admissions_covid_1d_state.pickle')
df['geo_value'] = df['geo_value'].apply(str.upper)
df_hosp = df[['geo_value', 'signal', 'time_value', 'value']]
df_hosp = df_hosp.rename(columns = {'geo_value' : 'state_abbr'})
df_hosp = df_hosp.merge(ref)
df_hosp['Description'] = 'Sum of adult and pediatric confirmed COVID-19 hospital admissions occurring each day.'
df_hosp = df_hosp.rename(columns = {'state_abbr' : 'State'})
df_hosp = df_hosp.dropna().reset_index().drop(columns = ['index'])
df_hosp = df_hosp.rename(columns = {'signal' : 'Variable'})
df_hosp['Variable'] = df_hosp['Variable'].replace('confirmed_admissions_covid_1d', 'Confirmed COVID-19 Hospital Admissions')
df_hosp['Temporal_resolution'] = 'Daily'
df_hosp = df_hosp.rename(columns = {'time_value' : 'Date'})
df_hosp['Date'] = pd.to_datetime(df_hosp['Date'])

In [6]:
df_hosp.to_pickle(cwd +'/processed_data/hospitalization_daily_state.pkl')

### Vaccination

In [7]:
vacc_raw = pd.read_csv(cwd + '/raw_data/COVID-19_Vaccinations_in_the_United_States_Jurisdiction.csv')
vacc_raw = vacc_raw.rename(columns = {'Location' : 'state_abbr'})
vacc_raw = vacc_raw.drop(columns = ['MMWR_week'])
vacc_raw['Temporal_resolution'] = 'Weekly'
vacc_raw = vacc_raw.merge(ref)
vacc_raw['Date'] = pd.to_datetime(vacc_raw['Date'])
vacc_raw = vacc_raw.set_index(['Date', 'state_abbr', 'Temporal_resolution', 'state_name',
       'fips'])
vacc_raw = vacc_raw.drop(columns = {'Distributed', 'Distributed_Janssen', 'Distributed_Moderna',
       'Distributed_Pfizer', 'Distributed_Novavax',
       'Distributed_Unk_Manuf', 'Dist_Per_100K',
       'Distributed_Per_100k_5Plus', 'Distributed_Per_100k_12Plus',
       'Distributed_Per_100k_18Plus', 'Distributed_Per_100k_65Plus'})
vacc_select = vacc_raw[['Administered_Dose1_Pop_Pct',
       'Administered_Dose1_Recip_65PlusPop_Pct', 
       'Series_Complete_Pop_Pct',
       'Series_Complete_65PlusPop_Pct',
       'Additional_Doses_Vax_Pct',
       'Additional_Doses_65Plus_Vax_Pct']]
vacc_pro = vacc_select.stack().reset_index()
vacc_pro = vacc_pro.rename(columns = {'level_5' : 'Variable', 0 : 'value'})

index = vacc_pro[vacc_pro['Variable'] == 'Administered_Dose1_Pop_Pct'].index
vacc_pro.loc[index, 'Description'] = 'Percent of population with at least one dose based on the State where recipient lives'

index = vacc_pro[vacc_pro['Variable'] == 'Administered_Dose1_Recip_65PlusPop_Pct'].index
vacc_pro.loc[index, 'Description'] = 'Percent of population ages 65+ with at least one dose based on the State where recipient lives'

index = vacc_pro[vacc_pro['Variable'] == 'Series_Complete_Pop_Pct'].index
vacc_pro.loc[index, 'Description'] = 'Percent of people with a completed primary series (have second dose of a two-dose vaccine or one dose of a single-dose vaccine) based on the State where recipient lives'

index = vacc_pro[vacc_pro['Variable'] == 'Series_Complete_65PlusPop_Pct'].index
vacc_pro.loc[index, 'Description'] = 'Percent of people 65+ with a completed primary series (have second dose of a two-dose vaccine or one dose of a single-dose vaccine) based on the State where recipient lives'

index = vacc_pro[vacc_pro['Variable'] == 'Additional_Doses_Vax_Pct'].index
vacc_pro.loc[index, 'Description'] = 'Percent of people who completed a primary series and have received a booster (or additional) dose.'

index = vacc_pro[vacc_pro['Variable'] == 'Additional_Doses_65Plus_Vax_Pct'].index
vacc_pro.loc[index, 'Description'] = 'Percent of people 65+ who completed a primary series and have received a booster (or additional) dose.'


In [8]:
vacc_pro.to_pickle(cwd + '/processed_data/vaccination_weekly_state.pkl')

### Cases

In [9]:
#############################
######Cases data
#############################
##Read in cases data from Github
CSSE_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv'
cases = pd.read_csv(CSSE_url, dtype = {'FIPS' : np.str_})
cases = ref_1.merge(cases.drop(columns = ['FIPS']), how = 'left', left_on = ['state_name'], right_on = ['Province_State'])# cases['FIPS'] = cases['FIPS'].str.zfill(5)
cases = cases.rename(columns = {'fips' : 'FIPS'}).groupby(['FIPS']).sum().iloc[:, 14:]

daily_cases = cases.iloc[:, 13:] - cases.iloc[:, 13:].shift(axis = 1)

daily_cases = daily_cases.dropna(axis = 1)
# daily_cases = daily_cases.iloc[:, 7:] 
daily_cases.columns = pd.to_datetime(daily_cases.columns).strftime('%Y-%m-%d')
daily_cases_state = daily_cases.groupby(daily_cases.index.str[:2]).sum()
# daily_cases_state = daily_cases_state.iloc[:-2, :]
daily_cases_state[daily_cases_state<0] = 0

  cases = cases.rename(columns = {'fips' : 'FIPS'}).groupby(['FIPS']).sum().iloc[:, 14:]


In [10]:
daily_cases_state = daily_cases_state.stack().reset_index().rename(columns = {'level_1' : 'Date', 0 : 'value'})
daily_cases_state['Date'] = pd.to_datetime(daily_cases_state['Date'])
daily_cases_state = daily_cases_state.rename(columns = {'FIPS' : 'fips'})
daily_cases_state = daily_cases_state.merge(ref)
daily_cases_state['Description'] = 'Number of individuals with a positive molecular test. The tests are designed for viral genetic material, such as a PCR or polymerase chain reaction test'

In [11]:
daily_cases_state.to_pickle(cwd + '/processed_data/confimed_cases_daily_state.pkl')

### Policy

In [18]:
df = pd.read_pickle(cwd + '/raw_data/policy.pkl')

In [19]:
df = df.drop(columns = ['Unnamed: 0'])
df['Date'] = pd.to_datetime(df['Date'].astype(str))
selected_policies = ['C1M_School closing', 'C2M_Workplace closing', 'C3E_Cancel public events',
                    'C4M_Restrictions on gatherings', 'C6M_Stay at home requirements',
                    'C7M_Restrictions on internal movement', 'C8NV_International travel controls',
                    'C8V_International travel controls', 'H1_Public information campaigns',
                    'H2_Testing policy', 'H3_Contact tracing', 'H6M_Facial Coverings',
                    'H8M_Protection of elderly people']
df_state = df[['RegionName', 'Date', 'C1M_School closing', 'C2M_Workplace closing', 'C3M_Cancel public events',
                    'C4M_Restrictions on gatherings', 'C6M_Stay at home requirements',
                    'C7M_Restrictions on internal movement', 'C8NV_International travel controls',
                    'C8V_International travel controls', 'H1_Public information campaigns',
                    'H2_Testing policy', 'H3_Contact tracing', 'H6M_Facial Coverings',
                    'H8M_Protection of elderly people']]
df_state = df_state.dropna(subset = ['RegionName']).reset_index().drop(columns = ['index'])
df_state = df_state.replace('no data', 'no measures')
df_state = df_state.rename(columns = {'RegionName' : 'state_name'})
df_state = ref.merge(df_state)


In [20]:
df_state.to_pickle(cwd + '/processed_data/selected_state_policy_daily.pkl')