# Project 1
## Epidemiological Study: US Vaccination Campaign (November 2020 - March 2021)
---
## Project Description/Outline
Determine the effectiveness of the US vaccination campaign in attending the population that is most affected by covid-19 disease. This is achieved by revising the total number of doses applied, vaccination coverage, population demographics such as gender, race, socioeconomic status, and education, versus epidemiologic variables: incidence, prevalence, hospitalization, UCI, death numbers.

In [1]:
## Dependencies
import pandas as pd
import requests
import time
import datetime
import matplotlib.pyplot as plt
import numpy as np

# Import API key
# from app_tokens import cdc_token

## 1. Covid-19 Vaccination Data

### 1.1 Data Retrieval

In [2]:
## Import data from CSV
hesitancy_df = pd.read_csv('input_data/Vaccine_Hesitancy_Covid19.csv', encoding='latin-1')

In [3]:
columns = [
    'FIPS Code',
    'Geographical Point',
    'Social Vulnerability Index (SVI)',
    'SVI Category',
    'Percent adults fully vaccinated against COVID-19',
    'Percent Hispanic',
    'Percent non-Hispanic American Indian/Alaska Native',
    'Percent non-Hispanic Asian',
    'Percent non-Hispanic Black',
    'Percent non-Hispanic Native Hawaiian/Pacific Islander',
    'Percent non-Hispanic White'
]

vaccination_df = hesitancy_df[columns].sort_values('FIPS Code')
vaccination_df.reset_index(inplace=True, drop=True)

vaccination_df['Percent non-Hispanic Other'] = 1 - vaccination_df.iloc[:,4:10].sum(axis=1)

vaccination_df.set_index('FIPS Code', drop=True, inplace=True)
vaccination_df.head()

# del hesitancy_df

Unnamed: 0_level_0,Geographical Point,Social Vulnerability Index (SVI),SVI Category,Percent adults fully vaccinated against COVID-19,Percent Hispanic,Percent non-Hispanic American Indian/Alaska Native,Percent non-Hispanic Asian,Percent non-Hispanic Black,Percent non-Hispanic Native Hawaiian/Pacific Islander,Percent non-Hispanic White,Percent non-Hispanic Other
FIPS Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1001,POINT (-86.844516 32.756889),0.44,Moderate Concern,0.114,0.0283,0.0025,0.0103,0.19,0.0001,0.746,0.6548
1003,POINT (-86.844516 32.756889),0.22,Low Concern,0.176,0.0456,0.0065,0.0092,0.0917,0.0,0.8307,0.671
1005,POINT (-86.844516 32.756889),1.0,Very High Concern,0.128,0.0436,0.0029,0.0048,0.4744,0.0,0.4581,0.3463
1007,POINT (-86.844516 32.756889),0.6,High Concern,0.115,0.0257,0.0013,0.0012,0.2214,0.0,0.7453,0.6354
1009,POINT (-86.844516 32.756889),0.42,Moderate Concern,0.095,0.0926,0.0007,0.0037,0.0153,0.0004,0.8689,0.7923


## 2. US Census Reference (2019): County

### 2.1 Data Retrieval

In [4]:
# In order to build a useful Census Data set, we need to import the original csv file.
# We used the encoding 'latin-1' to code the characters into the Latin alphabet. 

census_df = pd.read_csv(('input_data/US_Census2019_totals.csv'), encoding='latin-1')

In [5]:
# We now select the columns we will be using: 'STATE', 'COUNTY', 'STNAME', 'CTYNAME', 'POPESTIMATE2019'

census_2019 = census_df[['STATE', 'COUNTY', 'STNAME', 'CTYNAME', 'POPESTIMATE2019']]

In [6]:
# Once we have selected the columns, we need to clean the data set from empty values, NAN's.
# Since the data set has state totals, we create two dataframes, one with the total information by state
# and another with information by county. 

state_2019 = census_2019.loc[census_2019['COUNTY']==0]
state_2019.reset_index(inplace=True, drop=True)
# state_2019['POPESTIMATE2019'].sum()

In [7]:
# We also dropped the total row, since we want the information by county, not by state. 

county_2019 = census_2019.drop(census_2019.index[census_2019["COUNTY"]==0])

In [8]:
# We select the FIPS code to use as index and as the merging point.
# Since in the county dataframe, the fips is disaggregated, we need to go through the rows and create the FIPS code.

fips = []
for index, row in  county_2019.iterrows():
    fips.append(f'{row["STATE"]:>02}{row["COUNTY"]:>03}')

# We drop the columns we used to make the FIPS code and create another for the code. 

county_2019.insert(0, 'FIPS Code', fips)
county_2019.drop(labels=['STATE', 'COUNTY'], axis=1, inplace=True)

In [9]:
# Preview

county_2019

Unnamed: 0,FIPS Code,STNAME,CTYNAME,POPESTIMATE2019
1,01001,Alabama,Autauga County,55869
2,01003,Alabama,Baldwin County,223234
3,01005,Alabama,Barbour County,24686
4,01007,Alabama,Bibb County,22394
5,01009,Alabama,Blount County,57826
...,...,...,...,...
3188,56037,Wyoming,Sweetwater County,42343
3189,56039,Wyoming,Teton County,23464
3190,56041,Wyoming,Uinta County,20226
3191,56043,Wyoming,Washakie County,7805


In [10]:
county_2019.shape

(3142, 4)

## 3. US Census Reference (2019): Age

In [11]:
# We need the demographic information of the counties for comparing it with the vaccionation campaign.
# We import the census data set, also using the 'latin-1' encoding. 

all_data = pd.read_csv("input_data/cc-est2019-alldata.csv", encoding='latin-1')
all_data.head()

Unnamed: 0,SUMLEV,STATE,COUNTY,STNAME,CTYNAME,YEAR,AGEGRP,TOT_POP,TOT_MALE,TOT_FEMALE,...,HWAC_MALE,HWAC_FEMALE,HBAC_MALE,HBAC_FEMALE,HIAC_MALE,HIAC_FEMALE,HAAC_MALE,HAAC_FEMALE,HNAC_MALE,HNAC_FEMALE
0,50,1,1,Alabama,Autauga County,1,0,54571,26569,28002,...,607,538,57,48,26,32,9,11,19,10
1,50,1,1,Alabama,Autauga County,1,1,3579,1866,1713,...,77,56,9,5,4,1,0,0,2,1
2,50,1,1,Alabama,Autauga County,1,2,3991,2001,1990,...,64,66,2,3,2,7,2,3,2,0
3,50,1,1,Alabama,Autauga County,1,3,4290,2171,2119,...,51,57,13,7,5,5,2,1,1,1
4,50,1,1,Alabama,Autauga County,1,4,4290,2213,2077,...,48,44,7,5,0,2,2,1,3,1


In [12]:
# Since the age classification clashes with the other data sets, we are reclassifying it. 
# We use numpy select to creat a new column following the new age groups conditions. 

age_conditions = [
    (all_data['AGEGRP'] == 0),
    (all_data['AGEGRP'] >= 1) & (all_data['AGEGRP'] <= 4),
    (all_data['AGEGRP'] >= 5) & (all_data['AGEGRP'] <= 10),
    (all_data['AGEGRP'] >= 11) & (all_data['AGEGRP'] <= 13),
    (all_data['AGEGRP'] >= 14) & (all_data['AGEGRP'] <= 18) 
]

age_values = ['0', '0 - 17 years', '18 - 49 years', '50 - 64 years', '65 + years']

In [13]:
all_data['Age_group'] = np.select(age_conditions, age_values)

In [14]:
# If age group is 0, it means the the sum of all the age. We don't need it, so we drop it. 

all_data_tot = all_data.loc[all_data['AGEGRP']==0]

In [15]:
# Since we are focusing in the last year, we must select the year we need.
# We use a conditional loc. 

all_data_tot = all_data_tot.loc[all_data['YEAR']==12]
all_data_tot.reset_index(inplace=True, drop=True)

In [16]:
# We create the FIPS code again. 

fips_all_data = []
for index, row in  all_data_tot.iterrows():
    fips_all_data.append(f'{row["STATE"]:>02}{row["COUNTY"]:>03}')
    
all_data_tot.insert(0, 'FIPS Code', fips_all_data)
all_data_tot.drop(labels=['STATE', 'COUNTY'], axis=1, inplace=True)

In [17]:
all_data_tot

Unnamed: 0,FIPS Code,SUMLEV,STNAME,CTYNAME,YEAR,AGEGRP,TOT_POP,TOT_MALE,TOT_FEMALE,WA_MALE,...,HWAC_FEMALE,HBAC_MALE,HBAC_FEMALE,HIAC_MALE,HIAC_FEMALE,HAAC_MALE,HAAC_FEMALE,HNAC_MALE,HNAC_FEMALE,Age_group
0,01001,50,Alabama,Autauga County,12,0,55869,27092,28777,20878,...,687,89,93,40,27,15,19,16,11,0
1,01003,50,Alabama,Baldwin County,12,0,223234,108247,114987,94810,...,4646,268,281,264,197,69,65,55,35,0
2,01005,50,Alabama,Barbour County,12,0,24686,13064,11622,6389,...,408,63,50,61,26,1,0,14,8,0
3,01007,50,Alabama,Bibb County,12,0,22394,11929,10465,8766,...,253,32,19,6,15,5,1,17,3,0
4,01009,50,Alabama,Blount County,12,0,57826,28472,29354,27258,...,2516,76,58,67,66,18,21,34,21,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3137,56037,50,Wyoming,Sweetwater County,12,0,42343,21808,20535,20446,...,2997,83,73,212,196,33,36,16,9,0
3138,56039,50,Wyoming,Teton County,12,0,23464,12142,11322,11567,...,1578,25,23,105,81,16,15,12,7,0
3139,56041,50,Wyoming,Uinta County,12,0,20226,10224,10002,9753,...,840,17,23,82,111,3,12,8,2,0
3140,56043,50,Wyoming,Washakie County,12,0,7805,3963,3842,3759,...,489,7,9,54,59,7,8,4,2,0


In [18]:
# In order to merge the dataframe, we make the FIPS code our index.

county_2019.reset_index(inplace=True, drop=True)
county_2019

Unnamed: 0,FIPS Code,STNAME,CTYNAME,POPESTIMATE2019
0,01001,Alabama,Autauga County,55869
1,01003,Alabama,Baldwin County,223234
2,01005,Alabama,Barbour County,24686
3,01007,Alabama,Bibb County,22394
4,01009,Alabama,Blount County,57826
...,...,...,...,...
3137,56037,Wyoming,Sweetwater County,42343
3138,56039,Wyoming,Teton County,23464
3139,56041,Wyoming,Uinta County,20226
3140,56043,Wyoming,Washakie County,7805


In [19]:
# We select the demograhics we are goint to use from our census dataframe.

gender_tot_sub = all_data_tot[['FIPS Code', 'TOT_MALE','TOT_FEMALE']]
gender_tot_sub

Unnamed: 0,FIPS Code,TOT_MALE,TOT_FEMALE
0,01001,27092,28777
1,01003,108247,114987
2,01005,13064,11622
3,01007,11929,10465
4,01009,28472,29354
...,...,...,...
3137,56037,21808,20535
3138,56039,12142,11322
3139,56041,10224,10002
3140,56043,3963,3842


In [20]:
# We make the FIPS code the index and we reset it. 

census_2019_sex = gender_tot_sub.set_index('FIPS Code', drop=True)
census_2019_sex.reset_index(inplace=True, drop=True)

In [21]:
# Preview

census_2019_sex

Unnamed: 0,TOT_MALE,TOT_FEMALE
0,27092,28777
1,108247,114987
2,13064,11622
3,11929,10465
4,28472,29354
...,...,...
3137,21808,20535
3138,12142,11322
3139,10224,10002
3140,3963,3842


###### County_2019 dataframe with age group totals

In [22]:
# We create the datafram2 with the age group.

all_data_age = all_data[['STATE', 'COUNTY', 'YEAR', 'TOT_POP', 'Age_group']]
all_data_age

Unnamed: 0,STATE,COUNTY,YEAR,TOT_POP,Age_group
0,1,1,1,54571,0
1,1,1,1,3579,0 - 17 years
2,1,1,1,3991,0 - 17 years
3,1,1,1,4290,0 - 17 years
4,1,1,1,4290,0 - 17 years
...,...,...,...,...,...
716371,56,45,12,499,65 + years
716372,56,45,12,352,65 + years
716373,56,45,12,229,65 + years
716374,56,45,12,198,65 + years


In [23]:
# We drop the sum total row. 

all_data_age = all_data_age.drop(all_data_age.index[all_data_age["Age_group"]=='0'])

In [24]:
# We select the year again. 

all_data_age = all_data_age.loc[all_data_age['YEAR']==12]
all_data_age

Unnamed: 0,STATE,COUNTY,YEAR,TOT_POP,Age_group
210,1,1,12,3277,0 - 17 years
211,1,1,12,3465,0 - 17 years
212,1,1,12,3851,0 - 17 years
213,1,1,12,3659,0 - 17 years
214,1,1,12,3178,18 - 49 years
...,...,...,...,...,...
716371,56,45,12,499,65 + years
716372,56,45,12,352,65 + years
716373,56,45,12,229,65 + years
716374,56,45,12,198,65 + years


In [25]:
# We create the fips and drop the columns we no longer need.

fips_age = []
for index, row in  all_data_age.iterrows():
    fips_age.append(f'{row["STATE"]:>02}{row["COUNTY"]:>03}')
    
all_data_age.insert(0, 'FIPS Code', fips_age)
all_data_age.drop(labels=['STATE', 'COUNTY'], axis=1, inplace=True)

In [26]:
#Preview

all_data_age

Unnamed: 0,FIPS Code,YEAR,TOT_POP,Age_group
210,01001,12,3277,0 - 17 years
211,01001,12,3465,0 - 17 years
212,01001,12,3851,0 - 17 years
213,01001,12,3659,0 - 17 years
214,01001,12,3178,18 - 49 years
...,...,...,...,...
716371,56045,12,499,65 + years
716372,56045,12,352,65 + years
716373,56045,12,229,65 + years
716374,56045,12,198,65 + years


In [27]:
# We group the sum of people of certain age group by Fips code and age group.

age_group = pd.DataFrame(all_data_age.groupby(['FIPS Code','Age_group'])['TOT_POP'].sum())

In [28]:
# We can't really use the previous dataframe so we use unstack to have a dataframe we can merge.
# When we unstacked, we created a multiindex, so we use droplevel to retrieve the index we want. 

census_2019_age = age_group.unstack()
census_2019_age = census_2019_age.droplevel(level=0, axis=1)

In [29]:
# We reset the index.

census_2019_age.reset_index(inplace=True, drop=True)

In [30]:
# Preview

census_2019_age

Age_group,0 - 17 years,18 - 49 years,50 - 64 years,65 + years
0,14252,21652,11041,8924
1,52268,77402,46734,46830
2,5595,9477,4753,4861
3,4992,9233,4436,3733
4,14522,21002,11488,10814
...,...,...,...,...
3137,12049,16959,7846,5489
3138,4586,10694,4467,3717
3139,6215,7229,3757,3025
3140,1960,2506,1609,1730


In [31]:
# To creat a dataframe about ethinicity, we must select the information we need from the vaccination data set.
# We reset the inedex.

census_2019_ethnicity = vaccination_df[[
    'Percent Hispanic',
    'Percent non-Hispanic American Indian/Alaska Native',
    'Percent non-Hispanic Asian',
    'Percent non-Hispanic Black',
    'Percent non-Hispanic Native Hawaiian/Pacific Islander',
    'Percent non-Hispanic White',
    'Percent non-Hispanic Other'
]]
census_2019_ethnicity.reset_index(inplace=True)

In [32]:
# We also create the fips code. 

fips_ethnicity = []
for index, row in  census_2019_ethnicity.iterrows():
    row_string = f"{row['FIPS Code']:.0f}"
    fips_ethnicity.append(f"{row_string:>05}")

In [33]:
census_2019_ethnicity = census_2019_ethnicity.drop('FIPS Code', axis=1)

In [34]:
census_2019_sex

Unnamed: 0,TOT_MALE,TOT_FEMALE
0,27092,28777
1,108247,114987
2,13064,11622
3,11929,10465
4,28472,29354
...,...,...
3137,21808,20535
3138,12142,11322
3139,10224,10002
3140,3963,3842


In [35]:
census_2019_age

Age_group,0 - 17 years,18 - 49 years,50 - 64 years,65 + years
0,14252,21652,11041,8924
1,52268,77402,46734,46830
2,5595,9477,4753,4861
3,4992,9233,4436,3733
4,14522,21002,11488,10814
...,...,...,...,...
3137,12049,16959,7846,5489
3138,4586,10694,4467,3717
3139,6215,7229,3757,3025
3140,1960,2506,1609,1730


In [36]:
census_2019_ethnicity

Unnamed: 0,Percent Hispanic,Percent non-Hispanic American Indian/Alaska Native,Percent non-Hispanic Asian,Percent non-Hispanic Black,Percent non-Hispanic Native Hawaiian/Pacific Islander,Percent non-Hispanic White,Percent non-Hispanic Other
0,0.0283,0.0025,0.0103,0.1900,0.0001,0.7460,0.6548
1,0.0456,0.0065,0.0092,0.0917,0.0000,0.8307,0.6710
2,0.0436,0.0029,0.0048,0.4744,0.0000,0.4581,0.3463
3,0.0257,0.0013,0.0012,0.2214,0.0000,0.7453,0.6354
4,0.0926,0.0007,0.0037,0.0153,0.0004,0.8689,0.7923
...,...,...,...,...,...,...,...
3137,0.1588,0.0102,0.0074,0.0112,0.0003,0.7956,0.5841
3138,0.1503,0.0033,0.0125,0.0124,0.0012,0.8134,0.5153
3139,0.0913,0.0065,0.0016,0.0011,0.0000,0.8752,0.6935
3140,0.1423,0.0052,0.0000,0.0004,0.0000,0.8190,0.5881


In [37]:
# We save the dataframe as a csv to use in the analysis 

census_2019_ethnicity.to_csv('clean_data/census_2019_ethnicity.csv')

In [38]:
# We merge the sex and gender dataframes. 

census_2019_combined = county_2019
census_2019_combined = census_2019_combined.merge(census_2019_sex, how='inner', left_index=True, right_index=True)
census_2019_combined = census_2019_combined.merge(census_2019_age, how='inner', left_index=True, right_index=True)

In [39]:
# We merge our big census dataframe with the ethnicity dataframe. 

census_2019_combined = census_2019_combined.merge(census_2019_ethnicity, how='inner', left_index=True, right_index=True)

In [40]:
# Preview

census_2019_combined

Unnamed: 0,FIPS Code,STNAME,CTYNAME,POPESTIMATE2019,TOT_MALE,TOT_FEMALE,0 - 17 years,18 - 49 years,50 - 64 years,65 + years,Percent Hispanic,Percent non-Hispanic American Indian/Alaska Native,Percent non-Hispanic Asian,Percent non-Hispanic Black,Percent non-Hispanic Native Hawaiian/Pacific Islander,Percent non-Hispanic White,Percent non-Hispanic Other
0,01001,Alabama,Autauga County,55869,27092,28777,14252,21652,11041,8924,0.0283,0.0025,0.0103,0.1900,0.0001,0.7460,0.6548
1,01003,Alabama,Baldwin County,223234,108247,114987,52268,77402,46734,46830,0.0456,0.0065,0.0092,0.0917,0.0000,0.8307,0.6710
2,01005,Alabama,Barbour County,24686,13064,11622,5595,9477,4753,4861,0.0436,0.0029,0.0048,0.4744,0.0000,0.4581,0.3463
3,01007,Alabama,Bibb County,22394,11929,10465,4992,9233,4436,3733,0.0257,0.0013,0.0012,0.2214,0.0000,0.7453,0.6354
4,01009,Alabama,Blount County,57826,28472,29354,14522,21002,11488,10814,0.0926,0.0007,0.0037,0.0153,0.0004,0.8689,0.7923
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3137,56037,Wyoming,Sweetwater County,42343,21808,20535,12049,16959,7846,5489,0.1588,0.0102,0.0074,0.0112,0.0003,0.7956,0.5841
3138,56039,Wyoming,Teton County,23464,12142,11322,4586,10694,4467,3717,0.1503,0.0033,0.0125,0.0124,0.0012,0.8134,0.5153
3139,56041,Wyoming,Uinta County,20226,10224,10002,6215,7229,3757,3025,0.0913,0.0065,0.0016,0.0011,0.0000,0.8752,0.6935
3140,56043,Wyoming,Washakie County,7805,3963,3842,1960,2506,1609,1730,0.1423,0.0052,0.0000,0.0004,0.0000,0.8190,0.5881


In [41]:
# We save that census as a csv. 

census_2019_combined.to_csv('clean_data/census_2019_combined.csv')

In [42]:
geo_points = vaccination_df['Geographical Point']

In [43]:
geo_points[1001]

'POINT (-86.844516 32.756889)'

In [44]:
test = geo_points[1001].split(' ')
lat=test[2]
lng=test[1]
print(lat,lng)

32.756889) (-86.844516


In [45]:
lng[1:len(lng)]

'-86.844516'

In [46]:
# points = [geo_points[1001]]
lat = []
lng = []

for point in geo_points:
     (a, b, c) = point.split(' ')
     lat.append(c[0:len(c)-1])
     lng.append(b[1:len(b)])

In [47]:
len(lat)

3142

In [48]:
len(lng)

3142

In [49]:
census_2019_combined['Latitude'] = lat
census_2019_combined['Longitude'] = lng

In [50]:
census_2019_combined

Unnamed: 0,FIPS Code,STNAME,CTYNAME,POPESTIMATE2019,TOT_MALE,TOT_FEMALE,0 - 17 years,18 - 49 years,50 - 64 years,65 + years,Percent Hispanic,Percent non-Hispanic American Indian/Alaska Native,Percent non-Hispanic Asian,Percent non-Hispanic Black,Percent non-Hispanic Native Hawaiian/Pacific Islander,Percent non-Hispanic White,Percent non-Hispanic Other,Latitude,Longitude
0,01001,Alabama,Autauga County,55869,27092,28777,14252,21652,11041,8924,0.0283,0.0025,0.0103,0.1900,0.0001,0.7460,0.6548,32.756889,-86.844516
1,01003,Alabama,Baldwin County,223234,108247,114987,52268,77402,46734,46830,0.0456,0.0065,0.0092,0.0917,0.0000,0.8307,0.6710,32.756889,-86.844516
2,01005,Alabama,Barbour County,24686,13064,11622,5595,9477,4753,4861,0.0436,0.0029,0.0048,0.4744,0.0000,0.4581,0.3463,32.756889,-86.844516
3,01007,Alabama,Bibb County,22394,11929,10465,4992,9233,4436,3733,0.0257,0.0013,0.0012,0.2214,0.0000,0.7453,0.6354,32.756889,-86.844516
4,01009,Alabama,Blount County,57826,28472,29354,14522,21002,11488,10814,0.0926,0.0007,0.0037,0.0153,0.0004,0.8689,0.7923,32.756889,-86.844516
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3137,56037,Wyoming,Sweetwater County,42343,21808,20535,12049,16959,7846,5489,0.1588,0.0102,0.0074,0.0112,0.0003,0.7956,0.5841,42.999627,-107.55145
3138,56039,Wyoming,Teton County,23464,12142,11322,4586,10694,4467,3717,0.1503,0.0033,0.0125,0.0124,0.0012,0.8134,0.5153,42.999627,-107.55145
3139,56041,Wyoming,Uinta County,20226,10224,10002,6215,7229,3757,3025,0.0913,0.0065,0.0016,0.0011,0.0000,0.8752,0.6935,42.999627,-107.55145
3140,56043,Wyoming,Washakie County,7805,3963,3842,1960,2506,1609,1730,0.1423,0.0052,0.0000,0.0004,0.0000,0.8190,0.5881,42.999627,-107.55145


## Covid-19 Case Surveillance


In [51]:
months = ['2020-01', '2020-02', '2020-03', '2020-04', '2020-05',
          '2020-06', '2020-07', '2020-08', '2020-09', '2020-10',
          '2020-11', '2020-12', '2021-01', '2021-02', '2021-03']

# fields = 'case_month, county_fips_code, current_status, sex, age_group, race, ethnicity, hosp_yn, icu_yn, death_yn'
fields = 'case_month, county_fips_code, hosp_yn, icu_yn, death_yn'
fips.append('NA')

patients_df = pd.DataFrame(index=fips)
hospitalized_df = pd.DataFrame(index=fips)
icu_df = pd.DataFrame(index=fips)
death_df = pd.DataFrame(index=fips)

patients_df.index.rename('FIPS Code', inplace=True)
hospitalized_df.index.rename('FIPS Code', inplace=True)
icu_df.index.rename('FIPS Code', inplace=True)
death_df.index.rename('FIPS Code', inplace=True)

In [52]:
query_url = "https://data.cdc.gov/resource/n8mc-b4w4.json?"
params = {
    '$$app_token': cdc_token,
    '$limit': 25000000,
    '$offset': 0,
    '$select': fields
}

NameError: name 'cdc_token' is not defined

In [None]:
months = ['2020-11']

## Print Log Header
print("Beginning Data Retrieval")
print("------------------------------")

## Retrieve Loop
for month in months:
    
    ## Print Log Status
    print(f"Processing Month: {month} [{datetime.datetime.now().strftime('%H:%M:%S')}]")
    
    ## Set month query
    params['case_month'] = month
    ## Retrieve month data & Store in DataFrame
    response_month = requests.get(query_url, params=params).json()
    response_df = pd.DataFrame(response_month)

    patients_df[month] = response_df.groupby('county_fips_code')['case_month'].count()
    hospitalized_df[month] = response_df.loc[response_df['hosp_yn'] == 'Yes'].groupby('county_fips_code')['hosp_yn'].count()
    icu_df[month] = response_df.loc[response_df['icu_yn'] == 'Yes'].groupby('county_fips_code')['icu_yn'].count()
    death_df[month] = response_df.loc[response_df['death_yn'] == 'Yes'].groupby('county_fips_code')['death_yn'].count()

    if month != months[-1]:
        print("Sleeping...")
        time.sleep(60*30)

## Print Log Footer
print("------------------------------")        
print("Data Retrieval Complete")
print("------------------------------")

In [None]:
patients_df

In [None]:
patients_df.fillna(0, inplace=True)
hospitalized_df.fillna(0, inplace=True)
icu_df.fillna(0, inplace=True)
death_df.fillna(0, inplace=True)

patients_df.to_csv('clean_data/patients.csv')
hospitalized_df.to_csv('clean_data/hospitalized.csv')
icu_df.to_csv('clean_data/icu.csv')
death_df.to_csv('clean_data/death.csv')