### Import modules and set up the environment

In [1]:
import os
import pandas as pd

from dotenv import load_dotenv
from sodapy import Socrata

In [2]:
load_dotenv()

CDC_TOKEN = os.getenv('CDC_TOKEN')
EMAIL_ID = os.getenv('EMAIL_ID')
PASSWORD = os.getenv('PASSWORD')

### CDC Auth

In [3]:
cdc_client = Socrata('data.cdc.gov', CDC_TOKEN, username = EMAIL_ID, password = PASSWORD)

### Pfizer

Updated every **Tuesday**

Collect data:

In [4]:
res = cdc_client.get('saz5-9hgg', limit = 4000)

Process data:

In [5]:
df_pfizer = pd.DataFrame.from_records(res).rename(columns = {'week_of_allocations': 'week', '_1st_dose_allocations': 'first_dose', '_2nd_dose_allocations': 'second_dose'})
df_pfizer.shape

(882, 4)

In [6]:
df_pfizer['week'] = pd.to_datetime(df_pfizer['week'])  # convert to datetime

Some rows contain '\*' and ',' to describe footnotes:
* \*: Jurisdictions that won't receive Pfizer vaccines due to cold storage logistics
* \*\*: Jurisdictions that will receive both doses simultaneously
* \*\*\*: Jurisdictions that will receive a "Sovereign Nation Supplement" for American Indian/Alaskan Native populations that elected to receive vaccines through the state instead of Indian Health Service
* \*\*\*\*: Federal Entities includes - Bureau of Prisons, Dept. of Defense, Dept. of State, Indian Health Service, & Veterans Affairs + Dept. of Homeland Security

Source: [CDC](https://data.cdc.gov/Vaccinations/COVID-19-Vaccine-Distribution-Allocations-by-Juris/saz5-9hgg)

In [7]:
# df_pfizer[df_pfizer['jurisdiction'].str.contains('\*')]

df_pfizer['jurisdiction'] = df_pfizer['jurisdiction'].apply(lambda x: x.strip('*,') if '*' in x else x)

Note: Stats for Chicago, New York City, and Philadelphia aren't included in rows for IL, NY, and PA

In [8]:
df_pfizer.head()

Unnamed: 0,jurisdiction,week,first_dose,second_dose
0,Connecticut,2021-03-15,49140,49140
1,Maine,2021-03-15,18720,18720
2,Massachusetts,2021-03-15,93600,93600
3,New Hampshire,2021-03-15,18720,18720
4,Rhode Island,2021-03-15,15210,15210


In [9]:
df_pfizer.to_csv(os.getcwd() + '/Data/pfizer_cdc.csv')

### Moderna

Updated every **Tuesday**

Collect data:

In [10]:
res = cdc_client.get('b7pe-5nws', limit = 4000)

Process data:

In [11]:
df_modr = pd.DataFrame.from_records(res).rename(columns = {'week_of_allocations': 'week', '_1st_dose_allocations': 'first_dose', '_2nd_dose_allocations': 'second_dose'})
df_modr.shape

(819, 4)

In [12]:
df_modr['week'] = pd.to_datetime(df_modr['week'])  # convert to datetime

Some rows contain '\*' and ',' to describe footnotes:
* \*: Jurisdictions that won't receive Pfizer vaccines due to cold storage logistics
* \*\*: Jurisdictions that will receive both doses simultaneously
* \*\*\*: Jurisdictions that will receive a "Sovereign Nation Supplement" for American Indian/Alaskan Native populations that elected to receive vaccines through the state instead of Indian Health Service
* \*\*\*\*: Federal Entities includes - Bureau of Prisons, Dept. of Defense, Dept. of State, Indian Health Service, & Veterans Affairs + Dept. of Homeland Security

Source: [CDC](https://data.cdc.gov/Vaccinations/COVID-19-Vaccine-Distribution-Allocations-by-Juris/b7pe-5nws)

In [13]:
# df_modr[df_modr['jurisdiction'].str.contains('\*')]

df_modr['jurisdiction'] = df_modr['jurisdiction'].apply(lambda x: x.strip('*,') if '*' in x else x)

Note: Stats for Chicago, New York City, and Philadelphia aren't included in rows for IL, NY, and PA

In [14]:
df_modr.head()

Unnamed: 0,jurisdiction,week,first_dose,second_dose
0,Connecticut,2021-03-15,35800,35800
1,Maine,2021-03-15,13700,13700
2,Massachusetts,2021-03-15,69000,69000
3,New Hampshire,2021-03-15,13700,13700
4,Rhode Island,2021-03-15,10800,10800


In [15]:
df_modr.to_csv(os.getcwd() + '/Data/moderna_cdc.csv')

### Jansen

Updated every **Tuesday**

Collect data:

In [16]:
res = cdc_client.get('w9zu-fywh', limit = 4000)

Process data:

In [17]:
df_jj = pd.DataFrame.from_records(res).rename(columns = {'week_of_allocations': 'week', '_1st_dose_allocations': 'first_dose'})
df_jj.shape

(63, 3)

In [18]:
df_jj['week'] = pd.to_datetime(df_jj['week'])  # convert to datetime

Some rows contain '\*' and ',' to describe footnotes:
* \*: Jurisdictions that won't receive Pfizer vaccines due to cold storage logistics
* \*\*: Jurisdictions that will receive both doses simultaneously
* \*\*\*: Jurisdictions that will receive a "Sovereign Nation Supplement" for American Indian/Alaskan Native populations that elected to receive vaccines through the state instead of Indian Health Service
* \*\*\*\*: Federal Entities includes - Bureau of Prisons, Dept. of Defense, Dept. of State, Indian Health Service, & Veterans Affairs + Dept. of Homeland Security

Source: [CDC](https://data.cdc.gov/Vaccinations/COVID-19-Vaccine-Distribution-Allocations-by-Juris/w9zu-fywh)

In [19]:
# df_jj[df_jj['jurisdiction'].str.contains('\*')]

df_jj['jurisdiction'] = df_jj['jurisdiction'].apply(lambda x: x.strip('*,') if '*' in x else x)

Note: Stats for Chicago, New York City, and Philadelphia aren't included in rows for IL, NY, and PA

In [20]:
df_jj.head()

Unnamed: 0,jurisdiction,week,first_dose
0,Connecticut,2021-03-01,30200
1,Maine,2021-03-01,11500
2,Massachusetts,2021-03-01,58100
3,New Hampshire,2021-03-01,11600
4,Rhode Island,2021-03-01,9100


In [21]:
df_jj.to_csv(os.getcwd() + '/Data/jansen_cdc.csv')