In [85]:
# Dependencies
import pandas as pd
import json
import requests 
from sqlalchemy import create_engine
from config import sql_pw
import psycopg2

In [86]:
# Census URL
census_url = "https://api.census.gov/data/2019/pep/population?get=DATE_CODE,DATE_DESC,POP,NAME,STATE&for=state:*"

#Request data in json and store variable
census_response = requests.get(census_url)
census_data = census_response.json()

# Create list to store values for the census data
date_code = []
date_desc = []
pop = []
name = []
state = []

# Loop through data and append list
for items in census_data:
    date_code.append(items[0])
    date_desc.append(items[1])
    pop.append(items[2])
    name.append(items[3])
    state.append(items[4])

# Create dataframe from list
census_df = pd.DataFrame({"Date Code":date_code, "Date Desc":date_desc,
                        "Population":pop, "State":name, "State ID":state})
census_df.head()

Unnamed: 0,Date Code,Date Desc,Population,State,State ID
0,DATE_CODE,DATE_DESC,POP,NAME,STATE
1,1,4/1/2010 Census population,2967297,Mississippi,28
2,2,4/1/2010 population estimates base,2968130,Mississippi,28
3,3,7/1/2010 population estimate,2970548,Mississippi,28
4,4,7/1/2011 population estimate,2978731,Mississippi,28


In [87]:
# Pfizer URL
pfizer_url = "https://data.cdc.gov/resource/saz5-9hgg.json"

#Request data in json and store variable
pfizer_response = requests.get(pfizer_url)
pfizer_data = pfizer_response.json()

# Create list to store values for the census data
pfizer_jurisdiction = []
pfizer_wk_of_allo = []
pfizer_dose_1 = []
pfizer_dose_2 = []

# Loop through data and append list
for items in pfizer_data:
    pfizer_jurisdiction.append(items["jurisdiction"])
    pfizer_wk_of_allo.append(items["week_of_allocations"])
    pfizer_dose_1.append(items["_1st_dose_allocations"])
    pfizer_dose_2.append(items["_2nd_dose_allocations"])

# Create dataframe from list
pfizer_df = pd.DataFrame({"State":pfizer_jurisdiction, "Week of Allocation":pfizer_wk_of_allo,
                        "1st Dose Allocation":pfizer_dose_1, "2nd Dose Allocation":pfizer_dose_2})
pfizer_df.head()

Unnamed: 0,State,Week of Allocation,1st Dose Allocation,2nd Dose Allocation
0,Connecticut,2021-03-22T00:00:00.000,50310,50310
1,Maine,2021-03-22T00:00:00.000,19890,19890
2,Massachusetts,2021-03-22T00:00:00.000,95940,95940
3,New Hampshire,2021-03-22T00:00:00.000,19890,19890
4,Rhode Island,2021-03-22T00:00:00.000,15210,15210


In [88]:
# Moderna URL
moderna_url = "https://data.cdc.gov/resource/b7pe-5nws.json"

#Request data in json and store variable
moderna_response = requests.get(moderna_url)
moderna_data = moderna_response.json()

# Create list to store values for the census data
moderna_jurisdiction = []
moderna_wk_of_allo = []
moderna_dose_1 = []
moderna_dose_2 = []

# Loop through data and append list
for items in moderna_data:
    moderna_jurisdiction.append(items["jurisdiction"])
    moderna_wk_of_allo.append(items["week_of_allocations"])
    moderna_dose_1.append(items["_1st_dose_allocations"])
    moderna_dose_2.append(items["_2nd_dose_allocations"])

# Create dataframe from list
moderna_df = pd.DataFrame({"State":moderna_jurisdiction, "Week of Allocation":moderna_wk_of_allo,
                    "1st Dose Allocation":moderna_dose_1, "2nd Dose Allocation":moderna_dose_2})
moderna_df.head()

Unnamed: 0,State,Week of Allocation,1st Dose Allocation,2nd Dose Allocation
0,Connecticut,2021-03-22T00:00:00.000,35800,35800
1,Maine,2021-03-22T00:00:00.000,13700,13700
2,Massachusetts,2021-03-22T00:00:00.000,69000,69000
3,New Hampshire,2021-03-22T00:00:00.000,13700,13700
4,Rhode Island,2021-03-22T00:00:00.000,10800,10800


In [89]:
# Moderna URL
janssen_url = "https://data.cdc.gov/resource/w9zu-fywh.json"

#Request data in json and store variable
janssen_response = requests.get(janssen_url)
janssen_data = janssen_response.json()

# Create list to store values for the census data
janssen_jurisdiction = []
janssen_wk_of_allo = []
janssen_dose_1 = []
janssen_dose_2 = []

# Loop through data and append list
for items in janssen_data:
    janssen_jurisdiction.append(items["jurisdiction"])
    janssen_wk_of_allo.append(items["week_of_allocations"])
    janssen_dose_1.append(items["_1st_dose_allocations"])

# Create dataframe from list
janssen_df = pd.DataFrame({"State":janssen_jurisdiction, "Week of Allocation":janssen_wk_of_allo,
                    "1st Dose Allocation":janssen_dose_1})
janssen_df.head()

Unnamed: 0,State,Week of Allocation,1st Dose Allocation
0,Connecticut,2021-03-22T00:00:00.000,4200
1,Maine,2021-03-22T00:00:00.000,1600
2,Massachusetts,2021-03-22T00:00:00.000,8000
3,New Hampshire,2021-03-22T00:00:00.000,1600
4,Rhode Island,2021-03-22T00:00:00.000,1300


# Transform Data

In [90]:
# Create Vaccine DataFrame
vaccine_df  = pd.DataFrame({"id": [0,1,2], "vaccine": ['Pfizer','Moderna','Janssen'], 
                            "req_doses":[2,2,1], "days_between":[21,28,None], "min_age":[16,18,18]
                            })
vaccine_df

Unnamed: 0,id,vaccine,req_doses,days_between,min_age
0,0,Pfizer,2,21.0,16
1,1,Moderna,2,28.0,18
2,2,Janssen,1,,18


# Clean & Transform State Information

In [91]:
# Transform State DataFrame
state_df = census_df[census_df['State'] != 'NAME']
state_df = state_df.rename(columns={'State ID':'id','State':'state','Population':'population','Date Desc':'desc'})
state_df = state_df.set_index('id')
state_df['population_date'] = state_df['desc'].str[:8]
state_df[state_df['Date Code']=='12']['state'].nunique()
state_df = state_df[state_df['Date Code']=='12'][['state','population','population_date']]
state_df

Unnamed: 0_level_0,state,population,population_date
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
28,Mississippi,2976149,7/1/2019
29,Missouri,6137428,7/1/2019
30,Montana,1068778,7/1/2019
31,Nebraska,1934408,7/1/2019
32,Nevada,3080156,7/1/2019
33,New Hampshire,1359711,7/1/2019
34,New Jersey,8882190,7/1/2019
35,New Mexico,2096829,7/1/2019
36,New York,19453561,7/1/2019
37,North Carolina,10488084,7/1/2019


In [92]:
# Transform Allocation DataFrame
pfizer_df['vaccine']='Pfizer'
moderna_df['vaccine'] = 'Moderna'
janssen_df['vaccine']='Janssen'
janssen_df['2nd Dose Allocation']=None

allocation_df = pd.concat([janssen_df, moderna_df, pfizer_df], axis=0)
allocation_df.reset_index()
allocation_df.head()

Unnamed: 0,State,Week of Allocation,1st Dose Allocation,vaccine,2nd Dose Allocation
0,Connecticut,2021-03-22T00:00:00.000,4200,Janssen,
1,Maine,2021-03-22T00:00:00.000,1600,Janssen,
2,Massachusetts,2021-03-22T00:00:00.000,8000,Janssen,
3,New Hampshire,2021-03-22T00:00:00.000,1600,Janssen,
4,Rhode Island,2021-03-22T00:00:00.000,1300,Janssen,


In [93]:
df = state_df.reset_index()
df = df.set_index('state')
allocation_df = allocation_df.join(df, on='State', rsuffix='_state')

df = vaccine_df.reset_index()
df = vaccine_df.set_index('vaccine')
allocation_df = allocation_df.join(df, on='vaccine', rsuffix='_vaccine')
allocation_df = allocation_df.rename(columns={'Week of Allocation':'week'
                                              ,'1st Dose Allocation':'dose1'
                                              ,'2nd Dose Allocation':'dose2'
                                             ,'id':'state_id'
                                             ,'id_vaccine':'vaccine_id'})

allocation_df = allocation_df[['week','dose1','dose2','state_id','vaccine_id']]
allocation_df.rename_axis(index='id')
allocation_df.tail()

Unnamed: 0,week,dose1,dose2,state_id,vaccine_id
940,2020-12-14T00:00:00.000,72150,72150,51,0
941,2020-12-14T00:00:00.000,62400,62400,53,0
942,2020-12-14T00:00:00.000,16575,16575,54,0
943,2020-12-14T00:00:00.000,49725,49725,55,0
944,2020-12-14T00:00:00.000,4875,4875,56,0


In [94]:
engine = create_engine(f"postgresql+psycopg2://postgres:{sql_pw}@localhost:5432/covid_db")

In [95]:
# Establish Connection
connection = engine.connect()

In [96]:
vaccine_df.to_sql(name='vaccine', con=engine, if_exists='append', index=False)
state_df.to_sql(name='state', con=engine, if_exists='append', index=False)
allocation_df.to_sql(name='allocation', con=engine, if_exists='append', index=True)

In [97]:
pd.read_sql_query('select * from allocation', con=engine).head()

Unnamed: 0,index,week,dose1,dose2,state_id,vaccine_id
0,0,2021-03-22T00:00:00.000,4200,,9,2
1,1,2021-03-22T00:00:00.000,1600,,23,2
2,2,2021-03-22T00:00:00.000,8000,,25,2
3,3,2021-03-22T00:00:00.000,1600,,33,2
4,4,2021-03-22T00:00:00.000,1300,,44,2
