In [1]:
import pandas as pd
from sodapy import Socrata

In [2]:
client = Socrata("data.cdc.gov", None)
results = client.get("hk9y-quqm", limit=350000)

# Convert to pandas DataFrame
doom_df = pd.DataFrame.from_records(results)
doom_df = doom_df.drop(columns=["data_as_of", "start_date", "end_date", "group", "icd10_codes", "flag", "number_of_mentions", "year", "month"])
doom_df = doom_df.loc[(doom_df["condition_group"] == "COVID-19")]
doom_df = doom_df[doom_df["state"] != "United States"]
doom_df = doom_df.loc[(doom_df["age_group"] == "All Ages")]
doom_df = doom_df.reset_index()
doom_df = doom_df.head(53)
doom_df = doom_df.drop(columns=["index", "condition_group", "condition", "age_group"])
doom_df["covid_19_deaths"] = doom_df["covid_19_deaths"].astype(int)



In [3]:
client = Socrata("data.cdc.gov", None)
results = client.get("b7pe-5nws", limit=350000)
moderna_df = pd.DataFrame.from_records(results)
moderna_df["_1st_dose_allocations"] = moderna_df["_1st_dose_allocations"].astype(int)
moderna_df["_2nd_dose_allocations"] = moderna_df["_2nd_dose_allocations"].astype(int)
moderna_df = moderna_df.groupby(['jurisdiction']).sum()
moderna_df = moderna_df.reset_index()
moderna_df=moderna_df.rename(columns={"_1st_dose_allocations":"Moderna_1st_Dose", "_2nd_dose_allocations":"Moderna_2nd_Dose"})



In [4]:
client = Socrata("data.cdc.gov", None)
results = client.get("saz5-9hgg", limit=350000)
pfizer_df = pd.DataFrame.from_records(results)
pfizer_df["_1st_dose_allocations"] = pfizer_df["_1st_dose_allocations"].astype(int)
pfizer_df["_2nd_dose_allocations"] = pfizer_df["_2nd_dose_allocations"].astype(int)
pfizer_df = pfizer_df.groupby(['jurisdiction']).sum()
pfizer_df = pfizer_df.reset_index()
pfizer_df=pfizer_df.rename(columns={"_1st_dose_allocations":"Pfizer_1st_Dose", "_2nd_dose_allocations":"Pfizer_2nd_Dose"})



In [5]:
client = Socrata("data.cdc.gov", None)
results = client.get("w9zu-fywh", limit=350000)
jnj_df = pd.DataFrame.from_records(results)
jnj_df["_1st_dose_allocations"] = jnj_df["_1st_dose_allocations"].astype(int)
jnj_df = jnj_df.groupby(['jurisdiction']).sum()
jnj_df = jnj_df.reset_index()
jnj_df=jnj_df.rename(columns={"_1st_dose_allocations":"JnJ_1st_Dose"})



In [6]:
vaccine_df=pfizer_df.merge(moderna_df, how="outer", on="jurisdiction")
vaccine_df=vaccine_df.merge(jnj_df,how="outer", on="jurisdiction")
vaccine_df=vaccine_df.reset_index()
vaccine_df=vaccine_df.rename(columns={"jurisdiction":"state"})
vaccine_df= vaccine_df.drop(columns=["index"])

In [7]:
states_df = pd.read_csv("us-states-territories.csv", encoding="unicode_escape")
states_df=states_df.drop(columns=["Capital","Population (2015)"])
# states_df = states_df.loc[states_df["Abbreviation"] == "NaN"]
states_df=states_df.rename(columns={"Name":"state"})

In [8]:
vaccine_df_by_state = states_df.merge(vaccine_df, on="state", how="inner")
vaccine_df_by_state = vaccine_df_by_state.drop(columns=["Unnamed: 7", "Type"])

In [9]:
final_data = vaccine_df_by_state.merge(doom_df, on="state", how="inner")

In [10]:
client = Socrata("data.cdc.gov", None)
results = client.get("djj9-kh3p", limit=350000)
hesitency_df = pd.DataFrame.from_records(results)
hesitency_df = hesitency_df[["state","estimated_hesitant","estimated_unsure_or_hesitant","estimated_strongly_hesitant"]]
hesitency_df["estimated_hesitant"] = hesitency_df["estimated_hesitant"].astype(float)
hesitency_df["estimated_unsure_or_hesitant"] = hesitency_df["estimated_unsure_or_hesitant"].astype(float)
hesitency_df["estimated_strongly_hesitant"] = hesitency_df["estimated_strongly_hesitant"].astype(float)
hesitency_df = hesitency_df.groupby(["state"])["estimated_hesitant"].agg(mean_hesitency="mean",max_hesitency="max",min_hesitency="min")
hesitency_df = hesitency_df.reset_index()



In [11]:
final_data_df = final_data.merge(hesitency_df, on="state", how="left")
final_data_df = final_data_df.fillna(0)
final_data_df["population_2019"] =final_data_df["population_2019"].astype(int)
final_data_df["area"] = final_data_df["area"].astype(int)
final_data_df["density_per_sq_mile"] = round(final_data_df["population_2019"]/final_data_df["area"], 2)


Unnamed: 0,state,Abbreviation,population_2019,area,Pfizer_1st_Dose,Pfizer_2nd_Dose,Moderna_1st_Dose,Moderna_2nd_Dose,JnJ_1st_Dose,covid_19_deaths,mean_hesitency,max_hesitency,min_hesitency,density_per_sq_mile
0,Alabama,AL,4903185,52420,1552740,1552740,1295260,1295260,179400,12225,16.485588,18.98,12.44,93.54
1,Alaska,AK,731545,665384,369960,358260,285860,285860,39900,409,18.826,23.61,15.24,1.1
2,Arizona,AZ,7278717,113990,2203260,2203260,1835960,1835960,253900,16957,14.272037,21.05,9.31,63.85
3,Arkansas,AR,3017804,53179,951600,951600,786220,786220,109000,7298,20.6265,22.79,17.15,56.75
4,California,CA,39512223,163695,12333360,12333360,10329700,10329700,1428400,68116,6.166,9.76,2.76,241.38


In [13]:
final_data_df = final_data_df.set_index("state")
final_data_df.head()

Unnamed: 0_level_0,Abbreviation,population_2019,area,Pfizer_1st_Dose,Pfizer_2nd_Dose,Moderna_1st_Dose,Moderna_2nd_Dose,JnJ_1st_Dose,covid_19_deaths,mean_hesitency,max_hesitency,min_hesitency,density_per_sq_mile
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Alabama,AL,4903185,52420,1552740,1552740,1295260,1295260,179400,12225,16.485588,18.98,12.44,93.54
Alaska,AK,731545,665384,369960,358260,285860,285860,39900,409,18.826,23.61,15.24,1.1
Arizona,AZ,7278717,113990,2203260,2203260,1835960,1835960,253900,16957,14.272037,21.05,9.31,63.85
Arkansas,AR,3017804,53179,951600,951600,786220,786220,109000,7298,20.6265,22.79,17.15,56.75
California,CA,39512223,163695,12333360,12333360,10329700,10329700,1428400,68116,6.166,9.76,2.76,241.38


In [14]:
client = Socrata("data.cdc.gov", None)
results = client.get("8xkx-amqh", limit=1000000)
adminstration_df = pd.DataFrame.from_records(results)
adminstration_df = adminstration_df.fillna(0)
admin_data = adminstration_df[["recip_county","recip_state","series_complete_pop_pct","series_complete_yes","administered_dose1_recip_65plus","administered_dose1_recip_65pluspop_pct","svi_ctgy"]]
admin_data = admin_data.rename(columns={"recip_state":"Abbreviation","series_complete_pop_pct":"pct_pop_vax","series_complete_yes":"total_pop_vax","administered_dose1_recip_65plus":"age_65_plus_pop_vaxed","administered_dose1_recip_65pluspop_pct":"pct_65_plus_vaxed","svi_ctgy":"Social_Vulnerability_Index"})
admin_data["pct_pop_vax"] = admin_data["pct_pop_vax"].astype(float)
admin_data["total_pop_vax"] = admin_data["total_pop_vax"].astype(int)
admin_data["age_65_plus_pop_vaxed"] = admin_data["age_65_plus_pop_vaxed"].astype(int)
admin_data["pct_65_plus_vaxed"] = admin_data["pct_65_plus_vaxed"].astype(float)
admin_data = admin_data.groupby(["Abbreviation","recip_county"]).max()
admin_data = admin_data.reset_index()
admin_data



Unnamed: 0,Abbreviation,recip_county,pct_pop_vax,total_pop_vax,age_65_plus_pop_vaxed,pct_65_plus_vaxed,Social_Vulnerability_Index
0,AK,Aleutians East Borough,67.3,2247,166,47.3,Mod-High
1,AK,Aleutians West Census Area,52.7,2971,273,65.2,Low-Mod
2,AK,Anchorage Municipality,50.0,143950,30998,91.8,Low-Mod
3,AK,Bethel Census Area,54.3,9982,1251,86.4,High
4,AK,Bristol Bay Borough,87.6,732,123,90.4,Low-Mod
...,...,...,...,...,...,...,...
3277,WY,Teton County,77.2,18123,3467,93.3,Low
3278,WY,Uinta County,36.2,7327,2523,83.4,Low-Mod
3279,WY,Unknown County,0.0,8448,4208,0.0,0
3280,WY,Washakie County,32.9,2567,1315,76.0,Low-Mod


In [None]:
admin_data = admin_data.set_index()

In [15]:
admin_data.to_json("administration.json")
final_data_df.to_json("final_data.json")