In [40]:
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [41]:
# read in all the datasets
def read_clean(filename, date_vars = ["date"]):
    return pd.read_csv("../Data/" + filename, dtype={'fips': str}, parse_dates = date_vars)

cases = read_clean("NYT_clean.csv")
acs = read_clean("ACS Data.csv", None)
hospitals = read_clean("Hospitals_clean.csv").drop(columns=["collection_week"])
masks = read_clean("Masks_clean.csv")
mobility = read_clean("Mobility Data.csv")
IL_vaccine = read_clean("il_vaccine_clean.csv", ["Report_Date"]).drop(columns=["Unnamed: 0"]).drop_duplicates()
MO_vaccine = read_clean("MO_vaccine_clean.csv")
OH_vaccine = read_clean("oh_vaccine_clean.csv").drop(columns=["Unnamed: 0"])


In [42]:
# merge together all non-vaccine data
merged = cases.merge(acs.drop(columns=["county", "state"]), on=["fips"], how="left")\
              .merge(hospitals.drop(columns=["state"]), on=["fips", "date"], how="left")\
              .merge(masks.drop(columns=["county", "state"]), on=["fips", "date"], how="left")\
              .merge(mobility.drop(columns=["country", "state", "county"]), on=["fips", "date"], how="left")

In [43]:
# standardize vaccine data

IL_vaccine = IL_vaccine.rename(columns={"AdministeredCount": "cumulative doses",
                                        "Report_Date": "date"})
IL_vaccine['new doses'] = IL_vaccine.groupby(["state", "CountyName", "county", "fips"])['cumulative doses'].diff()


OH_vaccine['new doses'] = OH_vaccine["vaccines_started"] + OH_vaccine["vaccines_completed"]
OH_vaccine['new doses'] = OH_vaccine["vaccines_started"] + OH_vaccine["vaccines_completed"]
OH_vaccine["cumulative doses"] = OH_vaccine.sort_values(by=["state", "county", "fips", "date"])\
                                                       .groupby(['state', "county", "fips"])['new doses']\
                                                       .cumsum(axis=0)


# stack vaccine data
vaccines = IL_vaccine[["fips", "date", "new doses", "cumulative doses"]]\
            .append(OH_vaccine[["fips", "date", "new doses", "cumulative doses"]])\
            .append(MO_vaccine[["fips", "date", "new doses", "cumulative doses"]])

# merge onto rest of the data
merged = merged.merge(vaccines, on=["fips", "date"], how="left")

merged[["new doses", "cumulative doses"]] = merged[["new doses", "cumulative doses"]].fillna(0)

In [44]:
# look at a subset that should have all the data valued basically
merged[merged["date"] > '2020-12-01'].head()

Unnamed: 0,state,fips,county,date,cumulative_cases,cumulative_deaths,new_cases,new_deaths,new_cases_7avg,new_deaths_7avg,2weeksago_cases_7avg,2weeksago_deaths_7avg,total_pop,male,perc_male,female,perc_female,age_15_19,p_age_15_19,age_20_24,p_age_20_24,age_25_34,p_age_25_34,age_35_44,p_age_35_44,age_45_54,p_age_45_54,age_55_59,p_age_55_59,age_60_64,p_age_60_64,age_median,age_62over,p_age_62over,age_65over,p_age_65over,white,p_white,black,p_black,native,p_native,asian,p_asian,hawaiian,p_hawaiian,other_race,p_other_race,hispanic,p_hispanic,housing_units,below_50_pov,below_125_pov,below_150_pov,below_185_pov,below_200_pov,below_300_pov,below_400_pov,below_500_pov,below_pov,male_below_pov,female_below_pov,age_under14,p_under14,non_white,p_non_white,total_adult_hospitalizations,total_pediatric_hospitalizations,prev_day_adult_admit_7daysum,prev_day_adult_admit_18-19_7daysum,prev_day_adult_admit_20-29_7daysum,prev_day_adult_admit_30-39_7daysum,prev_day_adult_admit_40-49_7daysum,prev_day_adult_admit_50-59_7daysum,prev_day_adult_admit_60-69_7daysum,prev_day_adult_admit_70-79_7daysum,prev_day_adult_admit_80+_7daysum,prev_day_adult_admit_unknown_7daysum,mask_mandate,retail_rec,grocery_pharm,parks,transit,workplace,residential,new doses,cumulative doses
257,IL,17001,Adams,2020-12-02,4886,54.0,-15.0,4.0,33.0,1.0,114.0,1.0,66085,32276,48.8,33809,51.2,3679,5.6,3684,5.6,8060,12.2,7657,11.6,8105,12.3,4866,7.4,4251,6.4,41.5,15621,23.6,13186,20.0,61406,92.9,2731,4.1,297,0.4,527,0.8,14,0.0,113,0.2,1059,1.6,30192,3447,10417,13344,17583,19205,33041,43233,50182,8031,3605,4426,406544754057,6.26.86.1,4679,7.1,434.0,0.0,45.0,0.0,0.0,3.0,3.0,4.0,11.0,13.0,12.0,0.0,1.0,-14.0,-3.0,,-32.0,-17.0,7.0,0.0,0.0
258,IL,17001,Adams,2020-12-03,4996,55.0,110.0,1.0,49.0,1.0,124.0,1.0,66085,32276,48.8,33809,51.2,3679,5.6,3684,5.6,8060,12.2,7657,11.6,8105,12.3,4866,7.4,4251,6.4,41.5,15621,23.6,13186,20.0,61406,92.9,2731,4.1,297,0.4,527,0.8,14,0.0,113,0.2,1059,1.6,30192,3447,10417,13344,17583,19205,33041,43233,50182,8031,3605,4426,406544754057,6.26.86.1,4679,7.1,434.0,0.0,45.0,0.0,0.0,3.0,3.0,4.0,11.0,13.0,12.0,0.0,1.0,-18.0,5.0,,-43.0,-20.0,7.0,0.0,0.0
259,IL,17001,Adams,2020-12-04,5049,56.0,53.0,1.0,57.0,1.0,110.0,1.0,66085,32276,48.8,33809,51.2,3679,5.6,3684,5.6,8060,12.2,7657,11.6,8105,12.3,4866,7.4,4251,6.4,41.5,15621,23.6,13186,20.0,61406,92.9,2731,4.1,297,0.4,527,0.8,14,0.0,113,0.2,1059,1.6,30192,3447,10417,13344,17583,19205,33041,43233,50182,8031,3605,4426,406544754057,6.26.86.1,4679,7.1,340.0,0.0,45.0,0.0,3.0,3.0,0.0,3.0,12.0,20.0,7.0,0.0,1.0,-18.0,-6.0,,-37.0,-14.0,7.0,0.0,0.0
260,IL,17001,Adams,2020-12-05,5038,57.0,-11.0,1.0,44.0,1.0,124.0,1.0,66085,32276,48.8,33809,51.2,3679,5.6,3684,5.6,8060,12.2,7657,11.6,8105,12.3,4866,7.4,4251,6.4,41.5,15621,23.6,13186,20.0,61406,92.9,2731,4.1,297,0.4,527,0.8,14,0.0,113,0.2,1059,1.6,30192,3447,10417,13344,17583,19205,33041,43233,50182,8031,3605,4426,406544754057,6.26.86.1,4679,7.1,340.0,0.0,45.0,0.0,3.0,3.0,0.0,3.0,12.0,20.0,7.0,0.0,1.0,-26.0,0.0,,-40.0,0.0,3.0,0.0,0.0
261,IL,17001,Adams,2020-12-06,5074,57.0,36.0,0.0,44.0,1.0,110.0,1.0,66085,32276,48.8,33809,51.2,3679,5.6,3684,5.6,8060,12.2,7657,11.6,8105,12.3,4866,7.4,4251,6.4,41.5,15621,23.6,13186,20.0,61406,92.9,2731,4.1,297,0.4,527,0.8,14,0.0,113,0.2,1059,1.6,30192,3447,10417,13344,17583,19205,33041,43233,50182,8031,3605,4426,406544754057,6.26.86.1,4679,7.1,340.0,0.0,45.0,0.0,3.0,3.0,0.0,3.0,12.0,20.0,7.0,0.0,1.0,-18.0,0.0,,,-5.0,3.0,0.0,0.0


In [None]:
# export dataset

merged.to