In [167]:
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [168]:
# read in all the datasets
def read_clean(filename, date_vars = ["date"]):
    return pd.read_csv("../Data/" + filename, dtype={'fips': str}, parse_dates = date_vars)

cases = read_clean("NYT_clean.csv")
acs = read_clean("ACS Data.csv", None)
hospitals = read_clean("Hospitals_clean.csv").drop(columns=["collection_week"])
masks = read_clean("Masks_clean.csv")
mobility = read_clean("Mobility Data.csv")
IL_vaccine = read_clean("il_vaccine_clean.csv", ["Report_Date"]).drop(columns=["Unnamed: 0"]).drop_duplicates()
MO_vaccine = read_clean("MO_vaccine_clean.csv")
OH_vaccine = read_clean("oh_vaccine_clean.csv").drop(columns=["Unnamed: 0"])

In [169]:
# merge together all non-vaccine data
merged = cases.merge(acs.drop(columns=["county", "state"]), on=["fips"], how="left")\
              .merge(hospitals.drop(columns=["state"]), on=["fips", "date"], how="left")\
              .merge(masks.drop(columns=["county", "state"]), on=["fips", "date"], how="left")\
              .merge(mobility.drop(columns=["country", "state", "county"]), on=["fips", "date"], how="left")

In [170]:
# standardize vaccine data
IL_vaccine = IL_vaccine.rename(columns={"AdministeredCount": "cumulative doses",
                                        "Report_Date": "date"})
IL_vaccine['new doses'] = IL_vaccine.groupby(["state", "CountyName", "fips"])['cumulative doses'].diff()


OH_vaccine['new doses'] = OH_vaccine["vaccines_started"] + OH_vaccine["vaccines_completed"]
OH_vaccine['new doses'] = OH_vaccine["vaccines_started"] + OH_vaccine["vaccines_completed"]
OH_vaccine["cumulative doses"] = OH_vaccine.sort_values(by=["state", "county", "fips", "date"])\
                                                       .groupby(['state', "county", "fips"])['new doses']\
                                                       .cumsum(axis=0)


# stack vaccine data
vaccines = IL_vaccine[["fips", "date", "new doses", "cumulative doses"]]\
            .append(OH_vaccine[["fips", "date", "new doses", "cumulative doses"]])\
            .append(MO_vaccine[["fips", "date", "new doses", "cumulative doses"]])


# merge onto rest of the data
merged = merged.merge(vaccines, on=["fips", "date"], how="left")

merged["cumulative doses"] = merged.groupby(["fips"])['cumulative doses'].transform(lambda x: x.ffill())

merged[["new doses", "cumulative doses"]] = merged[["new doses", "cumulative doses"]].fillna(0)

In [172]:
# look at a subset that should have all the data valued basically
merged[merged["date"] > '2020-12-21'].head()

Unnamed: 0,state,fips,county,date,cumulative_cases,cumulative_deaths,new_cases,new_deaths,new_cases_7avg,new_deaths_7avg,2weeksago_cases_7avg,2weeksago_deaths_7avg,total_pop,male,perc_male,female,perc_female,age_15_19,p_age_15_19,age_20_24,p_age_20_24,age_25_34,p_age_25_34,age_35_44,p_age_35_44,age_45_54,p_age_45_54,age_55_59,p_age_55_59,age_60_64,p_age_60_64,age_median,age_62over,p_age_62over,age_65over,p_age_65over,white,p_white,black,p_black,native,p_native,asian,p_asian,hawaiian,p_hawaiian,other_race,p_other_race,hispanic,p_hispanic,housing_units,below_50_pov,below_125_pov,below_150_pov,below_185_pov,below_200_pov,below_300_pov,below_400_pov,below_500_pov,below_pov,male_below_pov,female_below_pov,age_under14,p_under14,non_white,p_non_white,total_adult_hospitalizations,total_pediatric_hospitalizations,prev_day_adult_admit_7daysum,prev_day_adult_admit_18-19_7daysum,prev_day_adult_admit_20-29_7daysum,prev_day_adult_admit_30-39_7daysum,prev_day_adult_admit_40-49_7daysum,prev_day_adult_admit_50-59_7daysum,prev_day_adult_admit_60-69_7daysum,prev_day_adult_admit_70-79_7daysum,prev_day_adult_admit_80+_7daysum,prev_day_adult_admit_unknown_7daysum,mask_mandate,retail_rec,grocery_pharm,parks,transit,workplace,residential,new doses,cumulative doses
277,IL,17001,Adams,2020-12-22,6040.0,72.0,118.0,3.0,79.0,1.0,71.0,1.0,66085,32276,48.8,33809,51.2,3679,5.6,3684,5.6,8060,12.2,7657,11.6,8105,12.3,4866,7.4,4251,6.4,41.5,15621,23.6,13186,20.0,61406,92.9,2731,4.1,297,0.4,527,0.8,14,0.0,113,0.2,1059,1.6,30192,3447,10417,13344,17583,19205,33041,43233,50182,8031,3605,4426,12597,19.1,4679,7.1,413.0,0.0,68.0,0.0,4.0,3.0,4.0,6.0,12.0,18.0,21.0,3.0,1.0,5.0,16.0,,-32.0,-28.0,7.0,0.0,9.0
278,IL,17001,Adams,2020-12-23,6051.0,72.0,11.0,0.0,60.0,1.0,64.0,1.0,66085,32276,48.8,33809,51.2,3679,5.6,3684,5.6,8060,12.2,7657,11.6,8105,12.3,4866,7.4,4251,6.4,41.5,15621,23.6,13186,20.0,61406,92.9,2731,4.1,297,0.4,527,0.8,14,0.0,113,0.2,1059,1.6,30192,3447,10417,13344,17583,19205,33041,43233,50182,8031,3605,4426,12597,19.1,4679,7.1,413.0,0.0,68.0,0.0,4.0,3.0,4.0,6.0,12.0,18.0,21.0,3.0,1.0,20.0,37.0,,-34.0,-30.0,7.0,536.0,545.0
279,IL,17001,Adams,2020-12-24,6162.0,73.0,111.0,1.0,59.0,1.0,52.0,1.0,66085,32276,48.8,33809,51.2,3679,5.6,3684,5.6,8060,12.2,7657,11.6,8105,12.3,4866,7.4,4251,6.4,41.5,15621,23.6,13186,20.0,61406,92.9,2731,4.1,297,0.4,527,0.8,14,0.0,113,0.2,1059,1.6,30192,3447,10417,13344,17583,19205,33041,43233,50182,8031,3605,4426,12597,19.1,4679,7.1,413.0,0.0,68.0,0.0,4.0,3.0,4.0,6.0,12.0,18.0,21.0,3.0,1.0,-18.0,33.0,,-44.0,-56.0,15.0,244.0,789.0
280,IL,17001,Adams,2020-12-25,6162.0,74.0,0.0,1.0,59.0,1.0,57.0,1.0,66085,32276,48.8,33809,51.2,3679,5.6,3684,5.6,8060,12.2,7657,11.6,8105,12.3,4866,7.4,4251,6.4,41.5,15621,23.6,13186,20.0,61406,92.9,2731,4.1,297,0.4,527,0.8,14,0.0,113,0.2,1059,1.6,30192,3447,10417,13344,17583,19205,33041,43233,50182,8031,3605,4426,12597,19.1,4679,7.1,424.0,0.0,61.0,0.0,4.0,3.0,3.0,6.0,12.0,14.0,22.0,0.0,1.0,-89.0,-75.0,,-54.0,-83.0,24.0,0.0,789.0
281,IL,17001,Adams,2020-12-26,6117.0,77.0,19.5,3.0,53.0,1.0,65.0,1.0,66085,32276,48.8,33809,51.2,3679,5.6,3684,5.6,8060,12.2,7657,11.6,8105,12.3,4866,7.4,4251,6.4,41.5,15621,23.6,13186,20.0,61406,92.9,2731,4.1,297,0.4,527,0.8,14,0.0,113,0.2,1059,1.6,30192,3447,10417,13344,17583,19205,33041,43233,50182,8031,3605,4426,12597,19.1,4679,7.1,424.0,0.0,61.0,0.0,4.0,3.0,3.0,6.0,12.0,14.0,22.0,0.0,1.0,-32.0,-13.0,,-54.0,-19.0,7.0,0.0,789.0


In [173]:
###############################
# Treat NAs for hospital data #
###############################

# filling na's with 0 for data points after 7-31-2020 because that's when the data should be complete
# most counties that had missing values just didn't have any hospitals in the dataset associated with it, and
# I spot-checked a few and it seemed to be counties without large hospitals.  Therefore, let's fill those in with 0's
# Also manually checked the two counties that had sporadic missings, but the surrounding dates (around the missing 
# dates) were all basically 0 so I feel okay filling in those with 0s
date_mask = (merged["date"]>='7-31-2020')
numerical_hosp_variables = hospitals.columns.tolist()[3:]
merged.loc[date_mask, numerical_hosp_variables] = merged.loc[date_mask, numerical_hosp_variables].fillna(0)

merged[date_mask].describe(datetime_is_numeric=True)

Unnamed: 0,date,cumulative_cases,cumulative_deaths,new_cases,new_deaths,new_cases_7avg,new_deaths_7avg,2weeksago_cases_7avg,2weeksago_deaths_7avg,total_pop,male,perc_male,female,perc_female,age_15_19,p_age_15_19,age_20_24,p_age_20_24,age_25_34,p_age_25_34,age_35_44,p_age_35_44,age_45_54,p_age_45_54,age_55_59,p_age_55_59,age_60_64,p_age_60_64,age_median,age_62over,p_age_62over,age_65over,p_age_65over,white,p_white,black,p_black,native,p_native,asian,p_asian,hawaiian,p_hawaiian,other_race,p_other_race,hispanic,p_hispanic,housing_units,below_50_pov,below_125_pov,below_150_pov,below_185_pov,below_200_pov,below_300_pov,below_400_pov,below_500_pov,below_pov,male_below_pov,female_below_pov,age_under14,p_under14,non_white,p_non_white,total_adult_hospitalizations,total_pediatric_hospitalizations,prev_day_adult_admit_7daysum,prev_day_adult_admit_18-19_7daysum,prev_day_adult_admit_20-29_7daysum,prev_day_adult_admit_30-39_7daysum,prev_day_adult_admit_40-49_7daysum,prev_day_adult_admit_50-59_7daysum,prev_day_adult_admit_60-69_7daysum,prev_day_adult_admit_70-79_7daysum,prev_day_adult_admit_80+_7daysum,prev_day_adult_admit_unknown_7daysum,mask_mandate,retail_rec,grocery_pharm,parks,transit,workplace,residential,new doses,cumulative doses
count,117111,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,117111.0,91371.0,63414.0,56228.0,17927.0,33013.0,97551.0,65760.0,117111.0,117111.0
mean,2020-12-28 04:33:37.409124608,5968.440522,117.095973,31.349601,0.506754,31.407528,0.450812,31.46841,0.444698,105247.2,51671.89,49.992793,53575.33,50.007721,6918.520839,6.417573,7123.782514,6.13498,13987.749648,11.457057,12842.195464,11.418598,13698.454159,12.859055,7360.588228,7.450544,6878.258994,7.170483,42.185661,21011.070198,23.483985,17023.871114,19.27242,81627.46,91.122325,13946.14,4.270402,357.139628,0.581623,3592.966263,1.003852,53.979157,0.050931,2764.147945,0.801762,8822.256,3.606252,46665.73,6256.414402,18246.184201,22479.46,28763.34,31434.1,48405.26,63097.01,74476.68,13932.418039,6185.031859,7747.386181,19413.801957,17.829321,23619.76,8.877675,122.765317,1.548992,16.77395,0.185585,1.071693,1.399057,1.853302,2.997703,3.899864,4.107735,3.753123,0.542759,0.699445,-4.253335,1.480792,33.898589,-5.662375,-18.568554,5.951551,186.51195,12607.54
min,2020-07-31 00:00:00,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2027.0,976.0,44.6,1022.0,36.7,77.0,2.9,84.0,2.3,158.0,5.4,197.0,7.4,210.0,8.3,174.0,4.0,159.0,4.1,27.6,612.0,11.2,500.0,8.3,1982.0,46.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1276.0,108.0,304.0,430.0,599.0,658.0,950.0,1365.0,1673.0,223.0,86.0,137.0,247.0,9.4,36.0,0.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,-91.0,-90.0,-89.0,-90.0,-8.0,0.0,0.0
25%,2020-10-14 00:00:00,461.0,7.0,1.0,0.0,2.0,0.0,2.0,0.0,15199.0,7756.0,49.1,7529.0,49.7,918.0,5.8,794.0,5.1,1677.0,10.5,1718.0,10.8,1930.0,12.3,1178.0,6.9,1138.0,6.5,39.8,3877.0,20.9,3186.0,17.0,14313.0,89.0,123.0,0.7,42.0,0.1,48.0,0.3,0.0,0.0,32.0,0.2,296.0,1.6,7913.0,856.0,2784.0,3604.0,4652.0,5097.0,8157.0,10519.0,12131.0,2068.0,903.0,1151.0,2705.0,16.8,714.0,3.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-15.0,-6.0,-12.0,-25.0,-25.0,3.0,0.0,0.0
50%,2020-12-28 00:00:00,1462.0,28.0,6.0,0.0,7.0,0.0,7.0,0.0,33122.0,16644.0,49.6,16657.0,50.4,2098.0,6.3,1876.0,5.6,3728.0,11.4,3820.0,11.5,4316.0,12.9,2438.0,7.4,2379.0,7.0,41.9,7810.0,22.8,6261.0,18.7,30937.0,94.2,607.0,1.7,121.0,0.3,180.0,0.6,7.0,0.0,138.0,0.4,740.0,2.3,16022.0,1742.0,5777.0,7431.0,9599.0,10569.0,16770.0,21713.0,25195.0,4335.0,1943.0,2440.0,5845.0,18.1,1890.0,5.8,6.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,-5.0,0.0,17.0,-4.0,-17.0,5.0,0.0,0.0
75%,2021-03-13 00:00:00,4063.0,74.0,20.0,0.0,22.0,0.0,22.0,0.0,75358.0,37322.0,50.3,37864.0,50.9,5011.0,6.7,5071.0,6.2,9154.0,12.3,8844.0,12.1,9588.0,13.4,5297.0,8.0,5038.0,7.7,44.3,15621.0,25.3,12713.0,20.9,64416.0,96.3,2602.0,5.5,285.0,0.6,627.0,0.9,30.0,0.1,655.0,0.8,3067.0,4.2,33950.0,3938.0,12285.0,15431.0,20539.0,22655.0,35378.0,46228.0,53308.0,8964.0,4068.0,5036.0,13516.0,19.2,5893.0,11.0,51.0,0.0,10.0,0.0,0.0,0.0,0.0,3.0,3.0,3.0,3.0,0.0,1.0,6.0,8.0,70.0,12.0,-11.0,8.0,41.0,3858.5
max,2021-05-27 00:00:00,552466.0,10816.0,6697.0,277.0,4654.0,70.0,4654.0,70.0,5198275.0,2522949.0,63.3,2675326.0,55.4,313876.0,14.4,342421.0,20.0,849085.0,19.9,701726.0,16.2,658679.0,15.6,330992.0,10.2,305924.0,11.8,58.6,917788.0,42.1,741170.0,35.8,2946314.0,99.3,1217416.0,46.4,14911.0,16.1,379444.0,11.9,2250.0,2.2,500069.0,14.0,1314796.0,31.9,2193338.0,332188.0,969847.0,1186523.0,1498788.0,1628328.0,2404378.0,3020235.0,3518555.0,734470.0,318968.0,415502.0,954402.0,26.1,2251961.0,53.5,16137.0,243.0,1641.0,24.0,117.0,145.0,180.0,278.0,369.0,326.0,300.0,634.0,1.0,213.0,109.0,544.0,252.0,48.0,35.0,37855.0,2335327.0
std,,22579.758274,493.033769,127.345613,2.930224,118.62711,2.01009,118.752927,2.002679,321679.8,156066.7,1.863091,165626.0,1.863282,19977.522037,1.200432,21440.152352,2.369498,50302.021807,1.701386,42179.854502,1.233577,41413.25896,1.010837,21177.932074,0.873948,19528.523519,1.157031,4.772756,58271.546786,4.609622,47022.43749,3.969432,196224.1,8.075748,77954.56,6.305703,970.787913,1.3459,21657.383587,1.45473,194.939791,0.161161,25961.330931,1.293597,68617.61,3.776063,137808.8,21349.622184,60813.452635,73818.87,92627.42,100460.5,148178.9,187482.3,219204.9,46763.682364,20583.845736,26191.294707,59646.478717,2.143732,130531.1,8.075748,528.529995,9.1642,61.797515,1.012519,4.553505,5.862928,7.200072,11.092336,13.404298,13.302207,12.399325,9.158023,0.458502,18.523839,14.111169,64.937196,27.555823,13.36092,4.886556,1005.664102,69354.11


In [174]:
###############################
# Treat NAs for mask data #
###############################

# since mask mandate data ends 3-22-2021 and our covid data goes on until 5-27-2021, let's forward
# fill NA's after 3-22-2021 (I've hand checked and the mask mandates have not changed for our three states)
mask_date_mask = (merged["date"] >= '3-22-2021')
merged.loc[mask_date_mask, 'mask_mandate'] = merged.loc[mask_date_mask, 'mask_mandate'].fillna(method='ffill', 
                                                                                               limit = 20)
merged.loc[mask_date_mask & merged["mask_mandate"].isnull()].head()

Unnamed: 0,state,fips,county,date,cumulative_cases,cumulative_deaths,new_cases,new_deaths,new_cases_7avg,new_deaths_7avg,2weeksago_cases_7avg,2weeksago_deaths_7avg,total_pop,male,perc_male,female,perc_female,age_15_19,p_age_15_19,age_20_24,p_age_20_24,age_25_34,p_age_25_34,age_35_44,p_age_35_44,age_45_54,p_age_45_54,age_55_59,p_age_55_59,age_60_64,p_age_60_64,age_median,age_62over,p_age_62over,age_65over,p_age_65over,white,p_white,black,p_black,native,p_native,asian,p_asian,hawaiian,p_hawaiian,other_race,p_other_race,hispanic,p_hispanic,housing_units,below_50_pov,below_125_pov,below_150_pov,below_185_pov,below_200_pov,below_300_pov,below_400_pov,below_500_pov,below_pov,male_below_pov,female_below_pov,age_under14,p_under14,non_white,p_non_white,total_adult_hospitalizations,total_pediatric_hospitalizations,prev_day_adult_admit_7daysum,prev_day_adult_admit_18-19_7daysum,prev_day_adult_admit_20-29_7daysum,prev_day_adult_admit_30-39_7daysum,prev_day_adult_admit_40-49_7daysum,prev_day_adult_admit_50-59_7daysum,prev_day_adult_admit_60-69_7daysum,prev_day_adult_admit_70-79_7daysum,prev_day_adult_admit_80+_7daysum,prev_day_adult_admit_unknown_7daysum,mask_mandate,retail_rec,grocery_pharm,parks,transit,workplace,residential,new doses,cumulative doses
388,IL,17001,Adams,2021-04-12,8131.0,145.0,3.0,0.0,12.0,0.0,4.0,0.0,66085,32276,48.8,33809,51.2,3679,5.6,3684,5.6,8060,12.2,7657,11.6,8105,12.3,4866,7.4,4251,6.4,41.5,15621,23.6,13186,20.0,61406,92.9,2731,4.1,297,0.4,527,0.8,14,0.0,113,0.2,1059,1.6,30192,3447,10417,13344,17583,19205,33041,43233,50182,8031,3605,4426,12597,19.1,4679,7.1,64.0,3.0,9.0,0.0,0.0,0.0,0.0,3.0,3.0,3.0,3.0,0.0,,-3.0,9.0,,-34.0,-16.0,3.0,27.0,41394.0
389,IL,17001,Adams,2021-04-13,8132.0,145.0,1.0,0.0,10.0,0.0,4.0,0.0,66085,32276,48.8,33809,51.2,3679,5.6,3684,5.6,8060,12.2,7657,11.6,8105,12.3,4866,7.4,4251,6.4,41.5,15621,23.6,13186,20.0,61406,92.9,2731,4.1,297,0.4,527,0.8,14,0.0,113,0.2,1059,1.6,30192,3447,10417,13344,17583,19205,33041,43233,50182,8031,3605,4426,12597,19.1,4679,7.1,64.0,3.0,9.0,0.0,0.0,0.0,0.0,3.0,3.0,3.0,3.0,0.0,,3.0,3.0,,,-17.0,3.0,18.0,41412.0
390,IL,17001,Adams,2021-04-14,8149.0,145.0,17.0,0.0,9.0,0.0,3.0,0.0,66085,32276,48.8,33809,51.2,3679,5.6,3684,5.6,8060,12.2,7657,11.6,8105,12.3,4866,7.4,4251,6.4,41.5,15621,23.6,13186,20.0,61406,92.9,2731,4.1,297,0.4,527,0.8,14,0.0,113,0.2,1059,1.6,30192,3447,10417,13344,17583,19205,33041,43233,50182,8031,3605,4426,12597,19.1,4679,7.1,64.0,3.0,9.0,0.0,0.0,0.0,0.0,3.0,3.0,3.0,3.0,0.0,,2.0,7.0,,-11.0,-16.0,2.0,133.0,41545.0
391,IL,17001,Adams,2021-04-15,8168.0,146.0,19.0,1.0,9.0,0.0,5.0,0.0,66085,32276,48.8,33809,51.2,3679,5.6,3684,5.6,8060,12.2,7657,11.6,8105,12.3,4866,7.4,4251,6.4,41.5,15621,23.6,13186,20.0,61406,92.9,2731,4.1,297,0.4,527,0.8,14,0.0,113,0.2,1059,1.6,30192,3447,10417,13344,17583,19205,33041,43233,50182,8031,3605,4426,12597,19.1,4679,7.1,64.0,3.0,9.0,0.0,0.0,0.0,0.0,3.0,3.0,3.0,3.0,0.0,,-5.0,11.0,,-33.0,-18.0,3.0,89.0,41634.0
392,IL,17001,Adams,2021-04-16,8200.0,146.0,32.0,0.0,12.0,0.0,5.0,0.0,66085,32276,48.8,33809,51.2,3679,5.6,3684,5.6,8060,12.2,7657,11.6,8105,12.3,4866,7.4,4251,6.4,41.5,15621,23.6,13186,20.0,61406,92.9,2731,4.1,297,0.4,527,0.8,14,0.0,113,0.2,1059,1.6,30192,3447,10417,13344,17583,19205,33041,43233,50182,8031,3605,4426,12597,19.1,4679,7.1,77.0,5.0,11.0,0.0,0.0,3.0,0.0,3.0,3.0,4.0,3.0,0.0,,-4.0,-3.0,,-3.0,-13.0,1.0,79.0,41713.0


In [175]:
# drop MI data for now since we don't have vaccination data
merged = merged[merged["state"]!="MI"]

In [176]:
# create cases_next_week variable that's the cases 7 days from now
merged["cases_next_week"]= merged.groupby(["state", "fips", "county"])['new_cases_7avg']\
                                            .shift(-7)

merged[["state", "fips", "county", "new_cases_7avg", "cases_next_week", "date"]].head(50)

Unnamed: 0,state,fips,county,new_cases_7avg,cases_next_week,date
0,IL,17001,Adams,1.0,0.0,2020-03-20
1,IL,17001,Adams,0.0,0.0,2020-03-21
2,IL,17001,Adams,0.0,0.0,2020-03-22
3,IL,17001,Adams,0.0,0.0,2020-03-23
4,IL,17001,Adams,0.0,0.0,2020-03-24
5,IL,17001,Adams,0.0,0.0,2020-03-25
6,IL,17001,Adams,0.0,0.0,2020-03-26
7,IL,17001,Adams,0.0,0.0,2020-03-27
8,IL,17001,Adams,0.0,0.0,2020-03-28
9,IL,17001,Adams,0.0,0.0,2020-03-29


In [177]:
# make sure there's no negative cases values
bool((merged.loc[:,"cumulative_cases":"2weeksago_deaths_7avg"] >= 0).all().mean())

True

In [178]:
# export dataset
merged.to_csv("../Data/merged.csv", index=False)