In [259]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)

In [184]:
covid = pd.read_csv('../capstone-data/monthly/covid-restrictions.csv')
cta = pd.read_csv('../capstone-data/monthly/cta-monthly-primary.csv')
auto = pd.read_excel('../capstone-data/monthly/agg_auto.xlsx')
crashes = pd.read_csv('../capstone-data/monthly/crash_fin.csv')
vax = pd.read_csv('../capstone-data/covid/COVID-19_Daily_Vaccinations_-_Chicago_Residents.csv')


# Prep CTA

In [185]:
cta.dtypes

date            object
bus            float64
rail           float64
total_rides    float64
dtype: object

In [186]:
cta['date']=pd.to_datetime(cta['date'])

In [187]:
cta.dtypes

date           datetime64[ns]
bus                   float64
rail                  float64
total_rides           float64
dtype: object

In [188]:
cta.set_index('date',inplace=True)
cta.sort_index(inplace=True)

# Prep Auto

In [189]:
auto.rename(columns={'Unnamed: 0':'date'},inplace=True)

In [190]:
auto.drop(columns={'Unnamed: 4'},inplace=True)

In [191]:
auto.dtypes

date                   datetime64[ns]
new_light_truck_reg           float64
new_car_reg                   float64
new_total_reg                 float64
dtype: object

In [192]:
auto.set_index('date',inplace=True)
auto.sort_index(inplace=True)

In [193]:
auto.head()

Unnamed: 0_level_0,new_light_truck_reg,new_car_reg,new_total_reg
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2006-01-01,13166.0,13961.0,27127.0
2006-02-01,11239.5,12597.0,23836.5
2006-03-01,11716.5,13620.5,25337.0
2006-04-01,11716.5,13620.5,25337.0
2006-05-01,11177.5,14052.5,25230.0


# Combine CTA & Auto

In [194]:
merged = cta.merge(auto, how='outer', left_index=True, right_index=True)

In [195]:
merged.shape

(252, 6)

In [196]:
merged.head()

Unnamed: 0_level_0,bus,rail,total_rides,new_light_truck_reg,new_car_reg,new_total_reg
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2001-01-01,25022908.0,14712334.0,39735242.0,,,
2001-02-01,23768892.0,13669269.0,37438161.0,,,
2001-03-01,26827165.0,15463578.0,42290743.0,,,
2001-04-01,25053791.0,14713336.0,39767127.0,,,
2001-05-01,26743744.0,15743730.0,42487474.0,,,


In [197]:
merged.tail()

Unnamed: 0_level_0,bus,rail,total_rides,new_light_truck_reg,new_car_reg,new_total_reg
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-08-01,10949681.0,8061195.0,19010876.0,16901.0,21676.0,21676.0
2021-09-01,12058765.0,8988251.0,21047016.0,14109.0,4021.0,18130.0
2021-10-01,11905629.0,9029274.0,20934903.0,17447.0,5041.0,22488.0
2021-11-01,10923926.0,8020686.0,18944612.0,13562.0,3328.0,16890.0
2021-12-01,,,,20876.0,4820.0,25696.0


# Prep Crashes

In [198]:
crashes.tail()

Unnamed: 0,crash_date,count
102,9/1/21,9897
103,10/1/21,10264
104,11/1/21,8763
105,12/1/21,8472
106,1/1/22,3781


In [199]:
crashes.dtypes

crash_date    object
count          int64
dtype: object

In [200]:
crashes.rename(columns={'crash_date':'date'},inplace=True)
crashes['date']=pd.to_datetime(crashes['date'])
crashes.dtypes

date     datetime64[ns]
count             int64
dtype: object

In [201]:
crashes.set_index('date',inplace=True)
crashes.sort_index(inplace=True)

In [202]:
crashes.head()

Unnamed: 0_level_0,count
date,Unnamed: 1_level_1
2013-03-01,1
2013-04-01,0
2013-05-01,0
2013-06-01,0
2013-07-01,0


# Merge in Crashes

In [203]:
merged = merged.merge(crashes, how='outer', left_index=True, right_index=True)

In [204]:
crashes.shape

(107, 1)

In [205]:
merged.shape

(253, 7)

In [206]:
merged.head()

Unnamed: 0_level_0,bus,rail,total_rides,new_light_truck_reg,new_car_reg,new_total_reg,count
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2001-01-01,25022908.0,14712334.0,39735242.0,,,,
2001-02-01,23768892.0,13669269.0,37438161.0,,,,
2001-03-01,26827165.0,15463578.0,42290743.0,,,,
2001-04-01,25053791.0,14713336.0,39767127.0,,,,
2001-05-01,26743744.0,15743730.0,42487474.0,,,,
2001-06-01,24994471.0,15586497.0,40580968.0,,,,
2001-07-01,24537097.0,16024798.0,40561895.0,,,,
2001-08-01,24647188.0,16014736.0,40661924.0,,,,
2001-09-01,24908952.0,14745685.0,39654637.0,,,,
2001-10-01,26768528.0,16663128.0,43431656.0,,,,


# Prep Covid

In [207]:
covid

Unnamed: 0,Date,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,E1_Income support,E2_Debt/contract relief,H1_Public information campaigns,H1_Flag,H2_Testing policy,H3_Contact tracing,H6_Facial Coverings,H7_Vaccination policy,H8_Protection of elderly people,ConfirmedCases,ConfirmedDeaths,StringencyIndex,StringencyIndexForDisplay,StringencyLegacyIndex,StringencyLegacyIndexForDisplay,GovernmentResponseIndex,GovernmentResponseIndexForDisplay,ContainmentHealthIndex,ContainmentHealthIndexForDisplay,EconomicSupportIndex,EconomicSupportIndexForDisplay
0,1/1/20,0,0,0,0,0,0,0,0,0,0,0,,0,0,0,0,0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2/1/20,0,0,0,0,0,0,0,0,0,0,1,0.0,1,1,0,0,0,2.0,0.0,2.78,2.78,4.76,4.76,6.77,6.77,7.74,7.74,0.0,0.0
2,3/1/20,0,0,0,0,0,0,0,0,0,0,1,1.0,1,1,0,0,0,3.0,0.0,5.56,5.56,9.52,9.52,8.33,8.33,9.52,9.52,0.0,0.0
3,4/1/20,3,3,2,4,0,2,2,3,2,2,2,1.0,1,1,0,0,1,6980.0,141.0,82.41,82.41,82.14,82.14,64.58,64.58,61.31,61.31,87.5,87.5
4,5/1/20,3,3,2,4,0,2,2,3,2,2,2,1.0,1,1,2,0,1,56055.0,2457.0,82.41,82.41,82.14,82.14,67.71,67.71,64.88,64.88,87.5,87.5
5,6/1/20,3,1,2,4,0,2,2,3,2,2,2,1.0,2,1,2,0,1,121234.0,5412.0,75.0,75.0,75.0,75.0,65.62,65.62,62.5,62.5,87.5,87.5
6,7/1/20,3,1,1,3,0,1,1,3,2,2,2,1.0,2,1,2,0,1,144013.0,6951.0,55.56,55.56,63.81,63.81,54.69,54.69,50.0,50.0,87.5,87.5
7,8/1/20,3,1,1,3,0,1,2,3,1,1,2,1.0,3,1,2,0,1,181754.0,7700.0,58.33,58.33,63.81,63.81,52.08,52.08,54.17,54.17,37.5,37.5
8,9/1/20,3,1,1,3,0,1,2,3,1,1,2,1.0,3,1,2,0,1,238216.0,8273.0,55.09,55.09,58.33,58.33,50.26,50.26,52.08,52.08,37.5,37.5
9,10/1/20,3,1,1,3,0,1,2,3,1,1,2,1.0,3,1,4,0,1,297884.0,8940.0,55.09,55.09,58.33,58.33,52.6,52.6,54.76,54.76,37.5,37.5


In [208]:
covid.dtypes

Date                                     object
C1_School closing                         int64
C2_Workplace closing                      int64
C3_Cancel public events                   int64
C4_Restrictions on gatherings             int64
C5_Close public transport                 int64
C6_Stay at home requirements              int64
C7_Restrictions on internal movement      int64
C8_International travel controls          int64
E1_Income support                         int64
E2_Debt/contract relief                   int64
H1_Public information campaigns           int64
H1_Flag                                 float64
H2_Testing policy                         int64
H3_Contact tracing                        int64
H6_Facial Coverings                       int64
H7_Vaccination policy                     int64
H8_Protection of elderly people           int64
ConfirmedCases                          float64
ConfirmedDeaths                         float64
StringencyIndex                         

In [209]:
covid.rename(columns={'Date':'date'},inplace=True)
covid['date']=pd.to_datetime(covid['date'])
covid.dtypes

date                                    datetime64[ns]
C1_School closing                                int64
C2_Workplace closing                             int64
C3_Cancel public events                          int64
C4_Restrictions on gatherings                    int64
C5_Close public transport                        int64
C6_Stay at home requirements                     int64
C7_Restrictions on internal movement             int64
C8_International travel controls                 int64
E1_Income support                                int64
E2_Debt/contract relief                          int64
H1_Public information campaigns                  int64
H1_Flag                                        float64
H2_Testing policy                                int64
H3_Contact tracing                               int64
H6_Facial Coverings                              int64
H7_Vaccination policy                            int64
H8_Protection of elderly people                  int64
ConfirmedC

In [210]:
covid.set_index('date',inplace=True)
covid.sort_index(inplace=True)

In [211]:
covid.drop(columns={'H1_Flag','StringencyIndexForDisplay','StringencyLegacyIndexForDisplay',
                    'GovernmentResponseIndexForDisplay','ContainmentHealthIndexForDisplay',
                   'EconomicSupportIndexForDisplay'},inplace=True)



In [212]:
merged = merged.merge(covid, how='outer', left_index=True, right_index=True)

In [213]:
merged.shape

(253, 30)

In [214]:
merged.isna().sum()

bus                                       2
rail                                      2
total_rides                               2
new_light_truck_reg                      61
new_car_reg                              61
new_total_reg                            61
count                                   146
C1_School closing                       229
C2_Workplace closing                    229
C3_Cancel public events                 229
C4_Restrictions on gatherings           229
C5_Close public transport               229
C6_Stay at home requirements            229
C7_Restrictions on internal movement    229
C8_International travel controls        229
E1_Income support                       229
E2_Debt/contract relief                 229
H1_Public information campaigns         229
H2_Testing policy                       229
H3_Contact tracing                      229
H6_Facial Coverings                     229
H7_Vaccination policy                   229
H8_Protection of elderly people 

In [215]:
merged.rename(columns={'count':'car_crashes'},inplace=True)

In [216]:
merged.loc[merged['total_rides'].isna() == False].head()

Unnamed: 0_level_0,bus,rail,total_rides,new_light_truck_reg,new_car_reg,new_total_reg,car_crashes,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,E1_Income support,E2_Debt/contract relief,H1_Public information campaigns,H2_Testing policy,H3_Contact tracing,H6_Facial Coverings,H7_Vaccination policy,H8_Protection of elderly people,ConfirmedCases,ConfirmedDeaths,StringencyIndex,StringencyLegacyIndex,GovernmentResponseIndex,ContainmentHealthIndex,EconomicSupportIndex
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1
2001-01-01,25022908.0,14712334.0,39735242.0,,,,,,,,,,,,,,,,,,,,,,,,,,,
2001-02-01,23768892.0,13669269.0,37438161.0,,,,,,,,,,,,,,,,,,,,,,,,,,,
2001-03-01,26827165.0,15463578.0,42290743.0,,,,,,,,,,,,,,,,,,,,,,,,,,,
2001-04-01,25053791.0,14713336.0,39767127.0,,,,,,,,,,,,,,,,,,,,,,,,,,,
2001-05-01,26743744.0,15743730.0,42487474.0,,,,,,,,,,,,,,,,,,,,,,,,,,,
2001-06-01,24994471.0,15586497.0,40580968.0,,,,,,,,,,,,,,,,,,,,,,,,,,,
2001-07-01,24537097.0,16024798.0,40561895.0,,,,,,,,,,,,,,,,,,,,,,,,,,,
2001-08-01,24647188.0,16014736.0,40661924.0,,,,,,,,,,,,,,,,,,,,,,,,,,,
2001-09-01,24908952.0,14745685.0,39654637.0,,,,,,,,,,,,,,,,,,,,,,,,,,,
2001-10-01,26768528.0,16663128.0,43431656.0,,,,,,,,,,,,,,,,,,,,,,,,,,,


## Vax Prep

In [217]:
vax.columns = vax.columns.str.lower()

In [218]:
vax['date'] = pd.to_datetime(vax['date'])
vax.set_index('date',inplace=True)
vax.sort_index(inplace=True)

In [219]:
vax.head()

Unnamed: 0_level_0,total doses - daily,total doses - cumulative,1st dose - daily,1st dose - cumulative,1st dose - percent population,vaccine series completed - daily,vaccine series completed - cumulative,vaccine series completed - percent population,total doses - daily - age 0-17,total doses - daily - age 18-29,total doses - daily - age 30-39,total doses - daily - age 40-49,total doses - daily - age 50-59,total doses - daily - age 60-69,total doses - daily - age 70-79,total doses - daily - age 80+,total doses - daily - age 5-11,total doses - daily - age 12-17,total doses - daily - age 5+,total doses - daily - age 12+,total doses - daily - age 18+,total doses - daily - age 65+,total doses - daily - age 75+,total doses - daily - unknown age,total doses - daily - female,total doses - daily - male,total doses - daily - unknown gender,total doses - daily - latinx,total doses - daily - asian non-latinx,total doses - daily - black non-latinx,total doses - daily - white non-latinx,total doses - daily - american indian/alaska native non-latinx,total doses - daily - pacific islander/hawaiian native non-latinx,total doses - daily - other non-latinx,total doses - daily - unknown race/ethnicity,1st dose - daily - age 0-17,1st dose - daily - age 18-29,1st dose - daily - age 30-39,1st dose - daily - age 40-49,1st dose - daily - age 50-59,1st dose - daily - age 60-69,1st dose - daily - age 70-79,1st dose - daily - age 80+,1st dose - daily - age 5-11,1st dose - daily - age 12-17,1st dose - daily - age 5+,1st dose - daily - age 12+,1st dose - daily - age 18+,1st dose - daily - age 65+,1st dose - daily - age 75+,1st dose - daily - unknown age,1st dose - daily - female,1st dose - daily - male,1st dose - daily - unknown gender,1st dose - daily - latinx,1st dose - daily - asian non-latinx,1st dose - daily - black non-latinx,1st dose - daily - white non-latinx,1st dose - daily - american indian/alaska native non-latinx,1st dose - daily - pacific islander/hawaiian native non-latinx,1st dose - daily - other race non-latinx,1st dose - daily - unknown race/ethnicity,vaccine series completed - daily - age 0-17,vaccine series completed - daily - age 18-29,vaccine series completed - daily - age 30-39,vaccine series completed - daily - age 40-49,vaccine series completed - daily - age 50-59,vaccine series completed - daily - age 60-69,vaccine series completed - daily - age 70-79,vaccine series completed - daily - age 80+,vaccine series completed - daily - age 5-11,vaccine series completed - daily - age 12-17,vaccine series completed - daily - age 5+,vaccine series completed - daily - age 12+,vaccine series completed - daily - age 18+,vaccine series completed - daily - age 65+,vaccine series completed - daily - age 75+,vaccine series completed - daily - unknown age,vaccine series completed - daily - female,vaccine series completed - daily - male,vaccine series completed - daily - unknown gender,vaccine series completed - daily - latinx,vaccine series completed - daily - asian non-latinx,vaccine series completed - daily - black non-latinx,vaccine series completed - daily - white non-latinx,vaccine series completed - daily - american indian/alaska native non-latinx,vaccine series completed - daily - pacific islander/hawaiian native non-latinx,vaccine series completed - daily - other race non-latinx,vaccine series completed - daily - unknown race/ethnicity
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1
2020-12-15,17,17,17,17,0.0,0,0,0.0,0,4,5,3,3,2,0,0,0,0,17,17,17,0,0,0,9,8,0,2,2,5,8,0,0,0,0,0,4,5,3,3,2,0,0,0,0,17,17,17,0,0,0,9,8,0,2,2,5,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2020-12-16,169,186,169,186,0.0,0,0,0.0,0,21,65,37,21,24,1,0,0,0,169,169,169,9,0,0,84,81,4,24,31,17,72,0,1,6,18,0,21,65,37,21,24,1,0,0,0,169,169,169,9,0,0,84,81,4,24,31,17,72,0,1,6,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2020-12-17,1989,2175,1989,2175,0.001,0,0,0.0,0,504,798,321,216,122,26,2,0,0,1989,1989,1989,69,8,0,1125,779,85,128,296,176,1112,10,9,91,167,0,504,798,321,216,122,26,2,0,0,1989,1989,1989,69,8,0,1125,779,85,128,296,176,1112,10,9,91,167,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2020-12-18,5729,7904,5729,7904,0.003,0,0,0.0,0,1443,2154,966,662,422,70,12,0,0,5729,5729,5729,231,34,0,3505,1996,228,431,921,391,3165,24,27,279,491,0,1443,2154,966,662,422,70,12,0,0,5729,5729,5729,231,34,0,3505,1996,228,431,921,391,3165,24,27,279,491,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2020-12-19,3513,11417,3513,11417,0.004,0,0,0.0,0,830,1350,599,425,273,32,4,0,0,3513,3513,3513,134,9,0,2223,1195,95,266,515,209,1964,15,7,215,322,0,830,1350,599,425,273,32,4,0,0,3513,3513,3513,134,9,0,2223,1195,95,266,515,209,1964,15,7,215,322,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [220]:
vax=vax[['total doses - daily','total doses - cumulative','1st dose - daily','1st dose - cumulative','1st dose - percent population','vaccine series completed - daily','vaccine series completed - cumulative','vaccine series completed - percent population']]

In [221]:
vax.rename(columns={'total doses - daily':'total_daily_doses',
           'total doses - cumulative':'total_daily_cum',
           '1st dose - daily':'first_dose_daily',
           '1st dose - cumulative':'first_dose_cum',
           '1st dose - percent population':'first_dose_percent_pop',
           'vaccine series completed - daily':'vax_series_completed_daily',
           'vaccine series completed - cumulative':'vax_series_cum',
           'vaccine series completed - percent population':'vax_series_percent'},inplace=True)



In [222]:
vax.isna().sum()

total_daily_doses             0
total_daily_cum               0
first_dose_daily              0
first_dose_cum                0
first_dose_percent_pop        0
vax_series_completed_daily    0
vax_series_cum                0
vax_series_percent            0
dtype: int64

In [223]:
vax.dtypes

total_daily_doses               int64
total_daily_cum                 int64
first_dose_daily                int64
first_dose_cum                  int64
first_dose_percent_pop        float64
vax_series_completed_daily      int64
vax_series_cum                  int64
vax_series_percent            float64
dtype: object

In [224]:
vax = vax.resample('MS').sum()

In [225]:
vax

Unnamed: 0_level_0,total_daily_doses,total_daily_cum,first_dose_daily,first_dose_cum,first_dose_percent_pop,vax_series_completed_daily,vax_series_cum,vax_series_percent
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-12-01,37800,321658,37800,321658,0.119,0,0,0.0
2021-01-01,179208,3507100,128715,2760638,1.024,50493,746462,0.277
2021-02-01,346512,10957678,211901,7893289,2.93,134614,3064393,1.136
2021-03-01,667046,27128440,439238,17803097,6.61,251818,9638626,3.579
2021-04-01,854762,50564013,406915,31543514,11.709,483203,20528629,7.623
2021-05-01,453139,72702185,179496,40958570,15.204,288745,33853003,12.565
2021-06-01,234556,80365681,87390,43622455,16.194,156120,39125380,14.525
2021-07-01,124646,87931749,64864,47180163,17.513,65199,43425410,16.121
2021-08-01,145992,92069365,78152,49535540,18.389,64025,45291093,16.813
2021-09-01,141744,93346830,57255,49959116,18.545,59062,45759922,16.984


In [226]:
merged = merged.merge(df, how='outer', left_index=True, right_index=True)

In [229]:
merged.tail()

Unnamed: 0_level_0,bus,rail,total_rides,new_light_truck_reg,new_car_reg,new_total_reg,car_crashes,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,E1_Income support,E2_Debt/contract relief,H1_Public information campaigns,H2_Testing policy,H3_Contact tracing,H6_Facial Coverings,H7_Vaccination policy,H8_Protection of elderly people,ConfirmedCases,ConfirmedDeaths,StringencyIndex,StringencyLegacyIndex,GovernmentResponseIndex,ContainmentHealthIndex,EconomicSupportIndex,total_daily_doses,total_daily_cum,first_dose_daily,first_dose_cum,first_dose_percent_pop,vax_series_completed_daily,vax_series_cum,vax_series_percent
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
2021-09-01,12058765.0,8988251.0,21047016.0,14109.0,4021.0,18130.0,9897.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,2.0,2.0,3.0,1.0,2.0,5.0,1.0,1528120.0,26412.0,25.0,34.52,42.71,39.88,62.5,141744.0,93346830.0,57255.0,49959116.0,18.545,59062.0,45759922.0,16.984
2021-10-01,11905629.0,9029274.0,20934903.0,17447.0,5041.0,22488.0,10264.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,1.0,2.0,5.0,2.0,1630864.0,27450.0,25.0,34.52,41.67,42.26,37.5,220280.0,102023840.0,47613.0,53225022.0,19.756,50765.0,48958837.0,18.174
2021-11-01,10923926.0,8020686.0,18944612.0,13562.0,3328.0,16890.0,8763.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,1701649.0,28267.0,25.0,34.52,44.79,45.83,37.5,362350.0,107880947.0,101956.0,53864013.0,19.992,53908.0,48908612.0,18.154
2021-12-01,,,,20876.0,4820.0,25696.0,8472.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,1815994.0,28892.0,25.0,34.52,44.79,45.83,37.5,422185.0,124220244.0,91367.0,58514803.0,21.721,84468.0,53061072.0,19.697
2022-01-01,,,,,,,3781.0,,,,,,,,,,,,,,,,,,,,,,,,131890.0,55336422.0,47117.0,25432963.0,9.439,22732.0,22799292.0,8.463


In [230]:
test=merged.copy()

In [231]:
test.isna().sum()

bus                                       2
rail                                      2
total_rides                               2
new_light_truck_reg                      61
new_car_reg                              61
new_total_reg                            61
car_crashes                             146
C1_School closing                       229
C2_Workplace closing                    229
C3_Cancel public events                 229
C4_Restrictions on gatherings           229
C5_Close public transport               229
C6_Stay at home requirements            229
C7_Restrictions on internal movement    229
C8_International travel controls        229
E1_Income support                       229
E2_Debt/contract relief                 229
H1_Public information campaigns         229
H2_Testing policy                       229
H3_Contact tracing                      229
H6_Facial Coverings                     229
H7_Vaccination policy                   229
H8_Protection of elderly people 

In [232]:
test.iloc[:220,7:].fillna(0,inplace=True)

In [233]:
test.loc[test['total_daily_doses'].isna()==True]

Unnamed: 0_level_0,bus,rail,total_rides,new_light_truck_reg,new_car_reg,new_total_reg,car_crashes,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,E1_Income support,E2_Debt/contract relief,H1_Public information campaigns,H2_Testing policy,H3_Contact tracing,H6_Facial Coverings,H7_Vaccination policy,H8_Protection of elderly people,ConfirmedCases,ConfirmedDeaths,StringencyIndex,StringencyLegacyIndex,GovernmentResponseIndex,ContainmentHealthIndex,EconomicSupportIndex,total_daily_doses,total_daily_cum,first_dose_daily,first_dose_cum,first_dose_percent_pop,vax_series_completed_daily,vax_series_cum,vax_series_percent
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
2019-05-01,21144868.0,19206970.0,40351838.0,21328.0,9540.0,30868.0,10708.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2019-06-01,19794294.0,18783743.0,38578037.0,17616.0,7188.0,24804.0,10706.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2019-07-01,19903917.0,19103898.0,39007815.0,18860.0,7694.0,26554.0,10659.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2019-08-01,19940708.0,19408958.0,39349666.0,21170.0,8698.0,29868.0,9940.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2019-09-01,20857679.0,19178218.0,40035897.0,21812.0,8391.0,30203.0,9815.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2019-10-01,21800400.0,20582746.0,42383146.0,22384.0,8869.0,31253.0,9938.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2019-11-01,19304408.0,17448364.0,36752772.0,19978.0,7133.0,27111.0,9640.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2019-12-01,18593554.0,15923640.0,34517194.0,23699.0,7592.0,31291.0,9417.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2020-01-01,19482387.0,17181153.0,36663540.0,20744.0,6652.0,27396.0,8680.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0,0.0,0.0,0.0,0.0,,,,,,,,
2020-02-01,19030115.0,16298769.0,35328884.0,14423.0,5483.0,19906.0,9019.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,2.78,4.76,6.77,7.74,0.0,,,,,,,,


In [246]:
test.iloc[:,-8:]=test.iloc[:,-8:].fillna(0)

In [None]:
#merge_limited = merged.loc[merged['new_total_reg'].isna()==False]

In [256]:
test.iloc[:,-31:-8] = test.iloc[:,-31:-8].fillna(0)

In [257]:
test.isna().sum()

bus                                       2
rail                                      2
total_rides                               2
new_light_truck_reg                      61
new_car_reg                              61
new_total_reg                            61
car_crashes                             146
C1_School closing                         0
C2_Workplace closing                      0
C3_Cancel public events                   0
C4_Restrictions on gatherings             0
C5_Close public transport                 0
C6_Stay at home requirements              0
C7_Restrictions on internal movement      0
C8_International travel controls          0
E1_Income support                         0
E2_Debt/contract relief                   0
H1_Public information campaigns           0
H2_Testing policy                         0
H3_Contact tracing                        0
H6_Facial Coverings                       0
H7_Vaccination policy                     0
H8_Protection of elderly people 

In [260]:
test

Unnamed: 0_level_0,bus,rail,total_rides,new_light_truck_reg,new_car_reg,new_total_reg,car_crashes,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,E1_Income support,E2_Debt/contract relief,H1_Public information campaigns,H2_Testing policy,H3_Contact tracing,H6_Facial Coverings,H7_Vaccination policy,H8_Protection of elderly people,ConfirmedCases,ConfirmedDeaths,StringencyIndex,StringencyLegacyIndex,GovernmentResponseIndex,ContainmentHealthIndex,EconomicSupportIndex,total_daily_doses,total_daily_cum,first_dose_daily,first_dose_cum,first_dose_percent_pop,vax_series_completed_daily,vax_series_cum,vax_series_percent
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1
2001-01-01,25022908.0,14712334.0,39735242.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0,0,0,0,0,0,0,0
2001-02-01,23768892.0,13669269.0,37438161.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0,0,0,0,0,0,0,0
2001-03-01,26827165.0,15463578.0,42290743.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0,0,0,0,0,0,0,0
2001-04-01,25053791.0,14713336.0,39767127.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0,0,0,0,0,0,0,0
2001-05-01,26743744.0,15743730.0,42487474.0,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-09-01,12058765.0,8988251.0,21047016.0,14109.0,4021.0,18130.0,9897.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,2.0,2.0,3.0,1.0,2.0,5.0,1.0,1528120.0,26412.0,25.0,34.52,42.71,39.88,62.5,0,0,0,0,0,0,0,0
2021-10-01,11905629.0,9029274.0,20934903.0,17447.0,5041.0,22488.0,10264.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,1.0,2.0,5.0,2.0,1630864.0,27450.0,25.0,34.52,41.67,42.26,37.5,0,0,0,0,0,0,0,0
2021-11-01,10923926.0,8020686.0,18944612.0,13562.0,3328.0,16890.0,8763.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,1701649.0,28267.0,25.0,34.52,44.79,45.83,37.5,0,0,0,0,0,0,0,0
2021-12-01,,,,20876.0,4820.0,25696.0,8472.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,1815994.0,28892.0,25.0,34.52,44.79,45.83,37.5,0,0,0,0,0,0,0,0


In [261]:
test.to_csv('../capstone-data/monthly-merged.csv')
#merge_limited.to_csv('../capstone-data/merge_limited.csv')