In [1]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)

In [2]:
cta = pd.read_csv('../capstone-data/daily/CTA_-_Ridership_-_Daily_Boarding_Totals.csv')
gas = pd.read_csv('../capstone-data/weekly/Weekly_Chicago_Regular_All_Formulations_Retail_Gasoline_Prices.csv')
vax= pd.read_csv('../capstone-data/covid/COVID-19_Daily_Vaccinations_-_Chicago_Residents.csv')
weather = pd.read_csv('../capstone-data/daily/weather.csv')
curve = pd.read_excel('../capstone-data/daily/treasury_yield_curve.xlsx')
crash = pd.read_csv('../capstone-data/monthly/Traffic_Crashes_-_Crashes.csv')
covid = pd.read_csv('../capstone-data/daily/covid_total(d).csv')


In [3]:
cta.head()

Unnamed: 0,service_date,day_type,bus,rail_boardings,total_rides
0,01/01/2001,U,297192,126455,423647
1,01/02/2001,W,780827,501952,1282779
2,01/03/2001,W,824923,536432,1361355
3,01/04/2001,W,870021,550011,1420032
4,01/05/2001,W,890426,557917,1448343


In [4]:
cta.shape

(7701, 5)

In [5]:
cta.dtypes

service_date      object
day_type          object
bus                int64
rail_boardings     int64
total_rides        int64
dtype: object

In [6]:
cta.drop(columns={'day_type'},inplace=True)

In [7]:
cta.rename(columns={'service_date':'date'},inplace=True)
cta['date']= pd.to_datetime(cta['date'])

In [8]:
cta.head()

Unnamed: 0,date,bus,rail_boardings,total_rides
0,2001-01-01,297192,126455,423647
1,2001-01-02,780827,501952,1282779
2,2001-01-03,824923,536432,1361355
3,2001-01-04,870021,550011,1420032
4,2001-01-05,890426,557917,1448343


In [9]:
cta.set_index('date',inplace=True) 
cta.sort_index(inplace=True)

In [10]:
#df1= df.groupby(df['date']).agg('sum')
weekly = cta.resample('W',closed='left').sum()
weekly.index = weekly.index - pd.DateOffset(days=6)

In [11]:
weekly.shape

(1092, 3)

In [12]:
weekly

Unnamed: 0_level_0,bus,rail_boardings,total_rides
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2001-01-01,4240790,2528123,6768913
2001-01-08,5930650,3445299,9375949
2001-01-15,5591301,3295995,8887296
2001-01-22,5956378,3463065,9419443
2001-01-29,5936801,3436499,9373300
...,...,...,...
2021-11-01,2747792,2013070,4760862
2021-11-08,2604481,1951484,4555965
2021-11-15,2647447,2007509,4654956
2021-11-22,2071230,1505622,3576852


## Prep Gas

In [13]:
gas = gas.rename(columns={'Week of':'date', 'Weekly Chicago Regular All Formulations Retail Gasoline Prices Dollars per Gallon':'price'})

In [14]:
gas['date']=pd.to_datetime(gas['date'])

In [15]:
gas.set_index('date',inplace=True)
gas.sort_index(inplace=True)
gas

Unnamed: 0_level_0,price
date,Unnamed: 1_level_1
2000-06-05,1.949
2000-06-12,2.091
2000-06-19,2.108
2000-06-26,1.989
2000-07-03,1.786
...,...
2021-12-27,3.340
2022-01-03,3.320
2022-01-10,3.457
2022-01-17,3.404


### Gas Merge

In [16]:
merged = weekly.merge(gas, how='outer', left_index=True, right_index=True)

In [17]:
merged.isna().sum()

bus               38
rail_boardings    38
total_rides       38
price              0
dtype: int64

In [18]:
merged.dropna(inplace=True)

In [19]:
merged.shape

(1092, 4)

In [20]:
merged.rename(columns={'price':'gas_price'},inplace=True)

## Weather

In [21]:
def weekly_weather(weather):
    """
    A function that cleans the weather dataset, which comes as daily. 
    Commented out is optional code that can be edited to suit the needed format,
    ex. ...resample('MS').sum() for month starting on first day of that month.
    """
    weather.columns
    weather.columns = weather.columns.str.lower()
    weather.drop(columns='station',inplace=True)
    weather['date'] = pd.to_datetime(weather['date'])
    # weather.set_index('date',inplace=True)
    weather = weather.groupby('date').agg('mean')
    weather.sort_index(inplace=True)
    weather = weather.fillna(0)
    
    
    weather = weather.resample('W',closed='left').mean()
    weather.index = weather.index - pd.DateOffset(days=6)
    
    
    return weather


In [22]:
weather = weekly_weather(weather)

In [23]:
weather

Unnamed: 0_level_0,prcp,snow,snwd,tmax,tmin
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2001-01-01,0.000000,0.000000,14.000000,30.000000,15.666667
2001-01-08,0.000000,0.000000,10.428571,34.142857,22.571429
2001-01-15,0.037143,0.014286,3.857143,31.571429,23.428571
2001-01-22,0.015714,0.200000,3.285714,30.000000,17.428571
2001-01-29,0.127143,0.157143,1.857143,32.714286,17.714286
...,...,...,...,...,...
2021-12-27,0.038571,0.714286,0.571429,41.142857,32.428571
2022-01-03,0.002857,0.142857,1.571429,27.714286,12.142857
2022-01-10,0.000000,0.114286,0.714286,34.857143,19.142857
2022-01-17,0.000000,0.057143,0.285714,33.285714,16.142857


### Merge Weather

In [24]:
merged = merged.merge(weather, how='outer', left_index=True, right_index=True)

In [25]:
merged.drop(merged.tail(8).index,inplace=True)

In [26]:
merged.isna().sum()

bus               0
rail_boardings    0
total_rides       0
gas_price         0
prcp              0
snow              0
snwd              0
tmax              0
tmin              0
dtype: int64

In [27]:
merged

Unnamed: 0_level_0,bus,rail_boardings,total_rides,gas_price,prcp,snow,snwd,tmax,tmin
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2001-01-01,4240790.0,2528123.0,6768913.0,1.487,0.000000,0.000000,14.000000,30.000000,15.666667
2001-01-08,5930650.0,3445299.0,9375949.0,1.477,0.000000,0.000000,10.428571,34.142857,22.571429
2001-01-15,5591301.0,3295995.0,8887296.0,1.569,0.037143,0.014286,3.857143,31.571429,23.428571
2001-01-22,5956378.0,3463065.0,9419443.0,1.546,0.015714,0.200000,3.285714,30.000000,17.428571
2001-01-29,5936801.0,3436499.0,9373300.0,1.541,0.127143,0.157143,1.857143,32.714286,17.714286
...,...,...,...,...,...,...,...,...,...
2021-11-01,2747792.0,2013070.0,4760862.0,3.539,0.000000,0.000000,0.000000,51.000000,33.285714
2021-11-08,2604481.0,1951484.0,4555965.0,3.616,0.000000,0.000000,0.000000,55.571429,39.714286
2021-11-15,2647447.0,2007509.0,4654956.0,3.557,0.000000,0.000000,0.000000,46.285714,33.142857
2021-11-22,2071230.0,1505622.0,3576852.0,3.565,0.001429,0.000000,0.000000,44.142857,27.000000


## Crash Prep

In [28]:
crash = crash[['CRASH_DATE','DAMAGE']].copy()

In [29]:
crash['crash_occurrences'] = 1

In [30]:
crash['CRASH_DATE'] = pd.to_datetime(crash['CRASH_DATE']).dt.date
crash['CRASH_DATE'] = pd.to_datetime(crash['CRASH_DATE'])

In [31]:
crash.columns = crash.columns.str.lower()

In [32]:
crash['damage'] = crash['damage'].replace('OVER $1,500', 3)
crash['damage'] = crash['damage'].replace('$501 - $1,500', 2)
crash['damage'] = crash['damage'].replace('$500 OR LESS', 1)

In [33]:
crash.rename(columns= {'crash_date':'date','damage':'damage_indicator'},inplace=True)

In [34]:
crash.sort_values('date')

Unnamed: 0,date,damage_indicator,crash_occurrences
407046,2013-03-03,3,1
559917,2014-01-18,3,1
370698,2014-01-21,3,1
133716,2014-02-24,2,1
372340,2014-06-25,2,1
...,...,...,...
73874,2022-01-17,3,1
73709,2022-01-17,3,1
73848,2022-01-17,3,1
73749,2022-01-17,3,1


In [35]:
crash = crash.groupby('date').agg({'crash_occurrences':'sum','damage_indicator':'mean'})
crash.sort_index(inplace=True)
crash = crash.resample('W',closed='left').agg({'crash_occurrences': 'sum', 'damage_indicator': 'mean'})
crash.index = crash.index - pd.DateOffset(days=6)

In [36]:
crash.isna().sum()

crash_occurrences      0
damage_indicator     104
dtype: int64

In [37]:
crash.fillna(0.0,inplace=True)

### Merge Crash

In [38]:
merged = merged.merge(crash, how='outer', left_index=True, right_index=True)

In [39]:
drag = merged.isna().sum()[0]
drag

7

In [40]:
merged.drop(merged.tail(drag).index,inplace=True)

In [41]:
merged.isna().sum()

bus                    0
rail_boardings         0
total_rides            0
gas_price              0
prcp                   0
snow                   0
snwd                   0
tmax                   0
tmin                   0
crash_occurrences    635
damage_indicator     635
dtype: int64

In [42]:
merged.fillna(0.0,inplace=True)

## Prep Curve

In [43]:
def clean_curve(curve):
    curve.columns = curve.columns.str.lower()

    curve.set_index('date',inplace=True)

    curve.sort_index(inplace=True)
    curve.columns = curve.columns.str.replace(' ','_')
    curve.drop(columns={'1_mo','2_mo','30_yr'},inplace=True)
    return curve

In [44]:
curve= clean_curve(curve)

In [45]:
curve.head()

Unnamed: 0_level_0,3_mo,6_mo,1_yr,2_yr,3_yr,5_yr,7_yr,10_yr,20_yr
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2001-01-02,5.87,5.58,5.11,4.87,4.82,4.76,4.97,4.92,5.46
2001-01-03,5.69,5.44,5.04,4.92,4.92,4.94,5.18,5.14,5.62
2001-01-04,5.37,5.2,4.82,4.77,4.78,4.82,5.07,5.03,5.56
2001-01-05,5.12,4.98,4.6,4.56,4.57,4.66,4.93,4.93,5.5
2001-01-08,5.19,5.03,4.61,4.54,4.55,4.65,4.94,4.94,5.52


In [46]:
curve.bfill(inplace=True)

In [47]:
curve = curve.resample('W',closed='left').mean()
curve.index = curve.index - pd.DateOffset(days=6)

### Merge Curve

In [48]:
merged = merged.merge(curve, how='outer', left_index=True, right_index=True)

In [49]:
drag = merged.isna().sum()[0]
drag

9

In [50]:
merged.drop(merged.tail(drag).index,inplace=True)

In [51]:
merged.isna().sum()

bus                  0
rail_boardings       0
total_rides          0
gas_price            0
prcp                 0
snow                 0
snwd                 0
tmax                 0
tmin                 0
crash_occurrences    0
damage_indicator     0
3_mo                 0
6_mo                 0
1_yr                 0
2_yr                 0
3_yr                 0
5_yr                 0
7_yr                 0
10_yr                0
20_yr                0
dtype: int64

## Vax Prep

In [52]:
def vax_cleaning(vax):
    vax.columns = vax.columns.str.lower()
    vax['date'] = pd.to_datetime(vax['date'])
    vax.set_index('date',inplace=True)
    vax.sort_index(inplace=True)
    
    vax=vax[['total doses - daily','total doses - cumulative','1st dose - daily',
             '1st dose - cumulative','1st dose - percent population','vaccine series completed - daily',
             'vaccine series completed - cumulative','vaccine series completed - percent population']]
    
    vax = vax.rename(columns={'total doses - daily':'total_daily_doses',
           'total doses - cumulative':'total_daily_cum',
           '1st dose - daily':'first_dose_daily',
           '1st dose - cumulative':'first_dose_cum',
           '1st dose - percent population':'first_dose_percent_pop',
           'vaccine series completed - daily':'vax_series_completed_daily',
           'vaccine series completed - cumulative':'vax_series_cum',
           'vaccine series completed - percent population':'vax_series_percent'})
    return vax


In [53]:
vax = vax_cleaning(vax)

In [54]:
vax.isna().sum()

total_daily_doses             0
total_daily_cum               0
first_dose_daily              0
first_dose_cum                0
first_dose_percent_pop        0
vax_series_completed_daily    0
vax_series_cum                0
vax_series_percent            0
dtype: int64

In [55]:
vax

Unnamed: 0_level_0,total_daily_doses,total_daily_cum,first_dose_daily,first_dose_cum,first_dose_percent_pop,vax_series_completed_daily,vax_series_cum,vax_series_percent
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-12-15,17,17,17,17,0.000,0,0,0.000
2020-12-16,169,186,169,186,0.000,0,0,0.000
2020-12-17,1989,2175,1989,2175,0.001,0,0,0.000
2020-12-18,5729,7904,5729,7904,0.003,0,0,0.000
2020-12-19,3513,11417,3513,11417,0.004,0,0,0.000
...,...,...,...,...,...,...,...,...
2022-01-09,4623,4277770,1260,1963754,0.729,880,1757428,0.652
2022-01-10,11610,4289380,3705,1967459,0.730,1919,1759347,0.653
2022-01-11,13429,4302809,5114,1972573,0.732,2274,1761621,0.654
2022-01-12,13091,4315900,4857,1977430,0.734,2305,1763926,0.655


In [56]:
vax = vax.resample('W',closed='left').agg({'total_daily_doses': 'sum', 'total_daily_cum': 'sum',
                                          'first_dose_daily': 'sum', 'first_dose_cum': 'sum',
                                          'first_dose_percent_pop': 'mean', 'vax_series_completed_daily': 'sum',
                                          'vax_series_cum': 'sum', 'vax_series_percent': 'mean'})
vax.index = vax.index - pd.DateOffset(days=6)

In [57]:
vax.head()

Unnamed: 0_level_0,total_daily_doses,total_daily_cum,first_dose_daily,first_dose_cum,first_dose_percent_pop,vax_series_completed_daily,vax_series_cum,vax_series_percent
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-12-14,11417,21699,11417,21699,0.0016,0,0,0.0
2020-12-21,12964,144304,12964,144304,0.007714,0,0,0.0
2020-12-28,15216,233334,15216,233334,0.012286,0,0,0.0
2021-01-04,33044,387708,20346,358085,0.018857,12698,29623,0.001714
2021-01-11,33273,642146,21420,496654,0.026286,11853,145492,0.007571


### Merge Vax

In [58]:
merged = merged.merge(vax, how='outer', left_index=True, right_index=True)
merged

Unnamed: 0_level_0,bus,rail_boardings,total_rides,gas_price,prcp,snow,snwd,tmax,tmin,crash_occurrences,damage_indicator,3_mo,6_mo,1_yr,2_yr,3_yr,5_yr,7_yr,10_yr,20_yr,total_daily_doses,total_daily_cum,first_dose_daily,first_dose_cum,first_dose_percent_pop,vax_series_completed_daily,vax_series_cum,vax_series_percent
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
2001-01-01,4240790.0,2528123.0,6768913.0,1.487,0.000000,0.000000,14.000000,30.000000,15.666667,0.0,0.0,5.5125,5.300,4.8925,4.780,4.7725,4.7950,5.0375,5.005,5.5350,,,,,,,,
2001-01-08,5930650.0,3445299.0,9375949.0,1.477,0.000000,0.000000,10.428571,34.142857,22.571429,0.0,0.0,5.2720,5.142,4.7880,4.722,4.7340,4.8060,5.0740,5.082,5.6120,,,,,,,,
2001-01-15,5591301.0,3295995.0,8887296.0,1.569,0.037143,0.014286,3.857143,31.571429,23.428571,0.0,0.0,5.3150,5.190,4.8525,4.800,4.8050,4.8525,5.1400,5.185,5.6475,,,,,,,,
2001-01-22,5956378.0,3463065.0,9419443.0,1.546,0.015714,0.200000,3.285714,30.000000,17.428571,0.0,0.0,5.2400,5.116,4.8260,4.788,4.8120,4.9360,5.2320,5.292,5.7460,,,,,,,,
2001-01-29,5936801.0,3436499.0,9373300.0,1.541,0.127143,0.157143,1.857143,32.714286,17.714286,0.0,0.0,5.0360,4.908,4.6580,4.658,4.7040,4.8740,5.1200,5.204,5.6520,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-13,,,,,,,,,,,,,,,,,,,,,98720.0,28013440.0,20501.0,13189421.0,0.699429,16647.0,11997668.0,0.636000
2021-12-20,,,,,,,,,,,,,,,,,,,,,75956.0,28681630.0,18351.0,13340870.0,0.707429,10693.0,12096945.0,0.641571
2021-12-27,,,,,,,,,,,,,,,,,,,,,67792.0,29161228.0,20652.0,13474900.0,0.714429,10700.0,12170314.0,0.645429
2022-01-03,,,,,,,,,,,,,,,,,,,,,82620.0,29637926.0,29687.0,13639161.0,0.723143,14072.0,12249342.0,0.649571


In [59]:
merged.isna().sum()[0]

6

In [60]:
merged = merged.drop(merged.tail(6).index)

In [61]:
merged.fillna(0.0, inplace=True)

In [62]:
merged.isna().sum()

bus                           0
rail_boardings                0
total_rides                   0
gas_price                     0
prcp                          0
snow                          0
snwd                          0
tmax                          0
tmin                          0
crash_occurrences             0
damage_indicator              0
3_mo                          0
6_mo                          0
1_yr                          0
2_yr                          0
3_yr                          0
5_yr                          0
7_yr                          0
10_yr                         0
20_yr                         0
total_daily_doses             0
total_daily_cum               0
first_dose_daily              0
first_dose_cum                0
first_dose_percent_pop        0
vax_series_completed_daily    0
vax_series_cum                0
vax_series_percent            0
dtype: int64

## Covid Prep

In [89]:
covid = pd.read_csv('../capstone-data/daily/covid_total(d).csv')

In [90]:
covid.columns = covid.columns.str.lower()

In [91]:
covid['date'] = pd.to_datetime(covid['date'])
covid

Unnamed: 0,date,c1_school closing,c2_workplace closing,c3_cancel public events,c4_restrictions on gatherings,c5_close public transport,c6_stay at home requirements,c7_restrictions on internal movement,c8_international travel controls,e1_income support,e2_debt/contract relief,h1_public information campaigns,h2_testing policy,h3_contact tracing,h6_facial coverings,h7_vaccination policy,h8_protection of elderly people,confirmedcases,confirmeddeaths,stringencyindex,stringencylegacyindex,governmentresponseindex,containmenthealthindex,economicsupportindex
0,2020-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0
1,2020-01-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0
2,2020-01-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0
3,2020-01-04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0
4,2020-01-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
726,2021-12-27,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,2077260.0,30021.0,25.0,34.52,44.79,45.83,37.5
727,2021-12-28,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,2098064.0,30117.0,25.0,34.52,44.79,45.83,37.5
728,2021-12-29,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,2119162.0,30167.0,25.0,34.52,44.79,45.83,37.5
729,2021-12-30,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,2149548.0,30254.0,25.0,34.52,44.79,45.83,37.5


In [92]:
covid.set_index('date',inplace=True)
covid.sort_index(inplace=True)

covid = covid.resample('W',closed='left').agg({'c1_school closing':'mean', 'c2_workplace closing':'mean', 'c3_cancel public events':'mean',
       'c4_restrictions on gatherings':'mean', 'c5_close public transport':'mean',
       'c6_stay at home requirements':'mean', 'c7_restrictions on internal movement':'mean',
       'c8_international travel controls':'mean', 'e1_income support':'mean',
       'e2_debt/contract relief':'mean', 'h1_public information campaigns':'mean',
       'h2_testing policy':'mean', 'h3_contact tracing':'mean', 'h6_facial coverings':'mean',
       'h7_vaccination policy':'mean', 'h8_protection of elderly people':'mean',
        'confirmedcases':'sum', 'confirmeddeaths':'sum','stringencyindex':'mean',
       'stringencylegacyindex':'mean', 'governmentresponseindex':'mean',
       'containmenthealthindex':'mean', 'economicsupportindex':'mean'})
covid.index = covid.index - pd.DateOffset(days=6)

In [94]:
covid.tail()

Unnamed: 0_level_0,c1_school closing,c2_workplace closing,c3_cancel public events,c4_restrictions on gatherings,c5_close public transport,c6_stay at home requirements,c7_restrictions on internal movement,c8_international travel controls,e1_income support,e2_debt/contract relief,h1_public information campaigns,h2_testing policy,h3_contact tracing,h6_facial coverings,h7_vaccination policy,h8_protection of elderly people,confirmedcases,confirmeddeaths,stringencyindex,stringencylegacyindex,governmentresponseindex,containmenthealthindex,economicsupportindex
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2021-11-29,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,12712600.0,202177.0,25.0,34.52,44.79,45.83,37.5
2021-12-06,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,13065128.0,203964.0,25.0,34.52,44.79,45.83,37.5
2021-12-13,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,13443488.0,205984.0,25.0,34.52,44.79,45.83,37.5
2021-12-20,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,13971608.0,208329.0,25.0,34.52,44.79,45.83,37.5
2021-12-27,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,12614884.0,180681.0,25.0,34.52,44.79,45.83,37.5


In [95]:
covid.isna().sum()

c1_school closing                       0
c2_workplace closing                    0
c3_cancel public events                 0
c4_restrictions on gatherings           0
c5_close public transport               0
c6_stay at home requirements            0
c7_restrictions on internal movement    0
c8_international travel controls        0
e1_income support                       0
e2_debt/contract relief                 0
h1_public information campaigns         0
h2_testing policy                       0
h3_contact tracing                      0
h6_facial coverings                     0
h7_vaccination policy                   0
h8_protection of elderly people         0
confirmedcases                          0
confirmeddeaths                         0
stringencyindex                         0
stringencylegacyindex                   0
governmentresponseindex                 0
containmenthealthindex                  0
economicsupportindex                    0
dtype: int64

### Covid Merge

In [96]:
merged = merged.merge(covid, how='outer', left_index=True, right_index=True)
merged

Unnamed: 0_level_0,bus,rail_boardings,total_rides,gas_price,prcp,snow,snwd,tmax,tmin,crash_occurrences,damage_indicator,3_mo,6_mo,1_yr,2_yr,3_yr,5_yr,7_yr,10_yr,20_yr,total_daily_doses,total_daily_cum,first_dose_daily,first_dose_cum,first_dose_percent_pop,vax_series_completed_daily,vax_series_cum,vax_series_percent,c1_school closing,c2_workplace closing,c3_cancel public events,c4_restrictions on gatherings,c5_close public transport,c6_stay at home requirements,c7_restrictions on internal movement,c8_international travel controls,e1_income support,e2_debt/contract relief,h1_public information campaigns,h2_testing policy,h3_contact tracing,h6_facial coverings,h7_vaccination policy,h8_protection of elderly people,confirmedcases,confirmeddeaths,stringencyindex,stringencylegacyindex,governmentresponseindex,containmenthealthindex,economicsupportindex
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
2001-01-01,4240790.0,2528123.0,6768913.0,1.487,0.000000,0.000000,14.000000,30.000000,15.666667,0.0,0.000000,5.5125,5.300,4.8925,4.780,4.7725,4.7950,5.0375,5.005,5.5350,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,,,,,,,,,,,,,,,,,,,,,,,
2001-01-08,5930650.0,3445299.0,9375949.0,1.477,0.000000,0.000000,10.428571,34.142857,22.571429,0.0,0.000000,5.2720,5.142,4.7880,4.722,4.7340,4.8060,5.0740,5.082,5.6120,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,,,,,,,,,,,,,,,,,,,,,,,
2001-01-15,5591301.0,3295995.0,8887296.0,1.569,0.037143,0.014286,3.857143,31.571429,23.428571,0.0,0.000000,5.3150,5.190,4.8525,4.800,4.8050,4.8525,5.1400,5.185,5.6475,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,,,,,,,,,,,,,,,,,,,,,,,
2001-01-22,5956378.0,3463065.0,9419443.0,1.546,0.015714,0.200000,3.285714,30.000000,17.428571,0.0,0.000000,5.2400,5.116,4.8260,4.788,4.8120,4.9360,5.2320,5.292,5.7460,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,,,,,,,,,,,,,,,,,,,,,,,
2001-01-29,5936801.0,3436499.0,9373300.0,1.541,0.127143,0.157143,1.857143,32.714286,17.714286,0.0,0.000000,5.0360,4.908,4.6580,4.658,4.7040,4.8740,5.1200,5.204,5.6520,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0.000000,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-29,1056369.0,726269.0,1782638.0,3.501,0.000000,0.000000,0.000000,48.428571,33.428571,1965.0,2.578659,0.0560,0.096,0.2460,0.564,0.8500,1.1620,1.3600,1.434,1.8520,109951.0,26513849.0,18169.0,12910683.0,0.684571,32413.0,11645570.0,0.617571,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,12712600.0,202177.0,25.0,34.52,44.79,45.83,37.5
2021-12-06,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,13065128.0,203964.0,25.0,34.52,44.79,45.83,37.5
2021-12-13,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,13443488.0,205984.0,25.0,34.52,44.79,45.83,37.5
2021-12-20,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,13971608.0,208329.0,25.0,34.52,44.79,45.83,37.5


In [97]:
merged.isna().sum()[0]

4

In [98]:
merged = merged.drop(merged.tail(4).index)

In [100]:
merged.fillna(0.0, inplace=True)

In [101]:
merged.isna().sum()

bus                                     0
rail_boardings                          0
total_rides                             0
gas_price                               0
prcp                                    0
snow                                    0
snwd                                    0
tmax                                    0
tmin                                    0
crash_occurrences                       0
damage_indicator                        0
3_mo                                    0
6_mo                                    0
1_yr                                    0
2_yr                                    0
3_yr                                    0
5_yr                                    0
7_yr                                    0
10_yr                                   0
20_yr                                   0
total_daily_doses                       0
total_daily_cum                         0
first_dose_daily                        0
first_dose_cum                    

In [103]:
#merged.to_csv('../capstone-data/weekly-merged.csv')