In [1]:
import pandas as pd
import numpy as np

pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)

In [2]:
covid = pd.read_csv('../capstone-data/daily/covid_total(d).csv')
cta = pd.read_csv('../capstone-data/monthly/cta-monthly-primary.csv')
auto = pd.read_excel('../capstone-data/monthly/agg_auto.xlsx')
crashes = pd.read_csv('../capstone-data/monthly/crash_fin.csv')
vax = pd.read_csv('../capstone-data/covid/COVID-19_Daily_Vaccinations_-_Chicago_Residents.csv')
gas = pd.read_csv('../capstone-data/weekly/Weekly_Chicago_Regular_All_Formulations_Retail_Gasoline_Prices.csv')
weather = pd.read_csv('../capstone-data/daily/weather.csv')
curve = pd.read_excel('../capstone-data/daily/treasury_yield_curve.xlsx')


# Prep CTA

In [3]:
cta.dtypes

date            object
bus            float64
rail           float64
total_rides    float64
dtype: object

In [4]:
cta['date']=pd.to_datetime(cta['date'])

In [5]:
cta.dtypes

date           datetime64[ns]
bus                   float64
rail                  float64
total_rides           float64
dtype: object

In [6]:
cta.set_index('date',inplace=True)
cta.sort_index(inplace=True)

# Prep Auto

In [7]:
auto.rename(columns={'Unnamed: 0':'date'},inplace=True)

In [8]:
auto.drop(columns={'Unnamed: 4'},inplace=True)

In [9]:
auto.dtypes

date                   datetime64[ns]
new_light_truck_reg           float64
new_car_reg                   float64
new_total_reg                 float64
dtype: object

In [10]:
auto.set_index('date',inplace=True)
auto.sort_index(inplace=True)

In [11]:
auto.head()

Unnamed: 0_level_0,new_light_truck_reg,new_car_reg,new_total_reg
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2006-01-01,13166.0,13961.0,27127.0
2006-02-01,11239.5,12597.0,23836.5
2006-03-01,11716.5,13620.5,25337.0
2006-04-01,11716.5,13620.5,25337.0
2006-05-01,11177.5,14052.5,25230.0


## Merge CTA & Auto

In [12]:
merged = cta.merge(auto, how='outer', left_index=True, right_index=True)

In [13]:
merged.shape

(252, 6)

In [14]:
merged.head()

Unnamed: 0_level_0,bus,rail,total_rides,new_light_truck_reg,new_car_reg,new_total_reg
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2001-01-01,25022908.0,14712334.0,39735242.0,,,
2001-02-01,23768892.0,13669269.0,37438161.0,,,
2001-03-01,26827165.0,15463578.0,42290743.0,,,
2001-04-01,25053791.0,14713336.0,39767127.0,,,
2001-05-01,26743744.0,15743730.0,42487474.0,,,


In [15]:
merged.tail()

Unnamed: 0_level_0,bus,rail,total_rides,new_light_truck_reg,new_car_reg,new_total_reg
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-08-01,10949681.0,8061195.0,19010876.0,16901.0,21676.0,21676.0
2021-09-01,12058765.0,8988251.0,21047016.0,14109.0,4021.0,18130.0
2021-10-01,11905629.0,9029274.0,20934903.0,17447.0,5041.0,22488.0
2021-11-01,10923926.0,8020686.0,18944612.0,13562.0,3328.0,16890.0
2021-12-01,,,,20876.0,4820.0,25696.0


In [16]:
merged.isna().sum()

bus                     1
rail                    1
total_rides             1
new_light_truck_reg    60
new_car_reg            60
new_total_reg          60
dtype: int64

In [17]:
merged.dropna(inplace=True)

# Prep Crashes

In [18]:
# crashes.tail()

# crashes.dtypes

# crashes['date']=pd.to_datetime(crashes['date'])
# crashes.dtypes

# crashes.set_index('date',inplace=True)
# crashes.sort_index(inplace=True)

# crashes.head()

# Merge in Crashes

In [19]:
# merged = merged.merge(crashes, how='outer', left_index=True, right_index=True)

# crashes.shape

# merged.shape

# merged.head()

# merged.isna().sum()

# merged.drop(merged.tail(2).index,inplace=True)

# merged.loc[merged['crash_occurrences'].isna()==False]



# Prep Covid

In [20]:
covid.columns=covid.columns.str.lower()
covid['date'] = pd.to_datetime(covid['date'])
covid.set_index('date',inplace=True)
covid.sort_index(inplace=True)

In [21]:
covid= covid.resample('MS',closed='left').agg({'c1_school closing':'mean', 'c2_workplace closing':'mean', 'c3_cancel public events':'mean',
       'c4_restrictions on gatherings':'mean', 'c5_close public transport':'mean',
       'c6_stay at home requirements':'mean', 'c7_restrictions on internal movement':'mean',
       'c8_international travel controls':'mean', 'e1_income support':'mean',
       'e2_debt/contract relief':'mean', 'h1_public information campaigns':'mean',
       'h2_testing policy':'mean', 'h3_contact tracing':'mean', 'h6_facial coverings':'mean',
       'h7_vaccination policy':'mean', 'h8_protection of elderly people':'mean',
        'confirmedcases':'sum', 'confirmeddeaths':'sum','stringencyindex':'mean',
       'stringencylegacyindex':'mean', 'governmentresponseindex':'mean',
       'containmenthealthindex':'mean', 'economicsupportindex':'mean'})

### Covid Merge

In [22]:
merged = merged.merge(covid, how='outer', left_index=True, right_index=True)

In [23]:
merged.shape

(192, 29)

In [24]:
merged.isna().sum()

bus                                       1
rail                                      1
total_rides                               1
new_light_truck_reg                       1
new_car_reg                               1
new_total_reg                             1
c1_school closing                       168
c2_workplace closing                    168
c3_cancel public events                 168
c4_restrictions on gatherings           168
c5_close public transport               168
c6_stay at home requirements            168
c7_restrictions on internal movement    168
c8_international travel controls        168
e1_income support                       168
e2_debt/contract relief                 168
h1_public information campaigns         168
h2_testing policy                       168
h3_contact tracing                      168
h6_facial coverings                     168
h7_vaccination policy                   168
h8_protection of elderly people         168
confirmedcases                  

In [25]:
merged = merged.drop(merged.tail(1).index)

In [26]:
merged.fillna(0.0,inplace=True)

In [27]:
merged.isna().sum()

bus                                     0
rail                                    0
total_rides                             0
new_light_truck_reg                     0
new_car_reg                             0
new_total_reg                           0
c1_school closing                       0
c2_workplace closing                    0
c3_cancel public events                 0
c4_restrictions on gatherings           0
c5_close public transport               0
c6_stay at home requirements            0
c7_restrictions on internal movement    0
c8_international travel controls        0
e1_income support                       0
e2_debt/contract relief                 0
h1_public information campaigns         0
h2_testing policy                       0
h3_contact tracing                      0
h6_facial coverings                     0
h7_vaccination policy                   0
h8_protection of elderly people         0
confirmedcases                          0
confirmeddeaths                   

# Vax Prep

In [28]:
vax= pd.read_csv('../capstone-data/covid/COVID-19_Daily_Vaccinations_-_Chicago_Residents.csv')


In [29]:
def vax_cleaning(vax):
    vax.columns = vax.columns.str.lower()
    vax['date'] = pd.to_datetime(vax['date'])
    vax.set_index('date',inplace=True)
    vax.sort_index(inplace=True)
    
    vax1=vax[['total doses - daily','total doses - cumulative','1st dose - daily',
             '1st dose - cumulative','1st dose - percent population','vaccine series completed - daily',
             'vaccine series completed - cumulative','vaccine series completed - percent population']]
    
    vax1 = vax1.rename(columns={'total doses - daily':'total_daily_doses',
           'total doses - cumulative':'total_daily_cum',
           '1st dose - daily':'first_dose_daily',
           '1st dose - cumulative':'first_dose_cum',
           '1st dose - percent population':'first_dose_percent_pop',
           'vaccine series completed - daily':'vax_series_completed_daily',
           'vaccine series completed - cumulative':'vax_series_cum',
           'vaccine series completed - percent population':'vax_series_percent'})
    return vax1



In [30]:
vax = vax_cleaning(vax)

In [31]:
vax

Unnamed: 0_level_0,total_daily_doses,total_daily_cum,first_dose_daily,first_dose_cum,first_dose_percent_pop,vax_series_completed_daily,vax_series_cum,vax_series_percent
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-12-15,17,17,17,17,0.000,0,0,0.000
2020-12-16,169,186,169,186,0.000,0,0,0.000
2020-12-17,1989,2175,1989,2175,0.001,0,0,0.000
2020-12-18,5729,7904,5729,7904,0.003,0,0,0.000
2020-12-19,3513,11417,3513,11417,0.004,0,0,0.000
...,...,...,...,...,...,...,...,...
2022-01-09,4623,4277770,1260,1963754,0.729,880,1757428,0.652
2022-01-10,11610,4289380,3705,1967459,0.730,1919,1759347,0.653
2022-01-11,13429,4302809,5114,1972573,0.732,2274,1761621,0.654
2022-01-12,13091,4315900,4857,1977430,0.734,2305,1763926,0.655


In [32]:
vax.isna().sum()

total_daily_doses             0
total_daily_cum               0
first_dose_daily              0
first_dose_cum                0
first_dose_percent_pop        0
vax_series_completed_daily    0
vax_series_cum                0
vax_series_percent            0
dtype: int64

In [33]:
vax.dtypes

total_daily_doses               int64
total_daily_cum                 int64
first_dose_daily                int64
first_dose_cum                  int64
first_dose_percent_pop        float64
vax_series_completed_daily      int64
vax_series_cum                  int64
vax_series_percent            float64
dtype: object

In [34]:
vax = vax.resample('MS',closed='left').agg({'total_daily_doses': 'sum', 'total_daily_cum': 'sum',
                                          'first_dose_daily': 'sum', 'first_dose_cum': 'sum',
                                          'first_dose_percent_pop': 'mean', 'vax_series_completed_daily': 'sum',
                                          'vax_series_cum': 'sum', 'vax_series_percent': 'mean'})

In [35]:
vax

Unnamed: 0_level_0,total_daily_doses,total_daily_cum,first_dose_daily,first_dose_cum,first_dose_percent_pop,vax_series_completed_daily,vax_series_cum,vax_series_percent
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-12-01,37800,321658,37800,321658,0.007,0,0,0.0
2021-01-01,179208,3507100,128715,2760638,0.033032,50493,746462,0.008935
2021-02-01,346512,10957678,211901,7893289,0.104643,134614,3064393,0.040571
2021-03-01,667046,27128440,439238,17803097,0.213226,251818,9638626,0.115452
2021-04-01,854762,50564013,406915,31543514,0.3903,483203,20528629,0.2541
2021-05-01,453139,72702185,179496,40958570,0.490452,288745,33853003,0.405323
2021-06-01,234556,80365681,87390,43622455,0.5398,156120,39125380,0.484167
2021-07-01,124646,87931749,64864,47180163,0.564935,65199,43425410,0.520032
2021-08-01,145992,92069365,78152,49535540,0.593194,64025,45291093,0.542355
2021-09-01,141744,93346830,57255,49959116,0.618167,59062,45759922,0.566133


## Merge Vax

In [36]:
merged = merged.merge(vax, how='outer', left_index=True, right_index=True)

In [37]:
merged.tail()

Unnamed: 0_level_0,bus,rail,total_rides,new_light_truck_reg,new_car_reg,new_total_reg,c1_school closing,c2_workplace closing,c3_cancel public events,c4_restrictions on gatherings,c5_close public transport,c6_stay at home requirements,c7_restrictions on internal movement,c8_international travel controls,e1_income support,e2_debt/contract relief,h1_public information campaigns,h2_testing policy,h3_contact tracing,h6_facial coverings,h7_vaccination policy,h8_protection of elderly people,confirmedcases,confirmeddeaths,stringencyindex,stringencylegacyindex,governmentresponseindex,containmenthealthindex,economicsupportindex,total_daily_doses,total_daily_cum,first_dose_daily,first_dose_cum,first_dose_percent_pop,vax_series_completed_daily,vax_series_cum,vax_series_percent
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1
2021-09-01,12058765.0,8988251.0,21047016.0,14109.0,4021.0,18130.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.9,2.0,3.0,1.633333,2.0,5.0,1.1,47405296.0,806700.0,25.0,34.52,44.582,42.379,60.0,141744.0,93346830.0,57255.0,49959116.0,0.618167,59062.0,45759922.0,0.566133
2021-10-01,11905629.0,9029274.0,20934903.0,17447.0,5041.0,22488.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,1.0,1.0,2.0,3.0,1.935484,2.0,5.0,2.0,51628096.0,863020.0,25.0,34.52,44.58871,45.599677,37.5,220280.0,102023840.0,47613.0,53225022.0,0.63729,50765.0,48958837.0,0.586258
2021-11-01,10923926.0,8020686.0,18944612.0,13562.0,3328.0,16890.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.733333,1.0,1.0,2.0,3.0,2.0,2.0,5.0,2.0,52453318.0,856857.0,21.478667,29.998,42.814,43.569,37.5,362350.0,107880947.0,101956.0,53864013.0,0.6664,53908.0,48908612.0,0.605133
2021-12-01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,422185.0,124220244.0,91367.0,58514803.0,0.700677,84468.0,53061072.0,0.635387
2022-01-01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,131890.0,55336422.0,47117.0,25432963.0,0.726077,22732.0,22799292.0,0.651


In [38]:
merged.isna().sum()

bus                                       2
rail                                      2
total_rides                               2
new_light_truck_reg                       2
new_car_reg                               2
new_total_reg                             2
c1_school closing                         2
c2_workplace closing                      2
c3_cancel public events                   2
c4_restrictions on gatherings             2
c5_close public transport                 2
c6_stay at home requirements              2
c7_restrictions on internal movement      2
c8_international travel controls          2
e1_income support                         2
e2_debt/contract relief                   2
h1_public information campaigns           2
h2_testing policy                         2
h3_contact tracing                        2
h6_facial coverings                       2
h7_vaccination policy                     2
h8_protection of elderly people           2
confirmedcases                  

In [39]:
merged = merged.drop(merged.tail(2).index)
merged.fillna(0.0,inplace=True)

In [40]:
merged.dtypes

bus                                     float64
rail                                    float64
total_rides                             float64
new_light_truck_reg                     float64
new_car_reg                             float64
new_total_reg                           float64
c1_school closing                       float64
c2_workplace closing                    float64
c3_cancel public events                 float64
c4_restrictions on gatherings           float64
c5_close public transport               float64
c6_stay at home requirements            float64
c7_restrictions on internal movement    float64
c8_international travel controls        float64
e1_income support                       float64
e2_debt/contract relief                 float64
h1_public information campaigns         float64
h2_testing policy                       float64
h3_contact tracing                      float64
h6_facial coverings                     float64
h7_vaccination policy                   

# Prep Gasoline

In [41]:
gas = gas.rename(columns={'Week of':'date', 'Weekly Chicago Regular All Formulations Retail Gasoline Prices Dollars per Gallon':'gas_price'})

In [42]:
gas['date']=pd.to_datetime(gas['date'])

In [43]:
gas.set_index('date',inplace=True)
gas.sort_index(inplace=True)
gas

Unnamed: 0_level_0,gas_price
date,Unnamed: 1_level_1
2000-06-05,1.949
2000-06-12,2.091
2000-06-19,2.108
2000-06-26,1.989
2000-07-03,1.786
...,...
2021-12-27,3.340
2022-01-03,3.320
2022-01-10,3.457
2022-01-17,3.404


In [44]:
gas = gas.resample('MS',closed='left').mean()

In [45]:
gas.isna().sum()

gas_price    0
dtype: int64

## Gas Merge

In [46]:
merged = merged.merge(gas, how='outer', left_index=True, right_index=True)

In [47]:
merged.isna().sum()

bus                                     69
rail                                    69
total_rides                             69
new_light_truck_reg                     69
new_car_reg                             69
new_total_reg                           69
c1_school closing                       69
c2_workplace closing                    69
c3_cancel public events                 69
c4_restrictions on gatherings           69
c5_close public transport               69
c6_stay at home requirements            69
c7_restrictions on internal movement    69
c8_international travel controls        69
e1_income support                       69
e2_debt/contract relief                 69
h1_public information campaigns         69
h2_testing policy                       69
h3_contact tracing                      69
h6_facial coverings                     69
h7_vaccination policy                   69
h8_protection of elderly people         69
confirmedcases                          69
confirmedde

In [48]:
merged = merged.dropna()


# Prep Weather

In [49]:
def monthly_weather(weather):
    """
    A function that cleans the weather dataset, which comes as daily. 
    Commented out is optional code that can be edited to suit the needed format,
    ex. ...resample('W') for weekly
    """
    weather.columns
    weather.columns = weather.columns.str.lower()
    weather.drop(columns='station',inplace=True)
    weather['date'] = pd.to_datetime(weather['date'])
    # weather.set_index('date',inplace=True)
    weather = weather.groupby('date').agg('mean')
    weather.sort_index(inplace=True)
    weather = weather.fillna(0)
    
    weather = weather.resample('MS',closed='left').mean()    
    
    return weather

In [50]:
weather = monthly_weather(weather)

In [51]:
weather.isna().sum()

prcp    0
snow    0
snwd    0
tmax    0
tmin    0
dtype: int64

## Merge Weather

In [52]:
merged = merged.merge(weather, how='outer', left_index=True, right_index=True)

In [53]:
merged.isna().sum()

bus                                     62
rail                                    62
total_rides                             62
new_light_truck_reg                     62
new_car_reg                             62
new_total_reg                           62
c1_school closing                       62
c2_workplace closing                    62
c3_cancel public events                 62
c4_restrictions on gatherings           62
c5_close public transport               62
c6_stay at home requirements            62
c7_restrictions on internal movement    62
c8_international travel controls        62
e1_income support                       62
e2_debt/contract relief                 62
h1_public information campaigns         62
h2_testing policy                       62
h3_contact tracing                      62
h6_facial coverings                     62
h7_vaccination policy                   62
h8_protection of elderly people         62
confirmedcases                          62
confirmedde

In [54]:
merged= merged.dropna()

# Prep Yield Curve

In [55]:
def clean_curve(curve):
    curve.columns = curve.columns.str.lower()

    curve.set_index('date',inplace=True)

    curve.sort_index(inplace=True)
    curve.columns = curve.columns.str.replace(' ','_')
    curve.drop(columns={'1_mo','2_mo','30_yr'},inplace=True)
    curve.bfill(inplace=True)
    return curve

In [56]:
curve= clean_curve(curve)

In [57]:
curve.head()

Unnamed: 0_level_0,3_mo,6_mo,1_yr,2_yr,3_yr,5_yr,7_yr,10_yr,20_yr
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2001-01-02,5.87,5.58,5.11,4.87,4.82,4.76,4.97,4.92,5.46
2001-01-03,5.69,5.44,5.04,4.92,4.92,4.94,5.18,5.14,5.62
2001-01-04,5.37,5.2,4.82,4.77,4.78,4.82,5.07,5.03,5.56
2001-01-05,5.12,4.98,4.6,4.56,4.57,4.66,4.93,4.93,5.5
2001-01-08,5.19,5.03,4.61,4.54,4.55,4.65,4.94,4.94,5.52


In [58]:
curve = curve.resample('MS',closed='left').mean()
curve

Unnamed: 0_level_0,3_mo,6_mo,1_yr,2_yr,3_yr,5_yr,7_yr,10_yr,20_yr
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2001-01-01,5.285238,5.146190,4.814762,4.760000,4.774286,4.858571,5.130476,5.160952,5.649048
2001-02-01,5.005263,4.888947,4.684211,4.656842,4.707895,4.886316,5.097368,5.098947,5.617895
2001-03-01,4.535455,4.435455,4.298636,4.342273,4.428182,4.642727,4.882727,4.885455,5.493636
2001-04-01,3.966000,3.985500,3.976500,4.234000,4.423000,4.763500,5.034500,5.141000,5.779000
2001-05-01,3.703182,3.738182,3.781364,4.260000,4.507273,4.927727,5.240000,5.391364,5.923182
...,...,...,...,...,...,...,...,...,...
2021-09-01,0.041905,0.052381,0.077143,0.240952,0.471905,0.863810,1.163810,1.374762,1.872857
2021-10-01,0.052000,0.061500,0.113000,0.392500,0.670000,1.106000,1.402000,1.582500,2.028500
2021-11-01,0.051500,0.073000,0.179000,0.508000,0.819500,1.202500,1.448000,1.559500,1.973500
2021-12-01,0.058636,0.145455,0.299091,0.680000,0.952727,1.229545,1.395909,1.465000,1.899545


### Merge Curve

In [60]:
merged = merged.merge(curve, how='outer', left_index=True, right_index=True)

In [63]:
merged.drop(merged.tail(2).index,inplace=True)

In [66]:
merged=merged.dropna()

In [69]:
merged.isna().sum()

bus                                     0
rail                                    0
total_rides                             0
new_light_truck_reg                     0
new_car_reg                             0
new_total_reg                           0
c1_school closing                       0
c2_workplace closing                    0
c3_cancel public events                 0
c4_restrictions on gatherings           0
c5_close public transport               0
c6_stay at home requirements            0
c7_restrictions on internal movement    0
c8_international travel controls        0
e1_income support                       0
e2_debt/contract relief                 0
h1_public information campaigns         0
h2_testing policy                       0
h3_contact tracing                      0
h6_facial coverings                     0
h7_vaccination policy                   0
h8_protection of elderly people         0
confirmedcases                          0
confirmeddeaths                   

In [70]:
merged

Unnamed: 0_level_0,bus,rail,total_rides,new_light_truck_reg,new_car_reg,new_total_reg,c1_school closing,c2_workplace closing,c3_cancel public events,c4_restrictions on gatherings,c5_close public transport,c6_stay at home requirements,c7_restrictions on internal movement,c8_international travel controls,e1_income support,e2_debt/contract relief,h1_public information campaigns,h2_testing policy,h3_contact tracing,h6_facial coverings,h7_vaccination policy,h8_protection of elderly people,confirmedcases,confirmeddeaths,stringencyindex,stringencylegacyindex,governmentresponseindex,containmenthealthindex,economicsupportindex,total_daily_doses,total_daily_cum,first_dose_daily,first_dose_cum,first_dose_percent_pop,vax_series_completed_daily,vax_series_cum,vax_series_percent,gas_price,prcp,snow,snwd,tmax,tmin,3_mo,6_mo,1_yr,2_yr,3_yr,5_yr,7_yr,10_yr,20_yr
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1
2006-01-01,23256316.0,14950854.0,38207170.0,13166.0,13961.0,27127.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.3524,0.112258,0.154839,0.451613,42.193548,30.774194,4.336,4.4695,4.445,4.3955,4.3515,4.3455,4.365,4.416,4.6515
2006-02-01,23084695.0,14326407.0,37411102.0,11239.5,12597.0,23836.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.27875,0.0475,0.132143,0.214286,36.892857,21.392857,4.539474,4.691579,4.684737,4.668421,4.637368,4.572105,4.564211,4.568947,4.734211
2006-03-01,26587686.0,16520883.0,43108569.0,11716.5,13620.5,25337.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.50025,0.101935,0.122581,0.16129,46.774194,32.0,4.627826,4.792174,4.773478,4.733913,4.738261,4.716087,4.713913,4.723913,4.913478
2006-04-01,23909312.0,15606925.0,39516237.0,11716.5,13620.5,25337.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.80725,0.129,0.0,0.0,63.833333,44.9,4.721579,4.9,4.897368,4.888947,4.885263,4.902105,4.935263,4.990526,5.224211
2006-05-01,26436976.0,16977479.0,43414455.0,11177.5,14052.5,25230.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.9402,0.102903,0.0,0.0,69.548387,51.935484,4.836364,5.01,4.995,4.968182,4.974545,4.997727,5.032273,5.11,5.346364
2006-06-01,24874712.0,16871696.0,41746408.0,12249.0,16954.5,29203.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.001,0.129,0.0,0.0,78.2,60.7,4.917727,5.172727,5.155,5.121818,5.088182,5.067273,5.075455,5.106364,5.292273
2006-07-01,23597397.0,17049203.0,40646600.0,12249.0,16954.5,29203.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.1916,0.136774,0.0,0.0,86.129032,70.032258,5.0765,5.2665,5.2175,5.1185,5.07,5.04,5.0485,5.0875,5.251
2006-08-01,24691332.0,17405119.0,42096451.0,11917.25,16392.75,28310.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.1695,0.229677,0.0,0.0,82.096774,67.806452,5.090435,5.172609,5.082609,4.903478,4.846087,4.822174,4.828696,4.876522,5.078696
2006-09-01,25932978.0,17188545.0,43121523.0,11585.5,15831.0,27416.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.75175,0.144,0.0,0.0,71.733333,56.366667,4.93,5.0785,4.9745,4.769,4.6925,4.6675,4.676,4.719,4.9275
2006-10-01,27061706.0,17771287.0,44832993.0,11585.5,15831.0,27416.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.304,0.213871,0.009677,0.0,59.225806,43.0,5.04619,5.119048,5.01,4.795714,4.721905,4.686667,4.69,4.729048,4.935714


In [71]:
merged.to_csv('../capstone-data/monthly-merged.csv')
#merge_limited.to_csv('../capstone-data/merge_limited.csv')