In [1]:
import numpy as np
import pandas as pd
from collections import Counter
import datetime

import matplotlib.pyplot as plt

In [2]:
c_weather = pd.read_csv('../data/weather_cleaned.csv')

In [3]:
pd.set_option('display.max_columns', 100)
c_weather.columns

Index(['Station', 'CalendarDate', 'Tmax', 'Tmin', 'Tavg', 'Depart', 'DewPoint',
       'WetBulb', 'Heat', 'Cool', 'Sunrise', 'Sunset', 'CodeSum', 'Depth',
       'SnowFall', 'PrecipTotal', 'StnPressure', 'SeaLevel', 'ResultSpeed',
       'ResultDir', 'AvgSpeed', 'Year', 'Month', 'Date', 'CodeSum_BCFG',
       'CodeSum_BR', 'CodeSum_DZ', 'CodeSum_FG', 'CodeSum_FG+', 'CodeSum_FU',
       'CodeSum_GR', 'CodeSum_HZ', 'CodeSum_MIFG', 'CodeSum_RA', 'CodeSum_SN',
       'CodeSum_SQ', 'CodeSum_TS', 'CodeSum_TSRA', 'CodeSum_VCFG',
       'CodeSum_VCTS', 'DaylightDurationMinutes'],
      dtype='object')

In [4]:
def format_columns(df):
    df['date'] = pd.to_datetime(df['CalendarDate'])
    df['Sunrise'] = pd.to_datetime(df['Sunrise'])
    df['Sunset'] = pd.to_datetime(df['Sunset'])
    df['daylight'] = df.Sunset - df.Sunrise
    df['year'] = df.date.dt.year
    df['month'] = df.date.dt.month
    df['week'] = df.date.dt.week
#    df['day_of_year'] = df.date.dt.day
    df.set_index('date', inplace=True)

    return df

In [5]:
c_weather.shape

(2944, 41)

In [6]:
c_weather = format_columns(c_weather)

In [7]:
c_weather.columns

Index(['Station', 'CalendarDate', 'Tmax', 'Tmin', 'Tavg', 'Depart', 'DewPoint',
       'WetBulb', 'Heat', 'Cool', 'Sunrise', 'Sunset', 'CodeSum', 'Depth',
       'SnowFall', 'PrecipTotal', 'StnPressure', 'SeaLevel', 'ResultSpeed',
       'ResultDir', 'AvgSpeed', 'Year', 'Month', 'Date', 'CodeSum_BCFG',
       'CodeSum_BR', 'CodeSum_DZ', 'CodeSum_FG', 'CodeSum_FG+', 'CodeSum_FU',
       'CodeSum_GR', 'CodeSum_HZ', 'CodeSum_MIFG', 'CodeSum_RA', 'CodeSum_SN',
       'CodeSum_SQ', 'CodeSum_TS', 'CodeSum_TSRA', 'CodeSum_VCFG',
       'CodeSum_VCTS', 'DaylightDurationMinutes', 'daylight', 'year', 'month',
       'week'],
      dtype='object')

In [8]:
# Select useful features and place under a new df name
c_weather = c_weather[['Station','year','month','week','daylight','Tmax','Tmin','Tavg','Depart','DewPoint','WetBulb','PrecipTotal','StnPressure', 'SeaLevel','ResultSpeed','ResultDir','AvgSpeed']]

In [9]:
## Seperate the df by station
stn1 = c_weather[c_weather.Station == 1].drop(['Station'], axis=1)
stn2 = c_weather[c_weather.Station ==2].drop(['Station'], axis=1)
print(c_weather.shape)
print(stn1.shape)
print(stn2.shape)

(2944, 17)
(1472, 16)
(1472, 16)


## Rolling Average AND lag

In [10]:
def add_suffix(list, suffix):
    return {i: i + "_" + suffix for i in list}

### Station 1

In [11]:
# Create rolling average with window size of 7 day (weekly average) 
new_col_names = add_suffix(stn1.columns.tolist(),"wkly")
col_weekly = stn1.drop(['year','month','week'],axis=1).rolling(7).mean().rename(columns = new_col_names)

# Create lag 7 of the rolling average 
new_col_names = add_suffix(col_weekly.columns.tolist(),"lag7")
col_lag_7 = col_weekly.shift(7).rename(columns = new_col_names)

# Create lag 14 of the rolling average
new_col_names = add_suffix(col_weekly.columns.tolist(),"lag14")
col_lag_14 = col_weekly.shift(14).rename(columns = new_col_names)

# Create lag 21 of the rolling average
new_col_names = add_suffix(col_weekly.columns.tolist(),"lag21")
col_lag_21 = col_weekly.shift(21).rename(columns = new_col_names)
#col_lag_21.head(30)

In [12]:
# Merged station 1 columns
stn1_comb = pd.concat([stn1, col_weekly, col_lag_7, col_lag_14, col_lag_21], axis=1)
stn1_comb.head(2)

Unnamed: 0_level_0,year,month,week,daylight,Tmax,Tmin,Tavg,Depart,DewPoint,WetBulb,PrecipTotal,StnPressure,SeaLevel,ResultSpeed,ResultDir,AvgSpeed,Tmax_wkly,Tmin_wkly,Tavg_wkly,Depart_wkly,DewPoint_wkly,WetBulb_wkly,PrecipTotal_wkly,StnPressure_wkly,SeaLevel_wkly,ResultSpeed_wkly,ResultDir_wkly,AvgSpeed_wkly,Tmax_wkly_lag7,Tmin_wkly_lag7,Tavg_wkly_lag7,Depart_wkly_lag7,DewPoint_wkly_lag7,WetBulb_wkly_lag7,PrecipTotal_wkly_lag7,StnPressure_wkly_lag7,SeaLevel_wkly_lag7,ResultSpeed_wkly_lag7,ResultDir_wkly_lag7,AvgSpeed_wkly_lag7,Tmax_wkly_lag14,Tmin_wkly_lag14,Tavg_wkly_lag14,Depart_wkly_lag14,DewPoint_wkly_lag14,WetBulb_wkly_lag14,PrecipTotal_wkly_lag14,StnPressure_wkly_lag14,SeaLevel_wkly_lag14,ResultSpeed_wkly_lag14,ResultDir_wkly_lag14,AvgSpeed_wkly_lag14,Tmax_wkly_lag21,Tmin_wkly_lag21,Tavg_wkly_lag21,Depart_wkly_lag21,DewPoint_wkly_lag21,WetBulb_wkly_lag21,PrecipTotal_wkly_lag21,StnPressure_wkly_lag21,SeaLevel_wkly_lag21,ResultSpeed_wkly_lag21,ResultDir_wkly_lag21,AvgSpeed_wkly_lag21
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1
2007-05-01,2007,5,18,14:01:00,83,50,67,14,51,56,0.0,29.1,29.82,1.7,27,9.2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2007-05-02,2007,5,18,14:03:00,59,42,51,-3,42,47,0.0,29.38,30.09,13.0,4,13.4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [13]:
# Put Station ID back
stn1_comb['Station'] = 1
stn1_comb.head(30)

Unnamed: 0_level_0,year,month,week,daylight,Tmax,Tmin,Tavg,Depart,DewPoint,WetBulb,PrecipTotal,StnPressure,SeaLevel,ResultSpeed,ResultDir,AvgSpeed,Tmax_wkly,Tmin_wkly,Tavg_wkly,Depart_wkly,DewPoint_wkly,WetBulb_wkly,PrecipTotal_wkly,StnPressure_wkly,SeaLevel_wkly,ResultSpeed_wkly,ResultDir_wkly,AvgSpeed_wkly,Tmax_wkly_lag7,Tmin_wkly_lag7,Tavg_wkly_lag7,Depart_wkly_lag7,DewPoint_wkly_lag7,WetBulb_wkly_lag7,PrecipTotal_wkly_lag7,StnPressure_wkly_lag7,SeaLevel_wkly_lag7,ResultSpeed_wkly_lag7,ResultDir_wkly_lag7,AvgSpeed_wkly_lag7,Tmax_wkly_lag14,Tmin_wkly_lag14,Tavg_wkly_lag14,Depart_wkly_lag14,DewPoint_wkly_lag14,WetBulb_wkly_lag14,PrecipTotal_wkly_lag14,StnPressure_wkly_lag14,SeaLevel_wkly_lag14,ResultSpeed_wkly_lag14,ResultDir_wkly_lag14,AvgSpeed_wkly_lag14,Tmax_wkly_lag21,Tmin_wkly_lag21,Tavg_wkly_lag21,Depart_wkly_lag21,DewPoint_wkly_lag21,WetBulb_wkly_lag21,PrecipTotal_wkly_lag21,StnPressure_wkly_lag21,SeaLevel_wkly_lag21,ResultSpeed_wkly_lag21,ResultDir_wkly_lag21,AvgSpeed_wkly_lag21,Station
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1
2007-05-01,2007,5,18,14:01:00,83,50,67,14,51,56,0.0,29.1,29.82,1.7,27,9.2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2007-05-02,2007,5,18,14:03:00,59,42,51,-3,42,47,0.0,29.38,30.09,13.0,4,13.4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2007-05-03,2007,5,18,14:05:00,66,46,56,2,40,48,0.0,29.39,30.12,11.7,7,11.9,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2007-05-04,2007,5,18,14:08:00,66,49,58,4,41,50,0.0,29.31,30.05,10.4,8,10.8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2007-05-05,2007,5,18,14:10:00,66,53,60,5,38,49,0.0,29.4,30.1,11.7,7,12.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2007-05-06,2007,5,18,14:13:00,68,49,59,4,30,46,0.0,29.57,30.29,14.4,11,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2007-05-07,2007,5,19,14:15:00,83,47,65,10,41,54,0.0,29.38,30.12,8.6,18,10.5,70.142857,48.0,59.428571,5.142857,40.428571,50.0,0.0,29.361429,30.084286,10.214286,11.714286,11.828571,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2007-05-08,2007,5,19,14:18:00,82,54,68,12,58,62,0.0,29.29,30.03,2.7,11,5.8,70.0,48.571429,59.571429,4.857143,41.428571,50.857143,0.0,29.388571,30.114286,10.357143,9.428571,11.342857,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2007-05-09,2007,5,19,14:20:00,77,61,69,13,59,63,0.13,29.21,29.94,3.9,9,6.2,72.571429,51.285714,62.142857,7.142857,43.857143,53.142857,0.018571,29.364286,30.092857,9.057143,10.142857,10.314286,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2007-05-10,2007,5,19,14:22:00,84,56,70,14,52,60,0.0,29.2,29.92,0.7,17,4.1,75.142857,52.714286,64.142857,8.857143,45.571429,54.857143,0.018571,29.337143,30.064286,7.485714,11.571429,9.2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1


### Station 2

In [14]:
# Create rolling average with window size of 7 day (weekly average) 
new_col_names = add_suffix(stn1.columns.tolist(),"wkly")
col_7d_ave = stn2.drop(['year','month','week'],axis=1).rolling(7).mean().rename(columns = new_col_names)

# Create lag 7 of the rolling average 
new_col_names = add_suffix(col_7d_ave.columns.tolist(),"lag7")
col_lag_7 = col_weekly.shift(7).rename(columns = new_col_names)

# Create lag 14 of the rolling average
new_col_names = add_suffix(col_7d_ave.columns.tolist(),"lag14")
col_lag_14 = col_weekly.shift(14).rename(columns = new_col_names)

# Create lag 21 of the rolling average
new_col_names = add_suffix(col_7d_ave.columns.tolist(),"lag21")
col_lag_21 = col_weekly.shift(21).rename(columns = new_col_names)
#col_lag_21.head(30)

In [15]:
# Merged station 1 columns
stn2_comb = pd.concat([stn2, col_weekly, col_lag_7, col_lag_14, col_lag_21], axis=1)
stn2_comb.head(30)

Unnamed: 0_level_0,year,month,week,daylight,Tmax,Tmin,Tavg,Depart,DewPoint,WetBulb,PrecipTotal,StnPressure,SeaLevel,ResultSpeed,ResultDir,AvgSpeed,Tmax_wkly,Tmin_wkly,Tavg_wkly,Depart_wkly,DewPoint_wkly,WetBulb_wkly,PrecipTotal_wkly,StnPressure_wkly,SeaLevel_wkly,ResultSpeed_wkly,ResultDir_wkly,AvgSpeed_wkly,Tmax_wkly_lag7,Tmin_wkly_lag7,Tavg_wkly_lag7,Depart_wkly_lag7,DewPoint_wkly_lag7,WetBulb_wkly_lag7,PrecipTotal_wkly_lag7,StnPressure_wkly_lag7,SeaLevel_wkly_lag7,ResultSpeed_wkly_lag7,ResultDir_wkly_lag7,AvgSpeed_wkly_lag7,Tmax_wkly_lag14,Tmin_wkly_lag14,Tavg_wkly_lag14,Depart_wkly_lag14,DewPoint_wkly_lag14,WetBulb_wkly_lag14,PrecipTotal_wkly_lag14,StnPressure_wkly_lag14,SeaLevel_wkly_lag14,ResultSpeed_wkly_lag14,ResultDir_wkly_lag14,AvgSpeed_wkly_lag14,Tmax_wkly_lag21,Tmin_wkly_lag21,Tavg_wkly_lag21,Depart_wkly_lag21,DewPoint_wkly_lag21,WetBulb_wkly_lag21,PrecipTotal_wkly_lag21,StnPressure_wkly_lag21,SeaLevel_wkly_lag21,ResultSpeed_wkly_lag21,ResultDir_wkly_lag21,AvgSpeed_wkly_lag21
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1
2007-05-01,2007,5,18,14:01:00,84,52,68,15,51,57,0.0,29.18,29.82,2.7,25,9.6,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2007-05-02,2007,5,18,14:03:00,60,43,52,-2,42,47,0.0,29.44,30.08,13.3,2,13.4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2007-05-03,2007,5,18,14:05:00,67,48,58,4,40,50,0.0,29.46,30.12,12.9,6,13.2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2007-05-04,2007,5,18,14:08:00,78,51,65,11,42,50,0.0,29.36,30.04,10.1,7,10.4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2007-05-05,2007,5,18,14:10:00,66,54,60,5,39,50,0.0,29.46,30.09,11.2,7,11.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2007-05-06,2007,5,18,14:13:00,68,52,60,5,30,46,0.0,29.62,30.28,13.8,10,14.5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2007-05-07,2007,5,19,14:15:00,84,50,67,12,39,53,0.0,29.44,30.12,8.5,17,9.9,70.142857,48.0,59.428571,5.142857,40.428571,50.0,0.0,29.361429,30.084286,10.214286,11.714286,11.828571,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2007-05-08,2007,5,19,14:18:00,80,60,70,14,57,63,0.0,29.36,30.02,2.5,8,5.4,70.0,48.571429,59.571429,4.857143,41.428571,50.857143,0.0,29.388571,30.114286,10.357143,9.428571,11.342857,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2007-05-09,2007,5,19,14:20:00,76,63,70,14,60,63,0.02,29.28,29.93,3.9,7,5.9,72.571429,51.285714,62.142857,7.142857,43.857143,53.142857,0.018571,29.364286,30.092857,9.057143,10.142857,10.314286,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2007-05-10,2007,5,19,14:22:00,83,59,71,15,52,61,0.0,29.26,29.91,2.0,9,3.9,75.142857,52.714286,64.142857,8.857143,45.571429,54.857143,0.018571,29.337143,30.064286,7.485714,11.571429,9.2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [16]:
# Put Station ID back
stn2_comb['Station'] = 2
stn2_comb.head(1)

Unnamed: 0_level_0,year,month,week,daylight,Tmax,Tmin,Tavg,Depart,DewPoint,WetBulb,PrecipTotal,StnPressure,SeaLevel,ResultSpeed,ResultDir,AvgSpeed,Tmax_wkly,Tmin_wkly,Tavg_wkly,Depart_wkly,DewPoint_wkly,WetBulb_wkly,PrecipTotal_wkly,StnPressure_wkly,SeaLevel_wkly,ResultSpeed_wkly,ResultDir_wkly,AvgSpeed_wkly,Tmax_wkly_lag7,Tmin_wkly_lag7,Tavg_wkly_lag7,Depart_wkly_lag7,DewPoint_wkly_lag7,WetBulb_wkly_lag7,PrecipTotal_wkly_lag7,StnPressure_wkly_lag7,SeaLevel_wkly_lag7,ResultSpeed_wkly_lag7,ResultDir_wkly_lag7,AvgSpeed_wkly_lag7,Tmax_wkly_lag14,Tmin_wkly_lag14,Tavg_wkly_lag14,Depart_wkly_lag14,DewPoint_wkly_lag14,WetBulb_wkly_lag14,PrecipTotal_wkly_lag14,StnPressure_wkly_lag14,SeaLevel_wkly_lag14,ResultSpeed_wkly_lag14,ResultDir_wkly_lag14,AvgSpeed_wkly_lag14,Tmax_wkly_lag21,Tmin_wkly_lag21,Tavg_wkly_lag21,Depart_wkly_lag21,DewPoint_wkly_lag21,WetBulb_wkly_lag21,PrecipTotal_wkly_lag21,StnPressure_wkly_lag21,SeaLevel_wkly_lag21,ResultSpeed_wkly_lag21,ResultDir_wkly_lag21,AvgSpeed_wkly_lag21,Station
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1
2007-05-01,2007,5,18,14:01:00,84,52,68,15,51,57,0.0,29.18,29.82,2.7,25,9.6,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2


### Merge data from two station back to one

In [17]:
weather_engr = pd.concat([stn1_comb,stn2_comb], axis=0)
weather_engr.shape

(2944, 65)

In [18]:
weather_engr.head(5)

Unnamed: 0_level_0,year,month,week,daylight,Tmax,Tmin,Tavg,Depart,DewPoint,WetBulb,PrecipTotal,StnPressure,SeaLevel,ResultSpeed,ResultDir,AvgSpeed,Tmax_wkly,Tmin_wkly,Tavg_wkly,Depart_wkly,DewPoint_wkly,WetBulb_wkly,PrecipTotal_wkly,StnPressure_wkly,SeaLevel_wkly,ResultSpeed_wkly,ResultDir_wkly,AvgSpeed_wkly,Tmax_wkly_lag7,Tmin_wkly_lag7,Tavg_wkly_lag7,Depart_wkly_lag7,DewPoint_wkly_lag7,WetBulb_wkly_lag7,PrecipTotal_wkly_lag7,StnPressure_wkly_lag7,SeaLevel_wkly_lag7,ResultSpeed_wkly_lag7,ResultDir_wkly_lag7,AvgSpeed_wkly_lag7,Tmax_wkly_lag14,Tmin_wkly_lag14,Tavg_wkly_lag14,Depart_wkly_lag14,DewPoint_wkly_lag14,WetBulb_wkly_lag14,PrecipTotal_wkly_lag14,StnPressure_wkly_lag14,SeaLevel_wkly_lag14,ResultSpeed_wkly_lag14,ResultDir_wkly_lag14,AvgSpeed_wkly_lag14,Tmax_wkly_lag21,Tmin_wkly_lag21,Tavg_wkly_lag21,Depart_wkly_lag21,DewPoint_wkly_lag21,WetBulb_wkly_lag21,PrecipTotal_wkly_lag21,StnPressure_wkly_lag21,SeaLevel_wkly_lag21,ResultSpeed_wkly_lag21,ResultDir_wkly_lag21,AvgSpeed_wkly_lag21,Station
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1
2007-05-01,2007,5,18,14:01:00,83,50,67,14,51,56,0.0,29.1,29.82,1.7,27,9.2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2007-05-02,2007,5,18,14:03:00,59,42,51,-3,42,47,0.0,29.38,30.09,13.0,4,13.4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2007-05-03,2007,5,18,14:05:00,66,46,56,2,40,48,0.0,29.39,30.12,11.7,7,11.9,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2007-05-04,2007,5,18,14:08:00,66,49,58,4,41,50,0.0,29.31,30.05,10.4,8,10.8,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1
2007-05-05,2007,5,18,14:10:00,66,53,60,5,38,49,0.0,29.4,30.1,11.7,7,12.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1


In [19]:
weather_engr.to_csv('../data/weather_engr.csv')