BIKE

https://www.kaggle.com/competitions/bike-sharing-demand/
```
datetime - hourly date + timestamp  
season -  1 = spring, 2 = summer, 3 = fall, 4 = winter 
holiday - whether the day is considered a holiday
workingday - whether the day is neither a weekend nor holiday
weather - 1: Clear, Few clouds, Partly cloudy, Partly cloudy
2: Mist + Cloudy, Mist + Broken clouds, Mist + Few clouds, Mist
3: Light Snow, Light Rain + Thunderstorm + Scattered clouds, Light Rain + Scattered clouds
4: Heavy Rain + Ice Pallets + Thunderstorm + Mist, Snow + Fog 
temp - temperature in Celsius
atemp - "feels like" temperature in Celsius
humidity - relative humidity
windspeed - wind speed
casual - number of non-registered user rentals initiated
registered - number of registered user rentals initiated
count - number of total rentals
```

In [81]:
import warnings
warnings.filterwarnings(action='ignore')
import re
from datetime import datetime, date, time, timedelta
from dateutil.relativedelta import relativedelta

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
plt.rcParams['font.family']= 'Malgun Gothic'
plt.rcParams['axes.unicode_minus'] = False
# plt.rcParams['figure.figsize'] = [6.4, 4.8]

from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor, VotingRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

<pre>
workingday  holiday
1           0          7412  [1]일하는날
0           0          3163  [0]주말
            1           311  [2]공휴일

In [80]:
test[test['weather'].values==4]

Unnamed: 0_level_0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,y,m,d,h,w,date_type
regdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2011-01-26 16:00:00,1,0,1,4,9.02,9.85,93,22.0028,2011,1,26,16,2,1
2012-01-21 01:00:00,1,0,0,4,5.74,6.82,86,12.998,2012,1,21,1,5,0


In [79]:
train = pd.read_csv('./train.csv', parse_dates=['datetime'])
test = pd.read_csv('./test.csv', parse_dates=['datetime'])

df_list = [train, test]
for df in df_list:
    df.rename(columns={'datetime':'regdate'}, inplace=True)
    df['y'] = df['regdate'].dt.year
    df['m'] = df['regdate'].dt.month
    df['d'] = df['regdate'].dt.day
    df['h'] = df['regdate'].dt.hour
    df['w'] = df['regdate'].dt.dayofweek
    df.set_index('regdate', inplace=True)
    
    df['date_type'] = 0
    df['date_type'] = np.where( (df['holiday']==0) & (df['workingday'] == 1), 1, df['date_type'] )
    df['date_type'] = np.where( (df['holiday']==1) & (df['workingday'] == 0), 2, df['date_type'] )
    
    df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 10886 entries, 2011-01-01 00:00:00 to 2012-12-19 23:00:00
Data columns (total 17 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   season      10886 non-null  int64  
 1   holiday     10886 non-null  int64  
 2   workingday  10886 non-null  int64  
 3   weather     10886 non-null  int64  
 4   temp        10886 non-null  float64
 5   atemp       10886 non-null  float64
 6   humidity    10886 non-null  int64  
 7   windspeed   10886 non-null  float64
 8   casual      10886 non-null  int64  
 9   registered  10886 non-null  int64  
 10  count       10886 non-null  int64  
 11  y           10886 non-null  int64  
 12  m           10886 non-null  int64  
 13  d           10886 non-null  int64  
 14  h           10886 non-null  int64  
 15  w           10886 non-null  int64  
 16  date_type   10886 non-null  int64  
dtypes: float64(3), int64(14)
memory usage: 1.5 MB
<class 'pandas.core.frame.

In [3]:
train[train['weather']==4]

Unnamed: 0_level_0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count,y,m,d,h,w,date_type
regdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2012-01-09 18:00:00,1,0,1,4,8.2,11.365,86,6.0032,6,158,164,2012,1,9,18,0,1


In [4]:
test[test['weather']==4]

Unnamed: 0_level_0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,y,m,d,h,w,date_type
regdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2011-01-26 16:00:00,1,0,1,4,9.02,9.85,93,22.0028,2011,1,26,16,2,1
2012-01-21 01:00:00,1,0,0,4,5.74,6.82,86,12.998,2012,1,21,1,5,0


## 이상치 제거

In [5]:
del_idx_list = []
idx = train[train['windspeed']>=50].index
del_idx_list.extend(idx)
del_idx_list

[Timestamp('2011-02-15 01:00:00'),
 Timestamp('2011-02-19 15:00:00'),
 Timestamp('2011-07-03 17:00:00'),
 Timestamp('2011-07-03 18:00:00')]

In [6]:
idx = train[train['weather']==4].index
del_idx_list.extend(idx)
del_idx_list

[Timestamp('2011-02-15 01:00:00'),
 Timestamp('2011-02-19 15:00:00'),
 Timestamp('2011-07-03 17:00:00'),
 Timestamp('2011-07-03 18:00:00'),
 Timestamp('2012-01-09 18:00:00')]

In [7]:
idx = train[train['temp']>=40].index
del_idx_list.extend(idx)
del_idx_list

[Timestamp('2011-02-15 01:00:00'),
 Timestamp('2011-02-19 15:00:00'),
 Timestamp('2011-07-03 17:00:00'),
 Timestamp('2011-07-03 18:00:00'),
 Timestamp('2012-01-09 18:00:00'),
 Timestamp('2012-07-07 16:00:00')]

In [8]:
print(train.shape)
train = train.drop(del_idx_list, axis=0)
print(train.shape)

(10886, 17)
(10880, 17)


In [9]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [10]:
ws0 = train[train['windspeed']==0]
ws1 = train[train['windspeed']!=0]
ws0.shape, ws1.shape

((1313, 17), (9567, 17))

In [11]:
train_test_w0 = pd.concat([train[train['windspeed']==0], test[test['windspeed']==0]], axis=0)
train_test_w0.drop(['windspeed', 'casual', 'registered', 'count'], axis=1)

Unnamed: 0_level_0,season,holiday,workingday,weather,temp,atemp,humidity,y,m,d,h,w,date_type
regdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2011-01-01 00:00:00,1,0,0,1,9.84,14.395,81,2011,1,1,0,5,0
2011-01-01 01:00:00,1,0,0,1,9.02,13.635,80,2011,1,1,1,5,0
2011-01-01 02:00:00,1,0,0,1,9.02,13.635,80,2011,1,1,2,5,0
2011-01-01 03:00:00,1,0,0,1,9.84,14.395,75,2011,1,1,3,5,0
2011-01-01 04:00:00,1,0,0,1,9.84,14.395,75,2011,1,1,4,5,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2012-12-29 12:00:00,1,0,0,3,8.20,12.120,100,2012,12,29,12,5,0
2012-12-29 13:00:00,1,0,0,3,8.20,12.120,100,2012,12,29,13,5,0
2012-12-29 15:00:00,1,0,0,2,9.84,12.120,87,2012,12,29,15,5,0
2012-12-29 23:00:00,1,0,0,2,10.66,12.120,60,2012,12,29,23,5,0


In [78]:
test[test['weather'].values==4]

Unnamed: 0_level_0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,y,m,d,h,w,date_type
regdate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2011-01-26 16:00:00,1,0,1,4,9.02,9.85,93,22.0028,2011,1,26,16,2,1
2012-01-21 01:00:00,1,0,0,4,5.74,6.82,86,12.998,2012,1,21,1,5,0


In [21]:
train_test_w1 = pd.concat([train[train['windspeed']!=0], test[test['windspeed']!=0]], axis=0)
train_test_w0 = pd.concat([train[train['windspeed']==0], test[test['windspeed']==0]], axis=0)

X_train = train_test_w1.drop(['windspeed', 'casual', 'registered', 'count'], axis=1)
y_train = train_test_w1['windspeed']
X_test = train_test_w0.drop(['windspeed', 'casual', 'registered', 'count'], axis=1)
y_test_idx = train_test_w0['windspeed'].index

model = RandomForestRegressor(random_state=0)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# train_test_w0['windspeed'] = y_pred


In [68]:
test.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 6493 entries, 2011-01-20 00:00:00 to 2012-12-31 23:00:00
Data columns (total 14 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   season      6493 non-null   int64  
 1   holiday     6493 non-null   int64  
 2   workingday  6493 non-null   int64  
 3   weather     6493 non-null   int64  
 4   temp        6493 non-null   float64
 5   atemp       6493 non-null   float64
 6   humidity    6493 non-null   int64  
 7   windspeed   6493 non-null   float64
 8   y           6493 non-null   int64  
 9   m           6493 non-null   int64  
 10  d           6493 non-null   int64  
 11  h           6493 non-null   int64  
 12  w           6493 non-null   int64  
 13  date_type   6493 non-null   int64  
dtypes: float64(3), int64(11)
memory usage: 1018.9 KB


In [22]:
y_test_idx

DatetimeIndex(['2011-01-01 00:00:00', '2011-01-01 01:00:00',
               '2011-01-01 02:00:00', '2011-01-01 03:00:00',
               '2011-01-01 04:00:00', '2011-01-01 06:00:00',
               '2011-01-01 07:00:00', '2011-01-01 08:00:00',
               '2011-01-01 09:00:00', '2011-01-03 19:00:00',
               ...
               '2012-12-29 00:00:00', '2012-12-29 02:00:00',
               '2012-12-29 03:00:00', '2012-12-29 08:00:00',
               '2012-12-29 09:00:00', '2012-12-29 12:00:00',
               '2012-12-29 13:00:00', '2012-12-29 15:00:00',
               '2012-12-29 23:00:00', '2012-12-30 19:00:00'],
              dtype='datetime64[ns]', name='regdate', length=2180, freq=None)

In [27]:
train.index

DatetimeIndex(['2011-01-01 00:00:00', '2011-01-01 01:00:00',
               '2011-01-01 02:00:00', '2011-01-01 03:00:00',
               '2011-01-01 04:00:00', '2011-01-01 05:00:00',
               '2011-01-01 06:00:00', '2011-01-01 07:00:00',
               '2011-01-01 08:00:00', '2011-01-01 09:00:00',
               ...
               '2012-12-19 14:00:00', '2012-12-19 15:00:00',
               '2012-12-19 16:00:00', '2012-12-19 17:00:00',
               '2012-12-19 18:00:00', '2012-12-19 19:00:00',
               '2012-12-19 20:00:00', '2012-12-19 21:00:00',
               '2012-12-19 22:00:00', '2012-12-19 23:00:00'],
              dtype='datetime64[ns]', name='regdate', length=10880, freq=None)

In [24]:
train.loc[]

KeyError: "[Timestamp('2011-01-20 01:00:00'), Timestamp('2011-01-20 02:00:00'), Timestamp('2011-01-20 12:00:00'), Timestamp('2011-01-22 13:00:00'), Timestamp('2011-01-22 15:00:00'), Timestamp('2011-01-22 17:00:00'), Timestamp('2011-01-22 22:00:00'), Timestamp('2011-01-24 07:00:00'), Timestamp('2011-01-24 08:00:00'), Timestamp('2011-01-24 09:00:00'), Timestamp('2011-01-24 10:00:00'), Timestamp('2011-01-25 14:00:00'), Timestamp('2011-01-25 15:00:00'), Timestamp('2011-01-25 16:00:00'), Timestamp('2011-01-25 17:00:00'), Timestamp('2011-01-28 14:00:00'), Timestamp('2011-01-28 15:00:00'), Timestamp('2011-01-28 16:00:00'), Timestamp('2011-01-29 17:00:00'), Timestamp('2011-01-30 01:00:00'), Timestamp('2011-01-30 02:00:00'), Timestamp('2011-01-30 03:00:00'), Timestamp('2011-01-30 04:00:00'), Timestamp('2011-01-30 05:00:00'), Timestamp('2011-01-30 07:00:00'), Timestamp('2011-01-30 08:00:00'), Timestamp('2011-01-30 09:00:00'), Timestamp('2011-01-30 10:00:00'), Timestamp('2011-01-30 11:00:00'), Timestamp('2011-01-30 15:00:00'), Timestamp('2011-01-30 16:00:00'), Timestamp('2011-01-31 13:00:00'), Timestamp('2011-02-20 13:00:00'), Timestamp('2011-02-20 20:00:00'), Timestamp('2011-02-20 21:00:00'), Timestamp('2011-02-21 05:00:00'), Timestamp('2011-02-22 10:00:00'), Timestamp('2011-02-22 11:00:00'), Timestamp('2011-02-23 10:00:00'), Timestamp('2011-02-23 16:00:00'), Timestamp('2011-02-23 19:00:00'), Timestamp('2011-02-23 21:00:00'), Timestamp('2011-02-23 22:00:00'), Timestamp('2011-02-25 00:00:00'), Timestamp('2011-02-25 01:00:00'), Timestamp('2011-02-25 02:00:00'), Timestamp('2011-02-26 06:00:00'), Timestamp('2011-02-26 08:00:00'), Timestamp('2011-02-26 11:00:00'), Timestamp('2011-02-27 06:00:00'), Timestamp('2011-02-27 08:00:00'), Timestamp('2011-02-27 13:00:00'), Timestamp('2011-02-28 05:00:00'), Timestamp('2011-03-20 10:00:00'), Timestamp('2011-03-20 13:00:00'), Timestamp('2011-03-20 14:00:00'), Timestamp('2011-03-20 16:00:00'), Timestamp('2011-03-23 23:00:00'), Timestamp('2011-03-25 10:00:00'), Timestamp('2011-03-27 16:00:00'), Timestamp('2011-03-28 12:00:00'), Timestamp('2011-03-29 21:00:00'), Timestamp('2011-03-29 22:00:00'), Timestamp('2011-03-30 00:00:00'), Timestamp('2011-03-30 01:00:00'), Timestamp('2011-03-30 02:00:00'), Timestamp('2011-03-30 03:00:00'), Timestamp('2011-03-30 04:00:00'), Timestamp('2011-04-20 03:00:00'), Timestamp('2011-04-20 07:00:00'), Timestamp('2011-04-23 04:00:00'), Timestamp('2011-04-23 05:00:00'), Timestamp('2011-04-24 06:00:00'), Timestamp('2011-04-24 07:00:00'), Timestamp('2011-04-25 00:00:00'), Timestamp('2011-04-25 04:00:00'), Timestamp('2011-04-25 05:00:00'), Timestamp('2011-04-25 06:00:00'), Timestamp('2011-05-20 01:00:00'), Timestamp('2011-05-20 03:00:00'), Timestamp('2011-05-20 08:00:00'), Timestamp('2011-05-20 17:00:00'), Timestamp('2011-05-20 22:00:00'), Timestamp('2011-05-21 01:00:00'), Timestamp('2011-05-21 12:00:00'), Timestamp('2011-05-21 13:00:00'), Timestamp('2011-05-21 14:00:00'), Timestamp('2011-05-21 20:00:00'), Timestamp('2011-05-22 04:00:00'), Timestamp('2011-05-23 04:00:00'), Timestamp('2011-05-23 05:00:00'), Timestamp('2011-05-23 06:00:00'), Timestamp('2011-05-25 02:00:00'), Timestamp('2011-05-26 04:00:00'), Timestamp('2011-05-30 07:00:00'), Timestamp('2011-05-30 11:00:00'), Timestamp('2011-05-30 14:00:00'), Timestamp('2011-05-30 20:00:00'), Timestamp('2011-05-31 03:00:00'), Timestamp('2011-05-31 11:00:00'), Timestamp('2011-05-31 14:00:00'), Timestamp('2011-06-20 03:00:00'), Timestamp('2011-06-20 17:00:00'), Timestamp('2011-06-21 07:00:00'), Timestamp('2011-06-22 02:00:00'), Timestamp('2011-06-22 04:00:00'), Timestamp('2011-06-22 05:00:00'), Timestamp('2011-06-22 06:00:00'), Timestamp('2011-06-22 07:00:00'), Timestamp('2011-06-24 22:00:00'), Timestamp('2011-06-25 11:00:00'), Timestamp('2011-06-25 15:00:00'), Timestamp('2011-06-26 00:00:00'), Timestamp('2011-06-26 05:00:00'), Timestamp('2011-06-26 06:00:00'), Timestamp('2011-06-26 07:00:00'), Timestamp('2011-06-26 13:00:00'), Timestamp('2011-06-26 15:00:00'), Timestamp('2011-06-26 22:00:00'), Timestamp('2011-06-27 09:00:00'), Timestamp('2011-06-27 11:00:00'), Timestamp('2011-06-27 12:00:00'), Timestamp('2011-06-27 15:00:00'), Timestamp('2011-06-27 18:00:00'), Timestamp('2011-06-27 21:00:00'), Timestamp('2011-06-27 22:00:00'), Timestamp('2011-06-27 23:00:00'), Timestamp('2011-06-28 01:00:00'), Timestamp('2011-06-28 02:00:00'), Timestamp('2011-06-28 03:00:00'), Timestamp('2011-06-28 04:00:00'), Timestamp('2011-06-28 05:00:00'), Timestamp('2011-06-28 09:00:00'), Timestamp('2011-06-28 11:00:00'), Timestamp('2011-06-28 13:00:00'), Timestamp('2011-06-28 15:00:00'), Timestamp('2011-06-28 20:00:00'), Timestamp('2011-06-30 23:00:00'), Timestamp('2011-07-20 01:00:00'), Timestamp('2011-07-20 02:00:00'), Timestamp('2011-07-20 04:00:00'), Timestamp('2011-07-20 08:00:00'), Timestamp('2011-07-20 10:00:00'), Timestamp('2011-07-20 12:00:00'), Timestamp('2011-07-20 16:00:00'), Timestamp('2011-07-22 01:00:00'), Timestamp('2011-07-22 20:00:00'), Timestamp('2011-07-23 00:00:00'), Timestamp('2011-07-23 02:00:00'), Timestamp('2011-07-23 14:00:00'), Timestamp('2011-07-23 15:00:00'), Timestamp('2011-07-24 22:00:00'), Timestamp('2011-07-25 00:00:00'), Timestamp('2011-07-25 03:00:00'), Timestamp('2011-07-25 07:00:00'), Timestamp('2011-07-25 08:00:00'), Timestamp('2011-07-25 09:00:00'), Timestamp('2011-07-25 10:00:00'), Timestamp('2011-07-25 17:00:00'), Timestamp('2011-07-25 21:00:00'), Timestamp('2011-07-25 22:00:00'), Timestamp('2011-07-26 11:00:00'), Timestamp('2011-07-26 12:00:00'), Timestamp('2011-07-27 10:00:00'), Timestamp('2011-07-27 11:00:00'), Timestamp('2011-07-27 22:00:00'), Timestamp('2011-07-28 01:00:00'), Timestamp('2011-07-28 03:00:00'), Timestamp('2011-07-29 02:00:00'), Timestamp('2011-07-29 03:00:00'), Timestamp('2011-07-29 05:00:00'), Timestamp('2011-07-29 06:00:00'), Timestamp('2011-07-29 08:00:00'), Timestamp('2011-07-29 13:00:00'), Timestamp('2011-07-30 13:00:00'), Timestamp('2011-07-30 14:00:00'), Timestamp('2011-07-31 03:00:00'), Timestamp('2011-07-31 04:00:00'), Timestamp('2011-07-31 05:00:00'), Timestamp('2011-07-31 06:00:00'), Timestamp('2011-07-31 09:00:00'), Timestamp('2011-08-20 00:00:00'), Timestamp('2011-08-20 01:00:00'), Timestamp('2011-08-20 02:00:00'), Timestamp('2011-08-20 03:00:00'), Timestamp('2011-08-20 04:00:00'), Timestamp('2011-08-20 05:00:00'), Timestamp('2011-08-20 07:00:00'), Timestamp('2011-08-20 08:00:00'), Timestamp('2011-08-20 09:00:00'), Timestamp('2011-08-20 11:00:00'), Timestamp('2011-08-23 08:00:00'), Timestamp('2011-08-23 09:00:00'), Timestamp('2011-08-23 10:00:00'), Timestamp('2011-08-23 21:00:00'), Timestamp('2011-08-24 06:00:00'), Timestamp('2011-08-25 11:00:00'), Timestamp('2011-08-25 12:00:00'), Timestamp('2011-08-25 14:00:00'), Timestamp('2011-08-25 15:00:00'), Timestamp('2011-08-25 16:00:00'), Timestamp('2011-08-25 17:00:00'), Timestamp('2011-08-25 22:00:00'), Timestamp('2011-08-26 01:00:00'), Timestamp('2011-08-26 04:00:00'), Timestamp('2011-08-26 05:00:00'), Timestamp('2011-08-26 06:00:00'), Timestamp('2011-08-26 07:00:00'), Timestamp('2011-08-26 09:00:00'), Timestamp('2011-08-26 11:00:00'), Timestamp('2011-08-26 14:00:00'), Timestamp('2011-08-29 13:00:00'), Timestamp('2011-08-29 14:00:00'), Timestamp('2011-08-30 12:00:00'), Timestamp('2011-08-30 20:00:00'), Timestamp('2011-08-30 22:00:00'), Timestamp('2011-08-30 23:00:00'), Timestamp('2011-08-31 00:00:00'), Timestamp('2011-08-31 01:00:00'), Timestamp('2011-08-31 02:00:00'), Timestamp('2011-08-31 03:00:00'), Timestamp('2011-08-31 04:00:00'), Timestamp('2011-08-31 06:00:00'), Timestamp('2011-08-31 07:00:00'), Timestamp('2011-08-31 08:00:00'), Timestamp('2011-08-31 22:00:00'), Timestamp('2011-09-20 19:00:00'), Timestamp('2011-09-20 20:00:00'), Timestamp('2011-09-20 21:00:00'), Timestamp('2011-09-20 22:00:00'), Timestamp('2011-09-20 23:00:00'), Timestamp('2011-09-21 00:00:00'), Timestamp('2011-09-21 01:00:00'), Timestamp('2011-09-21 02:00:00'), Timestamp('2011-09-21 10:00:00'), Timestamp('2011-09-21 19:00:00'), Timestamp('2011-09-21 20:00:00'), Timestamp('2011-09-21 22:00:00'), Timestamp('2011-09-22 23:00:00'), Timestamp('2011-09-23 01:00:00'), Timestamp('2011-09-23 03:00:00'), Timestamp('2011-09-23 04:00:00'), Timestamp('2011-09-23 05:00:00'), Timestamp('2011-09-23 16:00:00'), Timestamp('2011-09-23 17:00:00'), Timestamp('2011-09-23 18:00:00'), Timestamp('2011-09-23 20:00:00'), Timestamp('2011-09-24 03:00:00'), Timestamp('2011-09-24 13:00:00'), Timestamp('2011-09-24 19:00:00'), Timestamp('2011-09-24 20:00:00'), Timestamp('2011-09-24 22:00:00'), Timestamp('2011-09-25 00:00:00'), Timestamp('2011-09-25 02:00:00'), Timestamp('2011-09-25 03:00:00'), Timestamp('2011-09-25 08:00:00'), Timestamp('2011-09-25 09:00:00'), Timestamp('2011-09-25 10:00:00'), Timestamp('2011-09-25 12:00:00'), Timestamp('2011-09-25 13:00:00'), Timestamp('2011-09-25 14:00:00'), Timestamp('2011-09-25 15:00:00'), Timestamp('2011-09-25 16:00:00'), Timestamp('2011-09-25 17:00:00'), Timestamp('2011-09-25 18:00:00'), Timestamp('2011-09-26 00:00:00'), Timestamp('2011-09-26 03:00:00'), Timestamp('2011-09-26 05:00:00'), Timestamp('2011-09-26 07:00:00'), Timestamp('2011-09-26 08:00:00'), Timestamp('2011-09-27 03:00:00'), Timestamp('2011-09-27 20:00:00'), Timestamp('2011-09-27 21:00:00'), Timestamp('2011-09-28 00:00:00'), Timestamp('2011-09-28 01:00:00'), Timestamp('2011-09-28 04:00:00'), Timestamp('2011-09-28 05:00:00'), Timestamp('2011-09-28 10:00:00'), Timestamp('2011-09-28 12:00:00'), Timestamp('2011-09-29 08:00:00'), Timestamp('2011-09-29 21:00:00'), Timestamp('2011-09-29 22:00:00'), Timestamp('2011-09-30 10:00:00'), Timestamp('2011-10-22 05:00:00'), Timestamp('2011-10-22 06:00:00'), Timestamp('2011-10-22 08:00:00'), Timestamp('2011-10-22 13:00:00'), Timestamp('2011-10-22 19:00:00'), Timestamp('2011-10-22 20:00:00'), Timestamp('2011-10-22 21:00:00'), Timestamp('2011-10-22 22:00:00'), Timestamp('2011-10-22 23:00:00'), Timestamp('2011-10-23 00:00:00'), Timestamp('2011-10-23 01:00:00'), Timestamp('2011-10-23 03:00:00'), Timestamp('2011-10-23 04:00:00'), Timestamp('2011-10-23 05:00:00'), Timestamp('2011-10-23 09:00:00'), Timestamp('2011-10-23 19:00:00'), Timestamp('2011-10-24 00:00:00'), Timestamp('2011-10-24 04:00:00'), Timestamp('2011-10-24 05:00:00'), Timestamp('2011-10-24 19:00:00'), Timestamp('2011-10-25 03:00:00'), Timestamp('2011-10-25 08:00:00'), Timestamp('2011-10-26 14:00:00'), Timestamp('2011-10-26 15:00:00'), Timestamp('2011-10-26 19:00:00'), Timestamp('2011-10-26 20:00:00'), Timestamp('2011-10-26 21:00:00'), Timestamp('2011-10-27 00:00:00'), Timestamp('2011-10-27 01:00:00'), Timestamp('2011-10-27 02:00:00'), Timestamp('2011-10-27 03:00:00'), Timestamp('2011-10-27 04:00:00'), Timestamp('2011-10-27 06:00:00'), Timestamp('2011-10-27 14:00:00'), Timestamp('2011-10-30 07:00:00'), Timestamp('2011-10-30 20:00:00'), Timestamp('2011-10-30 21:00:00'), Timestamp('2011-10-30 22:00:00'), Timestamp('2011-10-30 23:00:00'), Timestamp('2011-10-31 00:00:00'), Timestamp('2011-10-31 01:00:00'), Timestamp('2011-10-31 02:00:00'), Timestamp('2011-10-31 04:00:00'), Timestamp('2011-10-31 05:00:00'), Timestamp('2011-10-31 06:00:00'), Timestamp('2011-10-31 07:00:00'), Timestamp('2011-10-31 08:00:00'), Timestamp('2011-11-21 00:00:00'), Timestamp('2011-11-21 05:00:00'), Timestamp('2011-11-21 06:00:00'), Timestamp('2011-11-21 08:00:00'), Timestamp('2011-11-21 10:00:00'), Timestamp('2011-11-22 09:00:00'), Timestamp('2011-11-22 10:00:00'), Timestamp('2011-11-22 15:00:00'), Timestamp('2011-11-22 16:00:00'), Timestamp('2011-11-22 18:00:00'), Timestamp('2011-11-23 06:00:00'), Timestamp('2011-11-24 09:00:00'), Timestamp('2011-11-24 10:00:00'), Timestamp('2011-11-25 01:00:00'), Timestamp('2011-11-25 05:00:00'), Timestamp('2011-11-25 07:00:00'), Timestamp('2011-11-25 09:00:00'), Timestamp('2011-11-25 19:00:00'), Timestamp('2011-11-25 21:00:00'), Timestamp('2011-11-26 03:00:00'), Timestamp('2011-11-26 04:00:00'), Timestamp('2011-11-26 10:00:00'), Timestamp('2011-11-26 12:00:00'), Timestamp('2011-11-26 13:00:00'), Timestamp('2011-11-26 20:00:00'), Timestamp('2011-11-26 21:00:00'), Timestamp('2011-11-26 22:00:00'), Timestamp('2011-11-26 23:00:00'), Timestamp('2011-11-27 00:00:00'), Timestamp('2011-11-27 01:00:00'), Timestamp('2011-11-27 02:00:00'), Timestamp('2011-11-27 03:00:00'), Timestamp('2011-11-28 05:00:00'), Timestamp('2011-11-28 07:00:00'), Timestamp('2011-11-28 08:00:00'), Timestamp('2011-11-28 13:00:00'), Timestamp('2011-11-28 14:00:00'), Timestamp('2011-11-29 00:00:00'), Timestamp('2011-11-29 02:00:00'), Timestamp('2011-11-29 03:00:00'), Timestamp('2011-12-20 01:00:00'), Timestamp('2011-12-20 02:00:00'), Timestamp('2011-12-20 03:00:00'), Timestamp('2011-12-20 05:00:00'), Timestamp('2011-12-20 06:00:00'), Timestamp('2011-12-20 07:00:00'), Timestamp('2011-12-20 09:00:00'), Timestamp('2011-12-20 15:00:00'), Timestamp('2011-12-20 16:00:00'), Timestamp('2011-12-20 19:00:00'), Timestamp('2011-12-20 20:00:00'), Timestamp('2011-12-20 22:00:00'), Timestamp('2011-12-20 23:00:00'), Timestamp('2011-12-21 02:00:00'), Timestamp('2011-12-21 03:00:00'), Timestamp('2011-12-21 04:00:00'), Timestamp('2011-12-21 06:00:00'), Timestamp('2011-12-21 08:00:00'), Timestamp('2011-12-22 05:00:00'), Timestamp('2011-12-22 06:00:00'), Timestamp('2011-12-22 07:00:00'), Timestamp('2011-12-22 08:00:00'), Timestamp('2011-12-22 11:00:00'), Timestamp('2011-12-22 12:00:00'), Timestamp('2011-12-22 13:00:00'), Timestamp('2011-12-22 14:00:00'), Timestamp('2011-12-22 15:00:00'), Timestamp('2011-12-22 16:00:00'), Timestamp('2011-12-22 19:00:00'), Timestamp('2011-12-22 20:00:00'), Timestamp('2011-12-22 21:00:00'), Timestamp('2011-12-22 23:00:00'), Timestamp('2011-12-24 01:00:00'), Timestamp('2011-12-24 22:00:00'), Timestamp('2011-12-24 23:00:00'), Timestamp('2011-12-25 03:00:00'), Timestamp('2011-12-26 21:00:00'), Timestamp('2011-12-26 22:00:00'), Timestamp('2011-12-29 06:00:00'), Timestamp('2011-12-29 08:00:00'), Timestamp('2011-12-29 14:00:00'), Timestamp('2011-12-30 09:00:00'), Timestamp('2011-12-30 15:00:00'), Timestamp('2011-12-31 07:00:00'), Timestamp('2011-12-31 09:00:00'), Timestamp('2011-12-31 23:00:00'), Timestamp('2012-01-21 02:00:00'), Timestamp('2012-01-23 00:00:00'), Timestamp('2012-01-23 03:00:00'), Timestamp('2012-01-23 04:00:00'), Timestamp('2012-01-23 05:00:00'), Timestamp('2012-01-23 06:00:00'), Timestamp('2012-01-23 07:00:00'), Timestamp('2012-01-23 10:00:00'), Timestamp('2012-01-23 12:00:00'), Timestamp('2012-01-24 20:00:00'), Timestamp('2012-01-24 21:00:00'), Timestamp('2012-01-24 22:00:00'), Timestamp('2012-01-24 23:00:00'), Timestamp('2012-01-25 00:00:00'), Timestamp('2012-01-25 15:00:00'), Timestamp('2012-01-25 19:00:00'), Timestamp('2012-01-25 21:00:00'), Timestamp('2012-01-25 23:00:00'), Timestamp('2012-01-26 01:00:00'), Timestamp('2012-01-26 04:00:00'), Timestamp('2012-01-26 06:00:00'), Timestamp('2012-01-26 09:00:00'), Timestamp('2012-01-26 13:00:00'), Timestamp('2012-01-26 17:00:00'), Timestamp('2012-01-26 18:00:00'), Timestamp('2012-01-26 23:00:00'), Timestamp('2012-01-28 02:00:00'), Timestamp('2012-01-28 06:00:00'), Timestamp('2012-01-28 07:00:00'), Timestamp('2012-01-28 09:00:00'), Timestamp('2012-01-29 04:00:00'), Timestamp('2012-01-30 11:00:00'), Timestamp('2012-02-20 23:00:00'), Timestamp('2012-02-21 00:00:00'), Timestamp('2012-02-21 01:00:00'), Timestamp('2012-02-21 02:00:00'), Timestamp('2012-02-21 04:00:00'), Timestamp('2012-02-21 05:00:00'), Timestamp('2012-02-21 06:00:00'), Timestamp('2012-02-21 07:00:00'), Timestamp('2012-02-23 16:00:00'), Timestamp('2012-02-23 23:00:00'), Timestamp('2012-02-24 18:00:00'), Timestamp('2012-02-26 13:00:00'), Timestamp('2012-02-26 14:00:00'), Timestamp('2012-02-26 15:00:00'), Timestamp('2012-02-27 08:00:00'), Timestamp('2012-02-28 19:00:00'), Timestamp('2012-02-29 18:00:00'), Timestamp('2012-03-20 01:00:00'), Timestamp('2012-03-20 03:00:00'), Timestamp('2012-03-20 05:00:00'), Timestamp('2012-03-20 21:00:00'), Timestamp('2012-03-20 22:00:00'), Timestamp('2012-03-20 23:00:00'), Timestamp('2012-03-21 00:00:00'), Timestamp('2012-03-21 12:00:00'), Timestamp('2012-03-21 13:00:00'), Timestamp('2012-03-21 14:00:00'), Timestamp('2012-03-21 15:00:00'), Timestamp('2012-03-21 16:00:00'), Timestamp('2012-03-21 18:00:00'), Timestamp('2012-03-21 19:00:00'), Timestamp('2012-03-21 23:00:00'), Timestamp('2012-03-22 11:00:00'), Timestamp('2012-03-22 19:00:00'), Timestamp('2012-03-23 00:00:00'), Timestamp('2012-03-23 01:00:00'), Timestamp('2012-03-23 05:00:00'), Timestamp('2012-03-23 06:00:00'), Timestamp('2012-03-23 13:00:00'), Timestamp('2012-03-24 00:00:00'), Timestamp('2012-03-25 16:00:00'), Timestamp('2012-03-27 13:00:00'), Timestamp('2012-03-27 15:00:00'), Timestamp('2012-03-27 16:00:00'), Timestamp('2012-03-27 17:00:00'), Timestamp('2012-03-27 19:00:00'), Timestamp('2012-03-30 11:00:00'), Timestamp('2012-03-30 12:00:00'), Timestamp('2012-03-30 13:00:00'), Timestamp('2012-03-30 14:00:00'), Timestamp('2012-03-30 15:00:00'), Timestamp('2012-03-30 23:00:00'), Timestamp('2012-03-31 12:00:00'), Timestamp('2012-04-20 05:00:00'), Timestamp('2012-04-20 06:00:00'), Timestamp('2012-04-20 07:00:00'), Timestamp('2012-04-20 08:00:00'), Timestamp('2012-04-24 18:00:00'), Timestamp('2012-04-25 00:00:00'), Timestamp('2012-04-25 01:00:00'), Timestamp('2012-04-25 02:00:00'), Timestamp('2012-04-25 05:00:00'), Timestamp('2012-04-25 06:00:00'), Timestamp('2012-04-25 07:00:00'), Timestamp('2012-04-25 09:00:00'), Timestamp('2012-04-25 10:00:00'), Timestamp('2012-04-25 22:00:00'), Timestamp('2012-04-26 00:00:00'), Timestamp('2012-04-27 08:00:00'), Timestamp('2012-04-28 09:00:00'), Timestamp('2012-04-28 12:00:00'), Timestamp('2012-04-28 13:00:00'), Timestamp('2012-04-28 18:00:00'), Timestamp('2012-04-28 20:00:00'), Timestamp('2012-04-29 05:00:00'), Timestamp('2012-04-29 11:00:00'), Timestamp('2012-04-29 14:00:00'), Timestamp('2012-04-29 22:00:00'), Timestamp('2012-04-29 23:00:00'), Timestamp('2012-05-20 01:00:00'), Timestamp('2012-05-20 02:00:00'), Timestamp('2012-05-22 11:00:00'), Timestamp('2012-05-22 13:00:00'), Timestamp('2012-05-22 17:00:00'), Timestamp('2012-05-23 07:00:00'), Timestamp('2012-05-23 08:00:00'), Timestamp('2012-05-23 09:00:00'), Timestamp('2012-05-23 11:00:00'), Timestamp('2012-05-23 15:00:00'), Timestamp('2012-05-23 21:00:00'), Timestamp('2012-05-24 02:00:00'), Timestamp('2012-05-25 02:00:00'), Timestamp('2012-05-25 04:00:00'), Timestamp('2012-05-27 22:00:00'), Timestamp('2012-05-28 08:00:00'), Timestamp('2012-05-28 09:00:00'), Timestamp('2012-05-28 10:00:00'), Timestamp('2012-05-29 23:00:00'), Timestamp('2012-05-30 12:00:00'), Timestamp('2012-05-30 13:00:00'), Timestamp('2012-05-31 04:00:00'), Timestamp('2012-06-20 07:00:00'), Timestamp('2012-06-20 09:00:00'), Timestamp('2012-06-20 15:00:00'), Timestamp('2012-06-20 17:00:00'), Timestamp('2012-06-20 18:00:00'), Timestamp('2012-06-21 03:00:00'), Timestamp('2012-06-21 08:00:00'), Timestamp('2012-06-21 13:00:00'), Timestamp('2012-06-21 17:00:00'), Timestamp('2012-06-22 02:00:00'), Timestamp('2012-06-22 03:00:00'), Timestamp('2012-06-22 04:00:00'), Timestamp('2012-06-22 06:00:00'), Timestamp('2012-06-22 23:00:00'), Timestamp('2012-06-23 00:00:00'), Timestamp('2012-06-23 23:00:00'), Timestamp('2012-06-24 00:00:00'), Timestamp('2012-06-24 02:00:00'), Timestamp('2012-06-24 04:00:00'), Timestamp('2012-06-24 08:00:00'), Timestamp('2012-06-24 09:00:00'), Timestamp('2012-06-24 10:00:00'), Timestamp('2012-06-28 01:00:00'), Timestamp('2012-06-29 15:00:00'), Timestamp('2012-06-29 16:00:00'), Timestamp('2012-06-29 18:00:00'), Timestamp('2012-06-30 02:00:00'), Timestamp('2012-06-30 03:00:00'), Timestamp('2012-06-30 04:00:00'), Timestamp('2012-06-30 18:00:00'), Timestamp('2012-07-21 23:00:00'), Timestamp('2012-07-22 04:00:00'), Timestamp('2012-07-22 05:00:00'), Timestamp('2012-07-22 07:00:00'), Timestamp('2012-07-22 08:00:00'), Timestamp('2012-07-22 09:00:00'), Timestamp('2012-07-22 10:00:00'), Timestamp('2012-07-22 11:00:00'), Timestamp('2012-07-23 10:00:00'), Timestamp('2012-07-24 09:00:00'), Timestamp('2012-07-25 10:00:00'), Timestamp('2012-07-25 11:00:00'), Timestamp('2012-07-25 12:00:00'), Timestamp('2012-07-25 17:00:00'), Timestamp('2012-07-27 00:00:00'), Timestamp('2012-07-27 04:00:00'), Timestamp('2012-07-27 05:00:00'), Timestamp('2012-07-27 06:00:00'), Timestamp('2012-07-27 10:00:00'), Timestamp('2012-07-27 23:00:00'), Timestamp('2012-07-28 02:00:00'), Timestamp('2012-07-28 03:00:00'), Timestamp('2012-07-28 19:00:00'), Timestamp('2012-07-29 10:00:00'), Timestamp('2012-07-29 23:00:00'), Timestamp('2012-07-30 01:00:00'), Timestamp('2012-07-30 02:00:00'), Timestamp('2012-07-30 03:00:00'), Timestamp('2012-07-30 04:00:00'), Timestamp('2012-07-30 09:00:00'), Timestamp('2012-07-31 10:00:00'), Timestamp('2012-07-31 11:00:00'), Timestamp('2012-07-31 12:00:00'), Timestamp('2012-08-20 05:00:00'), Timestamp('2012-08-20 09:00:00'), Timestamp('2012-08-20 21:00:00'), Timestamp('2012-08-20 22:00:00'), Timestamp('2012-08-20 23:00:00'), Timestamp('2012-08-21 03:00:00'), Timestamp('2012-08-21 04:00:00'), Timestamp('2012-08-21 09:00:00'), Timestamp('2012-08-21 11:00:00'), Timestamp('2012-08-21 12:00:00'), Timestamp('2012-08-21 13:00:00'), Timestamp('2012-08-21 15:00:00'), Timestamp('2012-08-21 16:00:00'), Timestamp('2012-08-21 20:00:00'), Timestamp('2012-08-21 21:00:00'), Timestamp('2012-08-21 22:00:00'), Timestamp('2012-08-21 23:00:00'), Timestamp('2012-08-22 01:00:00'), Timestamp('2012-08-22 02:00:00'), Timestamp('2012-08-22 03:00:00'), Timestamp('2012-08-22 05:00:00'), Timestamp('2012-08-22 06:00:00'), Timestamp('2012-08-22 07:00:00'), Timestamp('2012-08-22 09:00:00'), Timestamp('2012-08-22 12:00:00'), Timestamp('2012-08-22 19:00:00'), Timestamp('2012-08-22 21:00:00'), Timestamp('2012-08-22 22:00:00'), Timestamp('2012-08-22 23:00:00'), Timestamp('2012-08-23 00:00:00'), Timestamp('2012-08-23 01:00:00'), Timestamp('2012-08-23 02:00:00'), Timestamp('2012-08-23 04:00:00'), Timestamp('2012-08-23 05:00:00'), Timestamp('2012-08-23 09:00:00'), Timestamp('2012-08-23 12:00:00'), Timestamp('2012-08-23 23:00:00'), Timestamp('2012-08-24 00:00:00'), Timestamp('2012-08-24 01:00:00'), Timestamp('2012-08-24 02:00:00'), Timestamp('2012-08-24 03:00:00'), Timestamp('2012-08-24 04:00:00'), Timestamp('2012-08-24 05:00:00'), Timestamp('2012-08-24 06:00:00'), Timestamp('2012-08-24 07:00:00'), Timestamp('2012-08-24 08:00:00'), Timestamp('2012-08-24 09:00:00'), Timestamp('2012-08-24 10:00:00'), Timestamp('2012-08-24 22:00:00'), Timestamp('2012-08-25 00:00:00'), Timestamp('2012-08-25 01:00:00'), Timestamp('2012-08-27 00:00:00'), Timestamp('2012-08-27 04:00:00'), Timestamp('2012-08-27 05:00:00'), Timestamp('2012-08-27 06:00:00'), Timestamp('2012-08-27 07:00:00'), Timestamp('2012-08-27 12:00:00'), Timestamp('2012-08-28 22:00:00'), Timestamp('2012-08-28 23:00:00'), Timestamp('2012-08-29 00:00:00'), Timestamp('2012-08-29 11:00:00'), Timestamp('2012-08-29 12:00:00'), Timestamp('2012-08-29 13:00:00'), Timestamp('2012-08-29 20:00:00'), Timestamp('2012-08-29 22:00:00'), Timestamp('2012-08-29 23:00:00'), Timestamp('2012-08-30 00:00:00'), Timestamp('2012-08-30 01:00:00'), Timestamp('2012-08-30 02:00:00'), Timestamp('2012-08-30 03:00:00'), Timestamp('2012-08-30 04:00:00'), Timestamp('2012-08-30 05:00:00'), Timestamp('2012-08-30 07:00:00'), Timestamp('2012-08-30 08:00:00'), Timestamp('2012-08-30 09:00:00'), Timestamp('2012-08-30 10:00:00'), Timestamp('2012-08-30 22:00:00'), Timestamp('2012-08-31 08:00:00'), Timestamp('2012-08-31 09:00:00'), Timestamp('2012-09-20 00:00:00'), Timestamp('2012-09-20 03:00:00'), Timestamp('2012-09-20 06:00:00'), Timestamp('2012-09-20 11:00:00'), Timestamp('2012-09-21 00:00:00'), Timestamp('2012-09-21 01:00:00'), Timestamp('2012-09-21 02:00:00'), Timestamp('2012-09-21 04:00:00'), Timestamp('2012-09-21 05:00:00'), Timestamp('2012-09-21 07:00:00'), Timestamp('2012-09-23 22:00:00'), Timestamp('2012-09-24 05:00:00'), Timestamp('2012-09-24 06:00:00'), Timestamp('2012-09-24 14:00:00'), Timestamp('2012-09-24 15:00:00'), Timestamp('2012-09-24 17:00:00'), Timestamp('2012-09-27 03:00:00'), Timestamp('2012-09-27 05:00:00'), Timestamp('2012-09-27 07:00:00'), Timestamp('2012-09-27 09:00:00'), Timestamp('2012-09-27 13:00:00'), Timestamp('2012-09-27 15:00:00'), Timestamp('2012-09-28 01:00:00'), Timestamp('2012-09-28 05:00:00'), Timestamp('2012-09-28 07:00:00'), Timestamp('2012-09-28 11:00:00'), Timestamp('2012-09-30 00:00:00'), Timestamp('2012-09-30 01:00:00'), Timestamp('2012-09-30 02:00:00'), Timestamp('2012-09-30 05:00:00'), Timestamp('2012-09-30 07:00:00'), Timestamp('2012-09-30 21:00:00'), Timestamp('2012-09-30 22:00:00'), Timestamp('2012-10-20 04:00:00'), Timestamp('2012-10-20 05:00:00'), Timestamp('2012-10-20 06:00:00'), Timestamp('2012-10-20 07:00:00'), Timestamp('2012-10-20 14:00:00'), Timestamp('2012-10-20 15:00:00'), Timestamp('2012-10-20 23:00:00'), Timestamp('2012-10-21 06:00:00'), Timestamp('2012-10-21 07:00:00'), Timestamp('2012-10-21 08:00:00'), Timestamp('2012-10-21 22:00:00'), Timestamp('2012-10-21 23:00:00'), Timestamp('2012-10-22 00:00:00'), Timestamp('2012-10-22 01:00:00'), Timestamp('2012-10-22 02:00:00'), Timestamp('2012-10-22 03:00:00'), Timestamp('2012-10-22 04:00:00'), Timestamp('2012-10-22 08:00:00'), Timestamp('2012-10-22 10:00:00'), Timestamp('2012-10-22 12:00:00'), Timestamp('2012-10-23 10:00:00'), Timestamp('2012-10-23 11:00:00'), Timestamp('2012-10-23 13:00:00'), Timestamp('2012-10-23 17:00:00'), Timestamp('2012-10-23 21:00:00'), Timestamp('2012-10-24 01:00:00'), Timestamp('2012-10-24 02:00:00'), Timestamp('2012-10-24 05:00:00'), Timestamp('2012-10-24 10:00:00'), Timestamp('2012-10-24 11:00:00'), Timestamp('2012-10-24 13:00:00'), Timestamp('2012-10-24 17:00:00'), Timestamp('2012-10-24 18:00:00'), Timestamp('2012-10-25 01:00:00'), Timestamp('2012-10-25 02:00:00'), Timestamp('2012-10-25 13:00:00'), Timestamp('2012-10-26 00:00:00'), Timestamp('2012-11-20 09:00:00'), Timestamp('2012-11-20 12:00:00'), Timestamp('2012-11-20 13:00:00'), Timestamp('2012-11-20 19:00:00'), Timestamp('2012-11-20 20:00:00'), Timestamp('2012-11-20 21:00:00'), Timestamp('2012-11-20 22:00:00'), Timestamp('2012-11-21 00:00:00'), Timestamp('2012-11-21 10:00:00'), Timestamp('2012-11-21 21:00:00'), Timestamp('2012-11-21 22:00:00'), Timestamp('2012-11-21 23:00:00'), Timestamp('2012-11-22 09:00:00'), Timestamp('2012-11-22 12:00:00'), Timestamp('2012-11-22 13:00:00'), Timestamp('2012-11-22 14:00:00'), Timestamp('2012-11-22 15:00:00'), Timestamp('2012-11-22 16:00:00'), Timestamp('2012-11-22 17:00:00'), Timestamp('2012-11-22 18:00:00'), Timestamp('2012-11-22 19:00:00'), Timestamp('2012-11-22 20:00:00'), Timestamp('2012-11-22 22:00:00'), Timestamp('2012-11-22 23:00:00'), Timestamp('2012-11-23 00:00:00'), Timestamp('2012-11-23 03:00:00'), Timestamp('2012-11-23 05:00:00'), Timestamp('2012-11-23 06:00:00'), Timestamp('2012-11-23 07:00:00'), Timestamp('2012-11-23 18:00:00'), Timestamp('2012-11-25 03:00:00'), Timestamp('2012-11-25 04:00:00'), Timestamp('2012-11-25 19:00:00'), Timestamp('2012-11-25 20:00:00'), Timestamp('2012-11-25 21:00:00'), Timestamp('2012-11-25 22:00:00'), Timestamp('2012-11-26 00:00:00'), Timestamp('2012-11-26 01:00:00'), Timestamp('2012-11-26 02:00:00'), Timestamp('2012-11-26 03:00:00'), Timestamp('2012-11-26 04:00:00'), Timestamp('2012-11-26 06:00:00'), Timestamp('2012-11-26 07:00:00'), Timestamp('2012-11-26 08:00:00'), Timestamp('2012-11-26 10:00:00'), Timestamp('2012-11-26 14:00:00'), Timestamp('2012-11-26 16:00:00'), Timestamp('2012-11-26 17:00:00'), Timestamp('2012-11-26 19:00:00'), Timestamp('2012-11-26 20:00:00'), Timestamp('2012-11-26 21:00:00'), Timestamp('2012-11-26 22:00:00'), Timestamp('2012-11-29 02:00:00'), Timestamp('2012-11-29 05:00:00'), Timestamp('2012-11-29 06:00:00'), Timestamp('2012-11-29 07:00:00'), Timestamp('2012-11-29 09:00:00'), Timestamp('2012-11-29 23:00:00'), Timestamp('2012-11-30 02:00:00'), Timestamp('2012-11-30 03:00:00'), Timestamp('2012-11-30 04:00:00'), Timestamp('2012-11-30 05:00:00'), Timestamp('2012-11-30 06:00:00'), Timestamp('2012-11-30 07:00:00'), Timestamp('2012-11-30 10:00:00'), Timestamp('2012-11-30 12:00:00'), Timestamp('2012-11-30 18:00:00'), Timestamp('2012-11-30 19:00:00'), Timestamp('2012-11-30 20:00:00'), Timestamp('2012-11-30 22:00:00'), Timestamp('2012-12-20 01:00:00'), Timestamp('2012-12-20 02:00:00'), Timestamp('2012-12-20 06:00:00'), Timestamp('2012-12-20 07:00:00'), Timestamp('2012-12-20 08:00:00'), Timestamp('2012-12-20 09:00:00'), Timestamp('2012-12-20 10:00:00'), Timestamp('2012-12-23 05:00:00'), Timestamp('2012-12-23 19:00:00'), Timestamp('2012-12-23 20:00:00'), Timestamp('2012-12-23 21:00:00'), Timestamp('2012-12-23 23:00:00'), Timestamp('2012-12-24 00:00:00'), Timestamp('2012-12-24 01:00:00'), Timestamp('2012-12-24 02:00:00'), Timestamp('2012-12-24 03:00:00'), Timestamp('2012-12-24 06:00:00'), Timestamp('2012-12-24 07:00:00'), Timestamp('2012-12-24 08:00:00'), Timestamp('2012-12-24 10:00:00'), Timestamp('2012-12-24 22:00:00'), Timestamp('2012-12-24 23:00:00'), Timestamp('2012-12-25 08:00:00'), Timestamp('2012-12-25 09:00:00'), Timestamp('2012-12-25 10:00:00'), Timestamp('2012-12-28 15:00:00'), Timestamp('2012-12-29 00:00:00'), Timestamp('2012-12-29 02:00:00'), Timestamp('2012-12-29 03:00:00'), Timestamp('2012-12-29 08:00:00'), Timestamp('2012-12-29 09:00:00'), Timestamp('2012-12-29 12:00:00'), Timestamp('2012-12-29 13:00:00'), Timestamp('2012-12-29 15:00:00'), Timestamp('2012-12-29 23:00:00'), Timestamp('2012-12-30 19:00:00')] not in index"

In [20]:
train_test_w0['windspeed'].index

DatetimeIndex(['2011-01-01 00:00:00', '2011-01-01 01:00:00',
               '2011-01-01 02:00:00', '2011-01-01 03:00:00',
               '2011-01-01 04:00:00', '2011-01-01 06:00:00',
               '2011-01-01 07:00:00', '2011-01-01 08:00:00',
               '2011-01-01 09:00:00', '2011-01-03 19:00:00',
               ...
               '2012-12-29 00:00:00', '2012-12-29 02:00:00',
               '2012-12-29 03:00:00', '2012-12-29 08:00:00',
               '2012-12-29 09:00:00', '2012-12-29 12:00:00',
               '2012-12-29 13:00:00', '2012-12-29 15:00:00',
               '2012-12-29 23:00:00', '2012-12-30 19:00:00'],
              dtype='datetime64[ns]', name='regdate', length=2180, freq=None)

랜덤포레스트 굿

In [None]:
X_train = ws1.drop(['windspeed', 'casual', 'registered', 'regcount'], axis=1)
X_test  = ws0.drop(['windspeed', 'casual', 'registered', 'regcount'], axis=1)
y_train = ws1['windspeed']
print(X_train.shape, X_test.shape, y_train.shape)

model = RandomForestRegressor(random_state=0)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_pred.shape

In [None]:
plt.hist(y_pred)
plt.show()

In [None]:
ws0['windspeed'] = y_pred
train = pd.concat([ws0,ws1]).sort_index()
train

# 최종 컬럼 선정
<pre>
타겟(1): 'casual' + 'registered'
   or
타겟(2): 'regcount'

삭제 컬럼: 'd',     'holiday', 'workingday',      'atemp',  'season'

시간 컬럼: 'y', 'm', 'h', 'w'

날짜 컬럼: 'day_type'

날씨 컬럼: 'season', 'weather', 'temp', 'humidity', 'windspeed'
       
       
다중공선: 분석피쳐중 상관계수가 높은 컬럼
'registered' <-> 'regcount' : regcount 삭제
'temp' <-> 'atemp' : atemp 삭제

'w' <-> 'day_type' : 나중에 고려대상
'season' <-> 'm' : season 삭제

결론
['casual', 'registered']
['m', 'h', 'y', 'w', 'weather', 'temp', 'humidity', 'windspeed', 'day_type']

# 학습 ㄱㄱ

https://www.latex4technics.com/?note=CMEL1Z

*  $ MSE = \frac{\mathrm{1} }{\mathrm{n}} \sum\nolimits_{i=1}^{n} (Y_i - \hat{Y_i})^{2}$

*  $ MAE = \frac{\mathrm{1} }{\mathrm{n}} \sum\nolimits_{i=1}^{n} |Y_i - \hat{Y_i}|$

*  $ RMSE = \sqrt{\frac{1}{n}\Sigma_{i=1}^{n}{\Big(\frac{Y_i - \hat{Y_i}}{N}\Big)^2}}$

*  $ RMSLE = \sqrt{\frac{1}{n}\Sigma_{i=1}^{n}{\Big(\frac{log(Y_i+1) - log(\hat{Y_i}+1)}{N}\Big)^2}}$

In [None]:
# !pip install xgboost
# !pip install lightgmb

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, mean_squared_error

# 추가
from sklearn.ensemble import AdaBoostRegressor, VotingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

In [None]:
X = train[['m', 'h', 'y', 'w', 'weather', 'temp', 'humidity', 'windspeed', 'day_type']]
Y = train[['regcount', 'registered', 'casual']]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=0, test_size=0.2)
dtr = DecisionTreeRegressor(random_state=0)
dtr.fit(X_train, Y_train)
dtr_pred = dtr.predict(X_test)

dtr_mse_regcount = mean_squared_error(Y_test['regcount'], dtr_pred.T[0])
dtr_mse_registered = mean_squared_error(Y_test['registered'], dtr_pred.T[1])
dtr_mse_casual = mean_squared_error(Y_test['casual'], dtr_pred.T[2])

print(dtr_mse_regcount, dtr_mse_registered, dtr_mse_casual)

In [None]:
X = train[['m', 'h', 'y', 'w', 'weather', 'temp', 'humidity', 'windspeed', 'day_type']]
y_list = [train['regcount'], train['registered'], train['casual']]

for i, y in enumerate(y_list):
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.2)
    dtr = DecisionTreeRegressor(random_state=0)
    dtr.fit(X_train, y_train)
    y_pred = dtr.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)
    
    print(y_list[i].name)
    print('MSE', mse)
    print('RMSE', np.sqrt(mse))
    print()

In [None]:
X = train[['m', 'h', 'y', 'w', 'weather', 'temp', 'humidity', 'windspeed', 'day_type']]
y_list = [train['regcount'], train['registered'], train['casual']]
model_list = [
    ("DTR"   , DecisionTreeRegressor(random_state=0)),
    ("LR"    , LinearRegression()                   ),
    ("ABOOST", AdaBoostRegressor()                  ),
    ("XGB"   , XGBRegressor()                       ),  
    ("LGBM"  , LGBMRegressor()                      ),
    ("VR"    , VotingRegressor([("XGB", XGBRegressor()), ("LGBM", LGBMRegressor())]) ) 
]

res_list = []
for model_tuple in model_list:
    # print('model :', model_tuple[0])
    for i, y in enumerate(y_list):
        X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.2)
        model = model_tuple[1]
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        mse = mean_squared_error(y_test, y_pred)
        res_list.append([model_tuple[0], y_list[i].name, mse, np.sqrt(mse)])
    #     print(y_list[i].name)
    #     print('MSE   : ', mse)
    #     print('RMSE  : ', np.sqrt(mse))
    #     print()
    # print('-----------------------------')
res_df = pd.DataFrame(res_list, columns=['model', 'target', 'MSE', 'RMSE'])
res_df

In [None]:
res_df.pivot_table(index=['model', 'target'], values=['MSE', 'RMSE'])

In [None]:
plt.figure(figsize=(14,6))

plt.subplot(1,2,1)
sns.barplot(res_df, x='model', y='MSE', hue='target')
plt.title('MSE')

plt.subplot(1,2,2)
sns.barplot(res_df, x='model', y='RMSE', hue='target')
plt.title('RMSE')

plt.show()

In [None]:
sc_list = [
    ('Standard', StandardScaler()),
    ('MinMax', MinMaxScaler()),
    ('Robust', RobustScaler())
]
X = train[['m', 'h', 'y', 'w', 'weather', 'temp', 'humidity', 'windspeed', 'day_type']]
y_list = [train['regcount'], train['registered'], train['casual']]
model_list = [
    ("DTR"   , DecisionTreeRegressor(random_state=0)),
    ("LR"    , LinearRegression()                   ),
    ("ABOOST", AdaBoostRegressor()                  ),
    ("XGB"   , XGBRegressor()                       ),  
    ("LGBM"  , LGBMRegressor()                      ),
    ("VR"    , VotingRegressor([("XGB", XGBRegressor()), ("LGBM", LGBMRegressor())]) ) 
]

res_list = []
for j, sc in enumerate(sc_list):
    sc[1].fit(X)
    X = sc[1].transform(X)
    for model_tuple in model_list:
        # print('model :', model_tuple[0])
        for i, y in enumerate(y_list):
            X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.2)
            model = model_tuple[1]
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)

            mse = mean_squared_error(y_test, y_pred)
            res_list.append([sc[0], model_tuple[0], y_list[i].name, mse, np.sqrt(mse)])
        #     print(y_list[i].name)
        #     print('MSE   : ', mse)
        #     print('RMSE  : ', np.sqrt(mse))
        #     print()
        # print('-----------------------------')
res_df = pd.DataFrame(res_list, columns=['Scaler', 'model', 'target', 'MSE', 'RMSE'])
res_df

In [None]:
res_df.sort_values('MSE')

In [None]:
plt.figure(figsize=(14,6))

plt.subplot(1,2,1)
sns.barplot(res_df, x='model', y='MSE', hue='target')
plt.title('MSE')

plt.subplot(1,2,2)
sns.barplot(res_df, x='model', y='RMSE', hue='target')
plt.title('RMSE')

plt.show()