## 事前準備

### ライブラリのインポート

In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.cross_validation import train_test_split
from sklearn.metrics import mean_squared_error

### ファイルの読み込み

In [2]:
df = pd.read_csv('電力消費量.csv',encoding='shift_jis')
df.head(5)

Unnamed: 0,日時,家電使用量,電灯使用量,温度_キッチン,湿度_キッチン,温度_居間,湿度_居間,温度_洗濯室,湿度_洗濯室,温度_執務室,...,温度_親部屋,湿度_親部屋,温度_外気,気圧,湿度_外気,風速,視程,露点温度,乱数1,乱数2
0,2016/1/11 17:00,60,30,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,...,17.033333,45.53,6.6,733.5,92.0,7.0,63.0,5.3,13.275433,13.275433
1,2016/1/11 17:10,60,30,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,...,17.066667,45.56,6.48,733.6,92.0,6.666667,59.166667,5.2,18.606195,18.606195
2,2016/1/11 17:20,50,30,19.89,46.3,19.2,44.626667,19.79,44.933333,18.926667,...,17.0,45.5,6.37,733.7,92.0,6.333333,55.333333,5.1,28.642668,28.642668
3,2016/1/11 17:30,50,40,19.89,46.066667,19.2,44.59,19.79,45.0,18.89,...,17.0,45.4,6.25,733.8,92.0,6.0,51.5,5.0,45.41039,45.41039
4,2016/1/11 17:40,60,40,19.89,46.333333,19.2,44.53,19.79,45.0,18.89,...,17.0,45.4,6.13,733.9,92.0,5.666667,47.666667,4.9,10.084097,10.084097


## データ分割

### 目的変数と説明変数をそれぞれ変数に代入

In [34]:
X = df.ix[:, 3:len(df.columns)].values
y1 = df[['家電使用量']].values
y2 = df[['電灯使用量']].values

### ホールドアウト検証で分割

In [35]:
X_train , X_test , y1_train , y1_test = train_test_split(X, y1, test_size = 0.2)

## 学習

### 線形回帰（正則化なし）

In [36]:
reg_lr = LinearRegression()

In [37]:
reg_lr.fit(X = X_train, y = y1_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [38]:
reg_lr.coef_

array([[  2.04258993e+00,   1.68016576e+01,  -2.14479782e+01,
         -1.46856338e+01,   2.46873291e+01,   4.26848885e+00,
          2.71465157e+00,   1.98891975e+00,   9.01597207e-01,
          1.84119612e-01,   7.58874379e+00,   4.30317048e-01,
          3.68397836e-01,  -1.79848567e+00,   1.02782053e+01,
         -5.73088882e+00,  -2.12475286e+01,  -1.63384156e+00,
         -1.04855277e+01,   2.65178230e-01,  -9.96853770e-01,
          2.32912324e+00,   1.45993393e-01,   4.78528186e+00,
         -8.16078300e-03,  -8.16078300e-03]])

### LASSO回帰（L1正則化）

In [39]:
reg_l1 = Lasso(alpha = 20)

In [40]:
reg_l1.fit(X = X_train, y = y1_train)

Lasso(alpha=20, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [41]:
reg_l1.coef_

array([ 0.        ,  3.88193808,  0.        , -0.        ,  0.        ,
        0.        , -0.        ,  0.        , -0.        ,  0.        ,
        0.        ,  0.17987577, -0.        , -0.        ,  0.        ,
       -2.30399743, -0.        , -0.        , -0.        , -0.        ,
       -1.09589872,  0.        ,  0.        , -0.        , -0.        , -0.        ])

### リッジ回帰（L2正則化）

In [42]:
reg_l2 = Ridge(alpha = 20)

In [43]:
reg_l2.fit(X = X_train, y = y1_train)

Ridge(alpha=20, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)

In [44]:
reg_l2.coef_

array([[  1.95995780e+00,   1.67397722e+01,  -2.12226513e+01,
         -1.45863884e+01,   2.45976518e+01,   4.26501797e+00,
          2.66847073e+00,   1.97155983e+00,   8.42560319e-01,
          1.84829274e-01,   7.53626361e+00,   4.28865465e-01,
          3.14983266e-01,  -1.79779494e+00,   1.02449734e+01,
         -5.72606252e+00,  -2.10710485e+01,  -1.64154434e+00,
         -1.03557893e+01,   2.65757086e-01,  -9.79537827e-01,
          2.33419870e+00,   1.45752008e-01,   4.68344100e+00,
         -8.19572859e-03,  -8.19572859e-03]])

### ランダムフォレスト回帰

In [45]:
reg_rf = RandomForestRegressor(n_estimators=500, n_jobs=-1, random_state=1)

In [46]:
#reg_rf.fit(X = X_train, y = y1_train.ravel())
reg_rf.fit(X = X_train, y = y1_train.ravel())

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=500, n_jobs=-1, oob_score=False, random_state=1,
           verbose=0, warm_start=False)

## 評価

### 線形回帰（正則化なし）

学習用データ

In [51]:
reg_lr_train_pred = reg_lr.predict(X_train)
reg_lr_train_mse = mean_squared_error(y_true=y1_train, y_pred=reg_lr_train_pred)
np.sqrt(reg_lr_train_mse)

94.607650496168105

テスト用データ

In [52]:
reg_lr_test_pred = reg_lr.predict(X_test)
reg_lr_test_mse = mean_squared_error(y_true=y1_test, y_pred=reg_lr_test_pred)
np.sqrt(reg_lr_test_mse)

95.291991193841994

### LASSO回帰（L1正則化）

学習用データ

In [53]:
reg_l1_train_pred = reg_l1.predict(X_train)
reg_l1_train_mse = mean_squared_error(y_true=y1_train, y_pred=reg_l1_train_pred)
np.sqrt(reg_l1_train_mse)

99.278987947232054

テスト用データ

In [54]:
reg_l1_test_pred = reg_l1.predict(X_test)
reg_l1_test_mse = mean_squared_error(y_true=y1_test, y_pred=reg_l1_test_pred)
np.sqrt(reg_l1_test_mse)

100.32020289730055

### リッジ回帰（L2正則化）

学習用データ

In [55]:
reg_l2_train_pred = reg_l2.predict(X_train)
reg_l2_train_mse = mean_squared_error(y_true=y1_train, y_pred=reg_l2_train_pred)
np.sqrt(reg_l2_train_mse)

94.607758747003942

テスト用データ

In [56]:
reg_l2_test_pred = reg_l2.predict(X_test)
reg_l2_test_mse = mean_squared_error(y_true=y1_test, y_pred=reg_l2_test_pred)
np.sqrt(reg_l2_test_mse)

95.291847995174578

### ランダムフォレスト回帰

学習用データ

In [57]:
reg_rf_train_pred = reg_rf.predict(X_train)
reg_rf_train_mse = mean_squared_error(y_true=y1_train, y_pred=reg_rf_train_pred)
np.sqrt(reg_rf_train_mse)

24.777547668855203

テスト用データ

In [58]:
reg_rf_test_pred = reg_rf.predict(X_test)
reg_rf_test_mse = mean_squared_error(y_true=y1_test, y_pred=reg_rf_test_pred)
np.sqrt(reg_rf_test_mse)

68.437048267560357