# Sprint アンサンブル学習

In [1]:
import os

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

In [2]:
data_raw = pd.read_csv("/Users/morishuuya/Desktop/dataset/kaggle/HousePrice/train.csv")

In [3]:
train_data = data_raw.copy()

In [4]:
X = train_data.loc[:, ["GrLivArea", "YearBuilt"]]
t = train_data.loc[:, "SalePrice"]

In [5]:
X_train, X_test, t_train, t_test = train_test_split(X, t, test_size=0.2)

## 【問題1】ブレンディングのスクラッチ実装
ブレンディング をスクラッチ実装し、単一モデルより精度があがる例を 最低3つ 示してください。精度があがるとは、検証用データに対する平均二乗誤差（MSE）が小さくなることを指します。

### 1回目

In [6]:
lr = LinearRegression()
dt = DecisionTreeRegressor()
svr = SVR(gamma="scale")

In [7]:
lr.fit(X_train, t_train)
dt.fit(X_train, t_train)
svr.fit(X_train, t_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [8]:
y_lr = lr.predict(X_test)
y_dt = dt.predict(X_test)
y_svr = svr.predict(X_test)
blending_1 = (y_lr + y_dt + y_svr) / 3

In [90]:
print("LogisticRegression\n",mean_squared_error(t_test, y_lr))
print("DecisionTree\n", mean_squared_error(t_test, y_dt))
print("SVR\n", mean_squared_error(t_test, y_svr))
print("Blend\n",mean_squared_error(t_test, blending_1))

LogisticRegression
 2490336490.8824496
DecisionTree
 2894135253.592466
SVR
 7222329801.952959
Blend
 2549034230.212108


### 2回目

In [10]:
lr2 = LinearRegression(normalize = True)
dt2 = DecisionTreeRegressor(max_depth=5)
svr2 = SVR(gamma="scale", kernel="linear")

In [11]:
lr2.fit(X_train, t_train)
dt2.fit(X_train, t_train)
svr2.fit(X_train, t_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [82]:
y_lr2 = lr2.predict(X_test)
y_dt2 = dt2.predict(X_test)
y_svr2 = svr2.predict(X_test)
blending_2 = ((y_lr2) + (y_dt2) + (y_svr2)) / 3

In [83]:
print(mean_squared_error(t_test, y_lr2))
print(mean_squared_error(t_test, y_dt2))
print(mean_squared_error(t_test, y_svr2))
print(mean_squared_error(t_test, blending_2))

2490336490.8824506
2201063572.6127987
2515649538.6875553
2086003299.5508206


### 3回目

In [30]:
sc = StandardScaler()
X_train_sc = sc.fit_transform(X_train)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [31]:
lr3_sc = LinearRegression()
dt3_sc = DecisionTreeRegressor(max_depth=3)
svr3_sc = SVR(gamma="scale", kernel="linear")

In [32]:
lr3 = LinearRegression()
dt3 = DecisionTreeRegressor(max_depth=3)
svr3 = SVR(gamma="scale", kernel="linear")

In [33]:
lr3_sc.fit(X_train_sc, t_train)
dt3_sc.fit(X_train_sc, t_train)
svr3_sc.fit(X_train_sc, t_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [34]:
lr3.fit(X_train, t_train)
dt3.fit(X_train, t_train)
svr3.fit(X_train, t_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [35]:
y_lr3_sc = lr3_sc.predict(X_test) 
y_dt3_sc = dt3_sc.predict(X_test) 
y_svr3_sc = svr3_sc.predict(X_test)

blending_3 = ((y_lr3_sc*0.0001) + (y_dt3_sc*0.9) + (y_svr3_sc*0.0009)) / 3

In [36]:
print(mean_squared_error(t_test, y_lr3_sc))
print(mean_squared_error(t_test, y_dt3_sc))
print(mean_squared_error(t_test, y_svr3_sc))

print(mean_squared_error(t_test, blending_3))

1.9734662534743764e+16
68950911460.19217
5360243488764.264
8930109409.286076


ブレンディングし、重みをつけると数値が下がった

## 【問題2】バギングのスクラッチ実装
バギング をスクラッチ実装し、単一モデルより精度があがる例を 最低1つ 示してください。

In [37]:
X_train_b, X_test_b, t_train_b, t_test_b = train_test_split(X, t, test_size=0.2, shuffle=True)

In [38]:
X_train_b_1 = X_train_b.iloc[:292, :]
t_train_b_1 = t_train_b.iloc[:292]

X_train_b_2 = X_train_b.iloc[292:584 , :]
t_train_b_2 = t_train_b.iloc[292:584]

X_train_b_3 = X_train_b.iloc[584:876 , :]
t_train_b_3 = t_train_b.iloc[584:876]

X_train_b_4 = X_train_b.iloc[876:1168 , :]
t_train_b_4 = t_train_b.iloc[876:1168]

In [39]:
dt_b_1 = DecisionTreeRegressor(max_depth=5)
dt_b_2 = DecisionTreeRegressor(max_depth=5)
dt_b_3 = DecisionTreeRegressor(max_depth=5)
dt_b_4 = DecisionTreeRegressor(max_depth=5)

In [40]:
dt_b_1.fit(X_train_b_1, t_train_b_1)
dt_b_2.fit(X_train_b_2, t_train_b_2)
dt_b_3.fit(X_train_b_3, t_train_b_3)
dt_b_4.fit(X_train_b_4, t_train_b_4)

DecisionTreeRegressor(criterion='mse', max_depth=5, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

In [41]:
y_dt_b_1 = dt_b_1.predict(X_test_b)
y_dt_b_2 = dt_b_2.predict(X_test_b)
y_dt_b_3 = dt_b_3.predict(X_test_b)
y_dt_b_4 = dt_b_4.predict(X_test_b)

baging_1 = (y_dt_b_1+y_dt_b_2+y_dt_b_3+y_dt_b_4) / 4

In [42]:
print(mean_squared_error(t_test_b, y_dt_b_1))
print(mean_squared_error(t_test_b, y_dt_b_2))
print(mean_squared_error(t_test_b, y_dt_b_3))
print(mean_squared_error(t_test_b, y_dt_b_4))

print(mean_squared_error(t_test_b, baging_1))

1677612420.788353
2357848369.7927437
1821633571.3817184
2078780255.6058931
1374581904.5114648


バギングすると数値が下がった

## 【問題3】スタッキングのスクラッチ実装
スタッキング をスクラッチ実装し、単一モデルより精度があがる例を 最低1つ 示してください。

### 学習パート　1回目

#### データを分割しデータセットを3つ作る

In [44]:
X_train_1 = X_train.iloc[:389, :].values
t_train_1 =  t_train.iloc[:389].values

X_train_2 = X_train.iloc[389:778, :].values
t_train_2 =  t_train.iloc[389:778].values

X_train_3 = X_train.iloc[778:, :].values
t_train_3 =  t_train.iloc[778:].values

In [45]:
train_dataset_1_x = np.concatenate([X_train_2, X_train_3], axis=0)
train_dataset_1_t = np.concatenate([t_train_2, t_train_3], axis=0)

train_dataset_2_x = np.concatenate([X_train_1, X_train_3], axis=0)
train_dataset_2_t = np.concatenate([t_train_1, t_train_3], axis=0)

train_dataset_3_x = np.concatenate([X_train_1, X_train_2], axis=0)
train_dataset_3_t = np.concatenate([t_train_1, t_train_2], axis=0)

#### 1回目の学習、推定、ブレンドデータの作成

In [91]:
DTR_1 = DecisionTreeRegressor(max_depth=5)
DTR_2 = DecisionTreeRegressor(max_depth=5)
DTR_3 = DecisionTreeRegressor(max_depth=5)

DTR_1.fit(train_dataset_1_x, train_dataset_1_t)
DTR_2.fit(train_dataset_2_x, train_dataset_2_t)
DTR_3.fit(train_dataset_3_x, train_dataset_3_t)

y_DTR_1 = DTR_1.predict(X_train_1)
y_DTR_2 = DTR_2.predict(X_train_2)
y_DTR_3 = DTR_3.predict(X_train_3)

DTR_blend_data = np.concatenate([y_DTR_1, y_DTR_2, y_DTR_3], axis=0)
DTR_blend_data .shape

(1168,)

In [92]:
SVR_1 = SVR(gamma="scale")
SVR_2 = SVR(gamma="scale")
SVR_3 = SVR(gamma="scale")

SVR_1.fit(train_dataset_1_x, train_dataset_1_t)
SVR_2.fit(train_dataset_2_x, train_dataset_2_t)
SVR_3.fit(train_dataset_3_x, train_dataset_3_t)

y_SVR_1 = SVR_1.predict(X_train_1)
y_SVR_2 = SVR_2.predict(X_train_2)
y_SVR_3 = SVR_3.predict(X_train_3)

SVR_blend_data = np.concatenate([y_SVR_1, y_SVR_2, y_SVR_3], axis=0)
SVR_blend_data.shape

(1168,)

In [93]:
DTR_SVR_blend = np.stack([DTR_blend_data , SVR_blend_data], 1)
DTR_SVR_blend.shape

(1168, 2)

### 学習パート　2回目

#### データを分割しデータセットを3つ作る

In [94]:
DTSVR_train_1 = DTR_SVR_blend[:389, :]
DTSVR_train_2 = DTR_SVR_blend[389:778, :]
DTSVR_train_3 = DTR_SVR_blend[778:, :]

train_dataset_2_1_x = np.concatenate([DTSVR_train_2, DTSVR_train_3], axis=0)
train_dataset_2_2_x = np.concatenate([DTSVR_train_1, DTSVR_train_3], axis=0)
train_dataset_2_3_x = np.concatenate([DTSVR_train_1, DTSVR_train_2], axis=0)

#### 1回目の学習、推定、ブレンドデータの作成

In [95]:
DTR_2_1 = DecisionTreeRegressor(max_depth=5)
DTR_2_2 = DecisionTreeRegressor(max_depth=5)
DTR_2_3 = DecisionTreeRegressor(max_depth=5)

DTR_2_1.fit(train_dataset_2_1_x, train_dataset_1_t)
DTR_2_2.fit(train_dataset_2_2_x, train_dataset_2_t)
DTR_2_3.fit(train_dataset_2_3_x, train_dataset_3_t)

y_DTR_2_1 = DTR_2_1.predict(X_train_1)
y_DTR_2_2 = DTR_2_2.predict(X_train_2)
y_DTR_2_3 = DTR_2_3.predict(X_train_3)

DTR_blend_data_2 = np.concatenate([y_DTR_2_1, y_DTR_2_2, y_DTR_2_3], axis=0)
DTR_blend_data_2.shape

(1168,)

In [96]:
SVR_2_1 = SVR(gamma="scale")
SVR_2_2 = SVR(gamma="scale")
SVR_2_3 = SVR(gamma="scale")

SVR_2_1.fit(train_dataset_2_1_x, train_dataset_1_t)
SVR_2_2.fit(train_dataset_2_2_x, train_dataset_2_t)
SVR_2_3.fit(train_dataset_2_3_x, train_dataset_3_t)

y_SVR_2_1 = SVR_2_1.predict(X_train_1)
y_SVR_2_2 = SVR_2_2.predict(X_train_2)
y_SVR_2_3 = SVR_2_3.predict(X_train_3)

SVR_blend_data_2 = np.concatenate([y_SVR_2_1, y_SVR_2_2, y_SVR_2_3], axis=0)
SVR_blend_data_2.shape

(1168,)

In [97]:
DTR_SVR_blend_2 = np.stack([DTR_blend_data_2 , SVR_blend_data_2], 1)
DTR_SVR_blend_2.shape

(1168, 2)

### 学習パート 最終

In [98]:
last_SVR = SVR(gamma="scale")

In [99]:
last_SVR.fit(DTR_SVR_blend_2, t_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='scale',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

### 推定 

#### 推定パート　1回目

In [105]:
#X_test = X_test.values

In [106]:
y_test_DTR_1 = DTR_1.predict(X_test)
y_test_DTR_2 = DTR_2.predict(X_test)
y_test_DTR_3 = DTR_3.predict(X_test)

y_test_SVR_1 = SVR_1.predict(X_test)
y_test_SVR_2 = SVR_2.predict(X_test)
y_test_SVR_3 = SVR_3.predict(X_test)

blend_test_DTR = np.stack([y_test_DTR_1, y_test_DTR_2, y_test_DTR_3], 1)
blend_test_DTR_av = np.average(blend_test_DTR , 1)
blend_test_DTR_av.shape

(292,)

In [107]:
blend_test_SVR = np.stack([y_test_SVR_1, y_test_SVR_2, y_test_SVR_3], 1)
blend_test_SVR_av = np.average(blend_test_SVR , 1)
blend_test_SVR_av.shape

(292,)

In [108]:
blend_test_DTSVR = np.stack([blend_test_DTR_av, blend_test_SVR_av], 1)
blend_test_DTSVR.shape

(292, 2)

#### 推定パート　2回目

In [109]:
y_test_DTR_2_1 = DTR_2_1.predict(blend_test_DTSVR)
y_test_DTR_2_2 = DTR_2_2.predict(blend_test_DTSVR)
y_test_DTR_2_3 = DTR_2_3.predict(blend_test_DTSVR)

y_test_SVR_2_1 = SVR_2_1.predict(blend_test_DTSVR)
y_test_SVR_2_2 = SVR_2_2.predict(blend_test_DTSVR)
y_test_SVR_2_3 = SVR_2_3.predict(blend_test_DTSVR)

In [110]:
blend_test_DTR_2 = np.stack([y_test_DTR_2_1, y_test_DTR_2_2, y_test_DTR_2_3], 1)
blend_test_DTR_2_av = np.average(blend_test_DTR_2 , 1)
blend_test_DTR_2_av.shape

(292,)

In [111]:
blend_test_SVR_2 = np.stack([y_test_SVR_2_1, y_test_SVR_2_2, y_test_SVR_2_3], 1)
blend_test_SVR_2_av = np.average(blend_test_SVR_2 , 1)
blend_test_SVR_2_av.shape

(292,)

In [112]:
blend_test_DTSVR_2 = np.stack([blend_test_DTR_2_av, blend_test_SVR_2_av], 1)
blend_test_DTSVR_2.shape

(292, 2)

#### 推定　最終

In [113]:
last_y = last_SVR.predict(blend_test_DTSVR_2)

In [114]:
print(mean_squared_error(t_test, last_y))

7235973955.262264
