## 練習時間
試著使用 sklearn datasets 的其他資料集 (boston, ...)，來訓練自己的線性迴歸模型，並加上適當的正則話來觀察訓練情形。

In [8]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score

import warnings
warnings.simplefilter('ignore')

# Linear Regression

In [14]:
# 讀取資料
boston = datasets.load_boston()

# 資料特徵、目標值
X = pd.DataFrame(boston.data, columns = boston.feature_names)
Y = pd.DataFrame(boston.target, columns = ['target'])

# 切分資料
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

# 訓練模型
model = LinearRegression()
model.fit(X_train, Y_train)

# 預測結果
Y_pred = model.predict(X_test)
print('Coefficients: ', model.coef_)
print('R2: %.12f' % r2_score(Y_test, Y_pred))
print('MSE: %.12f' % mean_squared_error(Y_test, Y_pred))

Coefficients:  [[-1.21310401e-01  4.44664254e-02  1.13416945e-02  2.51124642e+00
  -1.62312529e+01  3.85906801e+00 -9.98516565e-03 -1.50026956e+00
   2.42143466e-01 -1.10716124e-02 -1.01775264e+00  6.81446545e-03
  -4.86738066e-01]]
R2: 0.673382550640
MSE: 27.195965766883


# Lasso

In [15]:
# 讀取資料
boston = datasets.load_boston()

# 資料特徵、目標值
X = pd.DataFrame(boston.data, columns = boston.feature_names)
Y = pd.DataFrame(boston.target, columns = ['target'])

# 切分資料
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

# 訓練模型
model = Lasso(alpha=1.0)
model.fit(X_train, Y_train)

# 預測結果
Y_pred = model.predict(X_test)
print('Coefficients: ', model.coef_)
print('R2: %.12f' % r2_score(Y_test, Y_pred))
print('MSE: %.12f' % mean_squared_error(Y_test, Y_pred))

Coefficients:  [-0.06586193  0.04832933 -0.          0.         -0.          0.86898466
  0.01217999 -0.75109378  0.2000743  -0.01395062 -0.84602363  0.00668818
 -0.73266568]
R2: 0.611543335960
MSE: 32.345038998569


# Ridge

In [16]:
# 讀取資料
boston = datasets.load_boston()

# 資料特徵、目標值
X = pd.DataFrame(boston.data, columns = boston.feature_names)
Y = pd.DataFrame(boston.target, columns = ['target'])

# 切分資料
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)

# 訓練模型
model = Ridge(alpha=1.0)
model.fit(X_train, Y_train)

# 預測結果
Y_pred = model.predict(X_test)
print('Coefficients: ', model.coef_)
print('R2: %.12f' % r2_score(Y_test, Y_pred))
print('MSE: %.12f' % mean_squared_error(Y_test, Y_pred))

Coefficients:  [[-1.18308575e-01  4.61259764e-02 -2.08626416e-02  2.45868617e+00
  -8.25958494e+00  3.89748516e+00 -1.79140171e-02 -1.39737175e+00
   2.18432298e-01 -1.16338128e-02 -9.31711410e-01  7.26996266e-03
  -4.94046539e-01]]
R2: 0.666581909149
MSE: 27.762224592167
