<a href="https://colab.research.google.com/github/semishen/ML100Days/blob/master/Day_040_HW.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## [作業重點]
使用 Sklearn 中的 Lasso, Ridge 模型，來訓練各種資料集，務必了解送進去模型訓練的**資料型態**為何，也請了解模型中各項參數的意義。

機器學習的模型非常多種，但要訓練的資料多半有固定的格式，確保你了解訓練資料的格式為何，這樣在應用新模型時，就能夠最快的上手開始訓練！

## 練習時間
試著使用 sklearn datasets 的其他資料集 (boston, ...)，來訓練自己的線性迴歸模型，並加上適當的正則化來觀察訓練情形。

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

In [14]:
boston_dset = datasets.load_boston()
df = pd.DataFrame(boston_dset.data, columns=boston_dset.feature_names)
df['Target'] = boston_dset.target
#df.head()
print(df.dtypes.value_counts())
_list = df.isna()
(_list == True).sum()

float64    14
dtype: int64


CRIM       0
ZN         0
INDUS      0
CHAS       0
NOX        0
RM         0
AGE        0
DIS        0
RAD        0
TAX        0
PTRATIO    0
B          0
LSTAT      0
Target     0
dtype: int64

In [17]:
x_train, x_test, y_train, y_test = train_test_split(boston_dset.data, boston_dset.target, test_size=0.2, random_state=5)

linear = linear_model.LinearRegression()
lasso = linear_model.Lasso(alpha=0.1)
ridge = linear_model.Ridge(alpha=0.1)


In [22]:
# baeline: linear regression
linear.fit(x_train, y_train)
y_pred = linear.predict(x_test)
loss = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

np.set_printoptions(precision=4)
np.set_printoptions(suppress=True)
print('params: ', linear.coef_)
print('loss: ', loss)
print('r2 score: ', r2)

params:  [ -0.1308   0.0494   0.0011   2.7054 -15.9571   3.414    0.0011  -1.4931
   0.3644  -0.0132  -0.9524   0.0117  -0.5941]
loss:  20.869292183770906
r2 score:  0.7334492147453064


In [24]:
# Lasso alpha = 0.1
lasso.fit(x_train, y_train)
y_pred = lasso.predict(x_test)
loss = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('=== lasso, alpha= 0.1 ===')
print('params: ', lasso.coef_) # knock out 1 feature
print('loss: ', loss)
print('r2 score: ', r2)

=== lasso, alpha= 0.1 ===
params:  [-0.1229  0.0542 -0.0418  0.8275 -0.      3.1984 -0.0049 -1.196   0.3406
 -0.0156 -0.8148  0.0121 -0.6498]
loss:  23.40636423156822
r2 score:  0.7010447354446618


In [25]:
# Ridge alpha = 0.1
ridge.fit(x_train, y_train)
y_pred = ridge.predict(x_test)
loss = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('=== ridge, alpha= 0.1 ===')
print('params: ', ridge.coef_)
print('loss: ', loss)
print('r2 score: ', r2)

=== ridge, alpha= 0.1 ===
params:  [ -0.1303   0.0497  -0.0043   2.6748 -14.667    3.4182   0.0002  -1.4736
   0.3617  -0.0133  -0.9396   0.0118  -0.5965]
loss:  20.95972224078456
r2 score:  0.7322942065880886


In [27]:
# Lasso, alphas
alphas = [0.01, 0.03, 0.05, 0.07, 0.10]
for alpha in alphas:
  lasso = linear_model.Lasso(alpha=alpha)
  lasso.fit(x_train, y_train)
  y_pred = lasso.predict(x_test)
  loss = mean_squared_error(y_test, y_pred)
  r2 = r2_score(y_test, y_pred)

  print('=== lasso, alpha= {:.2f} ==='.format(alpha))
  print('params: ', lasso.coef_) # knock out 1 feature
  print('loss: ', loss)
  print('r2 score: ', r2)
  print('======\n')

=== lasso, alpha= 0.01 ===
params:  [ -0.1293   0.0503  -0.0119   2.4865 -12.3087   3.404   -0.001   -1.4324
   0.3575  -0.0136  -0.9178   0.0119  -0.603 ]
loss:  21.167936856245262
r2 score:  0.729634807852193

=== lasso, alpha= 0.03 ===
params:  [-0.1262  0.0521 -0.0378  2.0487 -5.0126  3.3841 -0.0052 -1.3112  0.3435
 -0.0144 -0.8487  0.0121 -0.6209]
loss:  22.128918967788717
r2 score:  0.7173607674011797

=== lasso, alpha= 0.05 ===
params:  [-0.1239  0.0534 -0.0541  1.6455 -0.      3.3513 -0.0078 -1.2243  0.3347
 -0.0151 -0.8027  0.0122 -0.6351]
loss:  23.10019010181038
r2 score:  0.7049553115194502

=== lasso, alpha= 0.07 ===
params:  [-0.1235  0.0537 -0.0492  1.3183 -0.      3.2901 -0.0066 -1.213   0.337
 -0.0153 -0.8075  0.0122 -0.641 ]
loss:  23.20865637974607
r2 score:  0.7035699376743375

=== lasso, alpha= 0.10 ===
params:  [-0.1229  0.0542 -0.0418  0.8275 -0.      3.1984 -0.0049 -1.196   0.3406
 -0.0156 -0.8148  0.0121 -0.6498]
loss:  23.40636423156822
r2 score:  0.7010447354

In [30]:
# Ridge, alphas
# alphas = [0.2, 0.3, 0.5, 0.7]
alphas = [0.01, 0.03, 0.05, 0.07, 0.10]
for alpha in alphas:
  ridge = linear_model.Ridge(alpha=alpha)
  ridge.fit(x_train, y_train)
  y_pred = ridge.predict(x_test)
  loss = mean_squared_error(y_test, y_pred)
  r2 = r2_score(y_test, y_pred)

  print('=== ridge, alpha= {:.2f} ==='.format(alpha))
  print('params: ', ridge.coef_) # knock out 1 feature
  print('loss: ', loss)
  print('r2 score: ', r2)
  print('======\n')


=== ridge, alpha= 0.01 ===
params:  [ -0.1307   0.0494   0.0005   2.7021 -15.818    3.4145   0.001   -1.491
   0.3641  -0.0132  -0.951    0.0118  -0.5943]
loss:  20.878357369399158
r2 score:  0.7333334306388588

=== ridge, alpha= 0.03 ===
params:  [ -0.1307   0.0495  -0.0006   2.6958 -15.5469   3.4154   0.0008  -1.4869
   0.3636  -0.0132  -0.9483   0.0118  -0.5948]
loss:  20.8964976813263
r2 score:  0.7331017354598195

=== ridge, alpha= 0.05 ===
params:  [ -0.1306   0.0496  -0.0017   2.6896 -15.2849   3.4162   0.0006  -1.4829
   0.363   -0.0132  -0.9457   0.0118  -0.5953]
loss:  20.91462570067949
r2 score:  0.732870197286356

=== ridge, alpha= 0.07 ===
params:  [ -0.1305   0.0496  -0.0028   2.6836 -15.0317   3.4171   0.0005  -1.4791
   0.3625  -0.0133  -0.9432   0.0118  -0.5958]
loss:  20.93271425069434
r2 score:  0.7326391632307627

=== ridge, alpha= 0.10 ===
params:  [ -0.1303   0.0497  -0.0043   2.6748 -14.667    3.4182   0.0002  -1.4736
   0.3617  -0.0133  -0.9396   0.0118  -0.5965