# Python機械学習クックブック

In [26]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, Ridge, Lasso, RidgeCV
from sklearn.datasets import load_boston
from sklearn.preprocessing import PolynomialFeatures, StandardScaler

## 直線によるフィッティング

In [10]:
import warnings

warnings.simplefilter("ignore")

boston = load_boston()
features = boston.data[:, 0:2]
target = boston.target

# 線形回帰モデルを作成
regression = LinearRegression()

# 線形回帰モデルを訓練
model = regression.fit(features, target)

# 切片を表示
print(model.intercept_)

# 特徴量の重みを表示
print(model.coef_)

# 目的変数を予測
print("予測値:", model.predict(features)[0] * 1000)
print("正解:", target[0] * 1000)

22.485628113468223
[-0.35207832  0.11610909]
予測値: 24573.366631705547
正解: 24000.0


## 交互作用の影響の取り扱い

In [14]:
boston = load_boston()
features = boston.data[:, 0:2]
target = boston.target

# 交互作用項を作成
interaction = PolynomialFeatures(
    degree=3, include_bias=False, interaction_only=True
)
features_interaction = interaction.fit_transform(features)

# 線形回帰モデルを作成
regression = LinearRegression()

# 線形回帰モデルを訓練
model = regression.fit(features_interaction, target)

# 最初の観測値の特徴量の値を表示
print(features[0])

# 個々の観測値に対して、交互作用項を表示
interaction_term = np.multiply(features[:, 0], features[:, 1])
print(interaction_term[0])

[6.32e-03 1.80e+01]
0.11376


## 非線形な関係の学習

In [17]:
boston = load_boston()
features = boston.data[:, 0:1]
target = boston.target

# 多項式特徴量 x^2 と x^3 を作成
polynomial = PolynomialFeatures(degree=3, include_bias=False)
features_polynomial = polynomial.fit_transform(features)

# 線形回帰モデルを作成
regression = LinearRegression()

# 線形回帰モデルを訓練
model = regression.fit(features_polynomial, target)

# 特徴量の値を表示
print(features[0])

# 特徴量の2乗と3乗の値を表示
print(features_polynomial[0])

[0.00632]
[6.32000000e-03 3.99424000e-05 2.52435968e-07]


## 正則化によるバリアンスの低減

In [28]:
boston = load_boston()
features = boston.data
target = boston.target

# 特徴量を標準化
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

# alpha値を指定してリッジ回帰器を作成
regression = Ridge(alpha=0.5)

# リッジ回帰器を訓練
model = regression.fit(features_standardized, target)

# 3つの係数を表示を指定してリッジ回帰器を作成
regr_cv = RidgeCV(alphas=[0.1, 1.0, 10.0])

# リッジ回帰器を訓練
model_cv = regr_cv.fit(features_standardized, target)

# 係数を表示
print(model_cv.coef_)

# alpha値を表示
print(model_cv.alpha_)

[-0.91987132  1.06646104  0.11738487  0.68512693 -2.02901013  2.68275376
  0.01315848 -3.07733968  2.59153764 -2.0105579  -2.05238455  0.84884839
 -3.73066646]
1.0


## Lasso回帰による特徴量削減

In [30]:
boston = load_boston()
features = boston.data
target = boston.target

# 特徴量を標準化
scaler = StandardScaler()
features_standardized = scaler.fit_transform(features)

# alpha値を指定してラッソ回帰器を作成
regression = Lasso(alpha=0.5)

# ラッソ回帰器を訓練
model = regression.fit(features_standardized, target)

# 係数を表示
print(model.coef_)

# alpha値を大きくしてラッソ回帰器を作成(特徴量が使われなくなってしまう)
regression_a10 = Lasso(alpha=10)
model_a10 = regression_a10.fit(features_standardized, target)

# 係数を表示
print(model_a10.coef_)


[-0.11526463  0.         -0.          0.39707879 -0.          2.97425861
 -0.         -0.17056942 -0.         -0.         -1.59844856  0.54313871
 -3.66614361]
[-0.  0. -0.  0. -0.  0. -0.  0. -0. -0. -0.  0. -0.]
