In [1]:
"""
Chapter_1.1. Generalized Linear Models
参考
http://scikit-learn.org/stable/modules/linear_model.html
"""
import warnings
warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")

from sklearn import linear_model

In [2]:
# 1.1.1. Ordinary Least Squares
reg = linear_model.LinearRegression()
print(reg.fit ([[0, 0], [1, 1], [2, 2]], [0, 1, 2]))
# LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
print(reg.coef_)
# array([ 0.5,  0.5])

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
[ 0.5  0.5]


In [3]:
# 1.1.2. Ridge Regression
reg = linear_model.Ridge(alpha = .5)
print(reg.fit ([[0, 0], [0, 0], [1, 1]], [0, .1, 1]))
# Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,
#      normalize=False, random_state=None, solver='auto', tol=0.001)
print(reg.coef_)
# array([ 0.34545455,  0.34545455])
print(reg.intercept_) 
# 0.13636...

Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001)
[ 0.34545455  0.34545455]
0.136363636364


In [4]:
# 1.1.2.2. Setting the regularization parameter: generalized Cross-Validation
reg = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0])
print(reg.fit([[0, 0], [0, 0], [1, 1]], [0, .1, 1]))
# RidgeCV(alphas=[0.1, 1.0, 10.0], cv=None, fit_intercept=True, scoring=None,
#    normalize=False)
print(reg.alpha_)
# 0.1

RidgeCV(alphas=[0.1, 1.0, 10.0], cv=None, fit_intercept=True, gcv_mode=None,
    normalize=False, scoring=None, store_cv_values=False)
0.1


In [5]:
# 1.1.3. Lasso
reg = linear_model.Lasso(alpha = 0.1)
print(reg.fit([[0, 0], [1, 1]], [0, 1]))
# Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
#   normalize=False, positive=False, precompute=False, random_state=None,
#   selection='cyclic', tol=0.0001, warm_start=False)
print(reg.predict([[1, 1]]))
# array([ 0.8])

Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)
[ 0.8]


In [6]:
# 1.1.8. LARS Lasso
reg = linear_model.LassoLars(alpha=.1)
print(reg.fit([[0, 0], [1, 1]], [0, 1]))
# LassoLars(alpha=0.1, copy_X=True, eps=..., fit_intercept=True,
#     fit_path=True, max_iter=500, normalize=True, positive=False,
#     precompute='auto', verbose=False)
print(reg.coef_)
# array([ 0.717157...,  0.        ])

LassoLars(alpha=0.1, copy_X=True, eps=2.2204460492503131e-16,
     fit_intercept=True, fit_path=True, max_iter=500, normalize=True,
     positive=False, precompute='auto', verbose=False)
[ 0.71715729  0.        ]


In [7]:
# 1.1.10.1. Bayesian Ridge Regression
X = [[0., 0.], [1., 1.], [2., 2.], [3., 3.]]
Y = [0., 1., 2., 3.]
reg = linear_model.BayesianRidge()
print(reg.fit(X, Y))
# BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True,
#       fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
#       normalize=False, tol=0.001, verbose=False)
print(reg.predict ([[1, 0.]]))
# array([ 0.50000013])
print(reg.coef_)
# array([ 0.49999993,  0.49999993])

BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True,
       fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
       normalize=False, tol=0.001, verbose=False)
[ 0.50000013]
[ 0.49999993  0.49999993]


In [8]:
# 1.1.16. Polynomial regression: extending linear models with basis functions  Ver.1
from sklearn.preprocessing import PolynomialFeatures
import numpy as np
X = np.arange(6).reshape(3, 2)
print(X)
# array([[0, 1],
#       [2, 3],
#       [4, 5]])
poly = PolynomialFeatures(degree=2)
print(poly.fit_transform(X))
#array([[  1.,   0.,   1.,   0.,   0.,   1.],
#       [  1.,   2.,   3.,   4.,   6.,   9.],
#       [  1.,   4.,   5.,  16.,  20.,  25.]])

[[0 1]
 [2 3]
 [4 5]]
[[  1.   0.   1.   0.   0.   1.]
 [  1.   2.   3.   4.   6.   9.]
 [  1.   4.   5.  16.  20.  25.]]


In [9]:
# 1.1.16. Polynomial regression: extending linear models with basis functions  Ver.2
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
import numpy as np
model = Pipeline([('poly', PolynomialFeatures(degree=3)),
                  ('linear', LinearRegression(fit_intercept=False))])
# fit to an order-3 polynomial data
x = np.arange(5)
y = 3 - 2 * x + x ** 2 - x ** 3
model = model.fit(x[:, np.newaxis], y)
print(model.named_steps['linear'].coef_)
# array([ 3., -2.,  1., -1.])

[ 3. -2.  1. -1.]


In [10]:
# 1.1.16. Polynomial regression: extending linear models with basis functions  Ver.3
from sklearn.linear_model import Perceptron
from sklearn.preprocessing import PolynomialFeatures
import numpy as np
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = X[:, 0] ^ X[:, 1]
print(y)
# array([0, 1, 1, 0])
X = PolynomialFeatures(interaction_only=True).fit_transform(X).astype(int)
print(X)
# array([[1, 0, 0, 0],
#       [1, 0, 1, 0],
#       [1, 1, 0, 0],
#       [1, 1, 1, 1]])
clf = Perceptron(fit_intercept=False, max_iter=10, tol=None,
                 shuffle=False).fit(X, y)
print(clf.predict(X))
# array([0, 1, 1, 0])
print(clf.score(X, y))
# 1.0

[0 1 1 0]
[[1 0 0 0]
 [1 0 1 0]
 [1 1 0 0]
 [1 1 1 1]]
[0 1 1 0]
1.0
