In [2]:
from sklearn.datasets import load_boston
import numpy as np
boston = load_boston()
X = boston.data
y = boston.target
train_size = int(0.7 * y.size)
np.random.seed(0)
indices = np.random.permutation(y.size)

train_X = X[indices[:train_size]]
train_y = y[indices[:train_size]]
test_X = X[indices[train_size:]]
test_y = y[indices[train_size:]]

In [7]:
import matplotlib.pyplot as plt
import numpy as np
bins = np.arange(y.min(), y.max())
hist, bins = np.histogram(y, 20)
print(y.mean())
print(y.std())
print(hist)

22.5328063241
9.18801154528
[ 9 12 18 37 40 42 83 71 72 12 23 18 16 14  7  1  5  5  2 19]


In [11]:
plt.hist(y, bins)
plt.title("Boston house data target distribution")
plt.show()

## The feature dimension of Boston data is 13. 

In [14]:
from sklearn import linear_model
from sklearn.metrics import mean_squared_error

In [15]:
pred_y = np.full(test_y.shape, train_y.mean())
print("Baseline MSE: %.3f" % mean_squared_error(test_y, pred_y))

Baseline MSE: 99.330


In [19]:
reg = linear_model.LinearRegression()
pred_y = reg.fit(train_X, train_y).predict(test_X)
score = reg.score(test_X, test_y)
print("Linear regression, MSE: %.3f, score %.3f" % (mean_squared_error(test_y, pred_y), score))

Linear regression, MSE: 24.541, score 0.752


In [20]:
reg_cv = linear_model.RidgeCV(alphas=[0.01, 0.1, 1.0, 10.0])
pred_y = reg_cv.fit(train_X, train_y).predict(test_X)
print("Ridge with alpha = %.3f, MSE: %.3f, score %.3f"
      % (reg_cv.alpha_, mean_squared_error(test_y, pred_y), reg_cv.score(test_X, test_y) ))

Ridge with alpha = 0.010, MSE: 24.546, score 0.752


In [21]:
lasso_cv = linear_model.LassoCV(alphas=[0.01, 0.1, 1.0, 10.0])
pred_y = lasso_cv.fit(train_X, train_y).predict(test_X)
print("Lasso with alpha = %.3f, MSE: %.3f, score: %.3f"
      % (reg_cv.alpha_, mean_squared_error(test_y, pred_y), lasso_cv.score(test_X, test_y) ))

Lasso with alpha = 0.010, MSE: 24.768, score: 0.750


In [22]:
br = linear_model.BayesianRidge()
pred_y = br.fit(train_X, train_y).predict(test_X)
print("BayesianRidge MSE: %.3f, score: %.3f"
      % (mean_squared_error(test_y, pred_y), br.score(test_X, test_y)))

BayesianRidge MSE: 26.217, score: 0.735
