
# Linear Regression Example

Those examples use the `diabetes` dataset

In [1]:
import numpy as np
from sklearn import datasets, linear_model

In [2]:
# Load data
print("Data diabetes loading ...")
diabetes = datasets.load_diabetes()
diabetes_X = diabetes.data
diabetes_Y = diabetes.target
total_size = len(diabetes_Y)
train_size = int(0.7 * total_size)
np.random.seed(0)
indices = np.random.permutation(len(diabetes_Y))
train_x = diabetes_X[indices[:train_size]]
train_y = diabetes_Y[indices[:train_size]]

test_x = diabetes_X[indices[train_size:]]
test_y = diabetes_Y[indices[train_size:]]
print("Done")

Data diabetes loading ...
Done


In [3]:
regr = linear_model.LinearRegression()
regr.fit(train_x, train_y)
pred_y = regr.predict(test_x)
mse = np.mean( (pred_y - test_y) ** 2)
score = regr.score(test_x, test_y)
print("Linear regression")
print("MSE %.2f" % mse)
print("Variance score: %.2f" % score)

Linear regression
MSE 2855.80
Variance score: 0.57


In [4]:
ridge = linear_model.Ridge(alpha = .1)
ridge.fit(train_x, train_y)
pred_y = ridge.predict(test_x)
mse = np.mean( (pred_y - test_y) ** 2)
score = ridge.score(test_x, test_y)
print("Linear regression with ridge(L2) regularization")
print("MSE %.2f" % mse)
print("Variance score: %.2f" % score)

Linear regression with ridge(L2) regularization
MSE 2916.04
Variance score: 0.56


In [5]:
lasso = linear_model.Lasso(alpha = .1)
lasso.fit(train_x, train_y)
pred_y = lasso.predict(test_x)
mse = np.mean( (pred_y - test_y) ** 2)
score = lasso.score(test_x, test_y)
print("Linear regression with Lasso(L1) regularization")
print("MSE %.2f" % mse)
print("Variance score: %.2f" % score)

Linear regression with Lasso(L1) regularization
MSE 2842.24
Variance score: 0.57


In [6]:
elasticNet = linear_model.ElasticNet(l1_ratio=0.9, alpha=.1)
elasticNet.fit(train_x, train_y)
pred_y = elasticNet.predict(test_x)
mse = np.mean( (pred_y - test_y) ** 2)
score = elasticNet.score(test_x, test_y)
print("Linear regression with elastic net regularization")
print("MSE %.2f" % mse)
print("Variance score: %.2f" % score)

Linear regression with elastic net regularization
MSE 4696.62
Variance score: 0.29


## The advantages of LARS are:
1. It is numerically efficient in contexts where p >> n (i.e., when the number of dimensions is significantly greater than the number of points)
2. It is computationally just as fast as forward selection and has the same order of complexity as an ordinary least squares.
3. It produces a full piecewise linear solution path, which is useful in cross-validation or similar attempts to tune the model.
3. If two variables are almost equally correlated with the response, then their coefficients should increase at approximately the same rate. The algorithm thus behaves as intuition would expect, and also is more stable.
4. It is easily modified to produce solutions for other estimators, like the Lasso.

## The disadvantages of the LARS method include:
1. Because LARS is based upon an iterative refitting of the residuals, it would appear to be especially sensitive to the effects of noise.

In [7]:
lars = linear_model.Lars()
lars.fit(train_x, train_y)
pred_y = lars.predict(test_x)
mse = np.mean( (pred_y - test_y) ** 2)
score = lars.score(test_x, test_y)
print("Least Angle Regression")
print("MSE %.2f" % mse)
print("Variance score: %.2f" % score)

Least Angle Regression
MSE 2855.80
Variance score: 0.57


In [8]:
lasso_lars = linear_model.LassoLars()
lasso_lars.fit(train_x, train_y)
pred_y = lasso_lars.predict(test_x)
mse = np.mean( (pred_y - test_y) ** 2)
score = lasso_lars.score(test_x, test_y)
print("Lasso with least angle regression")
print("MSE %.2f" % mse)
print("Variance score: %.2f" % score)

Lasso with least angle regression
MSE 3863.93
Variance score: 0.42


In [9]:
# Orthogonal Matching Pursuit (OMP)

## The advantages of Bayesian Regression are:
1. It adapts to the data at hand.
2. It can be used to include regularization parameters in the estimation procedure.

## The disadvantages of Bayesian regression include:
1. Inference of the model can be time consuming.

In [10]:
model = linear_model.BayesianRidge()
model.fit(train_x, train_y)
pred_y = model.predict(test_x)
mse = np.mean( (pred_y - test_y) ** 2)
score = model.score(test_x, test_y)
print("Bayesian regression with ridge regularization")
print("MSE %.2f" % mse)
print("Variance score: %.2f" % score)

Bayesian regression with ridge regularization
MSE 2871.90
Variance score: 0.57
