In [1]:

%matplotlib inline
from bokeh.plotting import figure, show, output_notebook



# Linear Regression Example

This example uses the only the first feature of the `diabetes` dataset, in
order to illustrate a two-dimensional plot of this regression technique. The
straight line can be seen in the plot, showing how linear regression attempts
to draw a straight line that will best minimize the residual sum of squares
between the observed responses in the dataset, and the responses predicted by
the linear approximation.

The coefficients, the residual sum of squares and the variance score are also
calculated.



In [2]:
print(__doc__)


# Code source: Jaques Grobler
# License: BSD 3 clause


import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model

# Load the diabetes dataset
diabetes = datasets.load_diabetes()


# Use only one feature
diabetes_X = diabetes.data

# Split the data into training/testing sets
diabetes_X_train = diabetes_X[:-20]
diabetes_X_test = diabetes_X[-20:]

# Split the targets into training/testing sets
diabetes_y_train = diabetes.target[:-20]
diabetes_y_test = diabetes.target[-20:]

# Create linear regression object
regr = linear_model.LinearRegression()

# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)

# The coefficients
print('Coefficients: \n', regr.coef_)
# The mean squared error
print("Mean squared error: %.2f"
      % np.mean((regr.predict(diabetes_X_test) - diabetes_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % regr.score(diabetes_X_test, diabetes_y_test))

Automatically created module for IPython interactive environment
('Coefficients: \n', array([  3.03499549e-01,  -2.37639315e+02,   5.10530605e+02,
         3.27736980e+02,  -8.14131709e+02,   4.92814588e+02,
         1.02848452e+02,   1.84606489e+02,   7.43519617e+02,
         7.60951722e+01]))
Mean squared error: 2004.57
Variance score: 0.59


In [3]:
output_notebook()
p = figure()
p.scatter(diabetes_X_test.ravel(),diabetes_y_test)
p.line(diabetes_X_test.ravel(), regr.predict(diabetes_X_test))
show(p)



In [4]:
for a in np.linspace(0,1,11):
    ridge_model = linear_model.Ridge(alpha=a)
    ridge_model.fit(diabetes_X_train, diabetes_y_train)
    print 'alpha = %f' % a
    print 'score = %f' % ridge_model.score(diabetes_X_test,diabetes_y_test)
    print 'MSE = %f' % np.mean((ridge_model.predict(diabetes_X_test) - diabetes_y_test)**2)
    

alpha = 0.000000
score = 0.585075
MSE = 2004.567603
alpha = 0.100000
score = 0.570590
MSE = 2074.548443
alpha = 0.200000
score = 0.553680
MSE = 2156.243186
alpha = 0.300000
score = 0.537260
MSE = 2235.571331
alpha = 0.400000
score = 0.521550
MSE = 2311.466811
alpha = 0.500000
score = 0.506610
MSE = 2383.646911
alpha = 0.600000
score = 0.492431
MSE = 2452.146394
alpha = 0.700000
score = 0.478979
MSE = 2517.137150
alpha = 0.800000
score = 0.466206
MSE = 2578.845203
alpha = 0.900000
score = 0.454063
MSE = 2637.510602
alpha = 1.000000
score = 0.442501
MSE = 2693.368042


In [5]:
ridgeCV = linear_model.RidgeCV()