## Imports

In [1]:
from sklearn import datasets, linear_model
import numpy as np

## Get dataset

In [6]:
diabetes = datasets.load_diabetes()

diabetes_X = diabetes.data
diabetes_y = diabetes.target

# Shuffle data before dividing it into training and test datasets
np.random.seed(0)
indices = np.random.permutation(len(diabetes_y))

diabetes_X_train = diabetes_X[indices[:-20]]
diabetes_y_train = diabetes_y[indices[:-20]]
diabetes_X_test = diabetes_X[indices[-20:]]
diabetes_y_test = diabetes_y[indices[-20:]]

# Linear Regression

## Model Training

In [3]:
regr = linear_model.LinearRegression()
regr.fit(diabetes_X_train, diabetes_y_train)
regr.coef_

array([  -3.78606011, -235.47377739,  533.3485139 ,  321.69200267,
       -827.89481203,  521.12370087,  115.57077061,  159.58767508,
        742.51130661,   55.80177895])

## Prediction and benchmarking

In [7]:
# Mean Squared error
mse = np.mean((regr.predict(diabetes_X_test) - diabetes_y_test) ** 2)
print("MSE: ", mse)

# Score: 0 implies there is no linear relationship, 1 is perfect prediction
score = regr.score(diabetes_X_test, diabetes_y_test)
print("Score: ", score)

('MSE: ', 3171.5237990479986)
('Score: ', 0.63775706901832518)


# Ridge Regression

## Model Training

In [10]:
regr = linear_model.Ridge()
regr.fit(diabetes_X_train, diabetes_y_train)
regr.coef_

array([  31.31725578,  -77.29426972,  305.40066449,  197.22411856,
          4.40038891,  -26.25411444, -145.55812267,  109.99426897,
        246.61932133,  107.23784543])

## Prediction and benchmarking

In [13]:
# Mean Squared error
mse = np.mean((regr.predict(diabetes_X_test) - diabetes_y_test) ** 2)
print("MSE: ", mse)

# Score: 0 implies there is no linear relationship, 1 is perfect prediction
score = regr.score(diabetes_X_test, diabetes_y_test)
print("Score: ", score)

('MSE: ', 4134.2045556140847)
('Score: ', 0.52780225834881911)
