In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.datasets.samples_generator import make_regression
import pandas as pd

In [None]:
%matplotlib inline

# Load the diabetes dataset

In [None]:
diabetes = datasets.load_diabetes()

Because the diabetes dataset is unlabeled and somewhat confusing (read: complex) we want to simplify and only calculate the regression line for one feature.  A little `numpy` magic below and our `diabetes_x_temp` gives us our single feature we need.

In [None]:
diabetes_X = diabetes.data[:, np.newaxis]
diabetes_X_temp = diabetes_X[:, :, 2]

We then split the data up between two groups `train` and `test`. Of course we don't want to train and test against the same data! We know that's bad!

In [None]:
diabetes_X_train = diabetes_X_temp[:-20]
diabetes_X_test = diabetes_X_temp[-20:]

Here we access the `target` attribute on our dataset.  This will give us our valid Y `train` and `test` data.

In [None]:
diabetes_y_train = diabetes.target[:-20]
diabetes_y_test = diabetes.target[-20:]

To create a Linear Regression classifier we just instantiate the `LinearRegression` class and `fit` our `X_train` and `y_train` data into our classifier.

In [None]:
# Create linear regression object
regr = linear_model.LinearRegression()

# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)

Lets print some data about our Regression line.

In [None]:
# The coefficients
print('Coefficients: \n', regr.coef_)

# The mean square error
mean_square_error = np.mean((regr.predict(diabetes_X_test) - diabetes_y_test) ** 2)
print("Residual sum of squares: {}".format(mean_square_error))

# Explained variance score: 1 is perfect prediction
print('Variance score: {}'.format(regr.score(diabetes_X_test, diabetes_y_test)))

If we wanted to take a look at our X test inputs and their respective predictions we could do this:

In [None]:
print("Inputed X: ", diabetes_X_test)
print("Predicted Y: ", regr.predict(diabetes_X_test))

And why not? Let's plot our data!

In [None]:
plt.scatter(diabetes_X_test, diabetes_y_test, color='black')
plt.plot(diabetes_X_test, regr.predict(diabetes_X_test), color='blue',
        linewidth=3)
plt.show()