In [36]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures

In [25]:
# Load the diabetes dataset
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)

In [26]:
diabetes_X.shape

(442, 10)

In [27]:
# Split the data into training/testing sets
diabetes_X_train, diabetes_X_test, diabetes_y_train, diabetes_y_test \
        = train_test_split(diabetes_X, diabetes_y, test_size=0.2, random_state=5)

In [28]:
# Create linear regression object
regr = linear_model.LinearRegression()

In [29]:
# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [41]:
# Make predictions using the test and train set
diabetes_y_pred = regr.predict(diabetes_X_test)
diabetes_y_train_pred = regr.predict(diabetes_X_train)

In [35]:
# The coefficients
print("Coefficients: \n", regr.coef_)
#Intrecept
print("Intrecept: \n", regr.intercept_)
# The mean squared error
print("Mean squared error for train set: %.2f" % mean_squared_error(diabetes_y_train, diabetes_y_train_pred))
print("Mean squared error for test set: %.2f" % mean_squared_error(diabetes_y_test, diabetes_y_pred))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(diabetes_y_test, diabetes_y_pred))

Coefficients: 
 [   2.72195846 -255.94592688  522.83461574  353.10273364 -827.58494078
  543.32591808  115.93459912  214.68877404  694.94194778   32.73088487]
Intrecept: 
 152.22183644503602
Mean squared error for train set: 2845.00
Mean squared error for test set: 2981.59
Coefficient of determination: 0.53


In [37]:
#Non-linear fields
diabetes_X_ = PolynomialFeatures(degree=2, include_bias=False).fit_transform(diabetes_X)

In [45]:
diabetes_X_

(442, 65)

In [39]:
# Split the data into training/testing sets
diabetes_X_train, diabetes_X_test, diabetes_y_train, diabetes_y_test \
        = train_test_split(diabetes_X_, diabetes_y, test_size=0.2, random_state=5)

In [40]:
# Train the model using the training sets
regr.fit(diabetes_X_train, diabetes_y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [46]:
# Make predictions using the test and train set
diabetes_y_pred = regr.predict(diabetes_X_test)
diabetes_y_train_pred = regr.predict(diabetes_X_train)

In [49]:
# The coefficients
print("Coefficients: \n", regr.coef_)
#Intrecept
print("Intrecept: \n", regr.intercept_)
# The mean squared error
print("Mean squared error for train set: %.2f" % mean_squared_error(diabetes_y_train, diabetes_y_train_pred))
print("Mean squared error for test set: %.2f" % mean_squared_error(diabetes_y_test, diabetes_y_pred))
# The coefficient of determination: 1 is perfect prediction
print("Coefficient of determination: %.2f" % r2_score(diabetes_y_test, diabetes_y_pred))

Coefficients: 
 [ 9.10505214e+01 -3.06205123e+02  4.67467078e+02  3.61582517e+02
 -5.84529582e+04  5.14043898e+04  2.14657451e+04 -1.40195088e+02
  1.98245032e+04  1.91613198e+01  1.23203317e+03  2.20866661e+03
 -8.70149982e+02  1.40586771e+03  1.28364692e+03 -7.01696393e+03
  5.24984110e+03  7.80356381e+03  8.91916834e+02  1.23933345e+03
 -1.84901429e+00  8.28798375e+02  1.78140473e+03  4.24712314e+03
 -3.19149460e+03 -2.79840398e+03 -5.62702982e+03 -3.71764404e+01
  1.84343895e+03  2.40735669e+02  4.49244847e+03 -5.58907360e+03
  4.89526278e+03  8.69628355e+02 -6.74079993e+02  1.50242757e+03
  1.45862960e+03 -6.13022041e+02  1.69684979e+04 -1.23256448e+04
 -5.07018127e+03 -5.55266896e+02 -6.34906034e+03 -3.59928324e+03
  2.29649995e+04 -2.43678707e+04 -1.19409302e+04 -1.80433446e+04
  1.65129489e+05 -3.85734734e+03  6.93446815e+03 -5.86376496e+02
  4.11726562e+03 -1.48232008e+05  2.59165327e+03  4.10474556e+03
  1.63750493e+04 -6.57527634e+04  5.67955240e+03  1.27796764e+04
 -7.56058