In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# Dowload diabetes data
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)

In [3]:
# Divide data between training and testing datasets
diabetes_X_train = diabetes_X[:-20, :]   #all except the last 2 data rows
diabetes_X_test = diabetes_X[-20:, :]   #last 20 data rows

In [4]:
# Divide target values (results/labels) between training and testing datasets
diabetes_y_train = diabetes_y[:-20]
diabetes_y_test = diabetes_y[-20:]
diabetes_X_train.shape

(422, 10)

In [5]:
# Create a linear regression object
regression_object = linear_model.LinearRegression()

In [6]:
# Train
regression_object.fit(diabetes_X_train, diabetes_y_train)

LinearRegression()

In [8]:
#Predict with test data
diabetes_y_pred = regression_object.predict(diabetes_X_test)

In [10]:
# Print the coefficients
print('Coefficients: \n', regression_object.coef_)

Coefficients: 
 [ 3.03499549e-01 -2.37639315e+02  5.10530605e+02  3.27736980e+02
 -8.14131709e+02  4.92814588e+02  1.02848452e+02  1.84606489e+02
  7.43519617e+02  7.60951722e+01]


In [11]:
#Print the intercept
print('Intercept: \n', regression_object.intercept_)

Intercept: 
 152.76430691633442


In [15]:
#mean squared error
print('Mean squared error: {:.2f}'.format(mean_squared_error(diabetes_y_test, diabetes_y_pred)))
# The coefficient determination: 1 = perfect prediction 
R2 = r2_score(diabetes_y_test, diabetes_y_pred)
print('Coefficient of determination: {:.2f}'.format(R2))
print(f'Coefficient of determination: {R2:.2f}')  #newer version

Mean squared error: 2004.57
Coefficient of determination: 0.59
Coefficient of determination: 0.59


In [13]:
#Adjusted R squared :
n = len(diabetes_X)  #total number of data
p = diabetes_X_train.shape[1]   #number of independent variables (x1, x2, ...)
Adj_r2 = 1-(1-R2)*(n-1)/(n-p-1)
print(Adj_r2)

0.5754482791198476
