In [511]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score

In [512]:
# Load diabetes data set

diabetes = datasets.load_diabetes()

In [513]:
# take only one predictor from diabetes.data 
# 
diabetes_X = diabetes.data[:, [0, 2, 3, 4, 5, 8]]
diabetes_X.shape

(442, 6)

In [514]:
num_features = diabetes_X.shape[1]
num_features

6

In [515]:
diabetes.target.shape

(442,)

In [516]:
# Create training and test sets

idx = -60

train_X = diabetes_X[:idx]
test_X = diabetes_X[idx:]

# Split targets into training and test sets

train_Y = diabetes.target[:idx]
test_Y = diabetes.target[idx:]

In [517]:
# Create linear regression model using all the predictors 

regr = linear_model.LinearRegression()
regr.fit(train_X, train_Y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [518]:
# Obtain the predictions

pred_Y = regr.predict(test_X)

In [519]:
# Mean Squared Error

mean_squared_error(pred_Y, test_Y)

2236.7565002517217

In [520]:
# R2 score

r2_score(pred_Y, test_Y)

0.34674508313574559

In [521]:
# F-statistic = (TSS - RSS)/p / RSS/(n - p - 1) = R2_score / (1 - R2_score) * (n - p - 1)/p 

def f_statistic(pred_Y, test_Y, p):
    r2 = r2_score(pred_Y, test_Y)
    n = len(pred_Y)
    return r2 / (1 - r2) * (n-p-1)/p

In [522]:
# p = 10 

print(f_statistic(pred_Y, test_Y, p=num_features))

4.68869781453


In [523]:
import matplotlib.pyplot as plt

# Plot outputs
# plt.scatter(test_X, test_Y,  color='black')
# plt.plot(test_X, pred_Y, color='blue', linewidth=3)

# plt.show()