In [2]:
import numpy as np
from sklearn import datasets
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV

### Diabetes dataset
442 samples, 10 features each (real values between -.2 and +.2). Target values are integeres between 25 and 346

"Ten baseline variables, age, sex, body mass index, average blood pressure, and six blood serum measurements were obtained for each of n = 442 diabetes patients, as well as the response of interest, a quantitative measure of disease progression one year after baseline." 

In [6]:
# load the diabetes datasets
dataset = datasets.load_diabetes()
print(dataset.target.shape)
print(dataset.data.shape)

(442,)
(442, 10)


In [20]:
# prepare a range of alpha values to test
alphas = np.array([1,0.1,0.010,0.5,0.005,0.0001,0])

### GridSearch Cross Validation

Exhaustive search over specified parameter values for an estimator. It considers all parameter combinations. The grid can consist of multiple hyperparameter. Here, we have only one.

In [26]:
# create and fit a ridge regression model, testing each alpha
model = Ridge()
grid = GridSearchCV(estimator=model, param_grid=dict(alpha=alphas), cv=10)
grid.fit(dataset.data, dataset.target)
print(grid)

GridSearchCV(cv=10, error_score='raise',
       estimator=Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'alpha': array([  1.00000e+00,   1.00000e-01,   1.00000e-02,   5.00000e-01,
         5.00000e-03,   1.00000e-04,   0.00000e+00])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)


In [27]:
# summarize the results of the grid search
print(grid.best_score_)
print(grid.best_estimator_.alpha)

0.462973124828
0.1
