In [9]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.pipeline import Pipeline

import numpy as np
import pandas as pd

In [10]:
X = fetch_california_housing()['data']
y = fetch_california_housing()['target']
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=0)

In [11]:
pipe = Pipeline([('scaler', StandardScaler()), ('regressor', Ridge())])

In [14]:
# pipe.fit(X_train, y_train);
# print (pipe.score(X_train, y_train))
# print (pipe.score(X_test, y_test))

param_grid = {'regressor__alpha': np.logspace(-3,3,20)}
grid_search = GridSearchCV(pipe, param_grid, cv=5, verbose=1);
grid_search.fit(X_train, y_train);

Fitting 5 folds for each of 20 candidates, totalling 100 fits


In [15]:
grid_search.best_score_

0.6053953632414373

In [16]:
from tempfile import mkdtemp
from shutil import rmtree

In [17]:
cachedir = mkdtemp() #create temporary directory


In [18]:
pipe_cache = Pipeline([('scaler', StandardScaler()), ('regressor', Ridge())], memory=cachedir)

In [19]:
pipe_cache.fit(X_train, y_train);

In [20]:
param_grid = {'alpha': np.logspace(-3,3,20)}

grid_search = GridSearchCV(Ridge(), param_grid, verbose=1, cv=5, n_jobs=2)

pipe2 = Pipeline([('scaler', StandardScaler()), ('grid_search', grid_search)])

In [21]:
pipe2.fit(X_train, y_train);

Fitting 5 folds for each of 20 candidates, totalling 100 fits


In [22]:
pipe2.named_steps['grid_search'].best_params_

{'alpha': 26.366508987303554}

In [23]:
from sklearn.decomposition import PCA
pca = PCA()
pipe3 = Pipeline([('scaler', StandardScaler()), ('pca', pca), ('regressor', Ridge())], memory=cachedir)

param_grid = {'pca__n_components': [2,3,4,5,6],'regressor__alpha': np.logspace(-3,3,20)}

In [24]:
pipe3.fit(X_train, y_train);

In [25]:
grid_search = GridSearchCV(pipe3, param_grid, verbose=1, cv=5, n_jobs=2)

In [26]:
grid_search.fit(X_train, y_train);

Fitting 5 folds for each of 100 candidates, totalling 500 fits


In [27]:
grid_search.best_params_

{'pca__n_components': 6, 'regressor__alpha': 26.366508987303554}

In [None]:
rmtree(cachedir)

# randomizedsearchcv

In [28]:
from sklearn.model_selection import RandomizedSearchCV

param_grid = {'pca__n_components': range(1,9),'regressor__alpha': np.logspace(-3,3,20)}
pipe4 = Pipeline([('scaler', StandardScaler()), ('pca', pca), ('regressor', Ridge())], memory=cachedir)
pipe4.fit(X_train, y_train);

random_search = RandomizedSearchCV(pipe4, param_grid, verbose=1, cv=5, n_jobs=2)
random_search.fit(X_train, y_train);
rmtree(cachedir)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


In [29]:
random_search.best_params_

{'regressor__alpha': 26.366508987303554, 'pca__n_components': 7}

In [30]:
%%timeit
grid_search.fit(X_train, y_train);

Fitting 5 folds for each of 100 candidates, totalling 500 fits
Fitting 5 folds for each of 100 candidates, totalling 500 fits
Fitting 5 folds for each of 100 candidates, totalling 500 fits
Fitting 5 folds for each of 100 candidates, totalling 500 fits
Fitting 5 folds for each of 100 candidates, totalling 500 fits
Fitting 5 folds for each of 100 candidates, totalling 500 fits
Fitting 5 folds for each of 100 candidates, totalling 500 fits
Fitting 5 folds for each of 100 candidates, totalling 500 fits
4.91 s ± 31.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [88]:
rmtree(cachedir)