In [1]:
import numpy as np
from sklearn import linear_model, decomposition, datasets
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [2]:
logistic = linear_model.LogisticRegression()
pca = decomposition.PCA()

pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])

In [3]:
dataset = datasets.load_breast_cancer()
X = dataset.data
y = dataset.target

In [4]:
X.shape

(569, 30)

In [5]:
# Create a scaler object
sc = StandardScaler()

# Fit the scaler to the training data and transform
X_std = sc.fit_transform(X)

In [6]:
n_components = list(range(1,31,1))
C = np.logspace(-4, 4, 50)

#Parameters of pipelines can be set using ‘__’ separated parameter names:

parameters = dict(pca__n_components=n_components, logistic__C=C)

clf = GridSearchCV(pipe, parameters)
clf.fit(X_std, y)

GridSearchCV(cv=None, error_score='raise',
       estimator=Pipeline(steps=[('pca', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('logistic', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False))]),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'logistic__C': array([  1.00000e-04,   1.45635e-04,   2.12095e-04,   3.08884e-04,
         4.49843e-04,   6.55129e-04,   9.54095e-04,   1.38950e-03,
         2.02359e-03,   2.94705e-03,   4.29193e-03,   6.25055e-03,
         9.10298e-03,   1.32571e-02,   1.93070e-02,   2.81177e-02,
     ... 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]},
       pre_dispatch='2*n_jobs', refit=True, return_train_s

In [7]:
clf.best_estimator_.named_steps['pca'].n_components

9

In [8]:
clf.best_estimator_.named_steps['logistic'].C

0.086851137375135209

In [9]:
cross_val_score(clf, X_std, y)

array([ 0.97368421,  0.97368421,  0.96296296])