In [1]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
data = load_boston()
X_train, X_test, y_train, y_test = train_test_split(data['data'], data['target'])

In [3]:
from sklearn.preprocessing import StandardScaler, RobustScaler, QuantileTransformer
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.decomposition import PCA
from sklearn.linear_model import Ridge

In [4]:
from sklearn.pipeline import Pipeline
pipe = Pipeline([
        ('scaler', StandardScaler()),
        ('reduce_dim', PCA()),
        ('regressor', Ridge())
        ])

In [5]:
pipe = pipe.fit(X_train, y_train)
print('Testing score: ', pipe.score(X_test, y_test))

Testing score:  0.6760371601274904


In [6]:
#####the explained variance in the PCA step:
print(pipe.steps[1][1].explained_variance_)

[6.17944984 1.39610357 1.28870291 0.84671768 0.81912619 0.66420041
 0.54638908 0.41306193 0.26831896 0.20456008 0.18097859 0.16482782
 0.06195447]


In [7]:
import numpy as np
n_features_to_test = np.arange(1, 11)

In [8]:
alpha_to_test = 2.0**np.arange(-6, +6)

In [9]:
params = {'reduce_dim__n_components': n_features_to_test,\
              'regressor__alpha': alpha_to_test}

In [10]:
from sklearn.model_selection import GridSearchCV
gridsearch = GridSearchCV(pipe, params, verbose=1).fit(X_train, y_train)
print('Final score is: ', gridsearch.score(X_test, y_test))

Fitting 5 folds for each of 120 candidates, totalling 600 fits
Final score is:  0.6671430170003794


In [13]:
gridsearch.best_estimator_

Pipeline(steps=[('scaler', StandardScaler()),
                ('reduce_dim', PCA(n_components=6)),
                ('regressor', Ridge(alpha=0.5))])

In [14]:
scalers_to_test = [StandardScaler(), RobustScaler(), QuantileTransformer()]

In [17]:
params = {'scaler': scalers_to_test,
        'reduce_dim__n_components': n_features_to_test,\
        'regressor__alpha': alpha_to_test}

In [None]:
gridsearch = GridSearchCV(pipe, params, verbose=1).fit(X_train, y_train)
print('Final score is: ', gridsearch.score(X_test, y_test))

In [19]:
gridsearch.best_estimator_

Pipeline(steps=[('scaler', RobustScaler()),
                ('reduce_dim', PCA(n_components=10)),
                ('regressor', Ridge(alpha=4.0))])

In [20]:
params = [
        {'scaler': scalers_to_test,
         'reduce_dim': [PCA()],
         'reduce_dim__n_components': n_features_to_test,\
         'regressor__alpha': alpha_to_test},

        {'scaler': scalers_to_test,
         'reduce_dim': [SelectKBest(f_regression)],
         'reduce_dim__k': n_features_to_test,\
         'regressor__alpha': alpha_to_test}
        ]

In [None]:
gridsearch = GridSearchCV(pipe, params, verbose=1).fit(X_train, y_train)
print('Final score is: ', gridsearch.score(X_test, y_test))

In [22]:
gridsearch.best_estimator_

Pipeline(steps=[('scaler', RobustScaler()),
                ('reduce_dim', PCA(n_components=10)),
                ('regressor', Ridge(alpha=4.0))])

In [23]:
# IMPORT PACKAGES
from sklearn.svm import SVC
from sklearn.datasets import load_iris

from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline, FeatureUnion

from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest

In [24]:
iris = load_iris()

X, y = iris["data"], iris["target"]

In [25]:
# This dataset is way too high-dimensional. Better do PCA:
pca = PCA(n_components=2)

# Maybe some of the original features were good, too?
selection = SelectKBest(k=3)

In [26]:
# Build an transformer from PCA and Univariate selection:
combined_features = FeatureUnion([("pca", pca), ("univ_select", selection)])

In [31]:
# We will initialize the classifier
svm = SVC(kernel="linear")

In [None]:
# create our pipeline from FeatureUnion 
pipeline = Pipeline([("features", combined_features), ("svm", svm)])

# set up our parameters grid
param_grid = {"features__pca__n_components": [1, 2, 3],
                  "features__univ_select__k": [1, 2, 3],
                  "svm__C":[0.1, 1, 10]}

# create a Grid Search object
grid_search = GridSearchCV(pipeline, param_grid, verbose=10, refit=True)    

# fit the model and tune parameters
grid_search.fit(X, y)

In [34]:
print(grid_search.best_params_)

{'features__pca__n_components': 2, 'features__univ_select__k': 3, 'svm__C': 1}
