# key words
- decomposition: PCA, NMF, TruncatedSVD, ISO
- model selection: GridSearchCV
- classifier: SVC, LinearSVC

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn import datasets

from sklearn.decomposition import PCA, NMF, TruncatedSVD
from sklearn.manifold import Isomap
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC, LinearSVC

from sklearn.pipeline import Pipeline

iris = datasets.load_iris()
X = iris.data
y = iris.target

pipe = Pipeline([('reduce_dim', PCA()),
                 ('classify', SVC())])

param_grid = [{'reduce_dim': [PCA(), NMF(), Isomap(), TruncatedSVD()],
               'reduce_dim__n_components': [2, 3],
               'classify': [SVC(gamma='auto'), LinearSVC()],
               'classify__C': [1, 10, 100, 1000]},]

grid = GridSearchCV(pipe, cv=3, n_jobs=-1, param_grid=param_grid, iid=False)
grid.fit(X, y)
print("best params:\n", grid.best_params_)
print("best score:\n", grid.best_score_)

# detailed results
results_df = pd.DataFrame(grid.cv_results_)
results_df.to_excel(os.path.join(os.getcwd(), "3_6_results.xls"), encoding='utf-8', index=False)
# print(results_df)

best params:
 {'classify': SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False), 'classify__C': 10, 'reduce_dim': PCA(copy=True, iterated_power='auto', n_components=3, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False), 'reduce_dim__n_components': 3}
best score:
 0.9803921568627452
