In [1]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
np.set_printoptions(precision=5)

import pandas as pd
pd.set_option('display.precision', 5)

In [2]:
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

iris = load_iris()
X_trainval, X_test, y_trainval, y_test = train_test_split(
    iris.data, iris.target, test_size=0.25, random_state=0)

In [3]:
from sklearn.pipeline import Pipeline

pipe = Pipeline([('scaler', StandardScaler()), ('svm', SVC())])

In [4]:
from sklearn.model_selection import StratifiedKFold, GridSearchCV

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2)
hyperparam_grid = {'svm__C': [0.001, 0.01, 0.1, 1, 10, 100],
                   'svm__gamma': [0.001, 0.01, 0.1, 1, 10, 100]}
grid_search = GridSearchCV(pipe, hyperparam_grid, scoring='accuracy', refit=True, cv=kfold)
grid_search.fit(X_trainval, y_trainval)

print("Best score on validation set: {:.5f}".format(grid_search.best_score_))
print("Best hyperparameters: {}".format(grid_search.best_params_))

Best score on validation set: 0.97312
Best hyperparameters: {'svm__C': 10, 'svm__gamma': 0.1}


In [5]:
y_test_hat = grid_search.predict(X_test)
test_score = accuracy_score(y_test, y_test_hat)
print("Test set score with best hyperparameters: {:.5f}".format(test_score))

Test set score with best hyperparameters: 0.97368


In [6]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV

cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=0)

In [7]:
pipe = Pipeline([('preprocessing', None), ('classifier', SVC())])
hyperparam_grid = [
    {'classifier': [SVC()], 'preprocessing': [StandardScaler(), MinMaxScaler(), None],
     'classifier__gamma': [0.001, 0.01, 0.1, 1, 10, 100],
     'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100]},
    {'classifier': [MLPClassifier(solver='lbfgs')], 'preprocessing': [StandardScaler(), MinMaxScaler(), None],
     'classifier__hidden_layer_sizes': [(10,), (20,), (50,), (100,)],
     'classifier__activation': ['tanh', 'relu']},
    {'classifier': [RandomForestClassifier(n_estimators=100)],
     'preprocessing': [None], 'classifier__max_features': [1, 2, 3]}]
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)

grid = GridSearchCV(pipe, hyperparam_grid, scoring='accuracy', refit=True, cv=kfold)
grid.fit(X_train, y_train)

print("Best hyperparams:\n{}".format(grid.best_params_))
print("Best cross-validation score: {:.5f}".format(grid.best_score_))
print("Test-set score: {:.5f}".format(grid.score(X_test, y_test)))

Best hyperparams:
{'classifier': SVC(), 'classifier__C': 10, 'classifier__gamma': 0.01, 'preprocessing': StandardScaler()}
Best cross-validation score: 0.97882
Test-set score: 0.97902


In [8]:
grid.best_estimator_

In [9]:
grid.best_estimator_.named_steps['preprocessing']

In [10]:
grid.best_estimator_.named_steps['classifier']

In [11]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold, GridSearchCV

iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=0)

In [12]:
pipe = Pipeline([('preprocessing', None), ('classifier', SVC())])
hyperparam_grid = [
    {'classifier': [SVC()], 'preprocessing': [StandardScaler(), MinMaxScaler(), None],
     'classifier__gamma': [0.001, 0.01, 0.1, 1, 10, 100],
     'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100]},
    {'classifier': [MLPClassifier(solver='lbfgs')], 'preprocessing': [StandardScaler(), MinMaxScaler(), None],
     'classifier__hidden_layer_sizes': [(10,), (20,), (50,), (100,)],
     'classifier__activation': ['tanh', 'relu']},
    {'classifier': [RandomForestClassifier(n_estimators=100)],
     'preprocessing': [None], 'classifier__max_features': [1, 2, 3]}]
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)

grid = GridSearchCV(pipe, hyperparam_grid, scoring='accuracy', refit=True, cv=kfold)
grid.fit(X_train, y_train)

print('Best estimator:\n{}'.format(grid.best_estimator_))
print('Best hyperparams:\n{}'.format(grid.best_params_))
print('Best cross-validation score: {:.5f}'.format(grid.best_score_))
print('Test-set score: {:.5f}'.format(grid.score(X_test, y_test)))

Best estimator:
Pipeline(steps=[('preprocessing', None),
                ('classifier', SVC(C=100, gamma=0.01))])
Best hyperparams:
{'classifier': SVC(), 'classifier__C': 100, 'classifier__gamma': 0.01, 'preprocessing': None}
Best cross-validation score: 0.98221
Test-set score: 0.97368


In [13]:
grid.best_estimator_

In [14]:
grid.best_estimator_[0]

In [15]:
grid.best_estimator_[1]