# Grid Seach which Model to Use

In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=0)

param_grid = [
    {'classifier': [SVC()], 'preprocessing': [StandardScaler(), None],
    'classifier__gamma': [0.001, 0.01, 0.1, 1, 10, 100],
    'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100]
    },
    {'classifier': [RandomForestClassifier()], 'preprocessing': [None],
    'classifier__max_features': [1, 2, 3]
    }
]

pipeline = Pipeline([('preprocessing', StandardScaler()), ('classifier', SVC())])
grid = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1, verbose=1)
grid.fit(X_train, y_train)

# print the results
print("Best params {}".format(grid.best_params_))
print("Best cross validation score {}".format(grid.best_score_))
print("Test data score {}".format(grid.score(X_train, y_train)))

Fitting 5 folds for each of 75 candidates, totalling 375 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
Best params {'classifier': SVC(C=10, gamma=0.1), 'classifier__C': 10, 'classifier__gamma': 0.1, 'preprocessing': None}
Best cross validation score 0.9731225296442687
Test data score 0.9910714285714286
[Parallel(n_jobs=-1)]: Done 375 out of 375 | elapsed:    2.5s finished
