<img src="../Pics/MLSb-T.png" width="160">
<br><br>
<center><u><H1>Hyperparameter Optimization: GridSearchCV</H1></u></center>

In [1]:
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

## Data:

In [2]:
iris = load_iris()
X=iris.data
y=iris.target

In [3]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(X)
X_std = sc.transform(X)

## Models:

In [4]:
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [5]:
svm = SVC()
knn = KNN()
lr = LogisticRegression()
rf = RandomForestClassifier()

In [6]:
from sklearn.model_selection import GridSearchCV
clf_svm = GridSearchCV(svm, {'kernel':['linear', 'poly','rbf'], 'C':[1, 10, 100]})
clf_knn = GridSearchCV(knn, {'n_neighbors':[2, 5, 8, 10]})
clf_lr = GridSearchCV(lr, {'C':[1000, 100, 10, 1]})
clf_rf = GridSearchCV(rf, {'n_estimators':[50,100,500,1000]})

In [7]:
clf_svm.fit(X_std, y)
clf_knn.fit(X_std, y)
clf_lr.fit(X_std, y)
clf_rf.fit(X_std, y)

GridSearchCV(cv=None, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'n_estimators': [50, 100, 500, 1000]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

## Best Params:

In [8]:
print(clf_lr.best_params_)

{'C': 1000}


In [9]:
print(clf_knn.best_params_)

{'n_neighbors': 8}


In [10]:
print(clf_svm.best_params_)

{'kernel': 'rbf', 'C': 10}


In [11]:
print(clf_rf.best_params_)

{'n_estimators': 100}


In [12]:
means = clf_lr.cv_results_['mean_test_score']
stds = clf_lr.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf_lr.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))

0.960 (+/-0.055) for {'C': 1000}
0.960 (+/-0.055) for {'C': 100}
0.927 (+/-0.036) for {'C': 10}
0.900 (+/-0.033) for {'C': 1}


In [13]:
means = clf_knn.cv_results_['mean_test_score']
stds = clf_knn.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf_knn.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))

0.933 (+/-0.049) for {'n_neighbors': 2}
0.947 (+/-0.067) for {'n_neighbors': 5}
0.953 (+/-0.049) for {'n_neighbors': 8}
0.947 (+/-0.050) for {'n_neighbors': 10}


In [14]:
means = clf_svm.cv_results_['mean_test_score']
stds = clf_svm.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf_svm.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))

0.973 (+/-0.074) for {'kernel': 'linear', 'C': 1}
0.920 (+/-0.037) for {'kernel': 'poly', 'C': 1}
0.960 (+/-0.055) for {'kernel': 'rbf', 'C': 1}
0.973 (+/-0.074) for {'kernel': 'linear', 'C': 10}
0.953 (+/-0.049) for {'kernel': 'poly', 'C': 10}
0.987 (+/-0.037) for {'kernel': 'rbf', 'C': 10}
0.973 (+/-0.074) for {'kernel': 'linear', 'C': 100}
0.953 (+/-0.074) for {'kernel': 'poly', 'C': 100}
0.960 (+/-0.085) for {'kernel': 'rbf', 'C': 100}


In [15]:
means = clf_rf.cv_results_['mean_test_score']
stds = clf_rf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf_rf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))

0.953 (+/-0.049) for {'n_estimators': 50}
0.960 (+/-0.032) for {'n_estimators': 100}
0.960 (+/-0.055) for {'n_estimators': 500}
0.960 (+/-0.055) for {'n_estimators': 1000}


## Reference:
http://scikit-learn.org/stable/modules/grid_search.html