### Scikit-learn [parameter optimizer](http://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection) & [link](http://scikit-learn.org/stable/modules/grid_search.html#grid-search)
* [Grid search cross-validation](http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html#sklearn.model_selection.GridSearchCV)
* [Randomized search CV](http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html#sklearn.model_selection.RandomizedSearchCV)
    * distributions: scipy statistical functions - [scipy.stats](https://docs.scipy.org/doc/scipy-0.15.1/reference/stats.html)

In [1]:
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier
# data
digits = load_digits()
X, y = digits.data, digits.target
# model
model = RandomForestClassifier(n_estimators=20)

#### Randomized search

In [2]:
# distribution
from scipy.stats import randint as sp_randint

In [3]:
from sklearn.model_selection import RandomizedSearchCV

In [4]:
# Randomized search
param_dist = {"max_depth": [3, None],
              "max_features": sp_randint(1, 11),
              "min_samples_split": sp_randint(2, 11),
              "criterion": ["gini", "entropy"]}
n_iter_search = 20
random_search = RandomizedSearchCV(model,
                                   param_distributions = param_dist,
                                   n_iter=n_iter_search)

In [5]:
random_search.fit(X, y)

RandomizedSearchCV(cv=None, error_score='raise',
          estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=20, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False),
          fit_params={}, iid=True, n_iter=20, n_jobs=1,
          param_distributions={'max_depth': [3, None], 'max_features': <scipy.stats._distn_infrastructure.rv_frozen object at 0x10e9e02b0>, 'min_samples_split': <scipy.stats._distn_infrastructure.rv_frozen object at 0x10e9e0400>, 'criterion': ['gini', 'entropy']},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring=None, verbose=0)

In [6]:
import numpy as np
result = random_search.cv_results_
idc = result['rank_test_score'] == 1
idc = np.flatnonzero(idc)[0]
print('Mean validation score:', result['mean_test_score'][idc])
print('Parameters', result['params'][idc])

Mean validation score: 0.923205342237
Parameters {'criterion': 'entropy', 'max_depth': None, 'max_features': 8, 'min_samples_split': 7}


#### Grid search

In [7]:
from sklearn.model_selection import GridSearchCV

In [8]:
# Grid search
param_grid = {"max_depth": [3, None],
              "max_features": [1, 3, 10],
              "min_samples_split": [2, 3, 10],
              "criterion": ["gini", "entropy"]}
grid_search = GridSearchCV(model,
                           param_grid = param_grid)

In [9]:
grid_search.fit(X, y)

GridSearchCV(cv=None, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=20, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'max_depth': [3, None], 'max_features': [1, 3, 10], 'min_samples_split': [2, 3, 10], 'criterion': ['gini', 'entropy']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [10]:
import numpy as np
result = grid_search.cv_results_
idc = result['rank_test_score'] == 1
idc = np.flatnonzero(idc)[0]
print('Mean validation score:', result['mean_test_score'][idc])
print('Parameters', result['params'][idc])

Mean validation score: 0.925431274346
Parameters {'criterion': 'entropy', 'max_depth': None, 'max_features': 3, 'min_samples_split': 3}
