In [None]:
%matplotlib inline

In [3]:
import numpy as np
from time import time
from scipy.stats import randint as sp_randint
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier

In [4]:
# get data
digits = load_digits()  # Source : http://archive.ics.uci.edu/ml/datasets/Pen-Based+Recognition+of+Handwritten+Digits
X, y = digits.data, digits.target

In [5]:
# build a classifier
clf = RandomForestClassifier(n_estimators=50)

In [6]:
# specify parameters and distributions to sample from
param_dist = {"max_depth": [3, None],
              "max_features": sp_randint(1, 11),
              "min_samples_split": sp_randint(2, 11),
              "min_samples_leaf": sp_randint(1, 11),
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [7]:
# run randomized search
samples = 10  # number of random samples 
randomCV = RandomizedSearchCV(clf, param_distributions=param_dist, n_iter=samples) #default cv = 3

In [8]:
randomCV.fit(X, y)

 
print(randomCV.best_params_)


{'bootstrap': True, 'criterion': 'entropy', 'max_depth': None, 'max_features': 6, 'min_samples_leaf': 1, 'min_samples_split': 3}


In [9]:
# use a full grid over all parameters
param_grid = {"max_depth": [3, None],
              "max_features": [1, 3, 10],
              "min_samples_split": [2, 3, 10],
              "min_samples_leaf": [1, 3, 10],
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [10]:
# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid)
start = time()
grid_search.fit(X, y)

GridSearchCV(estimator=RandomForestClassifier(n_estimators=50),
             param_grid={'bootstrap': [True, False],
                         'criterion': ['gini', 'entropy'],
                         'max_depth': [3, None], 'max_features': [1, 3, 10],
                         'min_samples_leaf': [1, 3, 10],
                         'min_samples_split': [2, 3, 10]})

In [11]:
grid_search.best_params_

{'bootstrap': False,
 'criterion': 'gini',
 'max_depth': None,
 'max_features': 3,
 'min_samples_leaf': 1,
 'min_samples_split': 2}

In [12]:
grid_search.cv_results_['mean_test_score']

array([0.81080161, 0.82470597, 0.80245435, 0.82248994, 0.81360724,
       0.81528319, 0.80527391, 0.81081399, 0.8041303 , 0.85923244,
       0.8325147 , 0.83808883, 0.84363355, 0.84697617, 0.81802847,
       0.8397493 , 0.8336026 , 0.82970443, 0.84140669, 0.81805788,
       0.81915042, 0.83418292, 0.81969669, 0.8135701 , 0.81915506,
       0.82806716, 0.81581708, 0.9232281 , 0.93211235, 0.92488703,
       0.91153668, 0.91486846, 0.91877283, 0.88203033, 0.88035902,
       0.86367069, 0.93546271, 0.93213401, 0.92545032, 0.91988084,
       0.92822656, 0.92655834, 0.90318013, 0.90374342, 0.9026416 ,
       0.93935469, 0.93657691, 0.92601362, 0.93212318, 0.92266171,
       0.91988703, 0.91042556, 0.90375735, 0.90597493, 0.81860724,
       0.81749304, 0.81191582, 0.80966574, 0.79801919, 0.82806407,
       0.81582173, 0.81359331, 0.81025379, 0.84865831, 0.83249923,
       0.84253172, 0.8291628 , 0.83863665, 0.82028474, 0.83584803,
       0.84587898, 0.848632  , 0.81415197, 0.84808728, 0.81861

In [None]:
grid_search.best_estimator_

RandomForestClassifier(bootstrap=False, class_weight=None, criterion='gini',
            max_depth=None, max_features=3, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=3,
            min_weight_fraction_leaf=0.0, n_estimators=50, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)