In [1]:
%matplotlib inline

In [2]:
import numpy as np
from time import time
from scipy.stats import randint as sp_randint
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier

In [3]:
# get data
digits = load_digits()  # Source : http://archive.ics.uci.edu/ml/datasets/Pen-Based+Recognition+of+Handwritten+Digits
X, y = digits.data, digits.target

In [4]:
# build a classifier
clf = RandomForestClassifier(n_estimators=50)

In [5]:
# specify parameters and distributions to sample from
param_dist = {"max_depth": [3, None],
              "max_features": sp_randint(1, 11),
              "min_samples_split": sp_randint(2, 11),
              "min_samples_leaf": sp_randint(1, 11),
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [6]:
# run randomized search
samples = 10  # number of random samples 
randomCV = RandomizedSearchCV(clf, param_distributions=param_dist, n_iter=samples) #default cv = 3

In [7]:
randomCV.fit(X, y)

 
print(randomCV.best_params_)


{'bootstrap': True, 'criterion': 'gini', 'max_depth': None, 'max_features': 9, 'min_samples_leaf': 4, 'min_samples_split': 5}


In [8]:
# use a full grid over all parameters
param_grid = {"max_depth": [3, None],
              "max_features": [1, 3, 10],
              "min_samples_split": [2, 3, 10],
              "min_samples_leaf": [1, 3, 10],
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [9]:
# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid)
start = time()
grid_search.fit(X, y)

GridSearchCV(estimator=RandomForestClassifier(n_estimators=50),
             param_grid={'bootstrap': [True, False],
                         'criterion': ['gini', 'entropy'],
                         'max_depth': [3, None], 'max_features': [1, 3, 10],
                         'min_samples_leaf': [1, 3, 10],
                         'min_samples_split': [2, 3, 10]})

In [10]:
grid_search.best_params_

{'bootstrap': True,
 'criterion': 'entropy',
 'max_depth': None,
 'max_features': 3,
 'min_samples_leaf': 1,
 'min_samples_split': 3}

In [11]:
grid_search.cv_results_['mean_test_score']

array([0.81862117, 0.81524915, 0.81134633, 0.80411792, 0.82861343,
       0.81804085, 0.78242495, 0.79633086, 0.82025534, 0.8408666 ,
       0.84252863, 0.84085732, 0.83805478, 0.82417673, 0.84697617,
       0.83419375, 0.82805014, 0.83753482, 0.82361188, 0.82250387,
       0.81581089, 0.84142216, 0.82138347, 0.82304704, 0.82693129,
       0.82526617, 0.80913959, 0.92488858, 0.92322965, 0.91765862,
       0.9131987 , 0.91599195, 0.90651501, 0.87701486, 0.8792572 ,
       0.87702105, 0.93825132, 0.93323584, 0.92155215, 0.93157072,
       0.92933457, 0.92489477, 0.91653668, 0.89985299, 0.90709223,
       0.93435005, 0.92322656, 0.91933767, 0.92433612, 0.93267874,
       0.91877592, 0.90596565, 0.90042247, 0.91320334, 0.82029403,
       0.80800836, 0.81916435, 0.80412566, 0.81358248, 0.8069251 ,
       0.81247601, 0.81803931, 0.8119251 , 0.83751934, 0.83584803,
       0.84976478, 0.83027236, 0.84862736, 0.82973847, 0.84309037,
       0.83808109, 0.83028938, 0.82193748, 0.84030176, 0.81580

In [12]:
grid_search.best_estimator_

RandomForestClassifier(criterion='entropy', max_features=3, min_samples_split=3,
                       n_estimators=50)