In [2]:
# Imports and utilities
import numpy as np
from time import time
from operator import itemgetter
from sklearn import svm, grid_search, datasets
from sklearn.ensemble import RandomForestClassifier
#from spark_sklearn import GridSearchCV

# Utility function to report best scores
def report(grid_scores, n_top=3):
    top_scores = sorted(grid_scores, key=itemgetter(1), reverse=True)[:n_top]
    for i, score in enumerate(top_scores):
        print("Model with rank: {0}".format(i + 1))
        print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
              score.mean_validation_score,
              np.std(score.cv_validation_scores)))
        print("Parameters: {0}".format(score.parameters))
        print("")

In [5]:
digits = datasets.load_digits()
X, y = digits.data, digits.target
param_grid = {"max_depth": [3, None],
              "max_features": [1, 3, 10],
             # "min_samples_split": [1, 3, 10],
              "min_samples_leaf": [1, 3, 10],
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"],
              "n_estimators": [10, 20, 40, 80]}
clf = RandomForestClassifier()

In [6]:
gs = grid_search.GridSearchCV(clf, param_grid=param_grid)
start = time()
gs.fit(X, y)
print("GridSearchCV took {:.2f} seconds for {:d} candidate settings.".format(time() - start, len(gs.grid_scores_)))
report(gs.grid_scores_)

GridSearchCV took 179.58 seconds for 288 candidate settings.
Model with rank: 1
Mean validation score: 0.952 (std: 0.004)
Parameters: {'bootstrap': False, 'min_samples_leaf': 1, 'n_estimators': 80, 'criterion': 'gini', 'max_features': 3, 'max_depth': None}

Model with rank: 2
Mean validation score: 0.947 (std: 0.003)
Parameters: {'bootstrap': False, 'min_samples_leaf': 1, 'n_estimators': 80, 'criterion': 'entropy', 'max_features': 3, 'max_depth': None}

Model with rank: 3
Mean validation score: 0.943 (std: 0.009)
Parameters: {'bootstrap': False, 'min_samples_leaf': 1, 'n_estimators': 40, 'criterion': 'gini', 'max_features': 3, 'max_depth': None}

