In [1]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.externals import joblib
import numpy as np

In [2]:
# use Iris dataset
data = load_iris()
x = data.data
y = data.target
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=54321)

In [3]:
# run gridsearchcv on random forest classifier
forest = RandomForestClassifier()
param_grid = {
    'n_estimators'      : [5, 10, 20, 30, 50, 100, 300],
    'random_state'      : [0],
    'n_jobs'            : [1],
    'min_samples_split' : [3, 5, 10, 15, 20, 25, 30, 40, 50, 100],
    'max_depth'         : [3, 5, 10, 15, 20, 25, 30, 40, 50, 100]
}
forestGrid = GridSearchCV(forest, param_grid)
fgFit = forestGrid.fit(x_train, y_train)
fgFit

GridSearchCV(cv=None, error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'min_samples_split': [3, 5, 10, 15, 20, 25, 30, 40, 50, 100], 'random_state': [0], 'n_estimators': [5, 10, 20, 30, 50, 100, 300], 'n_jobs': [1], 'max_depth': [3, 5, 10, 15, 20, 25, 30, 40, 50, 100]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [4]:
# estimator
fgFit.best_estimator_

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=3, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=15, min_weight_fraction_leaf=0.0,
            n_estimators=50, n_jobs=1, oob_score=False, random_state=0,
            verbose=0, warm_start=False)

In [5]:
# best accuracy on gridsearchcv
fgFit.best_score_

0.97142857142857142

In [6]:
# parameters for the best accuracy
fgFit.best_params_

{'max_depth': 3,
 'min_samples_split': 15,
 'n_estimators': 50,
 'n_jobs': 1,
 'random_state': 0}

In [7]:
# set the best params to fit random forest classifier
forest.set_params(**fgFit.best_params_)
forest.fit(x, y)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=3, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=15, min_weight_fraction_leaf=0.0,
            n_estimators=50, n_jobs=1, oob_score=False, random_state=0,
            verbose=0, warm_start=False)

In [8]:
# save the model as pickle
joblib.dump(forest, './rfcParam.pkl', compress=True) 

['./rfcParam.pkl']

In [9]:
# load the model
forest = joblib.load('./rfcParam.pkl') 

In [10]:
# predict
t = np.array([5.1,  3.5,  1.4,  0.2])
t = t.reshape(1,-1)
forest.predict(t)

array([0])