In [None]:
from sklearn.datasets import load_boston
import numpy as np
import matplotlib.pyplot as plt
from time import time
from scipy.stats import randint as sp_randint



In [None]:
# Utility function to report best scores
def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
                  results['mean_test_score'][candidate],
                  results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")


In [None]:
boston = load_boston()
X = boston.data
y = boston.target
print(boston.DESCR)


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score

rf = RandomForestRegressor(n_estimators=100, 
                           max_features=10, 
                           min_samples_split=2,
                           random_state=10)
score = cross_val_score(rf,X,y,cv = 5)
print('score: mean={:.2e} ; std={:.2e}'\
      .format(score.mean(),score.std()))
    

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators' : [100,400,700,1000],
    'max_features' : [1,8,13],
    'min_samples_split' : [2,5,10]
}
clf = GridSearchCV(rf, param_grid, cv=5, verbose=1,n_jobs=4)

clf.fit(X,y)
print('GridSearch finished')

In [None]:
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))


In [None]:
plt.bar(range(len(means)),means)
plt.ylim(.3,.7)
plt.show()

In [None]:
report(clf.cv_results_)

In [None]:
from sklearn.model_selection import RandomizedSearchCV
param_dist = {
    'n_estimators' : [100,400,700,1000],
    'max_features' : sp_randint(1,13),
    'min_samples_split' : sp_randint(2,10)
}
clf = RandomizedSearchCV(rf, param_dist, cv=5, n_iter=15, 
                         verbose=1,n_jobs=4, random_state=1)

clf.fit(X,y)
print('Random finished')


In [None]:
report(clf.cv_results_)