# key words
- model selection: train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
- regressor: KNeighborsRegressor, Ridge
- ensemble: GradientBoostingRegressor, RandomForestRegressor

In [1]:
import numpy as np
%matplotlib inline

from sklearn import datasets
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import Ridge
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor


boston = datasets.load_boston()
X, y = boston.data, boston.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)

param_dist = {'n_neighbors': list(range(3, 20, 1))}
rs = RandomizedSearchCV(KNeighborsRegressor(), param_dist, cv=10, n_iter=17, iid=False)
rs.fit(X_train, y_train)
print("RandomizedSearchCV best_params: %s, best_score: %.4f" % (rs.best_params_, rs.best_score_))

RandomizedSearchCV best_params: {'n_neighbors': 4}, best_score: 0.4646


In [2]:
# ridge
cross_val_score(Ridge(), X_train, y_train, cv=10).mean()

0.7443218509488488

In [3]:
# gradient boosting regressor
cross_val_score(GradientBoostingRegressor(max_depth=7), X_train, y_train, cv=10).mean()

0.8297748978795465

In [4]:
# random forest regressor
param_dist = {'n_estimators': [4000], 'learning_rate': [0.01], 'max_depth': [1, 2, 3, 5, 7]}
rs = RandomizedSearchCV(GradientBoostingRegressor(), param_dist, cv=5, n_iter=5, n_jobs=-1, iid=False)
rs.fit(X_train, y_train)
print("RandomizedSearchCV best_params: %s, best_score: %.4f" % (rs.best_params_, rs.best_score_))

RandomizedSearchCV best_params: {'n_estimators': 4000, 'max_depth': 3, 'learning_rate': 0.01}, best_score: 0.8884
