# 作業
請使用不同的資料集，並使用 hyper-parameter search 的方式，看能不能找出最佳的超參數組合

In [74]:
from sklearn import datasets, metrics
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor

In [103]:
def predict(dataset, **kargs):
    X_train, X_test, Y_train, Y_test = train_test_split(dataset.data, dataset.target, test_size=0.3, random_state=89)
    print(kargs)
    model = GradientBoostingRegressor(**kargs)
    model.fit(X_train, Y_train)

    Y_pred = model.predict(X_test)
    return metrics.mean_squared_error(Y_test, Y_pred)

In [104]:
def search_best(n_estimators, max_depth):
    param_grid = dict(n_estimators=n_estimators, max_depth=max_depth)

    grid_search = GridSearchCV(model, param_grid, scoring="neg_mean_squared_error", n_jobs=-1, verbose=1)

    grid_result = grid_search.fit(X_train, Y_train)
    return (grid_result.best_score_, grid_result.best_params_)

In [105]:
def test(dataset, **kargs):
    print("MSE: %f" % predict(dataset))

    best_score, best_params = search_best(**kargs)
    print("Best Accuracy: %f using %s" % (best_score, best_params))

    print("MSE: %f" % predict(datasets.load_iris(), n_estimators=best_params['n_estimators'], max_depth=best_params['max_depth']))

In [106]:
test(
    datasets.load_iris(),
    n_estimators=[1, 50, 100, 200, 300, 1000],
    max_depth=[1, 3, 5, 11]
)

{}
MSE: 0.023936
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:    0.9s finished


Best Accuracy: -0.030879 using {'max_depth': 11, 'n_estimators': 1000}
{'n_estimators': 1000, 'max_depth': 11}
MSE: 0.031594


In [107]:
test(
    datasets.load_boston(),
    n_estimators=[1, 50, 100, 200, 300, 1000],
    max_depth=[1, 3, 5, 11]
)

{}
MSE: 15.818635
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:    1.1s finished


Best Accuracy: -0.030916 using {'max_depth': 5, 'n_estimators': 1000}
{'n_estimators': 1000, 'max_depth': 5}
MSE: 0.028174


In [108]:
test(
    datasets.load_diabetes(),
    n_estimators=[1, 50, 100, 200, 300, 1000],
    max_depth=[1, 3, 5, 11]
)

{}
MSE: 3783.240713
Fitting 3 folds for each of 24 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Best Accuracy: -0.031159 using {'max_depth': 5, 'n_estimators': 50}
{'n_estimators': 50, 'max_depth': 5}
MSE: 0.033006


[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:    0.9s finished
