In [8]:
import numpy as np
import pandas as pd
import time
import warnings

warnings.filterwarnings('ignore')

# import utils
import utils

In [16]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

In [10]:
X_train, X_test, y_train, y_test = utils.get_human_dataset()

In [12]:
# GBM 수행 시간 측정을 위한, 시작 시간 설정
start_time = time.time()

# n_estimators's default is 100
gb_clf = GradientBoostingClassifier(random_state=0)
gb_clf.fit(X_train, y_train)
gb_pred = gb_clf.predict(X_test)
gb_accuracy = accuracy_score(y_test, gb_pred)

In [13]:
print('GBM 정확도: {0:.4f}'.format(gb_accuracy))
print('GBM 수행 시간: {0:.1f}'.format(time.time() - start_time))

GBM 정확도: 0.9382
GBM 수행 시간: 526.1


In [17]:
params = {
    'n_estimators': [100, 500],
    'learning_rate': [0.05, 0.1]
}

In [18]:
grid_cv = GridSearchCV(gb_clf, param_grid=params, cv=2, verbose=1)

In [19]:
grid_cv.fit(X_train, y_train)

Fitting 2 folds for each of 4 candidates, totalling 8 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


KeyboardInterrupt: 

In [None]:
print('최적 하이퍼 파라미터:\n', grid_cv.best_params_)
print('최고 예측 정확도: {0:.4f}'.format(grid_cv.best_score_))

In [None]:
# GridSearchCV를 이용하여 최적으로 학습된 estimator로 predict 수행. 
gb_pred = grid_cv.best_estimator_.predict(X_test)
gb_accuracy = accuracy_score(y_test, gb_pred)
print('GBM 정확도: {0:.4f}'.format(gb_accuracy))