# GradientBoostingClassifier
* https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html?highlight=gradient#sklearn.ensemble.GradientBoostingClassifier
* class sklearn.ensemble.GradientBoostingClassifier(*, loss='log_loss', learning_rate=0.1, n_estimators=100, subsample=1.0, criterion='friedman_mse', min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_depth=3, min_impurity_decrease=0.0, init=None, random_state=None, max_features=None, verbose=0, max_leaf_nodes=None, warm_start=False, validation_fraction=0.1, n_iter_no_change=None, tol=0.0001, ccp_alpha=0.0)

In [1]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import time
import warnings
warnings.filterwarnings('ignore')
import pandas as pd

iris = load_iris()

In [2]:
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['labels'] = iris.target
iris_df['labels'].value_counts()

0    50
1    50
2    50
Name: labels, dtype: int64

In [3]:
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=110)

In [4]:
# GBM Time CHECK
# 교재 따라한 게 아니라 iris 가지고 해서 수행시간이 짧네...
start_time = time.time()

# predict
gb_clf = GradientBoostingClassifier(random_state=0)
gb_clf.fit(X_train, y_train)
gb_pred = gb_clf.predict(X_test)
gb_accuracy = accuracy_score(y_test, gb_pred)

print("GBM 정확도: {0: .4f}".format(gb_accuracy))
print("GBM 수행시간: {0: .1f} 초".format(time.time() - start_time))

print(f'f출력 GBM 정확도:{gb_accuracy:.4f}')

GBM 정확도:  0.9667
GBM 수행시간:  0.2 초
f출력 GBM 정확도:0.9667


In [5]:
from sklearn.model_selection import GridSearchCV

params = {
    'n_estimators':[100,500],
    'learning_rate':[0.05,0.1]
}

grid_cv = GridSearchCV(gb_clf, param_grid=params, cv=2, verbose=1)
grid_cv.fit(X_train, y_train)

print('Best Hyper Parameter: \n', grid_cv.best_params_)
print('최고 예측 정확도: {0: .4f}'.format(grid_cv.best_score_))

# Best CV
gb_pred = grid_cv.best_estimator_.predict(X_test)
gb_accuracy = accuracy_score(y_test, gb_pred)
print('GBM 정확도: {0: .4f}'.format(gb_accuracy))

Fitting 2 folds for each of 4 candidates, totalling 8 fits
Best Hyper Parameter: 
 {'learning_rate': 0.05, 'n_estimators': 100}
최고 예측 정확도:  0.9500
GBM 정확도:  0.9667


# XGBoost
* https://xgboost.readthedocs.io/en/latest/index.html
* https://xgboost.readthedocs.io/en/latest/python/index.html

In [12]:
import xgboost as xgb
from xgboost import XGBClassifier

xgb = XGBClassifier()

xgb.fit(X_train, y_train)

xgb_pred =  xgb.predict(X_test)

xgb.score(X_train, y_train)
acc = accuracy_score(xgb_pred, y_test)
print(acc)

0.9666666666666667
