In [None]:
# 사전준비
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
# xgboost
import xgboost as xgb
from xgboost import plot_importance
import warnings
warnings.filterwarnings('ignore')

dataset = load_breast_cancer()
features = dataset.data
labels = dataset.target

cancer_df = pd.DataFrame(data=features, index=dataset.feature_names)
cancer_df['target'] = labels


X_features = cancer_df.iloc[:, :-1]
y_label = cancer_df.iloc[:, -1]

In [None]:
#################################################
# XGBoost python native
#################################################

# 1.
X_train, X_test, y_train, y_test = train_test_split(X_features, y_label, 
                                                    test_size=.2, random_state=156)
# 2. 검증용 데이터 셋 (eval_set)
X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train, 
                                            test_size=.1, random_state=156)

# 3. 학습 (&검증)과 테스트 데이터셋을 DMatrix로 변환
dtr = xgb.DMatrix(data=X_tr, label=y_tr)
dval = xgb.DMatrix(data=X_val, label=y_val)
dtest = xgb.DMatrix(data=X_test, label=y_test)

# 4. 하이퍼 파라미터 설정
params = {
    'max_depth':3,
    'eta':0.05, # learning_rate in sklearn
    'objective':'binary:logistic',
    'eval_metric':'logloss' # loss를 log화 시켜줌
}
num_rounds = 400 # n_estimators in sklearn

# 5. 모델학습
eval_list = [(dtr,'train'), (dval, 'eval')]
xgb_model = xgb.train(params=params, dtrain=dtr,
                      num_boost_rounds=num_rounds, # n_estimators in sklearn
                      early_stopping_rounds=50-, # 더 이상 반복하지 말고 멈춰라
                      evals=eval_list # 검증 데이터 셋
                      )

# 6. 결괏값
# 1일 경우의 '확률'만 반환
pred_probs = xgb_model.predict(dtest)
# 1과 0의 결괏값으로 반환
preds = [1 if x > 0.5 else 0 for x in pred_probs]

get_clf_eval(y_test, preds, pred_probs)

# 7. plot_importance
import matplotlib.pyplot as plot
%matplotlib inline

# feature_importance는 f1 score을 기반으로 정해짐
fig, ax = plt.subplots(figsize=(10,12))
plot_importance(xgb_model, ax=ax)

In [None]:
#################################################
# XGBoost sklearn
#################################################

from xgboost import XGBClassifier

X_train, X_test, y_train, y_test = train_test_split(X_features, y_label, 
                                                    test_size=.2, random_state=156)
X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train,
                                            test_size=.1, random_state=156)
evals=[(X_tr, y_tr), (X_val, y_val)]
xgb_wrapper = XGBClassifier(n_estimators=400, learning_rate=.05, max_depth=3)
                            # early stopping은 모델이 학습할 때 적용하므로 fit에 넣음
xgb_wrapper.fit(X_tr, y_tr, early_stopping_rounds=50, \
                eval_metric='logloss', eval_set = evals, verbose=True)
ws50_preds = xgb_wrapper.predict(X_test)
ws50_pred_proba = xgb_wrapper.predict_proba(X_test)[:,1]

# plot_importance
fig, ax = subplots(figsize=(10,12))
plot_importance(xgb_wrapper, ax=ax)

In [None]:
#################################################
# lightGBM sklearn
#################################################

# 사실상 classifier만 바뀐 수준

from lightgbm import LGBMClassifier

X_train, X_test, y_train, y_test = train_test_split(X_features, y_label, 
                                                    test_size=.2, random_state=156)
X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train,
                                            test_size=.1, random_state=156)
evals=[(X_tr, y_tr), (X_val, y_val)]
lgbm_wrapper = LGBMClassifier(n_estimators=400, leanring_rate=.05)
# 조기중단 가능
lgbm_wrapper.fit(X_tr, y_tr, early_stopping_rounds=50,
                 eval_metric='logloss', eval_set=evals, verbose=True)
preds = lgbm_wrapper.predict(X_test)
pred_proba = lgbm_wrapper.predict_proba(X_test)[:,1]

# get_clf_eval
get_clf_eval(y_test, preds, pred_proba)

# plot_importance
from lightgbm import plot_importance
import matplotlib.pyplot as plt
%matplotlib inline

fig, ax = plt.figure(figsize=(10,12))
plot_importance(lgbm_wrapper, ax=ax)
plt.show()