In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

dataset = load_breast_cancer()
X_features = dataset.data
y_label = dataset.target

X_train, X_test, y_train, y_test = train_test_split(X_features, y_label, test_size=0.2, random_state=156)

In [4]:
from lightgbm import LGBMClassifier
from sklearn.metrics import roc_auc_score

lgbm_clf = LGBMClassifier(n_estimators=500)

evals = [(X_test, y_test)]
lgbm_clf.fit(X_train, y_train, early_stopping_rounds=100, eval_metric="auc", eval_set=evals, verbose=True)

lgbm_roc_score = roc_auc_score(y_test, lgbm_clf.predict_proba(X_test)[:, 1], average='macro')
print('ROC AUC: {0:.4f}'.format(lgbm_roc_score))

[1]	valid_0's auc: 0.957353	valid_0's binary_logloss: 0.565079
Training until validation scores don't improve for 100 rounds
[2]	valid_0's auc: 0.987539	valid_0's binary_logloss: 0.507451
[3]	valid_0's auc: 0.988592	valid_0's binary_logloss: 0.458489
[4]	valid_0's auc: 0.988768	valid_0's binary_logloss: 0.417481
[5]	valid_0's auc: 0.986311	valid_0's binary_logloss: 0.385507
[6]	valid_0's auc: 0.986837	valid_0's binary_logloss: 0.355846
[7]	valid_0's auc: 0.98789	valid_0's binary_logloss: 0.330897
[8]	valid_0's auc: 0.988417	valid_0's binary_logloss: 0.306923
[9]	valid_0's auc: 0.990172	valid_0's binary_logloss: 0.28776
[10]	valid_0's auc: 0.98947	valid_0's binary_logloss: 0.26917
[11]	valid_0's auc: 0.991576	valid_0's binary_logloss: 0.250954
[12]	valid_0's auc: 0.991576	valid_0's binary_logloss: 0.23847
[13]	valid_0's auc: 0.990172	valid_0's binary_logloss: 0.225865
[14]	valid_0's auc: 0.989821	valid_0's binary_logloss: 0.215076
[15]	valid_0's auc: 0.98947	valid_0's binary_logloss: 0.

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

LGBM_clf = LGBMClassifier(n_estimators=200)

params = {
    'num_leaves' : [32, 64],
    'max_depth' : [128, 160],
    'min_child_sample' : [60, 100],
    'subsample' : [0.8, 1]
}

# 수행속도를 높이기 위해 CV를 지정하지 않음
grid_cv = GridSearchCV(lgbm_clf, param_grid=params)
grid_cv.fit(X_train, y_train, early_stopping_rounds=30, eval_metric="auc",
                              eval_set=[(X_train, y_train), (X_test, y_test)])

print('GridSearchCV 최적 파라미터:', grid_cv.best_params_)                              
lgbm_roc_score = roc_auc_score(y_test, grid_cv.predict_proba(X_test)[:, 1], average='macro')
print('ROC AUC: {0:.4f}'.format(lgbm_roc_score))

lid_1's binary_logloss: 0.209345
[16]	valid_0's auc: 0.99002	valid_0's binary_logloss: 0.178195	valid_1's auc: 0.990874	valid_1's binary_logloss: 0.196751
[17]	valid_0's auc: 0.990224	valid_0's binary_logloss: 0.167256	valid_1's auc: 0.991225	valid_1's binary_logloss: 0.188026
[18]	valid_0's auc: 0.990367	valid_0's binary_logloss: 0.15695	valid_1's auc: 0.991576	valid_1's binary_logloss: 0.178073
[19]	valid_0's auc: 0.990347	valid_0's binary_logloss: 0.150434	valid_1's auc: 0.991225	valid_1's binary_logloss: 0.173493
[20]	valid_0's auc: 0.99049	valid_0's binary_logloss: 0.142951	valid_1's auc: 0.991225	valid_1's binary_logloss: 0.168827
[21]	valid_0's auc: 0.990602	valid_0's binary_logloss: 0.135195	valid_1's auc: 0.990523	valid_1's binary_logloss: 0.162358
[22]	valid_0's auc: 0.990745	valid_0's binary_logloss: 0.128626	valid_1's auc: 0.990523	valid_1's binary_logloss: 0.154842
[23]	valid_0's auc: 0.990847	valid_0's binary_logloss: 0.124395	valid_1's auc: 0.990523	valid_1's binary_logl

In [7]:
lgbm_clf = LGBMClassifier(n_estimators=1000, num_leaves=32, subsample=0.8, min_child_samples=60, max_depth=128)

evals = [(X_test, y_test)]
lgbm_clf.fit(X_train, y_train, early_stopping_rounds=100, eval_metric="auc", eval_set=evals, verbose=True)

lgbm_roc_score = roc_auc_score(y_test, lgbm_clf.predict_proba(X_test)[:, 1], average='macro')
print('ROC AUC: {0:.4f}'.format(lgbm_roc_score))

[1]	valid_0's auc: 0.969112	valid_0's binary_logloss: 0.569874
Training until validation scores don't improve for 100 rounds
[2]	valid_0's auc: 0.982801	valid_0's binary_logloss: 0.514245
[3]	valid_0's auc: 0.987364	valid_0's binary_logloss: 0.468235
[4]	valid_0's auc: 0.988066	valid_0's binary_logloss: 0.427097
[5]	valid_0's auc: 0.988066	valid_0's binary_logloss: 0.391164
[6]	valid_0's auc: 0.988417	valid_0's binary_logloss: 0.361162
[7]	valid_0's auc: 0.989821	valid_0's binary_logloss: 0.337014
[8]	valid_0's auc: 0.988768	valid_0's binary_logloss: 0.314257
[9]	valid_0's auc: 0.988768	valid_0's binary_logloss: 0.292985
[10]	valid_0's auc: 0.98947	valid_0's binary_logloss: 0.275811
[11]	valid_0's auc: 0.990523	valid_0's binary_logloss: 0.260942
[12]	valid_0's auc: 0.990172	valid_0's binary_logloss: 0.246998
[13]	valid_0's auc: 0.990172	valid_0's binary_logloss: 0.233185
[14]	valid_0's auc: 0.989821	valid_0's binary_logloss: 0.221633
[15]	valid_0's auc: 0.98947	valid_0's binary_logloss