In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, metrics
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
import xgboost as xgb


In [None]:

br_cancer = datasets.load_breast_cancer()
X = br_cancer.data
# 지도학습이기 때문에 정답값이 필요하다.
y = br_cancer.target

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=20)


In [None]:
dtrain = xgb.DMatrix(data=X_train, label=y_train)
dtest = xgb.DMatrix(data=X_test, label=y_test)

In [None]:
params = {
    'max_depth':3,                  # 트리 depth
    'eta':0.1,                      # 학습률
    'objective':'binary:logistic',  # 목적 함수
    'eval_metric':'logloss',        # 오류(손실) 함수
    'early_stoppings':100           # 시행시 오류가 줄지않으면 멈춤
}

In [None]:
wlist = [(dtrain, 'train'), (dtest, 'eval')]
xgb_model = xgb.train(params=params,
                      dtrain=dtrain,
                      num_boost_round=1000,
                      evals=wlist)

Parameters: { "early_stoppings" } are not used.

[0]	train-logloss:0.61025	eval-logloss:0.61722
[1]	train-logloss:0.54146	eval-logloss:0.55122
[2]	train-logloss:0.48398	eval-logloss:0.49380
[3]	train-logloss:0.43395	eval-logloss:0.44717
[4]	train-logloss:0.39192	eval-logloss:0.40641
[5]	train-logloss:0.35488	eval-logloss:0.37130
[6]	train-logloss:0.32233	eval-logloss:0.33748
[7]	train-logloss:0.29499	eval-logloss:0.31130
[8]	train-logloss:0.26992	eval-logloss:0.28644
[9]	train-logloss:0.24699	eval-logloss:0.26511
[10]	train-logloss:0.22688	eval-logloss:0.24776
[11]	train-logloss:0.20893	eval-logloss:0.23093
[12]	train-logloss:0.19298	eval-logloss:0.21768
[13]	train-logloss:0.17841	eval-logloss:0.20492
[14]	train-logloss:0.16547	eval-logloss:0.19246
[15]	train-logloss:0.15374	eval-logloss:0.18244
[16]	train-logloss:0.14262	eval-logloss:0.17328
[17]	train-logloss:0.13350	eval-logloss:0.16782
[18]	train-logloss:0.12492	eval-logloss:0.16083
[19]	train-logloss:0.11635	eval-logloss:0.15224
[

In [None]:
y_pred = xgb_model.predict(dtest)
# 1:악성, 0: 양성
y_pred = [1 if x>0.9 else 0 for x in y_pred]
print(y_pred)

[1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1]


In [None]:
print(accuracy_score(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

0.9736842105263158
[[48  0]
 [ 3 63]]
              precision    recall  f1-score   support

           0       0.94      1.00      0.97        48
           1       1.00      0.95      0.98        66

    accuracy                           0.97       114
   macro avg       0.97      0.98      0.97       114
weighted avg       0.98      0.97      0.97       114

