In [None]:
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix

In [None]:
cancer_data = load_breast_cancer()
cancer_data

In [None]:
df = pd.DataFrame(cancer_data.data, columns=cancer_data.feature_names)
df.head()
df.shape


In [None]:
df["target"] = cancer_data.target
df.head()

In [None]:
df["target"].value_counts()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df.drop("target", axis=1), df["target"], test_size=0.2, random_state=3333, stratify=df["target"])

In [None]:
print(X_train.shape)
print(X_test.shape)

In [None]:
model_tree = DecisionTreeClassifier(random_state=3333)

model_tree.fit(X_train, y_train)

pred = model_tree.predict(X_test)

accuracy_score(y_test, pred)

In [None]:
import os

print(os.cpu_count())

# 1. 배깅

In [15]:
model_rf = RandomForestClassifier(random_state=3333, n_estimators=1000, n_jobs=-1)

model_rf.fit(X_train, y_train)

pred = model_rf.predict(X_test)

print(accuracy_score(y_test, pred))
print("*"*60)
print(confusion_matrix(y_test, pred))
print("*"*60)
print(classification_report(y_test, pred))

0.9736842105263158
************************************************************
[[42  0]
 [ 3 69]]
************************************************************
              precision    recall  f1-score   support

           0       0.93      1.00      0.97        42
           1       1.00      0.96      0.98        72

    accuracy                           0.97       114
   macro avg       0.97      0.98      0.97       114
weighted avg       0.98      0.97      0.97       114



# 2. 부스트

In [None]:
import xgboost as xgb

cancer_data = load_breast_cancer()

X_train, X_test, y_train, y_test = train_test_split(cancer_data.data, cancer_data.target, test_size=0.2, random_state=3333, stratify=cancer_data.target)

xgb_model = xgb.XGBClassifier(n_estimators=1000, learning_rate=0.05, max_depth=3, eval_metric="logloss")
xgb_model.fit(X_train, y_train)

In [16]:
pred = xgb_model.predict(X_test)

print(accuracy_score(y_test, pred))
print("*"*60)
print(confusion_matrix(y_test, pred))
print("*"*60)
print(classification_report(y_test, pred))

0.9736842105263158
************************************************************
[[41  1]
 [ 2 70]]
************************************************************
              precision    recall  f1-score   support

           0       0.95      0.98      0.96        42
           1       0.99      0.97      0.98        72

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114

