In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd

In [2]:
breast = load_breast_cancer()
dir(breast)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [3]:
breast_data = breast.data
breast_label = breast.target
print(breast_data.shape)
print(breast_label.shape)

(569, 30)
(569,)


In [4]:
print(breast.target_names)

['malignant' 'benign']


In [5]:
digits_df = pd.DataFrame(data=breast_data, columns=breast.feature_names)
digits_df.describe()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
count,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,...,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0,569.0
mean,14.127292,19.289649,91.969033,654.889104,0.09636,0.104341,0.088799,0.048919,0.181162,0.062798,...,16.26919,25.677223,107.261213,880.583128,0.132369,0.254265,0.272188,0.114606,0.290076,0.083946
std,3.524049,4.301036,24.298981,351.914129,0.014064,0.052813,0.07972,0.038803,0.027414,0.00706,...,4.833242,6.146258,33.602542,569.356993,0.022832,0.157336,0.208624,0.065732,0.061867,0.018061
min,6.981,9.71,43.79,143.5,0.05263,0.01938,0.0,0.0,0.106,0.04996,...,7.93,12.02,50.41,185.2,0.07117,0.02729,0.0,0.0,0.1565,0.05504
25%,11.7,16.17,75.17,420.3,0.08637,0.06492,0.02956,0.02031,0.1619,0.0577,...,13.01,21.08,84.11,515.3,0.1166,0.1472,0.1145,0.06493,0.2504,0.07146
50%,13.37,18.84,86.24,551.1,0.09587,0.09263,0.06154,0.0335,0.1792,0.06154,...,14.97,25.41,97.66,686.5,0.1313,0.2119,0.2267,0.09993,0.2822,0.08004
75%,15.78,21.8,104.1,782.7,0.1053,0.1304,0.1307,0.074,0.1957,0.06612,...,18.79,29.72,125.4,1084.0,0.146,0.3391,0.3829,0.1614,0.3179,0.09208
max,28.11,39.28,188.5,2501.0,0.1634,0.3454,0.4268,0.2012,0.304,0.09744,...,36.04,49.54,251.2,4254.0,0.2226,1.058,1.252,0.291,0.6638,0.2075


In [7]:
train_input, test_input, train_target, test_target = train_test_split(breast.data, breast.target, test_size=0.15)
print("train_input 갯수 :", len(train_input), "test_input 갯수 :", len(test_input))

train_input 갯수 : 483 test_input 갯수 : 86


In [8]:
# 의사 결정 나무
from sklearn.tree import DecisionTreeClassifier

decision_tree = DecisionTreeClassifier()
decision_tree.fit(train_input, train_target)
test_pred = decision_tree.predict(test_input)

print(classification_report(test_target, test_pred))

              precision    recall  f1-score   support

           0       0.74      0.95      0.83        21
           1       0.98      0.89      0.94        65

    accuracy                           0.91        86
   macro avg       0.86      0.92      0.88        86
weighted avg       0.92      0.91      0.91        86



In [9]:
# RandomForest
from sklearn.ensemble import RandomForestClassifier

random_forest = RandomForestClassifier()
random_forest.fit(train_input, train_target)
test_pred2 = random_forest.predict(test_input)

print(classification_report(test_target, test_pred2))

              precision    recall  f1-score   support

           0       0.91      0.95      0.93        21
           1       0.98      0.97      0.98        65

    accuracy                           0.97        86
   macro avg       0.95      0.96      0.95        86
weighted avg       0.97      0.97      0.97        86



In [10]:
# svm
from sklearn import svm

svm_model = svm.SVC()
svm_model.fit(train_input, train_target)
test_pred3 = svm_model.predict(test_input)

print(classification_report(test_target, test_pred3))

              precision    recall  f1-score   support

           0       0.95      0.90      0.93        21
           1       0.97      0.98      0.98        65

    accuracy                           0.97        86
   macro avg       0.96      0.94      0.95        86
weighted avg       0.96      0.97      0.96        86



In [11]:
# SGDClassfier
from sklearn.linear_model import SGDClassifier

sgd_model = SGDClassifier()
sgd_model.fit(train_input, train_target)
test_pred4 = sgd_model.predict(test_input)

print(classification_report(test_target, test_pred4))

              precision    recall  f1-score   support

           0       0.66      1.00      0.79        21
           1       1.00      0.83      0.91        65

    accuracy                           0.87        86
   macro avg       0.83      0.92      0.85        86
weighted avg       0.92      0.87      0.88        86



In [12]:
# Logistic Regression
from sklearn.linear_model import LogisticRegression

logistic_model = LogisticRegression(max_iter=3000) # 반복 횟수 증가 및 제한
logistic_model.fit(train_input, train_target)
test_pred5 = logistic_model.predict(test_input)

print(classification_report(test_target, test_pred5))

              precision    recall  f1-score   support

           0       0.91      1.00      0.95        21
           1       1.00      0.97      0.98        65

    accuracy                           0.98        86
   macro avg       0.96      0.98      0.97        86
weighted avg       0.98      0.98      0.98        86

