In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
#wine data 불러오기
from sklearn.datasets import load_wine
wine = load_wine()
X = wine.data
y = wine.target

In [3]:
#6가지 모델 정의
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

models = [
    ('Logistic Regression', LogisticRegression(max_iter=10000)),
    ('K-Nearest Neighbors', KNeighborsClassifier()),
    ('Support Vector Machine', SVC()),
    ('Decision Tree', DecisionTreeClassifier()),
    ('Random Forest', RandomForestClassifier()),
    ('Gradient Boosting', GradientBoostingClassifier())
]

In [4]:
# 평가 지표 설정
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, make_scorer

scoring = {
    'accuracy': make_scorer(accuracy_score),
    'precision': make_scorer(precision_score, average='macro'),
    'recall': make_scorer(recall_score, average='macro'),
    'f1': make_scorer(f1_score, average='macro')
}

## LogisticRegression

In [5]:
model = LogisticRegression(max_iter=10000)
cv_results = cross_validate(model, X, y, cv=5, scoring=scoring)
print("Logistic Regression")
print(f"Accuracy: {np.mean(cv_results['test_accuracy']):.2f}")
print(f"Precision: {np.mean(cv_results['test_precision']):.2f}")
print(f"Recall: {np.mean(cv_results['test_recall']):.2f}")
print(f"F1-score: {np.mean(cv_results['test_f1']):.2f}")

Logistic Regression
Accuracy: 0.96
Precision: 0.96
Recall: 0.97
F1-score: 0.96


## KNN

In [11]:
model = KNeighborsClassifier()
cv_results = cross_validate(model, X, y, cv=5, scoring=scoring)
print("K-Nearest Neighbors")
print(f"Accuracy: {np.mean(cv_results['test_accuracy']):.2f}")
print(f"Precision: {np.mean(cv_results['test_precision']):.2f}")
print(f"Recall: {np.mean(cv_results['test_recall']):.2f}")
print(f"F1-score: {np.mean(cv_results['test_f1']):.2f}")

K-Nearest Neighbors
Accuracy: 0.69
Precision: 0.68
Recall: 0.68
F1-score: 0.67


## SVC

In [7]:
model = SVC()
cv_results = cross_validate(model, X, y, cv=5, scoring=scoring)
print("Support Vector Machine")
print(f"Accuracy: {np.mean(cv_results['test_accuracy']):.2f}")
print(f"Precision: {np.mean(cv_results['test_precision']):.2f}")
print(f"Recall: {np.mean(cv_results['test_recall']):.2f}")
print(f"F1-score: {np.mean(cv_results['test_f1']):.2f}")

Support Vector Machine
Accuracy: 0.66
Precision: 0.60
Recall: 0.63
F1-score: 0.60


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Decision Tree

In [8]:
model = DecisionTreeClassifier()
cv_results = cross_validate(model, X, y, cv=5, scoring=scoring)
print("Decision Tree")
print(f"Accuracy: {np.mean(cv_results['test_accuracy']):.2f}")
print(f"Precision: {np.mean(cv_results['test_precision']):.2f}")
print(f"Recall: {np.mean(cv_results['test_recall']):.2f}")
print(f"F1-score: {np.mean(cv_results['test_f1']):.2f}")

Decision Tree
Accuracy: 0.88
Precision: 0.88
Recall: 0.88
F1-score: 0.87


## Random Forest

In [9]:
model = RandomForestClassifier()
cv_results = cross_validate(model, X, y, cv=5, scoring=scoring)
print("Random Forest")
print(f"2Accuracy: {np.mean(cv_results['test_accuracy']):.2f}")
print(f"2Precision: {np.mean(cv_results['test_precision']):.2f}")
print(f"2Recall: {np.mean(cv_results['test_recall']):.2f}")
print(f"2F1-score: {np.mean(cv_results['test_f1']):.2f}")

Random Forest
2Accuracy: 0.97
2Precision: 0.97
2Recall: 0.98
2F1-score: 0.97


## Gradient Boosting

In [10]:
model = GradientBoostingClassifier()
cv_results = cross_validate(model, X, y, cv=5, scoring=scoring)
print("Gradient Boosting")
print(f"Accuracy: {np.mean(cv_results['test_accuracy']):.2f}")
print(f"Precision: {np.mean(cv_results['test_precision']):.2f}")
print(f"Recall: {np.mean(cv_results['test_recall']):.2f}")
print(f"F1-score: {np.mean(cv_results['test_f1']):.2f}")

Gradient Boosting
Accuracy: 0.94
Precision: 0.95
Recall: 0.94
F1-score: 0.94


## 최고 성능은 Random Forest로 여러 번 수행

In [16]:
model = RandomForestClassifier()

for i in range(6,21):
    cv_results = cross_validate(model, X, y, cv=i, scoring=scoring)
    print("Random Forest")
    print(f"Accuracy: {np.mean(cv_results['test_accuracy']):.2f}")
    print(f"Precision: {np.mean(cv_results['test_precision']):.2f}")
    print(f"Recall: {np.mean(cv_results['test_recall']):.2f}")
    print(f"F1-score: {np.mean(cv_results['test_f1']):.2f}")
    print()

Random Forest
Accuracy: 0.99
Precision: 0.99
Recall: 0.99
F1-score: 0.99

Random Forest
Accuracy: 0.97
Precision: 0.97
Recall: 0.98
F1-score: 0.97

Random Forest
Accuracy: 0.98
Precision: 0.98
Recall: 0.98
F1-score: 0.98

Random Forest
Accuracy: 0.98
Precision: 0.98
Recall: 0.98
F1-score: 0.98

Random Forest
Accuracy: 0.98
Precision: 0.98
Recall: 0.98
F1-score: 0.98

Random Forest
Accuracy: 0.98
Precision: 0.99
Recall: 0.98
F1-score: 0.98

Random Forest
Accuracy: 0.99
Precision: 0.99
Recall: 0.99
F1-score: 0.99

Random Forest
Accuracy: 0.98
Precision: 0.99
Recall: 0.98
F1-score: 0.98

Random Forest
Accuracy: 0.98
Precision: 0.99
Recall: 0.99
F1-score: 0.98

Random Forest
Accuracy: 0.99
Precision: 0.99
Recall: 0.99
F1-score: 0.99

Random Forest
Accuracy: 0.97
Precision: 0.97
Recall: 0.97
F1-score: 0.97

Random Forest
Accuracy: 0.97
Precision: 0.97
Recall: 0.97
F1-score: 0.97

Random Forest
Accuracy: 0.98
Precision: 0.98
Recall: 0.98
F1-score: 0.98

Random Forest
Accuracy: 0.97
Precision