In [1]:
# 필요한 모듈 import
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# 다양한 모델 사용을 위한 import
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier, LogisticRegression

# 데이터 준비
digits = load_digits()

# 데이터 이해하기
# Feature Data 지정하기
X = digits.data

# Label Data 지정하기
y = digits.target

# Target Names 출력해보기
print("Target Names:", digits.target_names)

# 데이터 Describe 해보기 (어떤 특성, 어떤 형태)
print("Data shape:", X.shape)
print("Target shape:", y.shape)
print(digits.DESCR)  # 데이터셋 설명 출력

# train, test 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 다양한 모델 학습
models = {
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(random_state=42),
    "SVM": SVC(random_state=42),
    "SGD Classifier": SGDClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42)
}

for model_name, model in models.items():
    # 모델 학습
    model.fit(X_train, y_train)
    # 예측
    y_pred = model.predict(X_test)
    # 모델 평가 - classification_report를 사용
    print(f"=== {model_name} ===")
    print(classification_report(y_test, y_pred))
    print("\n")

# 해석
# 위 결과로 각 모델별 정확도, 정밀도(precision), 재현율(recall), F1-score를 비교할 수 있었다.
# digits 데이터는 0~9까지 골고루 분포되어 있는 편이라 accuracy만 봐도 큰 무리는 없지만,
# 여러 모델을 비교할 때는 F1-score가 고른 모델이 더 좋다고 볼 수 있을 때가 많다고 한다.
#
# 결과적으로 어떤 모델이 가장 좋은 성능을 보이는지는 출력된 classification_report의 accuracy나,
# precision, recall, f1-score를 보고 판단할 수 있는것 같다.


Target Names: [0 1 2 3 4 5 6 7 8 9]
Data shape: (1797, 64)
Target shape: (1797,)
.. _digits_dataset:

Optical recognition of handwritten digits dataset
--------------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 1797
    :Number of Attributes: 64
    :Attribute Information: 8x8 image of integer pixels in the range 0..16.
    :Missing Attribute Values: None
    :Creator: E. Alpaydin (alpaydin '@' boun.edu.tr)
    :Date: July; 1998

This is a copy of the test set of the UCI ML hand-written digits datasets
https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits

The data set contains images of hand-written digits: 10 classes where
each class refers to a digit.

Preprocessing programs made available by NIST were used to extract
normalized bitmaps of handwritten digits from a preprinted form. From a
total of 43 people, 30 contributed to the training set and different 13
to the test set. 32x32 bitmaps are divided into

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
