In [1]:
from tensorflow.keras.datasets import fashion_mnist
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# 데이터 로드
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

# 데이터를 1D 벡터로 변환
x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)

# 데이터 정규화
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

# 훈련 데이터를 훈련/검증으로 분리
x_train_split, x_val_split, y_train_split, y_val_split = train_test_split(
    x_train_scaled, y_train, test_size=0.2, random_state=42
)

## 1.SVM

In [2]:
# PCA로 차원 축소 (옵션: 속도 향상을 위해 사용)
from sklearn.decomposition import PCA
pca = PCA(n_components=50)  # 50차원으로 축소
x_train_pca = pca.fit_transform(x_train_scaled)
x_test_pca = pca.transform(x_test_scaled)

# 2. SVM 모델 학습
from sklearn.svm import SVC
svm_model = SVC(kernel='rbf', C=1, gamma='scale')  # RBF 커널 사용
svm_model.fit(x_train_pca, y_train)

# 3. 모델 평가
y_pred = svm_model.predict(x_test_pca)

# 정확도 및 분류 리포트 출력
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.8669

Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.84      0.83      1000
           1       0.99      0.96      0.97      1000
           2       0.78      0.77      0.78      1000
           3       0.85      0.89      0.87      1000
           4       0.78      0.80      0.79      1000
           5       0.96      0.93      0.94      1000
           6       0.68      0.64      0.66      1000
           7       0.91      0.94      0.92      1000
           8       0.96      0.97      0.97      1000
           9       0.93      0.94      0.94      1000

    accuracy                           0.87     10000
   macro avg       0.87      0.87      0.87     10000
weighted avg       0.87      0.87      0.87     10000



## 2. Decision Tree

In [3]:
from sklearn.tree import DecisionTreeClassifier

# 모델 학습
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(x_train_split, y_train_split)

# 예측 및 평가
y_pred_dt = decision_tree.predict(x_test_scaled)
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))
print("Classification Report:\n", classification_report(y_test, y_pred_dt))

Decision Tree Accuracy: 0.7901
Classification Report:
               precision    recall  f1-score   support

           0       0.73      0.75      0.74      1000
           1       0.94      0.95      0.95      1000
           2       0.67      0.67      0.67      1000
           3       0.81      0.78      0.79      1000
           4       0.65      0.65      0.65      1000
           5       0.90      0.89      0.90      1000
           6       0.53      0.53      0.53      1000
           7       0.85      0.89      0.87      1000
           8       0.92      0.90      0.91      1000
           9       0.90      0.89      0.90      1000

    accuracy                           0.79     10000
   macro avg       0.79      0.79      0.79     10000
weighted avg       0.79      0.79      0.79     10000



## 3. Random Forest

In [4]:
from sklearn.ensemble import RandomForestClassifier

# 모델 학습
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
random_forest.fit(x_train_split, y_train_split)

# 예측 및 평가
y_pred_rf = random_forest.predict(x_test_scaled)
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Classification Report:\n", classification_report(y_test, y_pred_rf))

Random Forest Accuracy: 0.8735
Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.85      0.83      1000
           1       0.99      0.96      0.98      1000
           2       0.77      0.80      0.78      1000
           3       0.86      0.90      0.88      1000
           4       0.77      0.82      0.79      1000
           5       0.98      0.96      0.97      1000
           6       0.71      0.58      0.64      1000
           7       0.93      0.95      0.94      1000
           8       0.95      0.97      0.96      1000
           9       0.95      0.95      0.95      1000

    accuracy                           0.87     10000
   macro avg       0.87      0.87      0.87     10000
weighted avg       0.87      0.87      0.87     10000



## 4. XGBoost

In [5]:
from xgboost import XGBClassifier

# 모델 학습
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
xgb_model.fit(x_train_split, y_train_split, eval_set=[(x_val_split, y_val_split)], verbose=False)

# 예측 및 평가
y_pred_xgb = xgb_model.predict(x_test_scaled)
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred_xgb))
print("Classification Report:\n", classification_report(y_test, y_pred_xgb))

Parameters: { "use_label_encoder" } are not used.



XGBoost Accuracy: 0.8926
Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.86      0.85      1000
           1       0.99      0.97      0.98      1000
           2       0.79      0.82      0.81      1000
           3       0.90      0.90      0.90      1000
           4       0.81      0.83      0.82      1000
           5       0.99      0.97      0.98      1000
           6       0.72      0.66      0.69      1000
           7       0.95      0.97      0.96      1000
           8       0.97      0.97      0.97      1000
           9       0.96      0.97      0.96      1000

    accuracy                           0.89     10000
   macro avg       0.89      0.89      0.89     10000
weighted avg       0.89      0.89      0.89     10000



## 5. LightGBM

In [6]:
from lightgbm import LGBMClassifier
from lightgbm import log_evaluation

# 모델 학습
lgb_model = LGBMClassifier(random_state=42)
lgb_model.fit(
    x_train_split, y_train_split,
    eval_set=[(x_val_split, y_val_split)],  # 검증 세트를 지정
    eval_metric='multi_logloss',           # 다중 클래스 로스 사용
    callbacks=[log_evaluation(period=1)]   # 학습 로그 출력 주기를 설정
)

# 예측 및 평가
y_pred_lgb = lgb_model.predict(x_test_scaled)
print("LightGBM Accuracy:", accuracy_score(y_test, y_pred_lgb))
print("Classification Report:\n", classification_report(y_test, y_pred_lgb))

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.179128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 174176
[LightGBM] [Info] Number of data points in the train set: 48000, number of used features: 783
[LightGBM] [Info] Start training from score -2.303002
[LightGBM] [Info] Start training from score -2.306551
[LightGBM] [Info] Start training from score -2.303627
[LightGBM] [Info] Start training from score -2.299257
[LightGBM] [Info] Start training from score -2.303002
[LightGBM] [Info] Start training from score -2.304879
[LightGBM] [Info] Start training from score -2.306342
[LightGBM] [Info] Start training from score -2.294080
[LightGBM] [Info] Start training from score -2.301960
[LightGBM] [Info] Start training from score -2.303210
[1]	valid_0's multi_logloss: 1.73052
[2]	valid_0's multi_logloss: 1.44448
[3]	valid_0's multi_logloss: 1.24646
[4]	valid_0's multi_logloss: 1.09876
[5]	valid_0's multi_