In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, accuracy_score, f1_score, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
from joblib import load
from tensorflow.keras.models import load_model

# 결과 저장 경로
output_dir = "../results/reports"
os.makedirs(output_dir, exist_ok=True)

# 모델 경로
model_dir = "../results/models"

# 데이터 로드
df = pd.read_csv("../data/processed/data_cleaned.csv")

# 데이터 분리
X = df.drop(columns=['Label'])
y = df['Label']

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 평가 함수 정의
def evaluate_model(model_name, model, model_type, X_test, y_test):
    """모델 평가 함수"""
    if model_type == "binary":
        y_binary_test = (y_test != 0).astype(int)  # 이진 분류 레이블
        if model_name == "autoencoder_classifier":
            _, y_pred_prob = model.predict(X_test)  # 이진 분류 확률 출력
            y_pred = (y_pred_prob > 0.5).astype(int)
        else:
            y_pred = model.predict(X_test)
        acc = accuracy_score(y_binary_test, y_pred)
        f1 = f1_score(y_binary_test, y_pred)
        mse = mean_squared_error(y_binary_test, y_pred)
        r2 = r2_score(y_binary_test, y_pred)
        report = classification_report(y_binary_test, y_pred)
    elif model_type == "multiclass":
        if model_name == "cnn_classifier":
            X_test_input = np.expand_dims(X_test, axis=-1)  # CNN 입력 차원 조정
            y_pred = np.argmax(model.predict(X_test_input), axis=-1)
        else:
            y_pred = model.predict(X_test)
        acc = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred, average='weighted')
        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        report = classification_report(y_test, y_pred)
    return acc, f1, mse, r2, report

# 모델 목록 정의
scalers = [None, MinMaxScaler, StandardScaler]
pca_components = [None, 0.99, 10]
classic_models = ['rf', 'xgb', 'catboost', 'lightgbm']
deep_models = [
    {
        "name": "autoencoder_classifier",
        "type": "binary",
        "load_func": lambda: load_model(os.path.join(model_dir, "autoencoder_classifier.keras"))
    },
    {
        "name": "random_forest",
        "type": "binary",
        "load_func": lambda: load(os.path.join(model_dir, "autoencoder_random_forest.pkl"))
    },
    {
        "name": "udbb",
        "type": "multiclass",
        "load_func": lambda: load(os.path.join(model_dir, "autoencoder_udbb.pkl"))
    },
    {
        "name": "cnn_classifier",
        "type": "multiclass",
        "load_func": lambda: load_model(os.path.join(model_dir, "autoencoder_cnn.keras"))
    }
]

# 결과 저장
results = []

for scaler in scalers:
    for pca in pca_components:
        for model_name in classic_models:
            scaler_name = scaler.__name__ + "()" if scaler else "None"
            pca_name = str(pca)
            model_filename = f"{model_name}_scaler_{scaler_name}_pca_{pca_name}.joblib"
            model_path = os.path.join(model_dir, model_filename)

            if not os.path.exists(model_path):
                print(f"Model not found: {model_path}")
                continue

            # 모델 불러오기 및 평가
            print(f"Loading model: {model_path}")
            model = load(model_path)
            y_pred = model.predict(X_test)
            acc = accuracy_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred, average='weighted')
            mse = mean_squared_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            class_report = classification_report(y_test, y_pred)

            # 결과 저장
            results.append({
                'Model': model_name,
                'Type': "multiclass",
                'Scaler': scaler_name,
                'PCA': pca_name,
                'Accuracy': acc,
                'F1 Score': f1,
                'MSE': mse,
                'R2': r2
            })

            # Classification Report 저장
            report_path = os.path.join(output_dir, f"{model_name}_scaler_{scaler_name}_pca_{pca_name}_report.txt")
            with open(report_path, "w") as f:
                f.write(class_report)

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 딥러닝 모델 평가
scaler = StandardScaler()
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

encoder = load_model(os.path.join(model_dir, "encoder.keras"))

# 차원 축소된 데이터 생성
X_test_encoded = encoder.predict(X_test_scaled)

for model_info in deep_models:
    model_name = model_info["name"]
    model_type = model_info["type"]
    load_func = model_info["load_func"]
        
    print(f"Evaluating {model_name}...")

    model = load_func()

    if model_name == "autoencoder_classifier":
        acc, f1, mse, r2, report = evaluate_model(model_name, model, model_type, X_test_scaled, y_test)
    else:
        acc, f1, mse, r2, report = evaluate_model(model_name, model, model_type, X_test_encoded, y_test)
        
    results.append({
        'Model': model_name,
        'Type': model_type,
        'Scaler': "StandardScaler()",
        'PCA': "None",
        'Accuracy': acc,
        'F1 Score': f1,
        'MSE': mse,
        'R2': r2
    })

    # Classification Report 저장
    report_path = os.path.join(output_dir, f"{model_name}_report.txt")
    with open(report_path, "w") as f:
        f.write(report)

# 결과 데이터프레임 생성 및 저장
results_df = pd.DataFrame(results)
results_df.to_csv(os.path.join(output_dir, "models_scores.csv"), index=False)
print("모델 평가 완료. 결과 저장:", os.path.join(output_dir, "models_scores.csv"))


Loading model: ../results/models\rf_scaler_None_pca_None.joblib
Loading model: ../results/models\xgb_scaler_None_pca_None.joblib



    E.g. tree_method = "hist", device = "cuda"

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\catboost_scaler_None_pca_None.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\lightgbm_scaler_None_pca_None.joblib
Loading model: ../results/models\rf_scaler_None_pca_0.99.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\xgb_scaler_None_pca_0.99.joblib



    E.g. tree_method = "hist", device = "cuda"

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\catboost_scaler_None_pca_0.99.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\lightgbm_scaler_None_pca_0.99.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\rf_scaler_None_pca_10.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\xgb_scaler_None_pca_10.joblib



    E.g. tree_method = "hist", device = "cuda"



Loading model: ../results/models\catboost_scaler_None_pca_10.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\lightgbm_scaler_None_pca_10.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\rf_scaler_MinMaxScaler()_pca_None.joblib
Loading model: ../results/models\xgb_scaler_MinMaxScaler()_pca_None.joblib



    E.g. tree_method = "hist", device = "cuda"

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\catboost_scaler_MinMaxScaler()_pca_None.joblib
Loading model: ../results/models\lightgbm_scaler_MinMaxScaler()_pca_None.joblib
Loading model: ../results/models\rf_scaler_MinMaxScaler()_pca_0.99.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\xgb_scaler_MinMaxScaler()_pca_0.99.joblib



    E.g. tree_method = "hist", device = "cuda"



Loading model: ../results/models\catboost_scaler_MinMaxScaler()_pca_0.99.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\lightgbm_scaler_MinMaxScaler()_pca_0.99.joblib
Loading model: ../results/models\rf_scaler_MinMaxScaler()_pca_10.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\xgb_scaler_MinMaxScaler()_pca_10.joblib



    E.g. tree_method = "hist", device = "cuda"



Loading model: ../results/models\catboost_scaler_MinMaxScaler()_pca_10.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\lightgbm_scaler_MinMaxScaler()_pca_10.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\rf_scaler_StandardScaler()_pca_None.joblib
Loading model: ../results/models\xgb_scaler_StandardScaler()_pca_None.joblib



    E.g. tree_method = "hist", device = "cuda"

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\catboost_scaler_StandardScaler()_pca_None.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\lightgbm_scaler_StandardScaler()_pca_None.joblib
Loading model: ../results/models\rf_scaler_StandardScaler()_pca_0.99.joblib
Loading model: ../results/models\xgb_scaler_StandardScaler()_pca_0.99.joblib



    E.g. tree_method = "hist", device = "cuda"



Loading model: ../results/models\catboost_scaler_StandardScaler()_pca_0.99.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\lightgbm_scaler_StandardScaler()_pca_0.99.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\rf_scaler_StandardScaler()_pca_10.joblib
Loading model: ../results/models\xgb_scaler_StandardScaler()_pca_10.joblib



    E.g. tree_method = "hist", device = "cuda"



Loading model: ../results/models\catboost_scaler_StandardScaler()_pca_10.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Loading model: ../results/models\lightgbm_scaler_StandardScaler()_pca_10.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m17692/17692[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 663us/step
Evaluating autoencoder_classifier...
[1m17692/17692[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 813us/step
Evaluating random_forest...
Evaluating udbb...
Evaluating cnn_classifier...
[1m17692/17692[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1ms/step


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


모델 평가 완료. 결과 저장: ../results/reports\models_scores.csv


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
