<a href="https://colab.research.google.com/github/sasa10th/research/blob/main/research.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SQEH-Net(경량화)

## 코드

In [None]:
import random
from typing import Tuple

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler

import snntorch as snn
from snntorch import surrogate

# =============================================================================
# Utility – 재현성 설정
# =============================================================================
def set_seed(seed: int = 42):
    """모든 난수 시드 설정 (reproducibility 확보용)"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

# =============================================================================
# 1. Sparse-Binary Linear Layer 정의 (STE + Dynamic Sparsity 지원)
# =============================================================================
class SparseBinaryLinear(nn.Module):
    """
    고정된 sparsity mask를 갖는 binary weight 기반의 Linear 계층.

    - 학습 시에는 실수 weight (`w_real`)에 대해 최적화.
    - 순전파 시에는 이진화된 sign(weight)에 mask를 적용하여 연산 수행.
    - STE (Straight-Through Estimator) 방식으로 gradient 전달.
    """

    def __init__(self, in_features: int, out_features: int, density: float = 0.2):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.density = density

        # 초기화: 작은 실수값으로 초기화된 학습 가능 파라미터
        self.w_real = nn.Parameter(0.01 * torch.randn(out_features, in_features))

        # sparsity mask 초기화 (density 비율로 랜덤 연결 선택)
        num_active = int(in_features * out_features * density)
        mask = torch.zeros(out_features, in_features, dtype=torch.bool)
        idx = torch.randperm(mask.numel())[:num_active]
        mask.view(-1)[idx] = True
        self.register_buffer("mask", mask)

    @staticmethod
    def _binarise(w: torch.Tensor):
        """STE 기반 sign 함수 구현"""
        return (w >= 0).float() * 2 - 1 + w - w.detach()

    def forward(self, x: torch.Tensor):
        w_bin = self._binarise(self.w_real) * self.mask
        return nn.functional.linear(x, w_bin)

    def prune_and_grow(self, grow_fraction: float = 0.05):
        """
        동적 sparsity:
        - 가장 작은 magnitude의 활성 weight 일부 제거 (prune)
        - 같은 수만큼 비활성 위치에 새 weight 추가 (grow)
        """
        with torch.no_grad():
            active_weights = self.w_real.abs() * self.mask
            k_prune = int(self.mask.sum().item() * grow_fraction)
            if k_prune == 0:
                return
            prune_vals, prune_idx = torch.topk(active_weights.view(-1), k_prune, largest=False)
            flat_mask = self.mask.view(-1)
            flat_mask[prune_idx] = False

            inactive_idx = (~flat_mask).nonzero(as_tuple=False).squeeze()
            grow_idx = inactive_idx[torch.randperm(len(inactive_idx))[:k_prune]]
            flat_mask[grow_idx] = True
            self.w_real.view(-1)[grow_idx] = 0.01 * torch.randn_like(self.w_real.view(-1)[grow_idx])

# =============================================================================
# 2. SQEH 네트워크 정의
# =============================================================================
def build_sqeh_net(n_in: int, hidden: int = 64, density: float = 0.2, beta: float = 0.95):
    """Sparse-Binary + LIF 뉴런 기반 네트워크 생성"""
    return nn.ModuleDict({
        "fc1": SparseBinaryLinear(n_in, hidden, density),
        "lif1": snn.Leaky(beta=beta, spike_grad=surrogate.fast_sigmoid()),
        "fc2": SparseBinaryLinear(hidden, 1, density),
    })

def forward_seq(net: nn.ModuleDict, spike_seq: torch.Tensor):
    """
    시퀀스 기반 순전파 수행 (stateless, 마지막 time step 결과만 반환)
    """
    mem = None
    for t in range(spike_seq.size(0)):
        x_t = spike_seq[t]
        h1 = net["fc1"](x_t)
        spk, mem = net["lif1"](h1, mem)
        out = net["fc2"](spk)
    return out.squeeze(1)  # shape: (B,)

# =============================================================================
# 3. 학습 도우미 클래스
# =============================================================================
def latency_encode(x_batch: torch.Tensor, n_steps: int):
    """
    Latency encoding:
    입력값이 클수록 빠르게 스파이크 발생 → time dimension 생성
    """
    batch, feat = x_batch.shape
    latency = ((n_steps - 1) * (1.0 - x_batch)).round().long()
    spike_seq = torch.zeros(n_steps, batch, feat, device=x_batch.device)
    for t in range(n_steps):
        spike_seq[t][latency == t] = 1.0
    return spike_seq

class SqehTrainer:
    """
    SQEH 네트워크 학습기:
    - spike 기반 학습 + reward 기반 weight sign flip
    - dynamic pruning/growing 포함
    """

    def __init__(self, net, lr=1e-3, time_steps=10, device="cpu"):
        self.net = net.to(device)
        self.time_steps = time_steps
        self.device = device
        self.opt = torch.optim.Adam([p for p in net.parameters() if p.requires_grad], lr=lr)
        self.criterion = nn.MSELoss(reduction="none")  # per-sample 손실 계산용

    def train_epoch(self, loader, prune_every=10, grow_frac=0.05):
        self.net.train()
        total_loss = 0.0

        for i, (x, y) in enumerate(loader, 1):
            x, y = x.to(self.device), y.to(self.device)

            # --- 순전파 & 역전파 ---
            spk_seq = latency_encode(x, self.time_steps)
            y_pred = forward_seq(self.net, spk_seq)
            loss = self.criterion(y_pred, y).mean()

            self.opt.zero_grad()
            loss.backward()
            self.opt.step()

            # --- 보상 기반 sign flip ---
            with torch.no_grad():
                spk_seq2 = latency_encode(x, self.time_steps)
                y_pred_no_grad = forward_seq(self.net, spk_seq2)
                per_sample_error = (y_pred_no_grad - y).pow(2)
                R = torch.from_numpy(-(per_sample_error.cpu().numpy() - per_sample_error.mean().item())).float().to(self.device)

                for layer in [self.net["fc1"], self.net["fc2"]]:
                    prob = (R.unsqueeze(1) * 0.1).sigmoid()
                    flip_prob = prob.mean().item()
                    rand_mat = torch.rand_like(layer.w_real)
                    flip_mask = (rand_mat < flip_prob) & layer.mask
                    layer.w_real[flip_mask] *= -1

            total_loss += loss.item() * x.size(0)

            # --- 동적 sparsity (prune + grow) ---
            if i % prune_every == 0:
                for layer in [self.net["fc1"], self.net["fc2"]]:
                    layer.prune_and_grow(grow_frac)

        return total_loss / len(loader.dataset)

    @torch.no_grad()
    def evaluate(self, loader):
        """MSE, RMSE, R² 계산"""
        self.net.eval()
        y_true, y_pred = [], []
        for x, y in loader:
            x, y = x.to(self.device), y.to(self.device)
            spk_seq = latency_encode(x, self.time_steps)
            pred = forward_seq(self.net, spk_seq)
            y_true.append(y.cpu())
            y_pred.append(pred.cpu())
        y_true = torch.cat(y_true).numpy()
        y_pred = torch.cat(y_pred).numpy()
        mse = mean_squared_error(y_true, y_pred)
        return mse, np.sqrt(mse), r2_score(y_true, y_pred)

# =============================================================================
# 4. Tabular 데이터셋 래퍼
# =============================================================================
class TabularDataset(torch.utils.data.Dataset):
    """Numpy → Tensor 변환 및 인덱싱 지원"""
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()
        self.y = torch.from_numpy(y).float()

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# =============================================================================
# 5. Main 실행부
# =============================================================================
if __name__ == "__main__":
    DATA_PATH = "cleaned_data.xlsx"  # 데이터 파일 경로
    TEST_SPLIT = 0.25
    BATCH_SIZE = 32
    T_STEPS = 16
    DENSITY = 0.2  # 전체 연결 중 20%만 활성화

    set_seed(7)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # --- 데이터 로딩 ---
    df = pd.read_excel(DATA_PATH)
    y = df.iloc[:, 0].to_numpy(dtype=np.float32)
    X = df.iloc[:, 1:].to_numpy(dtype=np.float32)

    # --- 정규화 ---
    x_scaler = MinMaxScaler()
    y_scaler = StandardScaler()
    X_scaled = x_scaler.fit_transform(X)
    y_scaled = y_scaler.fit_transform(y.reshape(-1, 1)).flatten()

    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y_scaled, test_size=TEST_SPLIT, random_state=42
    )

    train_loader = torch.utils.data.DataLoader(TabularDataset(X_train, y_train), batch_size=BATCH_SIZE, shuffle=True)
    test_loader = torch.utils.data.DataLoader(TabularDataset(X_test, y_test), batch_size=BATCH_SIZE)

    # --- 네트워크 및 트레이너 초기화 ---
    net = build_sqeh_net(n_in=X.shape[1], hidden=64, density=DENSITY).to(device)
    trainer = SqehTrainer(net, lr=1e-3, time_steps=T_STEPS, device=device)

    # --- 학습 루프 ---
    n_epochs = 100
    best_r2 = -np.inf
    for epoch in range(1, n_epochs + 1):
        train_loss = trainer.train_epoch(train_loader)
        mse, rmse, r2 = trainer.evaluate(test_loader)
        best_r2 = max(best_r2, r2)
        print(f"Epoch {epoch:03d} | train MSE {train_loss:.4f} | test R² {r2:.4f} | best {best_r2:.4f}")

    # --- 최종 모델 저장 (sign-quantised, mask 포함) ---
    torch.save({
        "mask_fc1": net["fc1"].mask.cpu(),
        "w_fc1_sign": torch.sign(net["fc1"].w_real).cpu().to(torch.int8),
        "mask_fc2": net["fc2"].mask.cpu(),
        "w_fc2_sign": torch.sign(net["fc2"].w_real).cpu().to(torch.int8),
        "x_scaler_min": x_scaler.min_,
        "x_scaler_scale": x_scaler.scale_,
        "y_scaler_mean": y_scaler.mean_,
        "y_scaler_scale": y_scaler.scale_,
    }, "sqeh_sparse_bin.pt")

    print("\n학습 완료. 희소-양자화된 가중치 저장됨 → sqeh_sparse_bin.pt")


Epoch 001 | train MSE 1.0956 | test R² -0.0666 | best -0.0666
Epoch 002 | train MSE 1.1287 | test R² -0.0052 | best -0.0052
Epoch 003 | train MSE 1.1145 | test R² -0.0321 | best -0.0052
Epoch 004 | train MSE 1.0729 | test R² -0.0515 | best -0.0052
Epoch 005 | train MSE 1.0930 | test R² -0.0346 | best -0.0052
Epoch 006 | train MSE 1.0912 | test R² -0.0644 | best -0.0052
Epoch 007 | train MSE 1.0496 | test R² -0.0589 | best -0.0052
Epoch 008 | train MSE 1.0986 | test R² -0.0082 | best -0.0052


## 라이브러리

In [None]:
!pip install snntorch

Collecting snntorch
  Downloading snntorch-0.9.4-py2.py3-none-any.whl.metadata (15 kB)
Downloading snntorch-0.9.4-py2.py3-none-any.whl (125 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/125.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.6/125.6 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: snntorch
Successfully installed snntorch-0.9.4


## GPU 확인

In [None]:
import torch, platform, os

print("CUDA available? ➜", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Device name:", torch.cuda.get_device_name(0))
    print("PyTorch CUDA version:", torch.version.cuda)


CUDA available? ➜ True
Device name: NVIDIA A100-SXM4-40GB
PyTorch CUDA version: 12.4


# 데이터 전처리 및 모델 개발(GRN 모방, 경량화X)

In [None]:
"""
ImmunoGRN Studio
- 인공 면역체계 기반 이상치 분석 및 GRN 기반 앙상블 모델 학습/평가 파이프라인
- GUI는 Tkinter를 사용하여 두 기능(이상치 분석 / GRN 앙상블 모델)을 탭으로 제공
"""

import os
import random
import io
import datetime
import threading

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Scikit-learn 관련
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# TensorFlow 및 Keras 관련
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model  # type: ignore
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU, Input, Layer, Add  # type: ignore
from tensorflow.keras.optimizers import Adam  # type: ignore
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard  # type: ignore

# SHAP
import shap

# Keras Tuner 설치 확인
try:
    import keras_tuner
except ImportError:
    raise ImportError("keras-tuner를 설치해야 합니다.\n예: pip install keras-tuner")

# Tkinter (GUI)
import tkinter as tk
from tkinter import ttk, filedialog, messagebox

##############################################
# 1. 인공 면역체계 기반 이상치 분석 함수들
##############################################


def initialize_detectors(data, num_detectors=200):
    """
    초기 검출기(detector)를 데이터의 각 특성 범위 내에서 무작위로 생성
    """
    min_vals = np.min(data, axis=0)
    max_vals = np.max(data, axis=0)
    detectors = np.random.uniform(
        low=min_vals, high=max_vals, size=(num_detectors, data.shape[1])
    )
    return detectors


def compute_affinity(detector, data_point):
    """두 벡터 간 유클리드 거리를 계산"""
    return np.linalg.norm(detector - data_point)


def negative_selection_classification(data, detectors, threshold):
    """
    각 데이터 포인트와 모든 검출기 간의 거리를 계산하여,
    임계치 이하의 거리가 하나라도 존재하면 정상으로 간주하고, 그렇지 않으면 이상치로 분류
    """
    distances = np.linalg.norm(
        data[:, np.newaxis, :] - detectors[np.newaxis, :, :], axis=2
    )
    is_normal = np.any(distances < threshold, axis=1)
    outlier_indices = np.where(~is_normal)[0].tolist()
    return outlier_indices


def clonal_selection(
    detectors, data, threshold, iterations=10, clone_factor=5, mutation_rate=0.1
):
    """
    클론 선택 알고리즘: 활성화(detector와의 근접 빈도)를 기반으로 선택 후 복제 및 돌연변이 적용,
    최종적으로 지정한 수의 검출기를 유지
    """
    desired_num = detectors.shape[0]
    for iteration in range(iterations):
        distances = np.linalg.norm(
            data[np.newaxis, :, :] - detectors[:, np.newaxis, :], axis=2
        )
        activations = np.sum(distances < threshold, axis=1)
        num_to_select = max(1, int(0.5 * len(detectors)))
        selected_indices = np.argsort(activations)[-num_to_select:]
        selected_detectors = detectors[selected_indices]

        clones = []
        for detector in selected_detectors:
            for _ in range(clone_factor):
                mutation = np.random.normal(0, mutation_rate, detector.shape)
                clones.append(detector + mutation)
        clones = np.array(clones)

        detectors = np.vstack((detectors, clones))
        distances = np.linalg.norm(
            data[np.newaxis, :, :] - detectors[:, np.newaxis, :], axis=2
        )
        new_activations = np.sum(distances < threshold, axis=1)
        top_indices = np.argsort(new_activations)[-desired_num:]
        detectors = detectors[top_indices]
    return detectors


def remove_outliers(data, outlier_indices):
    """이상치 인덱스를 제외한 데이터를 반환"""
    mask = np.ones(len(data), dtype=bool)
    mask[outlier_indices] = False
    return data[mask], outlier_indices


##############################################
# 2. GRN 기반 앙상블 모델 관련 함수 및 클래스
##############################################


def set_seed(seed):
    """재현성을 위해 시드 설정"""
    np.random.seed(seed)
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    tf.random.set_seed(seed)


def set_korean_font(font_path="C:/Windows/Fonts/malgun.ttf"):
    """한글 폰트 설정 (matplotlib)"""
    from matplotlib import font_manager, rc

    try:
        font = font_manager.FontProperties(fname=font_path).get_name()
    except Exception as e:
        print(f"폰트 설정 오류: {e}\n기본 폰트로 설정합니다.")
        font = "sans-serif"
    rc("font", family=font)
    plt.rcParams["axes.unicode_minus"] = False


def load_and_preprocess_data(file_path):
    """
    엑셀 파일을 읽어 데이터프레임으로 변환 후,
    feature와 타겟(y)을 numpy 배열로 분리
    """
    data = pd.read_excel(file_path)
    feature_names = data.iloc[0, 1:].tolist()
    data = data.iloc[1:].reset_index(drop=True)
    data = data.dropna()
    X = data.iloc[:, 1:].to_numpy()
    y = data.iloc[:, 0].to_numpy()
    return X, y, feature_names


class GRNLayer(Layer):
    """
    GRN (Gene Regulatory Network) 레이어
    - 입력 특성 간 상호작용 및 메모리 효과를 적용하여 결과를 생성
    """

    def __init__(
        self,
        input_dim,
        memory_size=100,
        interaction_strength=0.1,
        memory_decay=0.9,
        **kwargs,
    ):
        super(GRNLayer, self).__init__(**kwargs)
        self.input_dim = input_dim
        self.memory_size = memory_size
        self.interaction_strength = interaction_strength
        self.memory_decay = memory_decay

    def build(self, input_shape):
        self.interaction_matrix = self.add_weight(
            shape=(self.input_dim, self.input_dim),
            initializer=tf.keras.initializers.RandomNormal(
                mean=0.0, stddev=self.interaction_strength
            ),
            trainable=True,
            name="interaction_matrix",
        )
        self.memory = self.add_weight(
            shape=(self.memory_size, self.input_dim),
            initializer=tf.keras.initializers.Zeros(),
            trainable=False,
            name="memory",
        )
        super(GRNLayer, self).build(input_shape)

    def call(self, inputs, training=None):
        interaction_effect = tf.matmul(inputs, self.interaction_matrix)
        memory_effect = tf.reduce_mean(self.memory, axis=0)
        combined_effect = interaction_effect + memory_effect
        if training:
            current_memory = tf.reduce_mean(inputs, axis=0, keepdims=True)
            updated_memory = tf.concat([self.memory[1:], current_memory], axis=0)
            updated_memory = (
                self.memory_decay * updated_memory
                + (1 - self.memory_decay) * current_memory
            )
            self.memory.assign(updated_memory)
        return combined_effect


def train_baseline_model(X, y, learning_rate=0.0005, batch_size=32):
    """
    베이스라인 모델 학습:
    - 데이터를 분할 및 스케일링하고, 모델을 학습한 후 최적의 랜덤 시드를 선택
    """
    best_rmse = float("inf")
    best_state = None

    for random_state in range(42, 43):
        set_seed(random_state)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.25, random_state=random_state
        )
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        model = Sequential(
            [
                Input(shape=(X_train_scaled.shape[1],)),
                Dense(512, kernel_initializer="he_normal"),
                LeakyReLU(alpha=0.2),
                BatchNormalization(),
                Dropout(0.2),
                Dense(128, kernel_initializer="he_normal"),
                LeakyReLU(alpha=0.2),
                BatchNormalization(),
                Dropout(0.2),
                Dense(64, kernel_initializer="he_normal"),
                LeakyReLU(alpha=0.2),
                BatchNormalization(),
                Dense(1),
            ]
        )
        model.compile(
            optimizer=Adam(learning_rate=learning_rate), loss="mse", metrics=["mse"]
        )

        early_stopping = EarlyStopping(
            monitor="val_loss", patience=50, restore_best_weights=True
        )
        lr_scheduler = ReduceLROnPlateau(
            monitor="val_loss", factor=0.5, patience=15, min_lr=1e-6
        )
        checkpoint = ModelCheckpoint(
            "baseline_best_model.h5", monitor="val_loss", save_best_only=True, verbose=0
        )

        model.fit(
            X_train_scaled,
            y_train,
            validation_data=(X_test_scaled, y_test),
            epochs=1000,
            batch_size=batch_size,
            verbose=0,
            callbacks=[early_stopping, lr_scheduler, checkpoint],
        )

        y_pred_test = model.predict(X_test_scaled).flatten()
        rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))
        if rmse_test < best_rmse:
            best_rmse = rmse_test
            best_state = random_state

    print(f"[베이스라인] Best Random State: {best_state}, Best RMSE: {best_rmse:.4f}")
    set_seed(best_state)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.25, random_state=best_state
    )
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled, y_train, y_test, scaler, best_state


def tuner_model_builder(hp):
    """
    Keras Tuner를 위한 하이퍼모델 빌더
    - 히든 레이어 개수, 노드 수, dropout 비율, 학습률 등을 튜닝함
    """
    inputs = Input(shape=(X_train_scaled_global.shape[1],))
    x = inputs
    n_layers = hp.Int("n_layers", min_value=2, max_value=4, step=1)
    for i in range(n_layers):
        units = hp.Choice(f"units_{i}", values=[64, 128, 256, 512], default=128)
        x = Dense(units, kernel_initializer="he_normal")(x)
        x = LeakyReLU(alpha=0.2)(x)
        x = BatchNormalization()(x)
        dropout_rate = hp.Float(
            f"dropout_rate_{i}", min_value=0.0, max_value=0.5, step=0.1
        )
        x = Dropout(dropout_rate)(x)
    outputs = Dense(1)(x)
    model = Model(inputs, outputs)
    tuned_lr = hp.Float(
        "learning_rate", min_value=1e-5, max_value=5e-3, sampling="log", default=1e-4
    )
    model.compile(optimizer=Adam(learning_rate=tuned_lr), loss="mse", metrics=["mse"])
    return model


def build_model_from_best_hps(
    best_hps, input_shape, use_grn=False, grn_layer_instance=None
):
    """
    튜닝된 하이퍼파라미터(best_hps)를 바탕으로 모델을 구축
    - GRNLayer를 적용할지 여부를 선택할 수 있음
    """
    inputs = Input(shape=input_shape)
    if use_grn and grn_layer_instance is not None:
        grn_output = grn_layer_instance(inputs)
        x = Add()([inputs, grn_output])
    else:
        x = inputs
    n_layers = best_hps.get("n_layers")
    for i in range(n_layers):
        units = best_hps.get(f"units_{i}")
        dropout_rate = best_hps.get(f"dropout_rate_{i}")
        x = Dense(units, kernel_initializer="he_normal")(x)
        x = LeakyReLU(alpha=0.2)(x)
        x = BatchNormalization()(x)
        x = Dropout(dropout_rate)(x)
    outputs = Dense(1)(x)
    model = Model(inputs, outputs)
    tuned_lr = best_hps.get("learning_rate")
    model.compile(optimizer=Adam(learning_rate=tuned_lr), loss="mse", metrics=["mse"])
    return model


def train_ensemble_models(
    X_train_scaled,
    y_train,
    X_test_scaled,
    y_test,
    best_hps,
    num_models=3,
    batch_size=32,
    memory_size=100,
    interaction_strength=0.1,
):
    """
    앙상블 모델 학습:
    - 첫 번째 모델에는 GRNLayer 적용, 이후 모델은 일반 모델로 학습
    - 각 모델은 별도의 체크포인트와 TensorBoard 로그를 기록
    """
    models = []
    grn_layer_instance = GRNLayer(
        input_dim=X_train_scaled.shape[1],
        memory_size=memory_size,
        interaction_strength=interaction_strength,
    )
    checkpoint_dir = "ensemble_checkpoints"
    os.makedirs(checkpoint_dir, exist_ok=True)
    log_dir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
    for i in range(num_models):
        if i == 0:
            print(f"\n[학습 시작] 모델 {i+1}/{num_models} (GRNLayer 적용)")
            model = build_model_from_best_hps(
                best_hps,
                input_shape=(X_train_scaled.shape[1],),
                use_grn=True,
                grn_layer_instance=grn_layer_instance,
            )
        else:
            print(f"\n[학습 시작] 모델 {i+1}/{num_models} (GRNLayer 미적용)")
            model = build_model_from_best_hps(
                best_hps, input_shape=(X_train_scaled.shape[1],), use_grn=False
            )
        early_stopping = EarlyStopping(
            monitor="val_loss", patience=50, restore_best_weights=True
        )
        lr_scheduler = ReduceLROnPlateau(
            monitor="val_loss", factor=0.5, patience=15, min_lr=1e-6
        )
        model_checkpoint = ModelCheckpoint(
            os.path.join(checkpoint_dir, f"model_{i+1}.h5"),
            monitor="val_loss",
            save_best_only=True,
            verbose=0,
        )
        tensorboard_cb = TensorBoard(log_dir=os.path.join(log_dir, f"model_{i+1}"))
        model.fit(
            X_train_scaled,
            y_train,
            validation_data=(X_test_scaled, y_test),
            epochs=1000,
            batch_size=batch_size,
            verbose=1,
            callbacks=[early_stopping, lr_scheduler, model_checkpoint, tensorboard_cb],
        )
        models.append(model)
    return models


def evaluate_ensemble(models, X_test_scaled, y_test, use_weighted=False):
    """
    앙상블 모델 평가:
    - 각 모델 예측 결과를 평균하여 최종 예측 도출 후 평가 지표(R^2, MSE, RMSE)를 출력
    """
    predictions_list = [model.predict(X_test_scaled).flatten() for model in models]
    predictions_array = np.array(predictions_list)
    if use_weighted:
        weights = np.ones(len(models)) / len(models)
        ensemble_predictions = np.average(predictions_array, axis=0, weights=weights)
    else:
        ensemble_predictions = np.mean(predictions_array, axis=0)
    mse_test = mean_squared_error(y_test, ensemble_predictions)
    rmse_test = np.sqrt(mse_test)
    r2_test = r2_score(y_test, ensemble_predictions)
    print("\n[최종 앙상블 결과]")
    print(f"Test R^2: {r2_test:.4f}, MSE: {mse_test:.4f}, RMSE: {rmse_test:.4f}")
    return ensemble_predictions, mse_test, rmse_test, r2_test


def run_shap_analysis(model, X_train_scaled, X_test_scaled, feature_names):
    """
    SHAP 분석:
    - 첫 번째 모델 기준으로 SHAP 값 및 요약, force plot을 생성하여 시각화
    """
    print("\n[SHAP 분석] 첫 번째 모델 기준")
    background = X_train_scaled[
        np.random.choice(X_train_scaled.shape[0], 100, replace=False)
    ]
    explainer = shap.Explainer(model, background)
    shap_values = explainer(X_test_scaled)
    shap.summary_plot(shap_values, X_test_scaled, feature_names=feature_names)
    shap.initjs()
    sample_features = pd.Series(X_test_scaled[0, :], index=feature_names)
    force_plot = shap.force_plot(
        shap_values[0].base_values,
        shap_values[0].values,
        sample_features,
        matplotlib=True,
    )
    plt.show()


def create_tf_dataset(X, y, batch_size=32, shuffle=True):
    """tf.data.Dataset 객체 생성 (batch 및 prefetch 적용)"""
    dataset = tf.data.Dataset.from_tensor_slices((X, y))
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(X))
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return dataset


##############################################
# 3. GUI 통합: 이상치 분석 및 GRN 앙상블 모델 탭 제공
##############################################


class OutlierRemovalGUI(ttk.Frame):
    """이상치 분석을 위한 GUI 탭"""

    def __init__(self, container):
        super().__init__(container, padding=15)
        self.file_path = ""
        # 파일 선택 위젯
        ttk.Label(self, text="Excel 파일 선택:", font=("맑은 고딕", 10)).grid(
            row=0, column=0, padx=5, pady=5, sticky="w"
        )
        self.file_entry = ttk.Entry(self, width=50)
        self.file_entry.grid(row=0, column=1, padx=5, pady=5, sticky="ew")
        ttk.Button(self, text="찾아보기", command=self.browse_file).grid(
            row=0, column=2, padx=5, pady=5
        )

        # 매개변수 입력 위젯
        params = [
            ("임계치", "1.5"),
            ("초기 검출기 수", "200"),
            ("클론 선택 반복 횟수", "10"),
            ("Clone Factor", "5"),
            ("Mutation Rate", "0.1"),
        ]
        self.param_entries = {}
        for i, (label_text, default) in enumerate(params, start=1):
            ttk.Label(self, text=label_text + ":", font=("맑은 고딕", 10)).grid(
                row=i, column=0, padx=5, pady=5, sticky="w"
            )
            entry = ttk.Entry(self)
            entry.insert(0, default)
            entry.grid(row=i, column=1, padx=5, pady=5, sticky="ew")
            self.param_entries[label_text] = entry

        # 실행 버튼 및 로그 출력 영역
        ttk.Button(
            self, text="이상치 분석 및 처리", command=self.run_outlier_removal
        ).grid(row=6, column=0, columnspan=3, pady=10)
        self.log_text = tk.Text(self, height=10, width=70, font=("맑은 고딕", 9))
        self.log_text.grid(row=7, column=0, columnspan=3, padx=5, pady=5, sticky="nsew")

        self.columnconfigure(1, weight=1)
        self.rowconfigure(7, weight=1)

    def browse_file(self):
        """파일 탐색기 호출 후 파일 경로 설정"""
        file_path = filedialog.askopenfilename(
            filetypes=[("Excel 파일", "*.xlsx *.xls")]
        )
        if file_path:
            self.file_path = file_path
            self.file_entry.delete(0, tk.END)
            self.file_entry.insert(0, file_path)

    def log(self, message):
        """로그 메시지 출력"""
        self.log_text.insert(tk.END, message + "\n")
        self.log_text.see(tk.END)
        self.update_idletasks()

    def run_outlier_removal(self):
        if not self.file_path:
            messagebox.showerror("오류", "Excel 파일을 선택하세요.")
            return
        threading.Thread(target=self.process_file).start()

    def process_file(self):
        try:
            self.log("Excel 파일 읽는 중...")
            df = pd.read_excel(self.file_path)
            data = df.values.astype(float)
        except Exception as e:
            messagebox.showerror("오류", f"Excel 파일 읽기 오류: {e}")
            return

        try:
            threshold = float(self.param_entries["임계치"].get())
            detectors_num = int(self.param_entries["초기 검출기 수"].get())
            iterations = int(self.param_entries["클론 선택 반복 횟수"].get())
            clone_factor = int(self.param_entries["Clone Factor"].get())
            mutation_rate = float(self.param_entries["Mutation Rate"].get())
        except Exception as e:
            messagebox.showerror("오류", f"매개변수 변환 오류: {e}")
            return

        self.log("초기 검출기 생성 중...")
        detectors = initialize_detectors(data, num_detectors=detectors_num)
        self.log("클론 선택 진행 중...")
        detectors = clonal_selection(
            detectors, data, threshold, iterations, clone_factor, mutation_rate
        )
        self.log("이상치 분류 중...")
        outlier_indices = negative_selection_classification(data, detectors, threshold)
        cleaned_data, _ = remove_outliers(data, outlier_indices)
        cleaned_df = pd.DataFrame(cleaned_data, columns=df.columns)

        save_path = filedialog.asksaveasfilename(
            defaultextension=".xlsx",
            filetypes=[("Excel 파일", "*.xlsx")],
            title="저장할 파일 선택",
        )
        if save_path:
            try:
                cleaned_df.to_excel(save_path, index=False)
                self.log(f"파일 저장 완료: {save_path}")
                messagebox.showinfo(
                    "완료", f"파일이 성공적으로 저장되었습니다:\n{save_path}"
                )
            except Exception as e:
                messagebox.showerror("오류", f"파일 저장 오류: {e}")
        else:
            self.log("파일 저장이 취소되었습니다.")


class GRNEnsembleGUI(ttk.Frame):
    """GRN 기반 앙상블 모델 학습 및 평가를 위한 GUI 탭"""

    def __init__(self, container):
        super().__init__(container, padding=15)
        # 파일 선택 위젯
        ttk.Label(self, text="Excel 파일 선택:", font=("맑은 고딕", 10)).grid(
            row=0, column=0, padx=5, pady=5, sticky="w"
        )
        self.file_path = ""
        self.file_entry = ttk.Entry(self, width=50)
        self.file_entry.grid(row=0, column=1, padx=5, pady=5, sticky="ew")
        ttk.Button(self, text="찾아보기", command=self.browse_file).grid(
            row=0, column=2, padx=5, pady=5
        )

        # 한글 폰트 경로 입력
        ttk.Label(self, text="한글 폰트 경로:", font=("맑은 고딕", 10)).grid(
            row=1, column=0, padx=5, pady=5, sticky="w"
        )
        self.font_entry = ttk.Entry(self, width=50)
        self.font_entry.insert(0, "C:/Windows/Fonts/malgun.ttf")
        self.font_entry.grid(row=1, column=1, padx=5, pady=5, sticky="ew")

        # 베이스라인 학습 파라미터
        ttk.Label(self, text="Learning Rate (Baseline):", font=("맑은 고딕", 10)).grid(
            row=2, column=0, padx=5, pady=5, sticky="w"
        )
        self.lr_entry = ttk.Entry(self)
        self.lr_entry.insert(0, "0.0005")
        self.lr_entry.grid(row=2, column=1, padx=5, pady=5, sticky="ew")

        ttk.Label(self, text="Batch Size:", font=("맑은 고딕", 10)).grid(
            row=3, column=0, padx=5, pady=5, sticky="w"
        )
        self.bs_entry = ttk.Entry(self)
        self.bs_entry.insert(0, "32")
        self.bs_entry.grid(row=3, column=1, padx=5, pady=5, sticky="ew")

        # 튜너 파라미터
        ttk.Label(self, text="Tuner max_epochs:", font=("맑은 고딕", 10)).grid(
            row=4, column=0, padx=5, pady=5, sticky="w"
        )
        self.max_epochs_entry = ttk.Entry(self)
        self.max_epochs_entry.insert(0, "230")
        self.max_epochs_entry.grid(row=4, column=1, padx=5, pady=5, sticky="ew")

        # 앙상블 모델 파라미터
        ttk.Label(self, text="앙상블 모델 수:", font=("맑은 고딕", 10)).grid(
            row=5, column=0, padx=5, pady=5, sticky="w"
        )
        self.num_models_entry = ttk.Entry(self)
        self.num_models_entry.insert(0, "3")
        self.num_models_entry.grid(row=5, column=1, padx=5, pady=5, sticky="ew")

        ttk.Label(self, text="Memory Size:", font=("맑은 고딕", 10)).grid(
            row=6, column=0, padx=5, pady=5, sticky="w"
        )
        self.memory_size_entry = ttk.Entry(self)
        self.memory_size_entry.insert(0, "100")
        self.memory_size_entry.grid(row=6, column=1, padx=5, pady=5, sticky="ew")

        ttk.Label(self, text="Interaction Strength:", font=("맑은 고딕", 10)).grid(
            row=7, column=0, padx=5, pady=5, sticky="w"
        )
        self.int_strength_entry = ttk.Entry(self)
        self.int_strength_entry.insert(0, "0.1")
        self.int_strength_entry.grid(row=7, column=1, padx=5, pady=5, sticky="ew")

        # 실행 버튼 및 로그 출력 영역
        ttk.Button(
            self, text="전체 파이프라인 실행", command=self.start_pipeline_thread
        ).grid(row=8, column=0, columnspan=3, pady=10)
        self.log_text = tk.Text(self, height=15, width=70, font=("맑은 고딕", 9))
        self.log_text.grid(row=9, column=0, columnspan=3, padx=5, pady=5, sticky="nsew")

        self.columnconfigure(1, weight=1)
        self.rowconfigure(9, weight=1)

    def browse_file(self):
        """파일 탐색기 호출 후 파일 경로 설정"""
        file_path = filedialog.askopenfilename(
            filetypes=[("Excel 파일", "*.xlsx *.xls")]
        )
        if file_path:
            self.file_path = file_path
            self.file_entry.delete(0, tk.END)
            self.file_entry.insert(0, file_path)

    def log(self, message):
        """로그 메시지 출력"""
        self.log_text.insert(tk.END, message + "\n")
        self.log_text.see(tk.END)
        self.update_idletasks()

    def start_pipeline_thread(self):
        threading.Thread(target=self.run_pipeline).start()

    def run_pipeline(self):
        try:
            font_path = self.font_entry.get().strip()
            self.log("한글 폰트 설정 중...")
            set_korean_font(font_path=font_path)
            if not self.file_path:
                messagebox.showerror("오류", "Excel 파일을 선택하세요.")
                return
            self.log("엑셀 파일 로드 중...")
            X, y, feature_names = load_and_preprocess_data(self.file_path)
            self.log("데이터 로드 완료.")
            lr = float(self.lr_entry.get().strip())
            bs = int(self.bs_entry.get().strip())
            self.log("베이스라인 모델 학습 시작...")
            X_train_scaled, X_test_scaled, y_train, y_test, scaler, best_state = (
                train_baseline_model(X, y, learning_rate=lr, batch_size=bs)
            )
            self.log(f"[베이스라인] Best Random State: {best_state}")
            global X_train_scaled_global
            X_train_scaled_global = X_train_scaled
            self.log("Keras Tuner 하이퍼파라미터 탐색 시작...")
            strategy = tf.distribute.MirroredStrategy()
            self.log(f"사용 가능한 디바이스 수: {strategy.num_replicas_in_sync}")
            max_epochs = int(self.max_epochs_entry.get().strip())
            with strategy.scope():
                tuner = keras_tuner.Hyperband(
                    hypermodel=tuner_model_builder,
                    objective="val_loss",
                    max_epochs=max_epochs,
                    factor=3,
                    directory="my_dir",
                    project_name="grn_tuning",
                    overwrite=True,
                )
            stop_early = EarlyStopping(
                monitor="val_loss", patience=5, restore_best_weights=True
            )
            tuner.search(
                X_train_scaled,
                y_train,
                epochs=70,
                validation_split=0.2,
                callbacks=[stop_early],
                verbose=1,
            )
            best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
            self.log("튜너 결과: " + str(best_hps.values))
            train_dataset = create_tf_dataset(
                X_train_scaled, y_train, batch_size=bs, shuffle=True
            )
            test_dataset = create_tf_dataset(
                X_test_scaled, y_test, batch_size=bs, shuffle=False
            )
            num_models = int(self.num_models_entry.get().strip())
            mem_size = int(self.memory_size_entry.get().strip())
            int_strength = float(self.int_strength_entry.get().strip())
            self.log("앙상블 모델 학습 시작...")
            models = train_ensemble_models(
                X_train_scaled,
                y_train,
                X_test_scaled,
                y_test,
                best_hps,
                num_models=num_models,
                batch_size=bs,
                memory_size=mem_size,
                interaction_strength=int_strength,
            )
            self.log("앙상블 모델 평가 중...")
            ensemble_predictions, mse_test, rmse_test, r2_test = evaluate_ensemble(
                models, X_test_scaled, y_test, use_weighted=False
            )
            self.log(
                f"[최종 앙상블 결과] Test R^2: {r2_test:.4f}, MSE: {mse_test:.4f}, RMSE: {rmse_test:.4f}"
            )
            self.log("SHAP 분석 시작...")
            run_shap_analysis(models[0], X_train_scaled, X_test_scaled, feature_names)
            for i, model in enumerate(models):
                model.save(f"final_model_{i+1}.h5")
            self.log("모델 저장 완료.")
            self.log("전체 파이프라인 실행 완료.")
        except Exception as e:
            self.log("오류 발생: " + str(e))
            messagebox.showerror("오류", str(e))


##############################################
# 4. 메인: Tkinter Notebook을 이용한 GUI 실행
##############################################
def main():
    root = tk.Tk()
    root.title("ImmunoGRN Studio")
    root.geometry("525x600")

    root.columnconfigure(0, weight=1)
    root.rowconfigure(0, weight=1)

    style = ttk.Style(root)
    style.theme_use("clam")
    style.configure(
        "TEntry",
        font=("맑은 고딕", 10),
        foreground="black",
        fieldbackground="#F7F9FC",
        background="#F7F9FC",
        borderwidth=1,
        relief="flat",
    )
    default_font = ("맑은 고딕", 10)
    style.configure(".", font=default_font)

    notebook = ttk.Notebook(root)
    notebook.pack(expand=True, fill="both")

    tab1 = OutlierRemovalGUI(notebook)
    notebook.add(tab1, text="이상치 분석")
    tab2 = GRNEnsembleGUI(notebook)
    notebook.add(tab2, text="GRN 앙상블 모델")

    root.mainloop()


if __name__ == "__main__":
    main()
