## FashionMNIST 데이터

In [1]:
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
import pandas as pd


# 데이터 로드 함수
def load_fashion_mnist_from_csv(train_path, test_path):
    # CSV 파일 읽기
    train_df = pd.read_csv(train_path)
    test_df = pd.read_csv(test_path)

    # 라벨과 이미지 데이터 분리
    y_train = train_df['label'].values  # 라벨
    x_train = train_df.drop(columns=['label']).values  # 이미지 데이터

    y_test = test_df['label'].values  # 라벨
    x_test = test_df.drop(columns=['label']).values  # 이미지 데이터

    # 데이터 형태 변환 및 정규화
    x_train = x_train.astype(np.float32) / 255.0  # 0~1 스케일로 정규화
    x_test = x_test.astype(np.float32) / 255.0

    return (x_train, y_train), (x_test, y_test)

# FashionMNIST 데이터 로드
train_path = "data/fashion-mnist_train.csv"
test_path = "data/fashion-mnist_test.csv"
(x_train, y_train), (x_test, y_test) = load_fashion_mnist_from_csv(train_path, test_path)

# 데이터 형태 출력
print(f"x_train shape: {x_train.shape}, y_train shape: {y_train.shape}")
print(f"x_test shape: {x_test.shape}, y_test shape: {y_test.shape}")

x_train shape: (60000, 784), y_train shape: (60000,)
x_test shape: (10000, 784), y_test shape: (10000,)


## 기존모델

In [None]:
def Regularized_loss(model, n, y_pred, y_true, p=4, lam=0.01):
    classification_loss = -torch.mean(y_true * F.log_softmax(y_pred, dim=1))
    RG_loss = 1/n * torch.norm(model.weight.unsqueeze(1) - model.weight.unsqueeze(0), p=2, dim=2).pow(p).sum()
    loss = classification_loss + lam * RG_loss
    return loss

# R_MLR 함수
def R_MLR(x_train, y_train, x_test, y_test, para):
    # 데이터 정규화
    if para["If_scale"]:
        scaler = StandardScaler()
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.transform(x_test)

    # 데이터 텐서 변환
    x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
    y_train_tensor = F.one_hot(torch.tensor(y_train, dtype=torch.long), num_classes=10).float()
    x_test_tensor = torch.tensor(x_test, dtype=torch.float32)
    y_test_tensor = F.one_hot(torch.tensor(y_test, dtype=torch.long), num_classes=10).float()

    # 모델 및 옵티마이저 정의
    n, d = x_train_tensor.shape
    num_class = y_train_tensor.shape[1]
    model = torch.nn.Linear(d, num_class)
    optimizer = torch.optim.Adam(model.parameters(), lr=para["lr"], weight_decay=para["weight_decay"])

    # 손실 값과 정확도 저장용 리스트 초기화
    loss_list = []
    test_acc_list = []

    for epoch in range(para["num_epoch"]):
        # 학습
        y_pred = model(x_train_tensor)
        loss = Regularized_loss(model, n, y_pred, y_train_tensor, para["p"], para["lam"])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # 테스트 정확도 계산
        if (epoch + 1) % 5 == 0:
            with torch.no_grad():
                y_pred = model(x_test_tensor)
                correct = (torch.argmax(y_pred, dim=1) == torch.argmax(y_test_tensor, dim=1)).sum().item()
                test_acc = correct / len(x_test_tensor)

            loss_list.append(loss.item())
            test_acc_list.append(test_acc)
            print(f"Epoch [{epoch+1}/{para['num_epoch']}], Loss: {loss.item():.4f}, Test Accuracy: {test_acc:.4f}")

    print(f"Final Test Accuracy: {test_acc:.4f}")
    return test_acc_list[-1]

# FashionMNIST 데이터 경로
train_path = "data/fashion-mnist_train.csv"
test_path = "data/fashion-mnist_test.csv"
(x_train, y_train), (x_test, y_test) = load_fashion_mnist_from_csv(train_path, test_path)

# 파라미터 설정
fashion_params = {
    "num_epoch": 100,  # 테스트 목적으로 50 epoch만 실행
    "lr": 0.001,
    "weight_decay": 1e-4,
    "lam": 0.01,
    "p": 4,
    "If_scale": True
}

# 학습 실행
final_accuracy = R_MLR(x_train, y_train, x_test, y_test, fashion_params)
print(f"FashionMNIST Final Test Accuracy: {final_accuracy:.4f}")

Epoch 450/500, Test Accuracy: 0.8565
Epoch 455/500, Test Accuracy: 0.8564
Epoch 460/500, Test Accuracy: 0.8564
Epoch 465/500, Test Accuracy: 0.8564
Epoch 470/500, Test Accuracy: 0.8562
Epoch 475/500, Test Accuracy: 0.8561
Epoch 480/500, Test Accuracy: 0.8561
Epoch 485/500, Test Accuracy: 0.8560
Epoch 490/500, Test Accuracy: 0.8560
Epoch 495/500, Test Accuracy: 0.8561
Epoch 500/500, Test Accuracy: 0.8563
Final Test Accuracy: 0.8563
FashionMNIST Final Test Accuracy: 0.8563


## 개선 모델

In [2]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
import numpy as np
from torch.utils.data import DataLoader, random_split
# from torchvision import datasets, transforms

# 손실 함수 정의 
def Regularized_loss(model, n, y_pred, y_true, p=4, lam_margin=0.01, lam_l1=0.001, lam_l2=0.001):
    classification_loss = -torch.mean(y_true * torch.log_softmax(y_pred, dim=1))
    last_layer_weight = model.network[-1].weight
    RG_loss_margin = 1/n * torch.norm(
        last_layer_weight.unsqueeze(1) - last_layer_weight.unsqueeze(0), p=2, dim=2
    ).pow(p).sum()
    RG_loss_regularization = lam_l1 * torch.norm(last_layer_weight, p=1) + \
                             lam_l2 * torch.norm(last_layer_weight, p=2)
    RG_loss = lam_margin * RG_loss_margin + RG_loss_regularization
    loss = classification_loss + RG_loss
    return loss

# MLP 모델 정의
class MLP(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, dropout_rate=0.3):
        super(MLP, self).__init__()
        self.network = torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dim),
            torch.nn.BatchNorm1d(hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Dropout(dropout_rate),
            torch.nn.Linear(hidden_dim, hidden_dim),
            torch.nn.BatchNorm1d(hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Dropout(dropout_rate),
            torch.nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x):
        return self.network(x)

# 가중치 초기화 함수
def initialize_weights(m):
    if isinstance(m, torch.nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)

# Early Stopping 클래스 정의
class EarlyStoppingAccuracy:
    def __init__(self, patience=10, min_delta=0.0):
        self.patience = patience
        self.min_delta = min_delta
        self.best_acc = None
        self.counter = 0
        self.early_stop = False

    def __call__(self, current_acc):
        if self.best_acc is None:
            self.best_acc = current_acc
        elif current_acc - self.best_acc > self.min_delta:
            self.best_acc = current_acc
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

In [3]:
def R_MLR_with_FashionMNIST(x_train, y_train, x_test, y_test, para):
    # Numpy 데이터를 PyTorch Tensor로 변환 및 정규화
    x_train = x_train.astype(np.float32) / 255.0  # 정규화 (0~1 스케일)
    x_test = x_test.astype(np.float32) / 255.0
    x_train = x_train.reshape(-1, 28 * 28)  # Flatten
    x_test = x_test.reshape(-1, 28 * 28)

    # 라벨을 Tensor로 변환
    y_train = torch.tensor(y_train, dtype=torch.long)
    y_test = torch.tensor(y_test, dtype=torch.long)

    # 데이터셋 생성
    train_dataset = TensorDataset(torch.tensor(x_train), y_train)
    test_dataset = TensorDataset(torch.tensor(x_test), y_test)

    # DataLoader 생성
    train_loader = DataLoader(train_dataset, batch_size=para["batch_size"], shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=para["batch_size"], shuffle=False)

    # MLP 모델 초기화
    input_dim = 28 * 28  # FashionMNIST 이미지 크기
    hidden_dim = para.get("hidden_dim", 512)
    dropout_rate = para.get("dropout_rate", 0.3)
    model = MLP(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=10, dropout_rate=dropout_rate)
    model.apply(initialize_weights)

    # 옵티마이저와 학습 스케줄러 설정
    optimizer = torch.optim.Adam(model.parameters(), lr=para["lr"], weight_decay=para["weight_decay"])
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

    # Early Stopping 설정
    early_stopping = EarlyStoppingAccuracy(patience=para["patience"], min_delta=para["min_delta"])

    # 학습 루프
    for epoch in range(para["num_epoch"]):
        # print(f"Epoch {epoch + 1}/{para['num_epoch']} starting...")
        model.train()
        for X_batch, y_batch in train_loader:
            y_pred = model(X_batch)
            y_onehot = F.one_hot(y_batch, num_classes=10).float()
            loss = Regularized_loss(model, len(x_train), y_pred, y_onehot, para["p"], para["lam_margin"], para["lam_l1"], para["lam_l2"])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        scheduler.step()

        # 평가
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                y_pred = model(X_batch)
                correct += (torch.argmax(y_pred, dim=1) == y_batch).sum().item()
                total += y_batch.size(0)

            test_acc = correct / total
            print(f"Epoch {epoch + 1}/{para['num_epoch']}, Test Accuracy: {test_acc:.4f}")

            # Early stopping
            early_stopping(test_acc)
            if early_stopping.early_stop:
                print("Early stopping triggered.")
                break

    return early_stopping.best_acc

In [5]:
fashion_params = {
    "num_epoch": 500,
    "lr": 0.001,
    "weight_decay": 1e-4,
    "lam_margin": 0.01,
    "lam_l1": 0.001,
    "lam_l2": 0.001,
    "p": 4,
    "patience": 50,
    "min_delta": 1e-4,
    "batch_size": 64,
    "hidden_dim": 512,
    "dropout_rate": 0.3
}

# 학습 실행
# final_accuracy = R_MLR_with_FashionMNIST(x_train, y_train, x_test, y_test, fashion_params)
# print(f"FashionMNIST Final Test Accuracy: {final_accuracy:.4f}")

#  random Search 후 탐색 범위를 좁힌 뒤, Grid Search 로 세밀하게 탐색
# 논문에서는 데이터 특성 별로 하이퍼 파라미터 튜닝을 하였음 
import random
from joblib import Parallel, delayed

# 데이터 샘플링
def sample_data(x, y, sample_size=10000):
    return x[:sample_size], y[:sample_size]

# 평가 함수
def evaluate_params(params, x_train_sample, y_train_sample, x_test_sample, y_test_sample, short_train_params):
    # FashionMNIST 모델 학습 및 평가
    accuracy = R_MLR_with_FashionMNIST(
        x_train_sample, y_train_sample, x_test_sample, y_test_sample, {**short_train_params, **params}
    )
    return accuracy, params

# 랜덤 서치 함수 (병렬화, 조기 종료 포함)
def random_search_quick(param_space, n_iter, x_train, y_train, x_test, y_test, sample_size=10000, patience=5, num_epoch=10, n_jobs=-1):
    x_train_sample, y_train_sample = sample_data(x_train, y_train, sample_size)
    x_test_sample, y_test_sample = sample_data(x_test, y_test, sample_size)

    short_train_params = {**fashion_params, "num_epoch": num_epoch}
    best_acc = 0
    best_params = None
    no_improvement_counter = 0

    param_list = [
        {k: random.choice(v) for k, v in param_space.items()} for _ in range(n_iter)
    ]

    results = Parallel(n_jobs=n_jobs)(
        delayed(evaluate_params)(params, x_train_sample, y_train_sample, x_test_sample, y_test_sample, short_train_params)
        for params in param_list
    )

    for accuracy, params in results:
        if accuracy > best_acc:
            best_acc = accuracy
            best_params = params
            no_improvement_counter = 0  # Reset counter if we improve
        else:
            no_improvement_counter += 1

        # Early stopping if no improvement for `patience` iterations
        if no_improvement_counter >= patience:
            break

    return best_acc, best_params

# 파라미터 탐색 공간
param_space = {
    "lr": [0.001, 0.0005, 0.0001],
    "batch_size": [64, 128],
    "hidden_dim": [512, 1024],
    "dropout_rate": [0.2, 0.3, 0.4]
}

# 랜덤 서치 실행
best_acc, best_params = random_search_quick(
    param_space, n_iter=20, x_train=x_train, y_train=y_train, x_test=x_test, y_test=y_test,
    sample_size=5000, patience=5, num_epoch=10, n_jobs=4
)

print(f"Best Accuracy: {best_acc:.4f} with Parameters: {best_params}")

Best Accuracy: 0.8446 with Parameters: {'lr': 0.0005, 'batch_size': 128, 'hidden_dim': 512, 'dropout_rate': 0.4}


In [6]:
# Best Accuray 최적화 결과의 파라미터 사용 
fashion_params = {
    "num_epoch": 500,
    "lr": 0.0005,
    "weight_decay": 1e-4,
    "lam_margin": 0.01,
    "lam_l1": 0.001,
    "lam_l2": 0.001,
    "p": 4,
    "patience": 50,
    "min_delta": 1e-4,
    "batch_size": 128,
    "hidden_dim": 512,
    "dropout_rate": 0.4
}

# 학습 실행
final_accuracy = R_MLR_with_FashionMNIST(x_train, y_train, x_test, y_test, fashion_params)
print(f"FashionMNIST Final Test Accuracy: {final_accuracy:.4f}")

Epoch 1/500, Test Accuracy: 0.8517
Epoch 2/500, Test Accuracy: 0.8629
Epoch 3/500, Test Accuracy: 0.8725
Epoch 4/500, Test Accuracy: 0.8674
Epoch 5/500, Test Accuracy: 0.8790
Epoch 6/500, Test Accuracy: 0.8745
Epoch 7/500, Test Accuracy: 0.8716
Epoch 8/500, Test Accuracy: 0.8749
Epoch 9/500, Test Accuracy: 0.8737
Epoch 10/500, Test Accuracy: 0.8829
Epoch 11/500, Test Accuracy: 0.8887
Epoch 12/500, Test Accuracy: 0.8784
Epoch 13/500, Test Accuracy: 0.8766
Epoch 14/500, Test Accuracy: 0.8859
Epoch 15/500, Test Accuracy: 0.8838
Epoch 16/500, Test Accuracy: 0.8836
Epoch 17/500, Test Accuracy: 0.8876
Epoch 18/500, Test Accuracy: 0.8792
Epoch 19/500, Test Accuracy: 0.8877
Epoch 20/500, Test Accuracy: 0.8859
Epoch 21/500, Test Accuracy: 0.8892
Epoch 22/500, Test Accuracy: 0.8920
Epoch 23/500, Test Accuracy: 0.8822
Epoch 24/500, Test Accuracy: 0.8914
Epoch 25/500, Test Accuracy: 0.8911
Epoch 26/500, Test Accuracy: 0.8846
Epoch 27/500, Test Accuracy: 0.8896
Epoch 28/500, Test Accuracy: 0.8838
E

In [11]:
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F
import numpy as np

# 손실 함수 정의
def Regularized_loss(model, n, y_pred, y_true, p=4, lam_margin=0.01, lam_l1=0.001, lam_l2=0.001):
    classification_loss = -torch.mean(y_true * torch.log_softmax(y_pred, dim=1))
    last_layer_weight = model.network[-1].weight
    RG_loss_margin = 1/n * torch.norm(
        last_layer_weight.unsqueeze(1) - last_layer_weight.unsqueeze(0), p=2, dim=2
    ).pow(p).sum()
    RG_loss_regularization = lam_l1 * torch.norm(last_layer_weight, p=1) + \
                             lam_l2 * torch.norm(last_layer_weight, p=2)
    RG_loss = lam_margin * RG_loss_margin + RG_loss_regularization
    loss = classification_loss + RG_loss
    return loss

# MLP 모델 정의
class MLP(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, dropout_rate=0.3):
        super(MLP, self).__init__()
        self.network = torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dim),
            torch.nn.BatchNorm1d(hidden_dim),
            torch.nn.LeakyReLU(negative_slope=0.01),  # LeakyReLU 사용
            torch.nn.Dropout(dropout_rate),
            torch.nn.Linear(hidden_dim, hidden_dim),
            torch.nn.BatchNorm1d(hidden_dim),
            torch.nn.LeakyReLU(negative_slope=0.01),  # LeakyReLU 사용
            torch.nn.Dropout(dropout_rate),
            torch.nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x):
        return self.network(x)

# 가중치 초기화 함수
def initialize_weights(m):
    if isinstance(m, torch.nn.Linear):
        torch.nn.init.kaiming_uniform_(m.weight, nonlinearity='leaky_relu')  # He 초기화

# Early Stopping 클래스 정의
class EarlyStoppingAccuracy:
    def __init__(self, patience=10, min_delta=0.0):
        self.patience = patience
        self.min_delta = min_delta
        self.best_acc = None
        self.counter = 0
        self.early_stop = False

    def __call__(self, current_acc):
        if self.best_acc is None:
            self.best_acc = current_acc
        elif current_acc - self.best_acc > self.min_delta:
            self.best_acc = current_acc
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

#  LeakyReLU를 사용한 학습 함수
def R_MLR_with_FashionMNIST(x_train, y_train, x_test, y_test, para):
    # Numpy 데이터를 PyTorch Tensor로 변환 및 정규화
    x_train = x_train.astype(np.float32) / 255.0
    x_test = x_test.astype(np.float32) / 255.0
    x_train = x_train.reshape(-1, 28 * 28)
    x_test = x_test.reshape(-1, 28 * 28)

    # 라벨을 Tensor로 변환
    y_train = torch.tensor(y_train, dtype=torch.long)
    y_test = torch.tensor(y_test, dtype=torch.long)

    # 데이터셋 생성
    train_dataset = TensorDataset(torch.tensor(x_train), y_train)
    test_dataset = TensorDataset(torch.tensor(x_test), y_test)

    # DataLoader 생성
    train_loader = DataLoader(train_dataset, batch_size=para["batch_size"], shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=para["batch_size"], shuffle=False)

    # MLP 모델 초기화
    input_dim = 28 * 28
    hidden_dim = para.get("hidden_dim", 512)
    dropout_rate = para.get("dropout_rate", 0.3)
    model = MLP(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=10, dropout_rate=dropout_rate)
    model.apply(initialize_weights)

    # 옵티마이저와 학습 스케줄러 설정
    optimizer = torch.optim.Adam(model.parameters(), lr=para["lr"], weight_decay=para["weight_decay"])
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

    # Early Stopping 설정
    early_stopping = EarlyStoppingAccuracy(patience=para["patience"], min_delta=para["min_delta"])

    # 학습 루프
    for epoch in range(para["num_epoch"]):
        model.train()
        for X_batch, y_batch in train_loader:
            y_pred = model(X_batch)
            y_onehot = F.one_hot(y_batch, num_classes=10).float()
            loss = Regularized_loss(model, len(x_train), y_pred, y_onehot, para["p"], para["lam_margin"], para["lam_l1"], para["lam_l2"])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        scheduler.step()

        # 평가
        model.eval()
        correct, total = 0, 0
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                y_pred = model(X_batch)
                correct += (torch.argmax(y_pred, dim=1) == y_batch).sum().item()
                total += y_batch.size(0)

            test_acc = correct / total
            print(f"Epoch {epoch + 1}/{para['num_epoch']}, Test Accuracy: {test_acc:.4f}")

            # Early stopping
            early_stopping(test_acc)
            if early_stopping.early_stop:
                print("Early stopping triggered.")
                break

    return early_stopping.best_acc

# Best Accuray 최적화 결과의 파라미터 사용 
fashion_params = {
    "num_epoch": 500,
    "lr": 0.0005,
    "weight_decay": 1e-4,
    "lam_margin": 0.01,
    "lam_l1": 0.001,
    "lam_l2": 0.001,
    "p": 4,
    "patience": 50,
    "min_delta": 1e-4,
    "batch_size": 128,
    "hidden_dim": 512,
    "dropout_rate": 0.3
}

# 학습 실행
final_accuracy = R_MLR_with_FashionMNIST(x_train, y_train, x_test, y_test, fashion_params)
print(f"FashionMNIST Final Test Accuracy: {final_accuracy:.4f}")

Epoch 1/500, Test Accuracy: 0.8554
Epoch 2/500, Test Accuracy: 0.8653
Epoch 3/500, Test Accuracy: 0.8720
Epoch 4/500, Test Accuracy: 0.8630
Epoch 5/500, Test Accuracy: 0.8770
Epoch 6/500, Test Accuracy: 0.8776
Epoch 7/500, Test Accuracy: 0.8669
Epoch 8/500, Test Accuracy: 0.8797
Epoch 9/500, Test Accuracy: 0.8859
Epoch 10/500, Test Accuracy: 0.8806
Epoch 11/500, Test Accuracy: 0.8859
Epoch 12/500, Test Accuracy: 0.8893
Epoch 13/500, Test Accuracy: 0.8874
Epoch 14/500, Test Accuracy: 0.8903
Epoch 15/500, Test Accuracy: 0.8911
Epoch 16/500, Test Accuracy: 0.8853
Epoch 17/500, Test Accuracy: 0.8901
Epoch 18/500, Test Accuracy: 0.8881
Epoch 19/500, Test Accuracy: 0.8772
Epoch 20/500, Test Accuracy: 0.8872
Epoch 21/500, Test Accuracy: 0.8899
Epoch 22/500, Test Accuracy: 0.8880
Epoch 23/500, Test Accuracy: 0.8778
Epoch 24/500, Test Accuracy: 0.8564
Epoch 25/500, Test Accuracy: 0.8908
Epoch 26/500, Test Accuracy: 0.8927
Epoch 27/500, Test Accuracy: 0.8873
Epoch 28/500, Test Accuracy: 0.8910
E