* 손실 함수에서 마지막 선형 계층의 가중치 차이를 이용하여, 논문의 핵심 아이디어인 최소 마진 최대화는 그대로 반영
### <개선 사항>
1. L1/L2 정규화 추가: 가중치를 제어하여 과적합 방지와 희소성 확보
2. MLP 구조 활용: 복잡한 데이터 패턴을 학습할 수 있도록 은닉층 추가
3. Early Stopping 도입: 학습을 조기에 종료하여 불필요한 계산을 방지하고 과적합 방지
4. 학습률 스케줄러: 학습 초반에는 큰 변화, 후반에는 미세 조정을 지원
### <실험 결과>
- Cornell: 0.8675(+ 0.0025)
- ISOLET: 0.9679(+ 0.0229)
- USPS: 0.9839(+ 0.0279)
- ORL: 0.9875(+ 0.0125)
- Dermatology: 1.000(+ 0.012)
- Glass: 0.8372(+ 0.0932)
- Vehicle: 0.9118(+ 0.1118)
- HHAR: 0.9947(+ 0.0137)

In [1]:
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

import torch
import torch.nn.functional as F
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
import scipy.io as scio

# 손실 함수 정의
def Regularized_loss(model, n, y_pred, y_true, p=4, lam_margin=0.01, lam_l1=0.001, lam_l2=0.001):
    classification_loss = -torch.mean(y_true * torch.log_softmax(y_pred, dim=1))
    last_layer_weight = model.network[-1].weight
    RG_loss_margin = 1/n * torch.norm(
        last_layer_weight.unsqueeze(1) - last_layer_weight.unsqueeze(0), p=2, dim=2
    ).pow(p).sum()
    RG_loss_regularization = lam_l1 * torch.norm(last_layer_weight, p=1) + \
                             lam_l2 * torch.norm(last_layer_weight, p=2)
    RG_loss = lam_margin * RG_loss_margin + RG_loss_regularization
    loss = classification_loss + RG_loss
    return loss

# MLP 모델 정의
class MLP(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, dropout_rate=0.3):
        super(MLP, self).__init__()
        self.network = torch.nn.Sequential(
            torch.nn.Linear(input_dim, hidden_dim),
            torch.nn.BatchNorm1d(hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Dropout(dropout_rate),
            torch.nn.Linear(hidden_dim, hidden_dim),
            torch.nn.BatchNorm1d(hidden_dim),
            torch.nn.ReLU(),
            torch.nn.Dropout(dropout_rate),
            torch.nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x):
        return self.network(x)

# 가중치 초기화 함수
def initialize_weights(m):
    if isinstance(m, torch.nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)

# Early Stopping 클래스 정의
class EarlyStoppingAccuracy:
    def __init__(self, patience=10, min_delta=0.0):
        self.patience = patience
        self.min_delta = min_delta
        self.best_acc = None
        self.counter = 0
        self.early_stop = False

    def __call__(self, current_acc):
        if self.best_acc is None:
            self.best_acc = current_acc
        elif current_acc - self.best_acc > self.min_delta:
            self.best_acc = current_acc
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

# R_MLR 함수 정의
def R_MLR_with_accuracy_early_stopping(para):
    path = f'./dataset/{para["data"]}.mat'
    data = scio.loadmat(path)
    X = data['X']
    y = data['Y'].squeeze()

    n, d = X.shape[0], X.shape[1]
    num_class = len(np.unique(y))

    # 데이터 정규화
    if para["If_scale"]:
        scaler = StandardScaler()
        X = scaler.fit_transform(X)

    y = y - 1  # 라벨 조정

    # PyTorch 텐서로 변환
    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.long)

    # 학습/테스트 분할
    X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=para["test_size"], random_state=para["state"])
    train_dataset = TensorDataset(X_train, torch.nn.functional.one_hot(y_train, num_classes=num_class).float())
    test_dataset = TensorDataset(X_test, torch.nn.functional.one_hot(y_test, num_classes=num_class).float())

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    # MLP 모델 초기화
    hidden_dim = para.get("hidden_dim", 256)
    dropout_rate = para.get("dropout_rate", 0.3)
    model = MLP(input_dim=d, hidden_dim=hidden_dim, output_dim=num_class, dropout_rate=dropout_rate)
    model.apply(initialize_weights)

    # 옵티마이저와 스케줄러 설정
    optimizer = torch.optim.Adam(model.parameters(), lr=para["lr"], weight_decay=para["weight_decay"])
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

    # Early Stopping 초기화
    early_stopping = EarlyStoppingAccuracy(patience=100, min_delta=1e-4)

    for epoch in range(para["num_epoch"]):
        model.train()
        for X_batch, y_batch in train_loader:
            y_pred = model(X_batch)
            loss = Regularized_loss(model, n, y_pred, y_batch, para["p"], para["lam_margin"], para["lam_l1"], para["lam_l2"])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        scheduler.step()

        # 평가
        model.eval()
        with torch.no_grad():
            correct, total = 0, 0
            for X_batch, y_batch in test_loader:
                y_pred = model(X_batch)
                correct += (torch.argmax(y_pred, dim=1) == torch.argmax(y_batch, dim=1)).sum().item()
                total += X_batch.size(0)

            test_acc = correct / total
            early_stopping(test_acc)
            if early_stopping.early_stop:
                break

    return early_stopping.best_acc

# 여러 데이터셋과 파라미터 설정
parameter_sets = [
    {
        "data": "Vehicle",
        "num_epoch": 500,
        "lr": 0.001,
        "weight_decay": 1e-4,
        "lam_margin": 0.01,
        "lam_l1": 0.001,
        "lam_l2": 0.001,
        "p": 4,
        "state": 42,
        "If_scale": True,
        "test_size": 0.2,
        "hidden_dim": 512,
        "dropout_rate": 0.4
    },
    {
        "data": "ISOLET",
        "num_epoch": 500,
        "lr": 0.001,
        "weight_decay": 1e-4,
        "lam_margin": 0.01,
        "lam_l1": 0.001,
        "lam_l2": 0.001,
        "p": 4,
        "state": 42,
        "If_scale": True,
        "test_size": 0.2,
        "hidden_dim": 512,
        "dropout_rate": 0.4
    },
    {
        "data": "ORL",
        "num_epoch": 500,
        "lr": 0.0005,
        "weight_decay": 1e-4,
        "lam_margin": 0.1,
        "lam_l1": 0.001,
        "lam_l2": 0.001,
        "p": 4,          
        "state": 42,
        "If_scale": True,
        "test_size": 0.2,
        "hidden_dim": 1024,
        "dropout_rate": 0.2
    },
    {
        "data": "Dermatology",
        "num_epoch": 500,
        "lr": 0.001,
        "weight_decay": 1e-4,
        "lam_margin": 0.01, 
        "lam_l1": 0.001,    
        "lam_l2": 0.001,     
        "p": 4,             
        "state": 42,
        "If_scale": True,
        "test_size": 0.2,
        "hidden_dim": 512,
        "dropout_rate": 0.4
    },
    {
        "data": "Glass",
        "num_epoch": 500,
        "lr": 0.001,
        "weight_decay": 1e-4,
        "lam_margin": 0.01,  
        "lam_l1": 0.001,     
        "lam_l2": 0.001,    
        "p": 4,             
        "state": 42,
        "If_scale": True,
        "test_size": 0.2,
        "hidden_dim": 512,
        "dropout_rate": 0.4
    },
    {
        "data": "Cornell",
        "num_epoch": 500,
        "lr": 0.005,
        "weight_decay": 1e-4,
        "lam_margin": 0.02,  
        "lam_l1": 0.0005,    
        "lam_l2": 0.0005,    
        "p": 4,              
        "state": 42,
        "If_scale": True,
        "test_size": 0.2,
        "hidden_dim": 1024,
        "dropout_rate": 0.3
    },
    {
        "data": "HHAR",
        "num_epoch": 500,
        "lr": 0.001,
        "weight_decay": 1e-4,
        "lam_margin": 0.01,  
        "lam_l1": 0.001,     
        "lam_l2": 0.001,    
        "p": 4,             
        "state": 42,
        "If_scale": True,
        "test_size": 0.2,
        "hidden_dim": 512,
        "dropout_rate": 0.4
    },
    {
        "data": "USPS",
        "num_epoch": 500,
        "lr": 0.001,
        "weight_decay": 1e-4,
        "lam_margin": 0.01,  
        "lam_l1": 0.001,     
        "lam_l2": 0.001,    
        "p": 4,             
        "state": 42,
        "If_scale": True,
        "test_size": 0.2,
        "hidden_dim": 512,
        "dropout_rate": 0.4
    }
]

# 결과 저장용 리스트
results = []

# 각 파라미터로 모델 실행
for params in parameter_sets:
    print(f"Running model for dataset: {params['data']} with parameters:")
    print(params)
    final_accuracy = R_MLR_with_accuracy_early_stopping(params)
    results.append({"dataset": params["data"], "final_accuracy": final_accuracy})

# 최종 결과 출력
print("\nFinal Results:")
for result in results:
    print(f"Dataset: {result['dataset']}, Final Test Accuracy: {result['final_accuracy']:.4f}")

Running model for dataset: Vehicle with parameters:
{'data': 'Vehicle', 'num_epoch': 500, 'lr': 0.001, 'weight_decay': 0.0001, 'lam_margin': 0.01, 'lam_l1': 0.001, 'lam_l2': 0.001, 'p': 4, 'state': 42, 'If_scale': True, 'test_size': 0.2, 'hidden_dim': 512, 'dropout_rate': 0.4}
Running model for dataset: ISOLET with parameters:
{'data': 'ISOLET', 'num_epoch': 500, 'lr': 0.001, 'weight_decay': 0.0001, 'lam_margin': 0.01, 'lam_l1': 0.001, 'lam_l2': 0.001, 'p': 4, 'state': 42, 'If_scale': True, 'test_size': 0.2, 'hidden_dim': 512, 'dropout_rate': 0.4}
Running model for dataset: ORL with parameters:
{'data': 'ORL', 'num_epoch': 500, 'lr': 0.0005, 'weight_decay': 0.0001, 'lam_margin': 0.1, 'lam_l1': 0.001, 'lam_l2': 0.001, 'p': 4, 'state': 42, 'If_scale': True, 'test_size': 0.2, 'hidden_dim': 1024, 'dropout_rate': 0.2}
Running model for dataset: Dermatology with parameters:
{'data': 'Dermatology', 'num_epoch': 500, 'lr': 0.001, 'weight_decay': 0.0001, 'lam_margin': 0.01, 'lam_l1': 0.001, 'la