In [54]:

import warnings
warnings.filterwarnings("ignore", category=UserWarning)
import pandas as pd 
import torch
import torch.nn.functional as F
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import scipy.io as scio

# Regularized_loss와 R_MLR 함수 정의
def Regularized_loss(model, n, y_pred, y_true, p=4, lam=0.01):
    classification_loss = -torch.mean(y_true * torch.log_softmax(y_pred, dim=1))
    RG_loss = 1/n * torch.norm(model.weight.unsqueeze(1) - model.weight.unsqueeze(0), p=2, dim=2).pow(p).sum()
    loss = classification_loss + lam * RG_loss
    return loss

def R_MLR(para):
    path = f'./dataset/{para["data"]}.mat'
    X = scio.loadmat(path)['X']
    y = scio.loadmat(path)['Y'].squeeze()
    print(X.shape, y.shape)

    n, d = X.shape[0], X.shape[1]
    num_class = len(np.unique(y))

    if para["If_scale"]:
        scaler = StandardScaler()
        X = scaler.fit_transform(X)

    y = y - 1

    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.long)

    X_train, X_test, y_train, y_test = \
        train_test_split(X_tensor, y_tensor, test_size=para["test_size"], random_state=para["state"])

    y_train = torch.nn.functional.one_hot(torch.tensor(y_train))
    y_test = torch.nn.functional.one_hot(torch.tensor(y_test))

    # 모델 및 옵티마이저 정의
    model = torch.nn.Linear(d, num_class)
    optimizer = torch.optim.Adam(model.parameters(), lr=para["lr"], weight_decay=para["weight_decay"])

    # 손실 값과 정확도 저장용 리스트 초기화
    loss_list = []
    test_acc_list = []

    for epoch in range(para["num_epoch"]):
        y_pred = model(X_train)
        loss = Regularized_loss(model, n, y_pred, y_train, para["p"], para["lam"])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (epoch + 1) % 5 == 0:
            # print(f"Epoch [{epoch+1}/{para['num_epoch']}], Loss: {loss.item():.4f}")

            with torch.no_grad():
                y_pred = model(X_test)
                correct = (torch.argmax(y_pred, dim=1) == torch.argmax(y_test, dim=1)).sum().item()
                test_acc = correct / len(X_test)
                # print(f"Test Accuracy: {test_acc:.4f}")

            loss_list.append(loss.item())
            test_acc_list.append(test_acc)

    print(f"Final Test Accuracy: {test_acc:.4f}")
    return test_acc_list[-1]
    
# 데이터 별 파라미터 설정 함수
def set_parameters(data_name, para):
    para["data"] = data_name
    if data_name == 'HHAR':
        para.update({"lr": 0.02, "lam": 0.0005, "p": 4, "If_scale": True})
    elif data_name == 'Cornell':
        para.update({"lr": 0.1, "lam": 0.005, "p": 1, "If_scale": False})
    elif data_name == 'USPS':
        para.update({"lr": 0.01, "lam": 0.001, "p": 4, "If_scale": True})
    elif data_name == 'ISOLET':
        para.update({"lr": 0.001, "lam": 0.001, "p": 4, "If_scale": False})
    elif data_name == 'ORL':
        para.update({"lr": 0.01, "lam": 0.01, "p": 6, "If_scale": True})
    elif data_name == 'Dermatology':
        para.update({"lr": 0.01, "lam": 0.1, "p": 6, "If_scale": False})
    elif data_name == 'Vehicle':
        para.update({"lr": 0.05, "lam": 0.0001, "p": 4, "If_scale": True})
    elif data_name == 'Glass':
        para.update({"lr": 0.01, "lam": 0.0001, "p": 4, "If_scale": True})
    return para
default_para = {
    "data": "Vehicle",
    "num_epoch": 500,
    "lr": 0.05,
    "weight_decay": 0.0,
    "lam": 0.0001,
    "p": 4,
    "state": 42,
    "If_scale": True,
    "test_size": 0.2
}

In [55]:
data_list = ['Cornell', 'ISOLET','HHAR', 'USPS',  'ORL', 'Dermatology', 'Vehicle', 'Glass']

# 결과 저장 리스트
results = []

# 데이터별로 파라미터 업데이트 및 결과 저장
for data in data_list:
    para = default_para.copy()
    para = set_parameters(data, para)
    result = R_MLR(para)
    results.append({"data": data, "result": result})

    

(827, 4134) (827,)
Final Test Accuracy: 0.8675
(1560, 617) (1560,)
Final Test Accuracy: 0.9487
(10299, 561) (10299,)
Final Test Accuracy: 0.9801
(9298, 256) (9298,)
Final Test Accuracy: 0.9462
(400, 1024) (400,)
Final Test Accuracy: 0.9750
(366, 34) (366,)
Final Test Accuracy: 0.9865
(846, 18) (846,)
Final Test Accuracy: 0.8000
(214, 9) (214,)
Final Test Accuracy: 0.7442


In [None]:
print(pd.DataFrame(results))

          data    result
0         HHAR  0.980583
1      Cornell  0.867470
2         USPS  0.947312
3       ISOLET  0.945513
4          ORL  0.975000
5  Dermatology  0.986486
6      Vehicle  0.800000
7        Glass  0.744186


## L1 Loss

In [None]:

def Regularized_loss(model, n, y_pred, y_true, p=4, lam=0.01,l1_ratio=0.5):
    classification_loss = -torch.mean(y_true * torch.log_softmax(y_pred, dim=1))
    L2_loss = 1/n * torch.norm(model.weight.unsqueeze(1) - model.weight.unsqueeze(0), p=2, dim=2).pow(p).sum()
    L1_loss = torch.norm(model.weight, p=1)
    loss = classification_loss + lam * ((1 - l1_ratio) * L2_loss + l1_ratio * L1_loss)
    return loss

def R_MLR(para):
    path = f'./dataset/{para["data"]}.mat'
    X = scio.loadmat(path)['X']
    y = scio.loadmat(path)['Y'].squeeze()
    print(X.shape, y.shape)

    n, d = X.shape[0], X.shape[1]
    num_class = len(np.unique(y))

    if para["If_scale"]:
        scaler = StandardScaler()
        X = scaler.fit_transform(X)

    y = y - 1

    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.long)

    X_train, X_test, y_train, y_test = \
        train_test_split(X_tensor, y_tensor, test_size=para["test_size"], random_state=para["state"])

    y_train = torch.nn.functional.one_hot(torch.tensor(y_train))
    y_test = torch.nn.functional.one_hot(torch.tensor(y_test))

    # 모델 및 옵티마이저 정의
    model = torch.nn.Linear(d, num_class)
    optimizer = torch.optim.Adam(model.parameters(), lr=para["lr"], weight_decay=para["weight_decay"])

    # 손실 값과 정확도 저장용 리스트 초기화
    loss_list = []
    test_acc_list = []

    for epoch in range(para["num_epoch"]):
        y_pred = model(X_train)
        loss = Regularized_loss(model, n, y_pred, y_train, para["p"], para["lam"])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (epoch + 1) % 5 == 0:
            # print(f"Epoch [{epoch+1}/{para['num_epoch']}], Loss: {loss.item():.4f}")

            with torch.no_grad():
                y_pred = model(X_test)
                correct = (torch.argmax(y_pred, dim=1) == torch.argmax(y_test, dim=1)).sum().item()
                test_acc = correct / len(X_test)
                # print(f"Test Accuracy: {test_acc:.4f}")

            loss_list.append(loss.item())
            test_acc_list.append(test_acc)

    print(f"Final Test Accuracy: {test_acc:.4f}")
    return test_acc_list[-1]

In [32]:
data_list = ['Cornell', 'ISOLET','HHAR', 'USPS',  'ORL', 'Dermatology', 'Vehicle', 'Glass']

# 결과 저장 리스트
results = []

# 데이터별로 파라미터 업데이트 및 결과 저장
for data in data_list:
    para = default_para.copy()
    para = set_parameters(data, para)
    result = R_MLR(para)
    results.append({"data": data, "result": result})

print(pd.DataFrame(results))

(827, 4134) (827,)
Final Test Accuracy: 0.8072
(1560, 617) (1560,)
Final Test Accuracy: 0.8141
(10299, 561) (10299,)
Final Test Accuracy: 0.9757
(9298, 256) (9298,)
Final Test Accuracy: 0.9258
(400, 1024) (400,)
Final Test Accuracy: 0.0375
(366, 34) (366,)
Final Test Accuracy: 0.6757
(846, 18) (846,)
Final Test Accuracy: 0.8059
(214, 9) (214,)
Final Test Accuracy: 0.7209
          data    result
0      Cornell  0.807229
1       ISOLET  0.814103
2         HHAR  0.975728
3         USPS  0.925806
4          ORL  0.037500
5  Dermatology  0.675676
6      Vehicle  0.805882
7        Glass  0.720930


## MLP

In [50]:

def Regularized_loss(model, n, y_pred, y_true, p=4, lam=0.01,l1_ratio=0.5):
    classification_loss = -torch.mean(y_true * torch.log_softmax(y_pred, dim=1))
    RG_loss = 1/n * torch.norm(model.weight.unsqueeze(1) - model.weight.unsqueeze(0), p=2, dim=2).pow(p).sum()
    loss = classification_loss + lam * RG_loss
    return loss


# 다층 퍼셉트론(MLP) 모델 정의
class MLPModel(torch.nn.Module):
    def __init__(self, input_size, num_classes):
        super(MLPModel, self).__init__()
        self.fc1 = torch.nn.Linear(input_size, 128)
        self.fc2 = torch.nn.Linear(128, 64)
        self.fc3 = torch.nn.Linear(64, num_classes)
        self.relu = torch.nn.ReLU()
        self.dropout = torch.nn.Dropout(0.5)  # 과적합 방지용

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# 데이터 전처리 함수
def preprocess_data(X, scale_type='standard'):
    if scale_type == 'standard':
        scaler = StandardScaler()
    elif scale_type == 'minmax':
        scaler = MinMaxScaler()
    X = scaler.fit_transform(X)
    return X

# R_MLR 함수 수정
def R_MLR(para):
    path = f'./dataset/{para["data"]}.mat'
    X = scio.loadmat(path)['X']
    y = scio.loadmat(path)['Y'].squeeze()
    print(X.shape, y.shape)

    n, d = X.shape[0], X.shape[1]
    num_class = len(np.unique(y))

    if para["If_scale"]:
        scaler = StandardScaler()
        X = scaler.fit_transform(X)

    y = y - 1

    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.long)

    X_train, X_test, y_train, y_test = \
        train_test_split(X_tensor, y_tensor, test_size=para["test_size"], random_state=para["state"])

    y_train = torch.nn.functional.one_hot(torch.tensor(y_train))
    y_test = torch.nn.functional.one_hot(torch.tensor(y_test))

    # 모델 및 옵티마이저 정의
    model = torch.nn.Linear(d, num_class)
    optimizer = torch.optim.Adam(model.parameters(), lr=para["lr"], weight_decay=para["weight_decay"])

    # 조기 종료 설정
    best_loss = float('inf')
    patience = 10
    early_stopping_counter = 0

    # Save the loss function values on the training set and the accuracy on the test set
    loss_list = []
    test_acc_list = []

    for epoch in range(para["num_epoch"]):
        model.train()
        y_pred = model(X_train)
        loss = Regularized_loss(model, n, y_pred, y_train, para["p"], para["lam"])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if loss.item() < best_loss:
            best_loss = loss.item()
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1

        if early_stopping_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break

        if (epoch + 1) % 5 == 0:
            # print(f"Epoch [{epoch+1}/{para.num_epoch}], Loss: {loss.item():.4f}")

            model.eval()
            with torch.no_grad():
                y_pred = model(X_test)
                correct = (torch.argmax(y_pred, dim=1) == torch.argmax(y_test, dim=1)).sum().item()
                test_acc = correct / len(X_test)
                # print(f"Test Accuracy: {test_acc:.4f}")

            loss_list.append(loss.item())
            test_acc_list.append(test_acc)

    print(f"Total Test Accuracy : {test_acc:.4f}")
    return test_acc_list[-1]

    # epochs = np.arange(1, len(loss_list) + 1) * 5
    # conver_plot(epochs, test_acc_list, loss_list)

In [51]:
data_list = ['Cornell', 'ISOLET','HHAR', 'USPS',  'ORL', 'Dermatology', 'Vehicle', 'Glass']

# 결과 저장 리스트
results = []

# 데이터별로 파라미터 업데이트 및 결과 저장
for data in data_list:
    para = default_para.copy()
    para = set_parameters(data, para)
    result = R_MLR(para)
    results.append({"data": data, "result": result})

print(pd.DataFrame(results))

(827, 4134) (827,)
Total Test Accuracy : 0.8735
(1560, 617) (1560,)
Total Test Accuracy : 0.9455
(10299, 561) (10299,)
Total Test Accuracy : 0.9806
(9298, 256) (9298,)
Total Test Accuracy : 0.9473
(400, 1024) (400,)
Total Test Accuracy : 0.9750
(366, 34) (366,)
Early stopping at epoch 490
Total Test Accuracy : 0.9865
(846, 18) (846,)
Total Test Accuracy : 0.8000
(214, 9) (214,)
Total Test Accuracy : 0.7442
          data    result
0      Cornell  0.873494
1       ISOLET  0.945513
2         HHAR  0.980583
3         USPS  0.947312
4          ORL  0.975000
5  Dermatology  0.986486
6      Vehicle  0.800000
7        Glass  0.744186


## RBF Kernel

In [58]:
from sklearn.metrics.pairwise import rbf_kernel
import torch

# RBF 커널 변환 함수
def apply_rbf_kernel(X, gamma=0.1):
    """
    RBF 커널을 적용한 데이터 변환
    Args:
        X: 입력 데이터 (numpy 배열 또는 텐서)
        gamma: RBF 커널 파라미터 (1 / (2 * sigma^2))
    Returns:
        RBF 커널로 변환된 데이터
    """
    X_rbf = rbf_kernel(X, gamma=gamma)
    return X_rbf

# R_MLR 함수 수정: RBF 커널 적용
def R_MLR_rbf(para):
    # 데이터 로드
    path = f'./dataset/{para["data"]}.mat'
    X = scio.loadmat(path)['X']
    y = scio.loadmat(path)['Y'].squeeze()
    print(X.shape, y.shape)

    n, d = X.shape[0], X.shape[1]
    num_class = len(np.unique(y))

    # 데이터 스케일링
    if para["If_scale"]:
        X = preprocess_data(X, scale_type=para["scale_type"])

    # 레이블 정규화
    y = y - 1

    # RBF 커널 적용
    X = apply_rbf_kernel(X, gamma=para.get("gamma", 0.01))  # gamma 값은 하이퍼파라미터

    # 데이터를 PyTorch 텐서로 변환
    X_tensor = torch.tensor(X, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.long)

    # 데이터 분할
    X_train, X_test, y_train, y_test = train_test_split(
        X_tensor, y_tensor, test_size=para["test_size"], random_state=para["state"]
    )

    # 모델 정의
    model = MLPModel(X.shape[1], num_class)  # RBF 적용 후 특성 차원에 맞게 수정
    optimizer = torch.optim.Adam(model.parameters(), lr=para["lr"], weight_decay=para["weight_decay"])
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)

    # 조기 종료 초기화
    best_loss = float('inf')
    patience = 10
    early_stopping_counter = 0

    # 손실 및 정확도 저장 리스트
    loss_list = []
    test_acc_list = []

    for epoch in range(para["num_epoch"]):
        # 학습 모드
        model.train()
        y_pred = model(X_train)
        loss = Regularized_loss(model, n, y_pred, torch.nn.functional.one_hot(y_train, num_classes=num_class), para["p"], para["lam"])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        # 조기 종료 조건 체크
        if loss.item() < best_loss:
            best_loss = loss.item()
            early_stopping_counter = 0
        else:
            early_stopping_counter += 1

        if early_stopping_counter >= patience:
            # print(f"Early stopping at epoch {epoch + 1}")
            break

        if (epoch + 1) % 5 == 0:
            # 검증 모드
            model.eval()
            with torch.no_grad():
                y_pred = model(X_test)
                correct = (torch.argmax(y_pred, dim=1) == y_test).sum().item()
                test_acc = correct / len(X_test)
                # print(f"Epoch [{epoch + 1}/{para['num_epoch']}], Loss: {loss.item():.4f}, Test Accuracy: {test_acc:.4f}")

            loss_list.append(loss.item())
            test_acc_list.append(test_acc)

    # 최종 결과 출력 및 반환
    print(f"Total Test Accuracy for {para['data']} with RBF Kernel: {test_acc:.4f}")
    return test_acc

In [59]:
data_list = ['Cornell', 'ISOLET','HHAR', 'USPS',  'ORL', 'Dermatology', 'Vehicle', 'Glass']

# 결과 저장 리스트
results = []

# 데이터별로 파라미터 업데이트 및 결과 저장
for data in data_list:
    para = default_para.copy()
    para = set_parameters(data, para)
    result = R_MLR(para)
    results.append({"data": data, "result": result})

print(pd.DataFrame(results))

(827, 4134) (827,)
Final Test Accuracy: 0.8675
(1560, 617) (1560,)
Final Test Accuracy: 0.9487
(10299, 561) (10299,)
Final Test Accuracy: 0.9806
(9298, 256) (9298,)
Final Test Accuracy: 0.9468
(400, 1024) (400,)
Final Test Accuracy: 0.9750
(366, 34) (366,)
Final Test Accuracy: 0.9865
(846, 18) (846,)
Final Test Accuracy: 0.7941
(214, 9) (214,)
Final Test Accuracy: 0.7209
          data    result
0      Cornell  0.867470
1       ISOLET  0.948718
2         HHAR  0.980583
3         USPS  0.946774
4          ORL  0.975000
5  Dermatology  0.986486
6      Vehicle  0.794118
7        Glass  0.720930


## others