In [1]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc, precision_recall_curve, roc_auc_score, average_precision_score
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#GPU 사용 가능 여부 확인
USE_CUDA = torch.cuda.is_available()
print('GPU 사용 가능 여부:', USE_CUDA)

GPU 사용 가능 여부: True


In [3]:
#사용할 디바이스 설정 (GPU 또는 CPU)
device = torch.device('cuda:0' if USE_CUDA else 'cpu')
print('학습을 진행하는 기기:', device)

학습을 진행하는 기기: cuda:0


In [18]:
# CSV 파일 로드
train_data = pd.read_csv('f_train_55.csv')

# 'Class' 열 숫자 인코딩
label_encoder = LabelEncoder()
train_data['Class'] = label_encoder.fit_transform(train_data['Class'])
class_mapping = {index: label for index, label in enumerate(label_encoder.classes_)}
print(class_mapping)

# 테스트 데이터 로드
test_data = pd.read_csv('f_test_149.csv')

# 'Class' 열 숫자 인코딩 (훈련 데이터에서 학습한 label_encoder 사용)
test_data['Class'] = label_encoder.transform(test_data['Class'])

{0: 'de', 1: 'f', 2: 'nor'}


In [19]:
#onehot 파일 폴더 경로
onehot_folder = 'onehot7'
onehot_files = os.listdir(onehot_folder)

# 원핫인코딩 매트릭스를 저장할 딕셔너리
onehot_matrices = {}

# 최대 행 수 찾기
max_rows = 0

# 원핫인코딩 매트릭스 파일들을 로드 (헤더 없음)
for file in onehot_files:
    variant_name = file.split('.csv')[0].replace('_', '.')  # CYP2D6_1.001.csv -> CYP2D6.1.001
    file_path = os.path.join(onehot_folder, file)
    try:
        matrix = pd.read_csv(file_path, header=None).values
        onehot_matrices[variant_name] = matrix
        max_rows = max(max_rows, matrix.shape[0])
        print(f"Loaded {variant_name}: shape {matrix.shape}")
    except Exception as e:
        print(f"Error loading {file}: {str(e)}")

print(f"최대 행 수: {max_rows}")
print(f"로드된 매트릭스 개수: {len(onehot_matrices)}")

Loaded CYP2D6.106.002: shape (4312, 7)
Loaded CYP2D6.35.004: shape (4312, 7)
Loaded CYP2D6.2.004: shape (4312, 7)
Loaded CYP2D6.4.012: shape (4312, 7)
Loaded CYP2D6.36.002: shape (4312, 7)
Loaded CYP2D6.10.003: shape (4312, 7)
Loaded CYP2D6.1.050: shape (4312, 7)
Loaded CYP2D6.105.001: shape (4312, 7)
Loaded CYP2D6.159.001: shape (4312, 7)
Loaded CYP2D6.134.001: shape (4312, 7)
Loaded CYP2D6.155.001: shape (4312, 7)
Loaded CYP2D6.4.015: shape (4312, 7)
Loaded CYP2D6.11.001: shape (4312, 7)
Loaded CYP2D6.71.001: shape (4312, 7)
Loaded CYP2D6.4.003: shape (4312, 7)
Loaded CYP2D6.47.001: shape (4312, 7)
Loaded CYP2D6.4.010: shape (4312, 7)
Loaded CYP2D6.4.024: shape (4312, 7)
Loaded CYP2D6.2.015: shape (4312, 7)
Loaded CYP2D6.164.001: shape (4312, 7)
Loaded CYP2D6.85.001: shape (4312, 7)
Loaded CYP2D6.3.002: shape (4312, 7)
Loaded CYP2D6.1.035: shape (4312, 7)
Loaded CYP2D6.3.003: shape (4312, 7)
Loaded CYP2D6.97.001: shape (4312, 7)
Loaded CYP2D6.1.013: shape (4312, 7)
Loaded CYP2D6.4.00

Loaded CYP2D6.2.012: shape (4312, 7)
Loaded CYP2D6.4.011: shape (4312, 7)
Loaded CYP2D6.135.001: shape (4312, 7)
Loaded CYP2D6.2.028: shape (4312, 7)
Loaded CYP2D6.84.002: shape (4312, 7)
Loaded CYP2D6.55.001: shape (4312, 7)
Loaded CYP2D6.1.003: shape (4312, 7)
Loaded CYP2D6.1.029: shape (4312, 7)
Loaded CYP2D6.2.008: shape (4312, 7)
Loaded CYP2D6.146.002: shape (4312, 7)
Loaded CYP2D6.109.001: shape (4312, 7)
Loaded CYP2D6.171.001: shape (4313, 7)
Loaded CYP2D6.2.001: shape (4312, 7)
Loaded CYP2D6.15.002: shape (4313, 7)
Loaded CYP2D6.71.003: shape (4312, 7)
Loaded CYP2D6.41.005: shape (4312, 7)
Loaded CYP2D6.35.001: shape (4312, 7)
Loaded CYP2D6.167.001: shape (4312, 7)
Loaded CYP2D6.101.001: shape (4312, 7)
Loaded CYP2D6.168.001: shape (4312, 7)
Loaded CYP2D6.120.001: shape (4312, 7)
Loaded CYP2D6.41.004: shape (4312, 7)
Loaded CYP2D6.1.017: shape (4312, 7)
Loaded CYP2D6.56.003: shape (4312, 7)
Loaded CYP2D6.42.001: shape (4314, 7)
Loaded CYP2D6.4.028: shape (4312, 7)
Loaded CYP2D6

In [20]:
# 매트릭스 패딩 함수_원래 input으로 들어갈 X가 numpy 배열이었는데 이를 pytorch 텐서로 전환
def pad_matrix(matrix, target_rows=4330, target_cols=7):
    current_rows, current_cols = matrix.shape
    padded = torch.zeros((target_rows, target_cols))
    padded[:current_rows, :current_cols] = torch.tensor(matrix)
    return padded

# 패딩된 매트릭스로 업데이트
for key in onehot_matrices:
    onehot_matrices[key] = pad_matrix(onehot_matrices[key])


# 'Variant' 컬럼의 값을 기반으로 원핫인코딩 매트릭스 매핑
def map_variant_to_onehot(variant):
    variants = variant.split('; ')
    matrices = []
    for v in variants:
        if v in onehot_matrices:
            matrices.append(onehot_matrices[v])
        else:
            print(f"Warning: Matrix not found for variant {v}")
            matrices.append(torch.zeros((4330, 7)))
    
    # 변이가 2개 미만인 경우 0으로 채우기
    while len(matrices) < 2:
        matrices.append(torch.zeros((4330, 7)))
    
    # 첫 두 개의 매트릭스를 행 방향으로 이어붙임
    combined_matrix = torch.vstack(matrices[:2])
    
    return combined_matrix

# 데이터셋마다 매핑된 매트릭스를 생성하는 함수
def map_variants_to_tensor(data):
    mapped_matrices = []
    for variant in data['Variant']:
        mapped_matrix = map_variant_to_onehot(variant)
        mapped_matrices.append(mapped_matrix)
    
    # 매핑된 매트릭스를 PyTorch 텐서로 변환
    X = torch.stack(mapped_matrices)
    y = torch.tensor(data['Class'].values)
    return X, y

# train_data에 대해 매핑된 매트릭스를 텐서로 변환
train_X, train_y = map_variants_to_tensor(train_data)
print("Train X shape:", train_X.shape)
print("Train y shape:", train_y.shape)

# test_data에 대해 매핑된 매트릭스를 텐서로 변환
test_X, test_y = map_variants_to_tensor(test_data)
print("Test X shape:", test_X.shape)
print("Test y shape:", test_y.shape)

Train X shape: torch.Size([2816, 8660, 7])
Train y shape: torch.Size([2816])
Test X shape: torch.Size([36, 8660, 7])
Test y shape: torch.Size([36])


In [21]:
#데이터셋 준비
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        X = self.X[idx]
        y = self.y[idx]
        return torch.tensor(X, dtype=torch.float32).unsqueeze(0), torch.tensor(y, dtype=torch.long)

#학습 데이터 로드    
dataset = CustomDataset(train_X, train_y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


# train 데이터를 75% 훈련, 25% 검증 데이터로 나누기
train_size = int(0.75 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# 테스트 데이터셋 생성
test_dataset = CustomDataset(test_X, test_y)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [8]:
# 학습 함수
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    
    epoch_loss = running_loss / len(loader)
    epoch_acc = correct / total
    return epoch_loss, epoch_acc

# 평가 함수
def evaluate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_labels = []
    all_predictions = []
    all_probs = []
    
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())
            all_probs.extend(outputs.softmax(dim=1).cpu().numpy())  # Probability for ROC-AUC
    
    all_labels = np.array(all_labels)
    all_predictions = np.array(all_predictions)
    all_probs = np.array(all_probs)
    
    epoch_loss = running_loss / len(loader)
    epoch_acc = (all_labels == all_predictions).mean()
    
    return epoch_loss, epoch_acc, all_labels, all_probs


In [9]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=8, kernel_size=(20, 7), stride=(1, 1))
        self.pool1 = nn.MaxPool2d(kernel_size=(2, 1))  # 추가된 풀링 층
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(20, 1), stride=(1, 1))
        self.pool2 = nn.MaxPool2d(kernel_size=(2, 1))  # 추가된 풀링 층
        
        # 풀링 층 추가로 인한 출력 크기 변경을 고려한 새로운 계산
        self.fc1 = nn.Linear(16 * ((((8660 - 20) // 2) - 20) // 2), 128)
        self.fc2 = nn.Linear(128, 3)  # 출력 층

    def forward(self, x):
        x = self.conv1(x)
        x = nn.ReLU()(x)
        x = self.pool1(x)  # 풀링 적용
        x = self.conv2(x)
        x = nn.ReLU()(x)
        x = self.pool2(x)  # 풀링 적용
        x = x.view(x.size(0), -1)  # 평탄화
        x = self.fc1(x)
        x = nn.ReLU()(x)
        x = self.fc2(x)
        return x

### AUPR 추가

In [10]:
# Stratified 10-fold 교차 검증을 위해 데이터를 k개의 폴드로 나눔
n_splits = 10
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

# 결과 저장용 리스트 초기화
fold_train_losses, fold_val_losses = [], []
fold_train_accs, fold_val_accs = [], []
fold_val_aucs, fold_val_precisions, fold_val_recalls, fold_val_f1s = [], [], [], []
fold_val_auprs = []  # AUPR 값을 저장할 리스트를 추가로 초기화

# 전체 데이터로 모델 학습 및 성능 평가
for fold, (train_idx, val_idx) in enumerate(skf.split(train_X, train_y)):
    print(f"\nFold {fold+1}/{n_splits}")
    
    # Train과 validation 데이터를 폴드별로 나눔
    X_train_fold, X_val_fold = train_X[train_idx], train_X[val_idx]
    y_train_fold, y_val_fold = train_y[train_idx], train_y[val_idx]
    
    # 커스텀 데이터셋 생성
    train_dataset_fold = CustomDataset(X_train_fold, y_train_fold)
    val_dataset_fold = CustomDataset(X_val_fold, y_val_fold)
    
    # DataLoader 생성
    train_loader_fold = DataLoader(train_dataset_fold, batch_size=32, shuffle=True)
    val_loader_fold = DataLoader(val_dataset_fold, batch_size=32, shuffle=False)
    
    # 모델 초기화 (매 fold마다 모델을 새로 초기화)
    model = SimpleCNN().to(device)   # 예시로 CNN 모델을 사용했습니다. 본인의 모델로 변경 가능
    
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = torch.nn.CrossEntropyLoss().to(device) 
    
    # Early stopping을 위한 변수 초기화
    best_val_loss = float('inf')
    patience = 3
    patience_counter = 0

    # 각 fold에서 epoch마다 학습
    num_epochs = 30
    for epoch in range(num_epochs):
        # 1 epoch 학습
        train_loss, train_acc = train_epoch(model, train_loader_fold, criterion, optimizer, device)
        
        # Validation 평가
        val_loss, val_acc, val_labels, val_probs = evaluate(model, val_loader_fold, criterion, device)
        
        # 성능 저장
        fold_train_losses.append(train_loss)
        fold_val_losses.append(val_loss)
        fold_train_accs.append(train_acc)
        fold_val_accs.append(val_acc)

        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        print("-" * 40)
        
        # Early Stopping 체크
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            # 모델 저장
            torch.save(model.state_dict(), f'CNN_best_model_fold_{fold+1}.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping 적용.")
                break

    # Fold별 Validation 성능 평가
    model.load_state_dict(torch.load(f'CNN_best_model_fold_{fold+1}.pth'))
    val_loss, val_acc, val_labels, val_probs = evaluate(model, val_loader_fold, criterion, device)

    # AUC-ROC 계산 및 성능 기록
    val_auc = roc_auc_score(val_labels, val_probs, multi_class='ovr')
    fold_val_aucs.append(val_auc)

    val_precision = precision_score(val_labels, val_probs.argmax(axis=1), average='macro')
    val_recall = recall_score(val_labels, val_probs.argmax(axis=1), average='macro')
    val_f1 = f1_score(val_labels, val_probs.argmax(axis=1), average='macro')

    fold_val_precisions.append(val_precision)
    fold_val_recalls.append(val_recall)
    fold_val_f1s.append(val_f1)

    # AUPR 계산 (multi-class one-vs-rest 방식)
    val_aupr = []
    for i in range(val_probs.shape[1]):
        aupr_class = average_precision_score((val_labels == i).astype(int), val_probs[:, i])
        val_aupr.append(aupr_class)

    # 각 클래스에 대한 AUPR 기록
    fold_val_auprs.append(np.mean(val_aupr))  # 평균 AUPR 계산
    print(f"Fold {fold+1} Validation AUC: {val_auc:.4f}, AUPR: {np.mean(val_aupr):.4f}")
    print(f"Fold {fold+1} Precision: {val_precision:.4f}, Recall: {val_recall:.4f}, F1 score: {val_f1:.4f}")

# 교차 검증 성능 평균 계산 (AUPR 추가)
avg_val_auc = sum(fold_val_aucs) / n_splits
avg_val_aupr = sum(fold_val_auprs) / n_splits
avg_val_precision = sum(fold_val_precisions) / n_splits
avg_val_recall = sum(fold_val_recalls) / n_splits
avg_val_f1 = sum(fold_val_f1s) / n_splits

print(f"\nAverage Validation AUC: {avg_val_auc:.4f}, AUPR: {avg_val_aupr:.4f}")
print(f"Average Validation Precision: {avg_val_precision:.4f}, Recall: {avg_val_recall:.4f}, F1 score: {avg_val_f1:.4f}")


Fold 1/10


  del sys.path[0]


Epoch 1/30
Train Loss: 1.0418, Train Acc: 0.4242
Val Loss: 0.9305, Val Acc: 0.6596
----------------------------------------
Epoch 2/30
Train Loss: 0.7491, Train Acc: 0.6492
Val Loss: 0.5323, Val Acc: 0.7624
----------------------------------------
Epoch 3/30
Train Loss: 0.4869, Train Acc: 0.7912
Val Loss: 0.6436, Val Acc: 0.6879
----------------------------------------
Epoch 4/30
Train Loss: 0.4128, Train Acc: 0.8181
Val Loss: 0.3432, Val Acc: 0.8440
----------------------------------------
Epoch 5/30
Train Loss: 0.3608, Train Acc: 0.8390
Val Loss: 0.3577, Val Acc: 0.8369
----------------------------------------
Epoch 6/30
Train Loss: 0.3272, Train Acc: 0.8579
Val Loss: 0.3184, Val Acc: 0.8617
----------------------------------------
Epoch 7/30
Train Loss: 0.3100, Train Acc: 0.8579
Val Loss: 0.3134, Val Acc: 0.8582
----------------------------------------
Epoch 8/30
Train Loss: 0.2990, Train Acc: 0.8607
Val Loss: 0.3267, Val Acc: 0.8652
----------------------------------------
Epoch 9/

  del sys.path[0]


Epoch 1/30
Train Loss: 1.1133, Train Acc: 0.4088
Val Loss: 1.0386, Val Acc: 0.5284
----------------------------------------
Epoch 2/30
Train Loss: 0.9559, Train Acc: 0.5134
Val Loss: 0.7815, Val Acc: 0.6454
----------------------------------------
Epoch 3/30
Train Loss: 0.6222, Train Acc: 0.7372
Val Loss: 0.5348, Val Acc: 0.7589
----------------------------------------
Epoch 4/30
Train Loss: 0.4511, Train Acc: 0.8031
Val Loss: 0.4335, Val Acc: 0.8227
----------------------------------------
Epoch 5/30
Train Loss: 0.3749, Train Acc: 0.8327
Val Loss: 0.4269, Val Acc: 0.7837
----------------------------------------
Epoch 6/30
Train Loss: 0.3553, Train Acc: 0.8374
Val Loss: 0.3936, Val Acc: 0.8227
----------------------------------------
Epoch 7/30
Train Loss: 0.3290, Train Acc: 0.8512
Val Loss: 0.3537, Val Acc: 0.8475
----------------------------------------
Epoch 8/30
Train Loss: 0.3212, Train Acc: 0.8528
Val Loss: 0.3468, Val Acc: 0.8404
----------------------------------------
Epoch 9/

  del sys.path[0]


Epoch 1/30
Train Loss: 1.0605, Train Acc: 0.4250
Val Loss: 1.0504, Val Acc: 0.4929
----------------------------------------
Epoch 2/30
Train Loss: 0.8672, Train Acc: 0.5900
Val Loss: 0.7198, Val Acc: 0.6950
----------------------------------------
Epoch 3/30
Train Loss: 0.5864, Train Acc: 0.7380
Val Loss: 0.5560, Val Acc: 0.6950
----------------------------------------
Epoch 4/30
Train Loss: 0.4341, Train Acc: 0.8094
Val Loss: 0.3836, Val Acc: 0.8404
----------------------------------------
Epoch 5/30
Train Loss: 0.3663, Train Acc: 0.8418
Val Loss: 0.3254, Val Acc: 0.8475
----------------------------------------
Epoch 6/30
Train Loss: 0.3273, Train Acc: 0.8567
Val Loss: 0.3182, Val Acc: 0.8440
----------------------------------------
Epoch 7/30
Train Loss: 0.3235, Train Acc: 0.8552
Val Loss: 0.2952, Val Acc: 0.8617
----------------------------------------
Epoch 8/30
Train Loss: 0.3095, Train Acc: 0.8575
Val Loss: 0.2999, Val Acc: 0.8759
----------------------------------------
Epoch 9/

  del sys.path[0]


Epoch 1/30
Train Loss: 1.0920, Train Acc: 0.3856
Val Loss: 0.9986, Val Acc: 0.4291
----------------------------------------
Epoch 2/30
Train Loss: 0.9443, Train Acc: 0.5391
Val Loss: 0.7608, Val Acc: 0.7376
----------------------------------------
Epoch 3/30
Train Loss: 0.6582, Train Acc: 0.7131
Val Loss: 0.5025, Val Acc: 0.7908
----------------------------------------
Epoch 4/30
Train Loss: 0.4664, Train Acc: 0.7999
Val Loss: 0.3547, Val Acc: 0.8582
----------------------------------------
Epoch 5/30
Train Loss: 0.3585, Train Acc: 0.8350
Val Loss: 0.3451, Val Acc: 0.8475
----------------------------------------
Epoch 6/30
Train Loss: 0.3328, Train Acc: 0.8457
Val Loss: 0.3158, Val Acc: 0.8617
----------------------------------------
Epoch 7/30
Train Loss: 0.3027, Train Acc: 0.8658
Val Loss: 0.3059, Val Acc: 0.8688
----------------------------------------
Epoch 8/30
Train Loss: 0.2900, Train Acc: 0.8694
Val Loss: 0.3374, Val Acc: 0.8262
----------------------------------------
Epoch 9/

  del sys.path[0]


Epoch 1/30
Train Loss: 1.0698, Train Acc: 0.3911
Val Loss: 1.0408, Val Acc: 0.3830
----------------------------------------
Epoch 2/30
Train Loss: 0.9194, Train Acc: 0.5438
Val Loss: 0.6765, Val Acc: 0.7270
----------------------------------------
Epoch 3/30
Train Loss: 0.5802, Train Acc: 0.7439
Val Loss: 0.4335, Val Acc: 0.8369
----------------------------------------
Epoch 4/30
Train Loss: 0.4128, Train Acc: 0.8129
Val Loss: 0.4365, Val Acc: 0.8617
----------------------------------------
Epoch 5/30
Train Loss: 0.3698, Train Acc: 0.8425
Val Loss: 0.3141, Val Acc: 0.8794
----------------------------------------
Epoch 6/30
Train Loss: 0.3222, Train Acc: 0.8575
Val Loss: 0.3207, Val Acc: 0.8688
----------------------------------------
Epoch 7/30
Train Loss: 0.3227, Train Acc: 0.8556
Val Loss: 0.3171, Val Acc: 0.8546
----------------------------------------
Epoch 8/30
Train Loss: 0.3117, Train Acc: 0.8619
Val Loss: 0.2918, Val Acc: 0.9007
----------------------------------------
Epoch 9/

  del sys.path[0]


Epoch 1/30
Train Loss: 1.0596, Train Acc: 0.4029
Val Loss: 1.0260, Val Acc: 0.4255
----------------------------------------
Epoch 2/30
Train Loss: 0.9947, Train Acc: 0.4858
Val Loss: 0.8672, Val Acc: 0.5390
----------------------------------------
Epoch 3/30
Train Loss: 0.6980, Train Acc: 0.6835
Val Loss: 0.4734, Val Acc: 0.8121
----------------------------------------
Epoch 4/30
Train Loss: 0.4546, Train Acc: 0.7995
Val Loss: 0.3630, Val Acc: 0.8546
----------------------------------------
Epoch 5/30
Train Loss: 0.3740, Train Acc: 0.8327
Val Loss: 0.3309, Val Acc: 0.8262
----------------------------------------
Epoch 6/30
Train Loss: 0.3496, Train Acc: 0.8414
Val Loss: 0.3120, Val Acc: 0.8404
----------------------------------------
Epoch 7/30
Train Loss: 0.3259, Train Acc: 0.8508
Val Loss: 0.3154, Val Acc: 0.8617
----------------------------------------
Epoch 8/30
Train Loss: 0.3131, Train Acc: 0.8567
Val Loss: 0.2942, Val Acc: 0.8759
----------------------------------------
Epoch 9/

  del sys.path[0]


Epoch 1/30
Train Loss: 1.0766, Train Acc: 0.3783
Val Loss: 1.0080, Val Acc: 0.4164
----------------------------------------
Epoch 2/30
Train Loss: 0.9249, Train Acc: 0.5400
Val Loss: 0.7097, Val Acc: 0.7260
----------------------------------------
Epoch 3/30
Train Loss: 0.6099, Train Acc: 0.7199
Val Loss: 0.4919, Val Acc: 0.7829
----------------------------------------
Epoch 4/30
Train Loss: 0.4396, Train Acc: 0.8075
Val Loss: 0.4245, Val Acc: 0.8256
----------------------------------------
Epoch 5/30
Train Loss: 0.3540, Train Acc: 0.8469
Val Loss: 0.3688, Val Acc: 0.8363
----------------------------------------
Epoch 6/30
Train Loss: 0.3233, Train Acc: 0.8576
Val Loss: 0.3705, Val Acc: 0.8434
----------------------------------------
Epoch 7/30
Train Loss: 0.3180, Train Acc: 0.8568
Val Loss: 0.3602, Val Acc: 0.8363
----------------------------------------
Epoch 8/30
Train Loss: 0.3034, Train Acc: 0.8611
Val Loss: 0.3740, Val Acc: 0.8434
----------------------------------------
Epoch 9/

  del sys.path[0]


Epoch 1/30
Train Loss: 1.0993, Train Acc: 0.4118
Val Loss: 1.0002, Val Acc: 0.4591
----------------------------------------
Epoch 2/30
Train Loss: 0.8480, Train Acc: 0.5929
Val Loss: 0.7097, Val Acc: 0.6690
----------------------------------------
Epoch 3/30
Train Loss: 0.5516, Train Acc: 0.7487
Val Loss: 0.4829, Val Acc: 0.7865
----------------------------------------
Epoch 4/30
Train Loss: 0.4127, Train Acc: 0.8233
Val Loss: 0.4084, Val Acc: 0.8327
----------------------------------------
Epoch 5/30
Train Loss: 0.3695, Train Acc: 0.8292
Val Loss: 0.3612, Val Acc: 0.8434
----------------------------------------
Epoch 6/30
Train Loss: 0.3343, Train Acc: 0.8536
Val Loss: 0.3457, Val Acc: 0.8505
----------------------------------------
Epoch 7/30
Train Loss: 0.3082, Train Acc: 0.8647
Val Loss: 0.3508, Val Acc: 0.8683
----------------------------------------
Epoch 8/30
Train Loss: 0.3050, Train Acc: 0.8556
Val Loss: 0.3201, Val Acc: 0.8399
----------------------------------------
Epoch 9/

  del sys.path[0]


Epoch 1/30
Train Loss: 1.0708, Train Acc: 0.4233
Val Loss: 1.0279, Val Acc: 0.4093
----------------------------------------
Epoch 2/30
Train Loss: 0.9267, Train Acc: 0.5487
Val Loss: 0.7603, Val Acc: 0.6655
----------------------------------------
Epoch 3/30
Train Loss: 0.6325, Train Acc: 0.7156
Val Loss: 0.5770, Val Acc: 0.7367
----------------------------------------
Epoch 4/30
Train Loss: 0.4608, Train Acc: 0.7905
Val Loss: 0.4740, Val Acc: 0.7936
----------------------------------------
Epoch 5/30
Train Loss: 0.3762, Train Acc: 0.8406
Val Loss: 0.4404, Val Acc: 0.7829
----------------------------------------
Epoch 6/30
Train Loss: 0.3407, Train Acc: 0.8568
Val Loss: 0.4047, Val Acc: 0.8043
----------------------------------------
Epoch 7/30
Train Loss: 0.3274, Train Acc: 0.8584
Val Loss: 0.4125, Val Acc: 0.7972
----------------------------------------
Epoch 8/30
Train Loss: 0.3010, Train Acc: 0.8722
Val Loss: 0.4035, Val Acc: 0.8185
----------------------------------------
Epoch 9/

  del sys.path[0]


Epoch 1/30
Train Loss: 1.0828, Train Acc: 0.4095
Val Loss: 1.0405, Val Acc: 0.3915
----------------------------------------
Epoch 2/30
Train Loss: 0.9248, Train Acc: 0.5365
Val Loss: 0.7969, Val Acc: 0.6228
----------------------------------------
Epoch 3/30
Train Loss: 0.5875, Train Acc: 0.7337
Val Loss: 0.5402, Val Acc: 0.7580
----------------------------------------
Epoch 4/30
Train Loss: 0.4079, Train Acc: 0.8170
Val Loss: 0.4641, Val Acc: 0.7900
----------------------------------------
Epoch 5/30
Train Loss: 0.3626, Train Acc: 0.8367
Val Loss: 0.4149, Val Acc: 0.8114
----------------------------------------
Epoch 6/30
Train Loss: 0.3282, Train Acc: 0.8469
Val Loss: 0.3728, Val Acc: 0.8292
----------------------------------------
Epoch 7/30
Train Loss: 0.3005, Train Acc: 0.8651
Val Loss: 0.4103, Val Acc: 0.8078
----------------------------------------
Epoch 8/30
Train Loss: 0.2954, Train Acc: 0.8698
Val Loss: 0.3628, Val Acc: 0.8434
----------------------------------------
Epoch 9/

In [22]:

# 성능 저장을 위한 리스트 초기화
fold_test_losses = []
fold_accuracies = []
fold_aucs = []
fold_auprs = []
fold_precisions = []
fold_recalls = []
fold_f1_scores = []

# 클래스별 성능 저장용 딕셔너리 초기화
class_metrics = {}

for fold in range(10):
    print(f"Evaluating fold {fold + 1}...")
    
    # 모델 로드 및 평가 모드로 전환
    model.load_state_dict(torch.load(f'CNN_best_model_fold_{fold+1}.pth'))
    model.eval()
    
    # 테스트 세트 평가
    test_loss, test_acc, test_labels, test_probs = evaluate(model, test_loader, criterion, device)
    
    # num_classes 동적으로 설정
    num_classes = test_probs.shape[1]

    # 클래스별 메트릭 저장용 딕셔너리 초기화
    if fold == 0:
        class_metrics = {
            i: {"accuracy": [], "auc": [], "aupr": [], "precision": [], "recall": [], "f1": []}
            for i in range(num_classes)
        }

    # 전체 성능 계산
    fold_accuracies.append(accuracy_score(test_labels, test_probs.argmax(axis=1)))
    fold_aucs.append(roc_auc_score(test_labels, test_probs, multi_class='ovr'))

    # One-vs-Rest 방식으로 AUPR 계산
    auprs = [
        average_precision_score((test_labels == i).astype(int), test_probs[:, i])
        for i in range(num_classes)
    ]
    fold_auprs.append(np.mean(auprs))

    # Precision, Recall, F1 Score 계산
    fold_precisions.append(precision_score(test_labels, test_probs.argmax(axis=1), average='macro'))
    fold_recalls.append(recall_score(test_labels, test_probs.argmax(axis=1), average='macro'))
    fold_f1_scores.append(f1_score(test_labels, test_probs.argmax(axis=1), average='macro'))

    # 클래스별 메트릭 계산
    for i in range(num_classes):
        class_pred = (test_probs.argmax(axis=1) == i).astype(int)
        class_true = (test_labels == i).astype(int)

        class_metrics[i]["accuracy"].append(accuracy_score(class_true, class_pred))
        class_metrics[i]["auc"].append(roc_auc_score(class_true, test_probs[:, i]))
        class_metrics[i]["aupr"].append(auprs[i])
        class_metrics[i]["precision"].append(precision_score(class_true, class_pred))
        class_metrics[i]["recall"].append(recall_score(class_true, class_pred))
        class_metrics[i]["f1"].append(f1_score(class_true, class_pred))

# Fold별 평균 및 표준편차 계산 함수
def mean_and_std(values):
    return np.mean(values), np.std(values)

# Fold별 전체 성능 출력
print(f"Overall Metrics Across 10 Folds:")
print(f"Accuracy: {mean_and_std(fold_accuracies)[0]:.4f} ± {mean_and_std(fold_accuracies)[1]:.4f}")
print(f"AUC: {mean_and_std(fold_aucs)[0]:.4f} ± {mean_and_std(fold_aucs)[1]:.4f}")
print(f"AUPR: {mean_and_std(fold_auprs)[0]:.4f} ± {mean_and_std(fold_auprs)[1]:.4f}")
print(f"Precision: {mean_and_std(fold_precisions)[0]:.4f} ± {mean_and_std(fold_precisions)[1]:.4f}")
print(f"Recall: {mean_and_std(fold_recalls)[0]:.4f} ± {mean_and_std(fold_recalls)[1]:.4f}")
print(f"F1 Score: {mean_and_std(fold_f1_scores)[0]:.4f} ± {mean_and_std(fold_f1_scores)[1]:.4f}")

# 클래스별 성능 출력
for i in range(num_classes):
    print(f"\nClass {i} Metrics Across 10 Folds:")
    print(f"  Accuracy: {mean_and_std(class_metrics[i]['accuracy'])[0]:.4f} ± {mean_and_std(class_metrics[i]['accuracy'])[1]:.4f}")
    print(f"  AUC: {mean_and_std(class_metrics[i]['auc'])[0]:.4f} ± {mean_and_std(class_metrics[i]['auc'])[1]:.4f}")
    print(f"  AUPR: {mean_and_std(class_metrics[i]['aupr'])[0]:.4f} ± {mean_and_std(class_metrics[i]['aupr'])[1]:.4f}")
    print(f"  Precision: {mean_and_std(class_metrics[i]['precision'])[0]:.4f} ± {mean_and_std(class_metrics[i]['precision'])[1]:.4f}")
    print(f"  Recall: {mean_and_std(class_metrics[i]['recall'])[0]:.4f} ± {mean_and_std(class_metrics[i]['recall'])[1]:.4f}")
    print(f"  F1 Score: {mean_and_std(class_metrics[i]['f1'])[0]:.4f} ± {mean_and_std(class_metrics[i]['f1'])[1]:.4f}")


Evaluating fold 1...
Evaluating fold 2...
Evaluating fold 3...
Evaluating fold 4...
Evaluating fold 5...
Evaluating fold 6...
Evaluating fold 7...


  del sys.path[0]
  del sys.path[0]
  del sys.path[0]
  del sys.path[0]
  del sys.path[0]
  del sys.path[0]
  del sys.path[0]


Evaluating fold 8...
Evaluating fold 9...
Evaluating fold 10...
Overall Metrics Across 10 Folds:
Accuracy: 0.8278 ± 0.0408
AUC: 0.9520 ± 0.0100
AUPR: 0.9046 ± 0.0100
Precision: 0.7520 ± 0.0485
Recall: 0.8784 ± 0.0288
F1 Score: 0.7817 ± 0.0503

Class 0 Metrics Across 10 Folds:
  Accuracy: 0.8556 ± 0.0408
  AUC: 0.9365 ± 0.0217
  AUPR: 0.9345 ± 0.0203
  Precision: 0.8358 ± 0.0334
  Recall: 0.8647 ± 0.0791
  F1 Score: 0.8482 ± 0.0473

Class 1 Metrics Across 10 Folds:
  Accuracy: 0.9389 ± 0.0242
  AUC: 0.9853 ± 0.0000
  AUPR: 0.8333 ± 0.0000
  Precision: 0.4967 ± 0.1016
  Recall: 1.0000 ± 0.0000
  F1 Score: 0.6576 ± 0.0898

Class 2 Metrics Across 10 Folds:
  Accuracy: 0.8611 ± 0.0248
  AUC: 0.9341 ± 0.0116
  AUPR: 0.9461 ± 0.0109
  Precision: 0.9235 ± 0.0341
  Recall: 0.7706 ± 0.0412
  F1 Score: 0.8394 ± 0.0304


  del sys.path[0]
  del sys.path[0]
  del sys.path[0]
