# Import

In [1]:
import os
import random

import pandas as pd
import numpy as np

from PIL import Image
from tqdm import tqdm 

from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import Dataset, DataLoader, Subset
import torchvision.models as models
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch import nn, optim

from sklearn.metrics import log_loss


if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print("Using device:", device)

Using device: mps


# Hyperparameter Setting

In [2]:
CFG = {
    'IMG_SIZE': 384,
    'BATCH_SIZE': 16,
    'EPOCHS': 12,
    'LEARNING_RATE': 1e-4,
    'SEED' : 42,
    'N_SPLITS': 5
}

# Fixed RandomSeed

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG['SEED']) # Seed 고정

# CustomDataset

In [None]:
class CustomImageDataset(Dataset):
    def __init__(self, root_dir, transform=None, is_test=False):
        self.root_dir = root_dir
        self.transform = transform
        self.is_test = is_test
        self.samples = []

        if is_test:
            # 테스트셋: 라벨 없이 이미지 경로만 저장
            for fname in sorted(os.listdir(root_dir)):
                if fname.lower().endswith(('.jpg')):
                    img_path = os.path.join(root_dir, fname)
                    self.samples.append((img_path,))
        else:
            # 학습셋: 클래스별 폴더 구조에서 라벨 추출
            self.classes = sorted(os.listdir(root_dir))
            self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)}

            for cls_name in self.classes:
                cls_folder = os.path.join(root_dir, cls_name)

                if not os.path.isdir(cls_folder):
                    continue
                
                for fname in os.listdir(cls_folder):
                    if fname.lower().endswith(('.jpg')):
                        img_path = os.path.join(cls_folder, fname)
                        label = self.class_to_idx[cls_name]
                        self.samples.append((img_path, label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        if self.is_test:
            img_path = self.samples[idx][0]
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image
        else:
            img_path, label = self.samples[idx]
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, label


# Data Load

In [5]:
train_root = './train'
test_root = './test'

In [6]:
train_transform = transforms.Compose([
    transforms.Resize((CFG['IMG_SIZE'], CFG['IMG_SIZE'])),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((CFG['IMG_SIZE'], CFG['IMG_SIZE'])),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [7]:
from torchvision import transforms

train_transform = transforms.Compose([
    transforms.Resize((CFG['IMG_SIZE'] + 32, CFG['IMG_SIZE'] + 32)),  # 약간 크게 리사이즈 후
    transforms.RandomResizedCrop(CFG['IMG_SIZE'], scale=(0.8, 1.0)),  # 랜덤 크롭
    transforms.RandomHorizontalFlip(),                                # 좌우 뒤집기
    transforms.RandomRotation(10),                                    # ±10도 회전
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # 색상 변형
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

val_transform = transforms.Compose([
    transforms.Resize((CFG['IMG_SIZE'], CFG['IMG_SIZE'])),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

In [8]:
# 전체 데이터셋 로드
full_dataset = CustomImageDataset(train_root, transform=None)
print(f"총 이미지 수: {len(full_dataset)}")

targets = [label for _, label in full_dataset.samples]
class_names = full_dataset.classes

# Stratified Split
train_idx, val_idx = train_test_split(
    range(len(targets)), test_size=0.2, stratify=targets, random_state=42
)

# Subset + transform 각각 적용
train_dataset = Subset(CustomImageDataset(train_root, transform=train_transform), train_idx)
val_dataset = Subset(CustomImageDataset(train_root, transform=val_transform), val_idx)
print(f'train 이미지 수: {len(train_dataset)}, valid 이미지 수: {len(val_dataset)}')


# DataLoader 정의
train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)

총 이미지 수: 33137
train 이미지 수: 26509, valid 이미지 수: 6628


# ResNet18

In [31]:
class BaseModel(nn.Module):
    def __init__(self, num_classes):
        super(BaseModel, self).__init__()
        self.backbone = models.resnet18(pretrained=True)  # ResNet18 모델 불러오기
        self.feature_dim = self.backbone.fc.in_features 
        self.backbone.fc = nn.Identity()  # feature extractor로만 사용
        self.head = nn.Linear(self.feature_dim, num_classes)  # 분류기

    def forward(self, x):
        x = self.backbone(x)       
        x = self.head(x) 
        return x

# EfficientNet

In [32]:
import torch.nn as nn
import timm


class BaseModel(nn.Module):
    def __init__(self, num_classes):
        super(BaseModel, self).__init__()
        
        # EfficientNet-b3 백본
        self.backbone = timm.create_model('efficientnet_b3', pretrained=True)
        
        # 기존 classifier 제거
        self.feature_dim = self.backbone.classifier.in_features
        self.backbone.classifier = nn.Identity()
        
        # 새 분류기
        self.head = nn.Linear(self.feature_dim, num_classes)

    def forward(self, x):
        x = self.backbone(x)
        x = self.head(x)
        return x


# EfficientNetV2-M

In [9]:
import torch.nn as nn
import timm
 

class BaseModel(nn.Module):
    def __init__(self, num_classes):
        super(BaseModel, self).__init__()
        
        # EfficientNet-b3 백본
        self.backbone = timm.create_model('efficientnetv2_rw_m', pretrained=True)
        
        # 기존 classifier 제거
        self.feature_dim = self.backbone.classifier.in_features
        self.backbone.classifier = nn.Identity()
        
        # 새 분류기
        self.head = nn.Linear(self.feature_dim, num_classes)

    def forward(self, x):
        x = self.backbone(x)
        x = self.head(x)
        return x


  from .autonotebook import tqdm as notebook_tqdm


In [10]:
def mixup_data(x, y, alpha=0.2):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(x.device)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

# Train/ Validation

In [None]:
model = BaseModel(num_classes=len(class_names)).to(device)
best_logloss = float('inf')
best_acc = 0.0
best_ce_loss = float('inf')

# 손실 함수
criterion = nn.CrossEntropyLoss()

# 옵티마이저
optimizer = optim.Adam(model.parameters(), lr=CFG['LEARNING_RATE'])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=CFG['EPOCHS'], eta_min=1e-6
)

# 학습 및 검증 루프
for epoch in range(CFG['EPOCHS']):
    # Train
    model.train()
    train_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f"[Epoch {epoch+1}/{CFG['EPOCHS']}] Training"):
        images, labels = images.to(device), labels.to(device)

        inputs, targets_a, targets_b, lam = mixup_data(images, labels, alpha=0.2)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    avg_train_loss = train_loss / len(train_loader)

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    all_probs = []
    all_labels = []

    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"[Epoch {epoch+1}/{CFG['EPOCHS']}] Validation"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            # Accuracy
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

            # LogLoss
            probs = F.softmax(outputs, dim=1)
            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = 100 * correct / total
    val_logloss = log_loss(all_labels, all_probs, labels=list(range(len(class_names))))

    scheduler.step()
    print(f"📉 Learning Rate after epoch {epoch+1}: {scheduler.get_last_lr()[0]:.8f}")

    # 결과 출력
    print(f"Train Loss : {avg_train_loss:.4f} || Valid Loss : {avg_val_loss:.4f} | Valid Accuracy : {val_accuracy:.4f}%")

    # Best model 저장
    if val_logloss < best_logloss:
        best_logloss = val_logloss
        torch.save(model.state_dict(), f'best_logloss.pth')
        print(f"📦 Best model saved at epoch {epoch+1} (logloss: {val_logloss:.4f})")

    if val_accuracy > best_acc:
        best_acc = val_accuracy
        torch.save(model.state_dict(), "best_acc.pth")
        print(f"📦 Best model saved at epoch {epoch+1} (val_acc: {val_accuracy:.4f})")

    if avg_val_loss < best_ce_loss:
        best_ce_loss = avg_val_loss
        torch.save(model.state_dict(), "best_loss.pth")
        print(f"📦 Best model saved at epoch {epoch+1} (val_loss: {avg_val_loss:.4f})")

[Epoch 1/15] Training: 100%|██████████| 1657/1657 [28:34<00:00,  1.03s/it]
[Epoch 1/15] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.88it/s]


📉 Learning Rate after epoch 1: 0.00009892
Train Loss : 3.1939 || Valid Loss : 0.6222 | Valid Accuracy : 85.9234%
📦 Best model saved at epoch 1 (logloss: 0.6222)
📦 Best model saved at epoch 1 (val_acc: 85.9234)
📦 Best model saved at epoch 1 (val_loss: 0.6222)


[Epoch 2/15] Training:  43%|████▎     | 711/1657 [12:24<17:28,  1.11s/it]

In [11]:
from sklearn.model_selection import StratifiedKFold


# === K-Fold 설정 ===
skf = StratifiedKFold(n_splits=CFG['N_SPLITS'], shuffle=True, random_state=42)
targets = [label for _, label in full_dataset.samples]
class_names = full_dataset.classes

for fold, (train_idx, val_idx) in enumerate(skf.split(np.zeros(len(targets)), targets)):
    print(f"\n📂 Fold {fold+1}/{CFG['N_SPLITS']}")

    train_dataset = Subset(CustomImageDataset(train_root, transform=train_transform), train_idx)
    val_dataset = Subset(CustomImageDataset(train_root, transform=val_transform), val_idx)
    train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)

    # === 모델, 옵티마이저, 스케줄러, 손실 함수 설정 ===
    model = BaseModel(num_classes=len(class_names)).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=CFG['LEARNING_RATE'])
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CFG['EPOCHS'], eta_min=1e-6)
    criterion = nn.CrossEntropyLoss()

    best_logloss = float('inf')
    best_acc = 0.0
    best_ce_loss = float('inf')

    for epoch in range(CFG['EPOCHS']):
        model.train()
        train_loss = 0.0
        for images, labels in tqdm(train_loader, desc=f"[Fold {fold+1}][Epoch {epoch+1}/{CFG['EPOCHS']}] Training"):
            images, labels = images.to(device), labels.to(device)
            inputs, targets_a, targets_b, lam = mixup_data(images, labels, alpha=0.2)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        avg_train_loss = train_loss / len(train_loader)

        # === Validation ===
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        all_probs = []
        all_labels = []

        with torch.no_grad():
            for images, labels in tqdm(val_loader, desc=f"[Fold {fold+1}][Epoch {epoch+1}/{CFG['EPOCHS']}] Validation"):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)

                probs = F.softmax(outputs, dim=1)
                all_probs.extend(probs.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())

        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = 100 * correct / total
        val_logloss = log_loss(all_labels, all_probs, labels=list(range(len(class_names))))
        scheduler.step()

        print(f"Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_accuracy:.2f}% | LogLoss: {val_logloss:.4f}")

        # === 모델 저장 ===
        save_dir = f"model_fold{fold+1}"
        os.makedirs(save_dir, exist_ok=True)

        if val_logloss < best_logloss:
            best_logloss = val_logloss
            torch.save(model.state_dict(), f"{save_dir}/best_logloss.pth")

        if val_accuracy > best_acc:
            best_acc = val_accuracy
            torch.save(model.state_dict(), f"{save_dir}/best_acc.pth")

        if avg_val_loss < best_ce_loss:
            best_ce_loss = avg_val_loss
            torch.save(model.state_dict(), f"{save_dir}/best_loss.pth")



📂 Fold 1/5


[Fold 1][Epoch 1/12] Training: 100%|██████████| 1657/1657 [28:43<00:00,  1.04s/it]
[Fold 1][Epoch 1/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.91it/s]


Train Loss: 3.1081 | Val Loss: 0.6150 | Val Acc: 85.92% | LogLoss: 0.6151


[Fold 1][Epoch 2/12] Training: 100%|██████████| 1657/1657 [28:25<00:00,  1.03s/it]
[Fold 1][Epoch 2/12] Validation: 100%|██████████| 415/415 [01:48<00:00,  3.83it/s]


Train Loss: 1.0496 | Val Loss: 0.2973 | Val Acc: 91.55% | LogLoss: 0.2976


[Fold 1][Epoch 3/12] Training: 100%|██████████| 1657/1657 [28:23<00:00,  1.03s/it]
[Fold 1][Epoch 3/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.91it/s]


Train Loss: 0.8294 | Val Loss: 0.1871 | Val Acc: 94.07% | LogLoss: 0.1874


[Fold 1][Epoch 4/12] Training: 100%|██████████| 1657/1657 [27:51<00:00,  1.01s/it]
[Fold 1][Epoch 4/12] Validation: 100%|██████████| 415/415 [01:45<00:00,  3.92it/s]


Train Loss: 0.7091 | Val Loss: 0.1604 | Val Acc: 94.86% | LogLoss: 0.1607


[Fold 1][Epoch 5/12] Training: 100%|██████████| 1657/1657 [27:50<00:00,  1.01s/it]
[Fold 1][Epoch 5/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.91it/s]


Train Loss: 0.6577 | Val Loss: 0.1362 | Val Acc: 95.67% | LogLoss: 0.1365


[Fold 1][Epoch 6/12] Training: 100%|██████████| 1657/1657 [27:49<00:00,  1.01s/it]
[Fold 1][Epoch 6/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.91it/s]


Train Loss: 0.5926 | Val Loss: 0.1255 | Val Acc: 96.23% | LogLoss: 0.1257


[Fold 1][Epoch 7/12] Training: 100%|██████████| 1657/1657 [27:49<00:00,  1.01s/it]
[Fold 1][Epoch 7/12] Validation: 100%|██████████| 415/415 [01:45<00:00,  3.92it/s]


Train Loss: 0.5360 | Val Loss: 0.1010 | Val Acc: 97.01% | LogLoss: 0.1012


[Fold 1][Epoch 8/12] Training: 100%|██████████| 1657/1657 [27:52<00:00,  1.01s/it]
[Fold 1][Epoch 8/12] Validation: 100%|██████████| 415/415 [01:45<00:00,  3.93it/s]


Train Loss: 0.5124 | Val Loss: 0.0977 | Val Acc: 96.98% | LogLoss: 0.0979


[Fold 1][Epoch 9/12] Training: 100%|██████████| 1657/1657 [27:50<00:00,  1.01s/it]
[Fold 1][Epoch 9/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.90it/s]


Train Loss: 0.4684 | Val Loss: 0.1020 | Val Acc: 96.97% | LogLoss: 0.1021


[Fold 1][Epoch 10/12] Training: 100%|██████████| 1657/1657 [27:49<00:00,  1.01s/it]
[Fold 1][Epoch 10/12] Validation: 100%|██████████| 415/415 [01:45<00:00,  3.93it/s]


Train Loss: 0.4408 | Val Loss: 0.0938 | Val Acc: 97.30% | LogLoss: 0.0940


[Fold 1][Epoch 11/12] Training: 100%|██████████| 1657/1657 [27:50<00:00,  1.01s/it]
[Fold 1][Epoch 11/12] Validation: 100%|██████████| 415/415 [01:45<00:00,  3.92it/s]


Train Loss: 0.4468 | Val Loss: 0.0926 | Val Acc: 97.31% | LogLoss: 0.0928


[Fold 1][Epoch 12/12] Training: 100%|██████████| 1657/1657 [27:49<00:00,  1.01s/it]
[Fold 1][Epoch 12/12] Validation: 100%|██████████| 415/415 [01:45<00:00,  3.93it/s]


Train Loss: 0.4134 | Val Loss: 0.0892 | Val Acc: 97.56% | LogLoss: 0.0894

📂 Fold 2/5


[Fold 2][Epoch 1/12] Training: 100%|██████████| 1657/1657 [30:52<00:00,  1.12s/it]
[Fold 2][Epoch 1/12] Validation: 100%|██████████| 415/415 [01:47<00:00,  3.86it/s]


Train Loss: 3.1314 | Val Loss: 0.5619 | Val Acc: 85.82% | LogLoss: 0.5627


[Fold 2][Epoch 2/12] Training: 100%|██████████| 1657/1657 [30:22<00:00,  1.10s/it]
[Fold 2][Epoch 2/12] Validation: 100%|██████████| 415/415 [01:47<00:00,  3.86it/s]


Train Loss: 1.0476 | Val Loss: 0.2914 | Val Acc: 91.57% | LogLoss: 0.2919


[Fold 2][Epoch 3/12] Training: 100%|██████████| 1657/1657 [30:23<00:00,  1.10s/it]
[Fold 2][Epoch 3/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.91it/s]


Train Loss: 0.8590 | Val Loss: 0.2338 | Val Acc: 93.75% | LogLoss: 0.2340


[Fold 2][Epoch 4/12] Training: 100%|██████████| 1657/1657 [30:28<00:00,  1.10s/it]
[Fold 2][Epoch 4/12] Validation: 100%|██████████| 415/415 [01:45<00:00,  3.94it/s]


Train Loss: 0.7227 | Val Loss: 0.1671 | Val Acc: 95.04% | LogLoss: 0.1673


[Fold 2][Epoch 5/12] Training: 100%|██████████| 1657/1657 [30:25<00:00,  1.10s/it]
[Fold 2][Epoch 5/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.91it/s]


Train Loss: 0.6210 | Val Loss: 0.1657 | Val Acc: 95.25% | LogLoss: 0.1660


[Fold 2][Epoch 6/12] Training: 100%|██████████| 1657/1657 [30:23<00:00,  1.10s/it]
[Fold 2][Epoch 6/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.90it/s]


Train Loss: 0.5800 | Val Loss: 0.1371 | Val Acc: 95.94% | LogLoss: 0.1373


[Fold 2][Epoch 7/12] Training: 100%|██████████| 1657/1657 [30:26<00:00,  1.10s/it]
[Fold 2][Epoch 7/12] Validation: 100%|██████████| 415/415 [01:45<00:00,  3.93it/s]


Train Loss: 0.5251 | Val Loss: 0.1444 | Val Acc: 96.06% | LogLoss: 0.1446


[Fold 2][Epoch 8/12] Training: 100%|██████████| 1657/1657 [30:25<00:00,  1.10s/it]
[Fold 2][Epoch 8/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.90it/s]


Train Loss: 0.5063 | Val Loss: 0.1232 | Val Acc: 96.56% | LogLoss: 0.1234


[Fold 2][Epoch 9/12] Training: 100%|██████████| 1657/1657 [30:40<00:00,  1.11s/it]
[Fold 2][Epoch 9/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.88it/s]


Train Loss: 0.4905 | Val Loss: 0.1181 | Val Acc: 96.97% | LogLoss: 0.1182


[Fold 2][Epoch 10/12] Training: 100%|██████████| 1657/1657 [32:29<00:00,  1.18s/it]
[Fold 2][Epoch 10/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.91it/s]


Train Loss: 0.4453 | Val Loss: 0.1072 | Val Acc: 97.09% | LogLoss: 0.1073


[Fold 2][Epoch 11/12] Training: 100%|██████████| 1657/1657 [30:41<00:00,  1.11s/it]
[Fold 2][Epoch 11/12] Validation: 100%|██████████| 415/415 [01:45<00:00,  3.93it/s]


Train Loss: 0.4649 | Val Loss: 0.1069 | Val Acc: 97.22% | LogLoss: 0.1071


[Fold 2][Epoch 12/12] Training: 100%|██████████| 1657/1657 [30:29<00:00,  1.10s/it]
[Fold 2][Epoch 12/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.90it/s]


Train Loss: 0.4271 | Val Loss: 0.1025 | Val Acc: 97.28% | LogLoss: 0.1027

📂 Fold 3/5


[Fold 3][Epoch 1/12] Training: 100%|██████████| 1657/1657 [36:07<00:00,  1.31s/it]
[Fold 3][Epoch 1/12] Validation: 100%|██████████| 415/415 [01:47<00:00,  3.88it/s]


Train Loss: 3.0946 | Val Loss: 0.5967 | Val Acc: 84.52% | LogLoss: 0.5977


[Fold 3][Epoch 2/12] Training: 100%|██████████| 1657/1657 [35:51<00:00,  1.30s/it]
[Fold 3][Epoch 2/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.91it/s]


Train Loss: 1.0704 | Val Loss: 0.2753 | Val Acc: 92.20% | LogLoss: 0.2757


[Fold 3][Epoch 3/12] Training: 100%|██████████| 1657/1657 [36:01<00:00,  1.30s/it]
[Fold 3][Epoch 3/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.91it/s]


Train Loss: 0.8343 | Val Loss: 0.1870 | Val Acc: 94.42% | LogLoss: 0.1873


[Fold 3][Epoch 4/12] Training: 100%|██████████| 1657/1657 [35:39<00:00,  1.29s/it]
[Fold 3][Epoch 4/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.90it/s]


Train Loss: 0.6890 | Val Loss: 0.1734 | Val Acc: 95.16% | LogLoss: 0.1737


[Fold 3][Epoch 5/12] Training: 100%|██████████| 1657/1657 [37:23<00:00,  1.35s/it] 
[Fold 3][Epoch 5/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.90it/s]


Train Loss: 0.6420 | Val Loss: 0.1516 | Val Acc: 95.76% | LogLoss: 0.1519


[Fold 3][Epoch 6/12] Training: 100%|██████████| 1657/1657 [35:54<00:00,  1.30s/it]
[Fold 3][Epoch 6/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.90it/s]


Train Loss: 0.6082 | Val Loss: 0.1474 | Val Acc: 96.02% | LogLoss: 0.1477


[Fold 3][Epoch 7/12] Training: 100%|██████████| 1657/1657 [36:53<00:00,  1.34s/it] 
[Fold 3][Epoch 7/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.90it/s]


Train Loss: 0.5310 | Val Loss: 0.1226 | Val Acc: 96.68% | LogLoss: 0.1228


[Fold 3][Epoch 8/12] Training: 100%|██████████| 1657/1657 [36:47<00:00,  1.33s/it] 
[Fold 3][Epoch 8/12] Validation: 100%|██████████| 415/415 [01:45<00:00,  3.93it/s]


Train Loss: 0.4970 | Val Loss: 0.1095 | Val Acc: 97.19% | LogLoss: 0.1097


[Fold 3][Epoch 9/12] Training: 100%|██████████| 1657/1657 [35:47<00:00,  1.30s/it]
[Fold 3][Epoch 9/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.91it/s]


Train Loss: 0.4544 | Val Loss: 0.1073 | Val Acc: 97.27% | LogLoss: 0.1075


[Fold 3][Epoch 10/12] Training: 100%|██████████| 1657/1657 [35:33<00:00,  1.29s/it]
[Fold 3][Epoch 10/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.90it/s]


Train Loss: 0.4452 | Val Loss: 0.1069 | Val Acc: 97.45% | LogLoss: 0.1071


[Fold 3][Epoch 11/12] Training: 100%|██████████| 1657/1657 [37:08<00:00,  1.34s/it]
[Fold 3][Epoch 11/12] Validation: 100%|██████████| 415/415 [01:53<00:00,  3.66it/s]


Train Loss: 0.4402 | Val Loss: 0.0984 | Val Acc: 97.66% | LogLoss: 0.0986


[Fold 3][Epoch 12/12] Training: 100%|██████████| 1657/1657 [38:19<00:00,  1.39s/it] 
[Fold 3][Epoch 12/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.90it/s]


Train Loss: 0.4283 | Val Loss: 0.0996 | Val Acc: 97.66% | LogLoss: 0.0998

📂 Fold 4/5


[Fold 4][Epoch 1/12] Training: 100%|██████████| 1657/1657 [36:48<00:00,  1.33s/it] 
[Fold 4][Epoch 1/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.88it/s]


Train Loss: 3.0139 | Val Loss: 0.5064 | Val Acc: 86.04% | LogLoss: 0.5072


[Fold 4][Epoch 2/12] Training: 100%|██████████| 1657/1657 [33:39<00:00,  1.22s/it]
[Fold 4][Epoch 2/12] Validation: 100%|██████████| 415/415 [01:48<00:00,  3.81it/s]


Train Loss: 1.0532 | Val Loss: 0.2843 | Val Acc: 92.47% | LogLoss: 0.2847


[Fold 4][Epoch 3/12] Training: 100%|██████████| 1657/1657 [33:45<00:00,  1.22s/it]
[Fold 4][Epoch 3/12] Validation: 100%|██████████| 415/415 [01:48<00:00,  3.82it/s]


Train Loss: 0.8267 | Val Loss: 0.1924 | Val Acc: 94.05% | LogLoss: 0.1928


[Fold 4][Epoch 4/12] Training: 100%|██████████| 1657/1657 [34:06<00:00,  1.23s/it] 
[Fold 4][Epoch 4/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.91it/s]


Train Loss: 0.7201 | Val Loss: 0.1930 | Val Acc: 94.54% | LogLoss: 0.1934


[Fold 4][Epoch 5/12] Training: 100%|██████████| 1657/1657 [33:31<00:00,  1.21s/it]
[Fold 4][Epoch 5/12] Validation: 100%|██████████| 415/415 [01:47<00:00,  3.87it/s]


Train Loss: 0.6600 | Val Loss: 0.1558 | Val Acc: 95.19% | LogLoss: 0.1560


[Fold 4][Epoch 6/12] Training: 100%|██████████| 1657/1657 [36:02<00:00,  1.31s/it]
[Fold 4][Epoch 6/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.91it/s]


Train Loss: 0.5780 | Val Loss: 0.1365 | Val Acc: 95.90% | LogLoss: 0.1367


[Fold 4][Epoch 7/12] Training: 100%|██████████| 1657/1657 [33:39<00:00,  1.22s/it]
[Fold 4][Epoch 7/12] Validation: 100%|██████████| 415/415 [01:45<00:00,  3.92it/s]


Train Loss: 0.5428 | Val Loss: 0.1295 | Val Acc: 96.39% | LogLoss: 0.1298


[Fold 4][Epoch 8/12] Training: 100%|██████████| 1657/1657 [35:06<00:00,  1.27s/it]
[Fold 4][Epoch 8/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.91it/s]


Train Loss: 0.4962 | Val Loss: 0.1282 | Val Acc: 96.59% | LogLoss: 0.1284


[Fold 4][Epoch 9/12] Training: 100%|██████████| 1657/1657 [33:41<00:00,  1.22s/it]
[Fold 4][Epoch 9/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.91it/s]


Train Loss: 0.4727 | Val Loss: 0.1174 | Val Acc: 96.80% | LogLoss: 0.1176


[Fold 4][Epoch 10/12] Training: 100%|██████████| 1657/1657 [33:54<00:00,  1.23s/it]
[Fold 4][Epoch 10/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.88it/s]


Train Loss: 0.4387 | Val Loss: 0.1086 | Val Acc: 97.07% | LogLoss: 0.1088


[Fold 4][Epoch 11/12] Training: 100%|██████████| 1657/1657 [34:06<00:00,  1.24s/it]
[Fold 4][Epoch 11/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.91it/s]


Train Loss: 0.4266 | Val Loss: 0.1113 | Val Acc: 96.98% | LogLoss: 0.1115


[Fold 4][Epoch 12/12] Training: 100%|██████████| 1657/1657 [33:43<00:00,  1.22s/it] 
[Fold 4][Epoch 12/12] Validation: 100%|██████████| 415/415 [01:46<00:00,  3.91it/s]


Train Loss: 0.4222 | Val Loss: 0.1079 | Val Acc: 96.97% | LogLoss: 0.1081

📂 Fold 5/5


[Fold 5][Epoch 1/12] Training: 100%|██████████| 1657/1657 [36:27<00:00,  1.32s/it]
[Fold 5][Epoch 1/12] Validation: 100%|██████████| 415/415 [01:47<00:00,  3.87it/s]


ValueError: Input contains NaN.

# Inference

In [12]:
test_dataset = CustomImageDataset(test_root, transform=val_transform, is_test=True)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)

In [12]:
# 저장된 모델 로드
model = BaseModel(num_classes=len(class_names))
model.load_state_dict(torch.load('best_logloss.pth', map_location=device))
model.to(device)

# 추론
model.eval()
results = []

with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        probs = F.softmax(outputs, dim=1)

        # 각 배치의 확률을 리스트로 변환
        for prob in probs.cpu():  # prob: (num_classes,)
            result = {
                class_names[i]: prob[i].item()
                for i in range(len(class_names))
            }
            results.append(result)
            
pred = pd.DataFrame(results)

In [13]:
from torch.nn import functional as F

all_fold_probs = []

for fold in range(1, 5):  # Fold 1~4
    model = BaseModel(num_classes=len(class_names))
    model.load_state_dict(torch.load(f"model_fold{fold}/best_logloss.pth", map_location=device))
    model.to(device)
    model.eval()

    fold_probs = []

    with torch.no_grad():
        for images in test_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            fold_probs.append(probs.cpu().numpy())

    fold_probs = np.concatenate(fold_probs, axis=0)  # (N, C)
    all_fold_probs.append(fold_probs)

# 평균 앙상블
ensemble_probs = np.mean(np.stack(all_fold_probs), axis=0)

# 결과 저장
pred_df = pd.DataFrame(ensemble_probs, columns=class_names)


# Submission

In [13]:
submission = pd.read_csv('./sample_submission.csv', encoding='utf-8-sig')

# 'ID' 컬럼을 제외한 클래스 컬럼 정렬
class_columns = submission.columns[1:]
pred = pred[class_columns]

submission[class_columns] = pred.values
submission.to_csv('best_logloss_submission.csv', index=False, encoding='utf-8-sig')

In [14]:
submission = pd.read_csv('./sample_submission.csv', encoding='utf-8-sig')

class_columns = submission.columns[1:]  # ID 제외한 클래스 열들
pred_df = pred_df[class_columns]

submission[class_columns] = pred_df.values
submission.to_csv('best_logloss_submission.csv', index=False, encoding='utf-8-sig')
print("best_logloss_submission.csv 저장 완료")

best_logloss_submission.csv 저장 완료


In [None]:
tta_transform = transforms.Compose([
    transforms.Resize((CFG['IMG_SIZE'] + 32, CFG['IMG_SIZE'] + 32)),  # 크게 리사이즈 후
    transforms.FiveCrop(CFG['IMG_SIZE']),  # (top-left, top-right, bottom-left, bottom-right, center)
    transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
    transforms.Lambda(lambda crops: torch.stack([
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])(crop) for crop in crops
    ]))
])

In [16]:
class TTADataset(Dataset):
    def __init__(self, image_paths, transform):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('RGB')
        crops = self.transform(image)  # (5, 3, H, W)
        return crops


In [18]:
test_image_dir = './test'
test_image_paths = sorted([os.path.join(test_image_dir, fname) for fname in os.listdir(test_image_dir)])

test_dataset = TTADataset(test_image_paths, tta_transform)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)

In [19]:
all_fold_probs = []

for fold in range(1, 5):  # model_fold1 ~ model_fold4
    print(f"📂 Inference with Fold {fold} model...")

    # === 모델 로드 ===
    model = BaseModel(num_classes=len(class_names))
    model.load_state_dict(torch.load(f"model_fold{fold}/best_logloss.pth", map_location=device))
    model.to(device)
    model.eval()

    fold_probs = []

    with torch.no_grad():
        for batch_crops in tqdm(test_loader):
            B, N, C, H, W = batch_crops.shape  # (B, 5, C, H, W)
            batch_crops = batch_crops.view(-1, C, H, W).to(device)  # (B×5, C, H, W)

            outputs = model(batch_crops)
            probs = F.softmax(outputs, dim=1).view(B, N, -1)  # (B, 5, num_classes)
            avg_probs = probs.mean(dim=1)  # (B, num_classes)

            fold_probs.append(avg_probs.cpu().numpy())

    fold_probs = np.concatenate(fold_probs, axis=0)  # (전체 test 이미지 수, num_classes)
    all_fold_probs.append(fold_probs)

# ✅ 최종 앙상블 (soft voting)
final_probs = np.mean(np.stack(all_fold_probs), axis=0)
pred_df = pd.DataFrame(final_probs, columns=class_names)

# ✅ sample_submission 기반 저장
submission = pd.read_csv('./sample_submission.csv', encoding='utf-8-sig')
class_columns = submission.columns[1:]
submission[class_columns] = pred_df[class_columns].values
submission.to_csv('tta_4fold_submission.csv', index=False, encoding='utf-8-sig')

print("✅ 제출 파일 저장 완료: tta_4fold_submission.csv")

📂 Inference with Fold 1 model...


100%|██████████| 517/517 [16:16<00:00,  1.89s/it]


📂 Inference with Fold 2 model...


100%|██████████| 517/517 [14:34<00:00,  1.69s/it]


📂 Inference with Fold 3 model...


100%|██████████| 517/517 [13:20<00:00,  1.55s/it]


📂 Inference with Fold 4 model...


100%|██████████| 517/517 [13:07<00:00,  1.52s/it]


✅ 제출 파일 저장 완료: tta_4fold_submission.csv
