# Import

In [1]:
import os
import random

import pandas as pd
import numpy as np

from PIL import Image
from tqdm import tqdm 

from sklearn.model_selection import train_test_split

import torch
from torch.utils.data import Dataset, DataLoader, Subset
import torchvision.models as models
import torchvision.transforms as transforms
import torch.nn.functional as F
from torch import nn, optim

from sklearn.metrics import log_loss


if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print("Using device:", device)

Using device: mps


# Hyperparameter Setting

In [2]:
CFG = {
    'IMG_SIZE': 384,
    'BATCH_SIZE': 16,
    'EPOCHS': 10,
    'LEARNING_RATE': 1e-4,
    'SEED' : 42
}

# Fixed RandomSeed

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(CFG['SEED']) # Seed Í≥†Ï†ï

# CustomDataset

In [4]:
class CustomImageDataset(Dataset):
    def __init__(self, root_dir, transform=None, is_test=False):
        self.root_dir = root_dir
        self.transform = transform
        self.is_test = is_test
        self.samples = []

        if is_test:
            # ÌÖåÏä§Ìä∏ÏÖã: ÎùºÎ≤® ÏóÜÏù¥ Ïù¥ÎØ∏ÏßÄ Í≤ΩÎ°úÎßå Ï†ÄÏû•
            for fname in sorted(os.listdir(root_dir)):
                if fname.lower().endswith(('.jpg')):
                    img_path = os.path.join(root_dir, fname)
                    self.samples.append((img_path,))
        else:
            # ÌïôÏäµÏÖã: ÌÅ¥ÎûòÏä§Î≥Ñ Ìè¥Îçî Íµ¨Ï°∞ÏóêÏÑú ÎùºÎ≤® Ï∂îÏ∂ú
            self.classes = sorted(os.listdir(root_dir))
            self.class_to_idx = {cls_name: i for i, cls_name in enumerate(self.classes)}

            for cls_name in self.classes:
                cls_folder = os.path.join(root_dir, cls_name)

                if not os.path.isdir(cls_folder):
                    continue
                
                for fname in os.listdir(cls_folder):
                    if fname.lower().endswith(('.jpg')):
                        img_path = os.path.join(cls_folder, fname)
                        label = self.class_to_idx[cls_name]
                        self.samples.append((img_path, label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        if self.is_test:
            img_path = self.samples[idx][0]
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image
        else:
            img_path, label = self.samples[idx]
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, label


# Data Load

In [5]:
train_root = './train'
test_root = './test'

In [6]:
train_transform = transforms.Compose([
    transforms.Resize((CFG['IMG_SIZE'], CFG['IMG_SIZE'])),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((CFG['IMG_SIZE'], CFG['IMG_SIZE'])),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [6]:
from torchvision import transforms

train_transform = transforms.Compose([
    transforms.Resize((CFG['IMG_SIZE'] + 32, CFG['IMG_SIZE'] + 32)),  # ÏïΩÍ∞Ñ ÌÅ¨Í≤å Î¶¨ÏÇ¨Ïù¥Ï¶à ÌõÑ
    transforms.RandomResizedCrop(CFG['IMG_SIZE'], scale=(0.8, 1.0)),  # ÎûúÎç§ ÌÅ¨Î°≠
    transforms.RandomHorizontalFlip(),                                # Ï¢åÏö∞ Îí§ÏßëÍ∏∞
    transforms.RandomRotation(10),                                    # ¬±10ÎèÑ ÌöåÏ†Ñ
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # ÏÉâÏÉÅ Î≥ÄÌòï
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

val_transform = transforms.Compose([
    transforms.Resize((CFG['IMG_SIZE'], CFG['IMG_SIZE'])),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

In [7]:
# Ï†ÑÏ≤¥ Îç∞Ïù¥ÌÑ∞ÏÖã Î°úÎìú
full_dataset = CustomImageDataset(train_root, transform=None)
print(f"Ï¥ù Ïù¥ÎØ∏ÏßÄ Ïàò: {len(full_dataset)}")

targets = [label for _, label in full_dataset.samples]
class_names = full_dataset.classes

# Stratified Split
train_idx, val_idx = train_test_split(
    range(len(targets)), test_size=0.2, stratify=targets, random_state=42
)

# Subset + transform Í∞ÅÍ∞Å Ï†ÅÏö©
train_dataset = Subset(CustomImageDataset(train_root, transform=train_transform), train_idx)
val_dataset = Subset(CustomImageDataset(train_root, transform=val_transform), val_idx)
print(f'train Ïù¥ÎØ∏ÏßÄ Ïàò: {len(train_dataset)}, valid Ïù¥ÎØ∏ÏßÄ Ïàò: {len(val_dataset)}')


# DataLoader Ï†ïÏùò
train_loader = DataLoader(train_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)

Ï¥ù Ïù¥ÎØ∏ÏßÄ Ïàò: 33137
train Ïù¥ÎØ∏ÏßÄ Ïàò: 26509, valid Ïù¥ÎØ∏ÏßÄ Ïàò: 6628


# ResNet18

In [31]:
class BaseModel(nn.Module):
    def __init__(self, num_classes):
        super(BaseModel, self).__init__()
        self.backbone = models.resnet18(pretrained=True)  # ResNet18 Î™®Îç∏ Î∂àÎü¨Ïò§Í∏∞
        self.feature_dim = self.backbone.fc.in_features 
        self.backbone.fc = nn.Identity()  # feature extractorÎ°úÎßå ÏÇ¨Ïö©
        self.head = nn.Linear(self.feature_dim, num_classes)  # Î∂ÑÎ•òÍ∏∞

    def forward(self, x):
        x = self.backbone(x)       
        x = self.head(x) 
        return x

# EfficientNet

In [32]:
import torch.nn as nn
import timm


class BaseModel(nn.Module):
    def __init__(self, num_classes):
        super(BaseModel, self).__init__()
        
        # EfficientNet-b3 Î∞±Î≥∏
        self.backbone = timm.create_model('efficientnet_b3', pretrained=True)
        
        # Í∏∞Ï°¥ classifier Ï†úÍ±∞
        self.feature_dim = self.backbone.classifier.in_features
        self.backbone.classifier = nn.Identity()
        
        # ÏÉà Î∂ÑÎ•òÍ∏∞
        self.head = nn.Linear(self.feature_dim, num_classes)

    def forward(self, x):
        x = self.backbone(x)
        x = self.head(x)
        return x


# EfficientNetV2-M

In [8]:
import torch.nn as nn
import timm
 

class BaseModel(nn.Module):
    def __init__(self, num_classes):
        super(BaseModel, self).__init__()
        
        # EfficientNet-b3 Î∞±Î≥∏
        self.backbone = timm.create_model('efficientnetv2_rw_m', pretrained=True)
        
        # Í∏∞Ï°¥ classifier Ï†úÍ±∞
        self.feature_dim = self.backbone.classifier.in_features
        self.backbone.classifier = nn.Identity()
        
        # ÏÉà Î∂ÑÎ•òÍ∏∞
        self.head = nn.Linear(self.feature_dim, num_classes)

    def forward(self, x):
        x = self.backbone(x)
        x = self.head(x)
        return x


  from .autonotebook import tqdm as notebook_tqdm


# Train/ Validation

In [9]:
model = BaseModel(num_classes=len(class_names)).to(device)
best_logloss = float('inf')
best_acc = 0.0
best_ce_loss = float('inf')

# ÏÜêÏã§ Ìï®Ïàò
criterion = nn.CrossEntropyLoss()

# ÏòµÌã∞ÎßàÏù¥Ï†Ä
optimizer = optim.Adam(model.parameters(), lr=CFG['LEARNING_RATE'])
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=CFG['EPOCHS'], eta_min=1e-6
)

# ÌïôÏäµ Î∞è Í≤ÄÏ¶ù Î£®ÌîÑ
for epoch in range(CFG['EPOCHS']):
    # Train
    model.train()
    train_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f"[Epoch {epoch+1}/{CFG['EPOCHS']}] Training"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)  # logits
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    avg_train_loss = train_loss / len(train_loader)

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    all_probs = []
    all_labels = []

    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc=f"[Epoch {epoch+1}/{CFG['EPOCHS']}] Validation"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            # Accuracy
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

            # LogLoss
            probs = F.softmax(outputs, dim=1)
            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    avg_val_loss = val_loss / len(val_loader)
    val_accuracy = 100 * correct / total
    val_logloss = log_loss(all_labels, all_probs, labels=list(range(len(class_names))))

    scheduler.step()
    print(f"üìâ Learning Rate after epoch {epoch+1}: {scheduler.get_last_lr()[0]:.8f}")

    # Í≤∞Í≥º Ï∂úÎ†•
    print(f"Train Loss : {avg_train_loss:.4f} || Valid Loss : {avg_val_loss:.4f} | Valid Accuracy : {val_accuracy:.4f}%")

    # Best model Ï†ÄÏû•
    if val_logloss < best_logloss:
        best_logloss = val_logloss
        torch.save(model.state_dict(), f'best_logloss.pth')
        print(f"üì¶ Best model saved at epoch {epoch+1} (logloss: {val_logloss:.4f})")

    if val_accuracy > best_acc:
        best_acc = val_accuracy
        torch.save(model.state_dict(), "best_acc.pth")
        print(f"üì¶ Best model saved at epoch {epoch+1} (val_acc: {val_accuracy:.4f})")

    if avg_val_loss < best_ce_loss:
        best_ce_loss = avg_val_loss
        torch.save(model.state_dict(), "best_loss.pth")
        print(f"üì¶ Best model saved at epoch {epoch+1} (val_loss: {avg_val_loss:.4f})")

[Epoch 1/10] Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1657/1657 [28:52<00:00,  1.05s/it]
[Epoch 1/10] Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [01:46<00:00,  3.89it/s]


üìâ Learning Rate after epoch 1: 0.00009758
Train Loss : 2.5324 || Valid Loss : 0.4517 | Valid Accuracy : 86.8286%
üì¶ Best model saved at epoch 1 (logloss: 0.4515)
üì¶ Best model saved at epoch 1 (val_acc: 86.8286)
üì¶ Best model saved at epoch 1 (val_loss: 0.4517)


[Epoch 2/10] Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1657/1657 [28:30<00:00,  1.03s/it]
[Epoch 2/10] Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [01:47<00:00,  3.88it/s]


üìâ Learning Rate after epoch 2: 0.00009055
Train Loss : 0.3081 || Valid Loss : 0.2337 | Valid Accuracy : 91.8980%
üì¶ Best model saved at epoch 2 (logloss: 0.2340)
üì¶ Best model saved at epoch 2 (val_acc: 91.8980)
üì¶ Best model saved at epoch 2 (val_loss: 0.2337)


[Epoch 3/10] Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1657/1657 [27:50<00:00,  1.01s/it]
[Epoch 3/10] Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [01:45<00:00,  3.93it/s]


üìâ Learning Rate after epoch 3: 0.00007960
Train Loss : 0.1629 || Valid Loss : 0.1747 | Valid Accuracy : 94.1913%
üì¶ Best model saved at epoch 3 (logloss: 0.1748)
üì¶ Best model saved at epoch 3 (val_acc: 94.1913)
üì¶ Best model saved at epoch 3 (val_loss: 0.1747)


[Epoch 4/10] Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1657/1657 [29:30<00:00,  1.07s/it]
[Epoch 4/10] Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [01:45<00:00,  3.92it/s]


üìâ Learning Rate after epoch 4: 0.00006580
Train Loss : 0.1071 || Valid Loss : 0.1541 | Valid Accuracy : 95.5492%
üì¶ Best model saved at epoch 4 (logloss: 0.1542)
üì¶ Best model saved at epoch 4 (val_acc: 95.5492)
üì¶ Best model saved at epoch 4 (val_loss: 0.1541)


[Epoch 5/10] Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1657/1657 [27:47<00:00,  1.01s/it]
[Epoch 5/10] Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [01:45<00:00,  3.94it/s]


üìâ Learning Rate after epoch 5: 0.00005050
Train Loss : 0.0761 || Valid Loss : 0.1345 | Valid Accuracy : 96.1979%
üì¶ Best model saved at epoch 5 (logloss: 0.1347)
üì¶ Best model saved at epoch 5 (val_acc: 96.1979)
üì¶ Best model saved at epoch 5 (val_loss: 0.1345)


[Epoch 6/10] Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1657/1657 [27:41<00:00,  1.00s/it]
[Epoch 6/10] Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [01:46<00:00,  3.90it/s]


üìâ Learning Rate after epoch 6: 0.00003520
Train Loss : 0.0539 || Valid Loss : 0.1124 | Valid Accuracy : 96.5902%
üì¶ Best model saved at epoch 6 (logloss: 0.1126)
üì¶ Best model saved at epoch 6 (val_acc: 96.5902)
üì¶ Best model saved at epoch 6 (val_loss: 0.1124)


[Epoch 7/10] Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1657/1657 [27:41<00:00,  1.00s/it]
[Epoch 7/10] Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [01:45<00:00,  3.94it/s]


üìâ Learning Rate after epoch 7: 0.00002140
Train Loss : 0.0404 || Valid Loss : 0.1043 | Valid Accuracy : 97.0881%
üì¶ Best model saved at epoch 7 (logloss: 0.1044)
üì¶ Best model saved at epoch 7 (val_acc: 97.0881)
üì¶ Best model saved at epoch 7 (val_loss: 0.1043)


[Epoch 8/10] Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1657/1657 [27:41<00:00,  1.00s/it]
[Epoch 8/10] Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [01:45<00:00,  3.93it/s]


üìâ Learning Rate after epoch 8: 0.00001045
Train Loss : 0.0273 || Valid Loss : 0.1048 | Valid Accuracy : 96.8920%


[Epoch 9/10] Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1657/1657 [28:25<00:00,  1.03s/it]
[Epoch 9/10] Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [01:45<00:00,  3.94it/s]


üìâ Learning Rate after epoch 9: 0.00000342
Train Loss : 0.0182 || Valid Loss : 0.0989 | Valid Accuracy : 97.1334%
üì¶ Best model saved at epoch 9 (logloss: 0.0991)
üì¶ Best model saved at epoch 9 (val_acc: 97.1334)
üì¶ Best model saved at epoch 9 (val_loss: 0.0989)


[Epoch 10/10] Training: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1657/1657 [28:53<00:00,  1.05s/it]
[Epoch 10/10] Validation: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 415/415 [01:45<00:00,  3.93it/s]


üìâ Learning Rate after epoch 10: 0.00000100
Train Loss : 0.0134 || Valid Loss : 0.0972 | Valid Accuracy : 97.1937%
üì¶ Best model saved at epoch 10 (logloss: 0.0973)
üì¶ Best model saved at epoch 10 (val_acc: 97.1937)
üì¶ Best model saved at epoch 10 (val_loss: 0.0972)


# Inference

In [10]:
test_dataset = CustomImageDataset(test_root, transform=val_transform, is_test=True)
test_loader = DataLoader(test_dataset, batch_size=CFG['BATCH_SIZE'], shuffle=False)

In [11]:
# Ï†ÄÏû•Îêú Î™®Îç∏ Î°úÎìú
model = BaseModel(num_classes=len(class_names))
model.load_state_dict(torch.load('best_logloss.pth', map_location=device))
model.to(device)

# Ï∂îÎ°†
model.eval()
results = []

with torch.no_grad():
    for images in test_loader:
        images = images.to(device)
        outputs = model(images)
        probs = F.softmax(outputs, dim=1)

        # Í∞Å Î∞∞ÏπòÏùò ÌôïÎ•†ÏùÑ Î¶¨Ïä§Ìä∏Î°ú Î≥ÄÌôò
        for prob in probs.cpu():  # prob: (num_classes,)
            result = {
                class_names[i]: prob[i].item()
                for i in range(len(class_names))
            }
            results.append(result)
            
pred = pd.DataFrame(results)

# Submission

In [12]:
submission = pd.read_csv('./sample_submission.csv', encoding='utf-8-sig')

# 'ID' Ïª¨ÎüºÏùÑ Ï†úÏô∏Ìïú ÌÅ¥ÎûòÏä§ Ïª¨Îüº Ï†ïÎ†¨
class_columns = submission.columns[1:]
pred = pred[class_columns]

submission[class_columns] = pred.values
submission.to_csv('best_logloss_submission.csv', index=False, encoding='utf-8-sig')