In [27]:
import os
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from tqdm import tqdm

CONFIG = {
    "SEED": 42,
    "IMG_SIZE": 224,
    "BATCH_SIZE": 32,
    "EPOCHS": 30,
    "LR": 0.002,
    "DEVICE": 'cuda' if torch.cuda.is_available() else 'cpu',
    "NUM_CLASSES": 18
}

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

seed_everything(CONFIG["SEED"])

In [28]:
class OnePieceDataset(Dataset):
    def __init__(self, df, root_dir, transform=None, mode='train'):
        self.df = df
        self.root_dir = root_dir
        self.transform = transform
        self.mode = mode

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        
        if self.mode in ['train', 'val']:
            rel_path = row['image_path'].replace('\\', '/')
            img_path = os.path.join(self.root_dir, rel_path)
            label = int(row['label'])
        else:
            img_id = row['id']
            img_path = os.path.join(self.root_dir, 'test', f"{img_id}") 
            possible_exts = ['.jpg', '.png', '.jpeg']
            for ext in possible_exts:
                temp_path = os.path.join(self.root_dir, 'test', f"{img_id}{ext}")
                if os.path.exists(temp_path):
                    img_path = temp_path
                    break
            label = -1

        try:
            image = Image.open(img_path).convert("RGB")
        except:
            image = Image.new('RGB', (CONFIG["IMG_SIZE"], CONFIG["IMG_SIZE"]))

        if self.transform:
            image = self.transform(image)

        if self.mode == 'test':
            return image, row['id']
        else:
            return image, torch.tensor(label, dtype=torch.long)

In [29]:
train_transforms = transforms.Compose([
    transforms.Resize((CONFIG["IMG_SIZE"], CONFIG["IMG_SIZE"])),
    transforms.RandomHorizontalFlip(),
    transforms.RandomAffine(degrees=15, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

val_transforms = transforms.Compose([
    transforms.Resize((CONFIG["IMG_SIZE"], CONFIG["IMG_SIZE"])),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

csv_root = 'data'
img_root = 'data/splitted'

train_df_full = pd.read_csv(os.path.join(csv_root, 'train_annotations.csv'))

train_df, val_df = train_test_split(
    train_df_full, 
    test_size=0.15, 
    random_state=CONFIG["SEED"], 
    stratify=train_df_full['label']
)

train_dataset = OnePieceDataset(train_df, img_root, transform=train_transforms, mode='train')
val_dataset = OnePieceDataset(val_df, img_root, transform=val_transforms, mode='val')

train_loader = DataLoader(train_dataset, batch_size=CONFIG["BATCH_SIZE"], shuffle=True, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=CONFIG["BATCH_SIZE"], shuffle=False, num_workers=0)

In [30]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.relu(out)
        return out

class MiniResNet(nn.Module):
    def __init__(self, num_classes=18):
        super(MiniResNet, self).__init__()
        self.in_channels = 32
        
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU(inplace=True)
        
        self.layer1 = self._make_layer(32, 2, stride=1)
        self.layer2 = self._make_layer(64, 2, stride=2)
        self.layer3 = self._make_layer(128, 2, stride=2)
        self.layer4 = self._make_layer(256, 2, stride=2)
        
        self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(256, num_classes)

    def _make_layer(self, out_channels, blocks, stride):
        layers = []
        layers.append(ResidualBlock(self.in_channels, out_channels, stride))
        self.in_channels = out_channels
        for _ in range(1, blocks):
            layers.append(ResidualBlock(out_channels, out_channels, stride=1))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avg_pool(out)
        out = torch.flatten(out, 1)
        out = self.fc(out)
        return out

model = MiniResNet(num_classes=CONFIG["NUM_CLASSES"]).to(CONFIG["DEVICE"])

param_size = sum(p.numel() for p in model.parameters()) * 4 / (1024 ** 2)
print(f"Model Size in memory: {param_size:.2f} MB")

Model Size in memory: 10.68 MB


In [31]:
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.AdamW(model.parameters(), lr=CONFIG["LR"], weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CONFIG["EPOCHS"])

def train_epoch(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    all_preds = []
    all_labels = []
    
    for images, labels in tqdm(loader, desc="Train"):
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        
    return running_loss / len(loader), f1_score(all_labels, all_preds, average='macro')

def val_epoch(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Val"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
    return running_loss / len(loader), f1_score(all_labels, all_preds, average='macro')

In [32]:
best_f1 = 0.0
best_model_path = "model_resnet_compact.pth"

for epoch in range(CONFIG["EPOCHS"]):
    print(f"Epoch {epoch+1}/{CONFIG['EPOCHS']}")
    
    train_loss, train_f1 = train_epoch(model, train_loader, optimizer, criterion, CONFIG["DEVICE"])
    val_loss, val_f1 = val_epoch(model, val_loader, criterion, CONFIG["DEVICE"])
    
    scheduler.step()
    
    print(f"Train Loss: {train_loss:.4f} | F1: {train_f1:.4f}")
    print(f"Val Loss: {val_loss:.4f} | F1: {val_f1:.4f}")
    print(f"LR: {optimizer.param_groups[0]['lr']:.6f}")
    
    if val_f1 > best_f1:
        best_f1 = val_f1
        torch.save(model.state_dict(), best_model_path)

print(f"Best Val F1: {best_f1}")
print(f"File Size: {os.path.getsize(best_model_path)/1024/1024:.2f} MB")

Epoch 1/30


Train: 100%|██████████| 78/78 [16:38<00:00, 12.81s/it]
Val: 100%|██████████| 14/14 [00:43<00:00,  3.13s/it]


Train Loss: 2.7410 | F1: 0.1512
Val Loss: 3.0356 | F1: 0.1093
LR: 0.001995
Epoch 2/30


Train: 100%|██████████| 78/78 [2:01:59<00:00, 93.85s/it]   
Val: 100%|██████████| 14/14 [00:43<00:00,  3.09s/it]


Train Loss: 2.5577 | F1: 0.2088
Val Loss: 2.5780 | F1: 0.1926
LR: 0.001978
Epoch 3/30


Train: 100%|██████████| 78/78 [33:48<00:00, 26.01s/it] 
Val: 100%|██████████| 14/14 [00:42<00:00,  3.04s/it]


Train Loss: 2.4606 | F1: 0.2467
Val Loss: 2.4833 | F1: 0.2231
LR: 0.001951
Epoch 4/30


Train: 100%|██████████| 78/78 [1:33:20<00:00, 71.80s/it]   
Val: 100%|██████████| 14/14 [00:42<00:00,  3.06s/it]


Train Loss: 2.4174 | F1: 0.2829
Val Loss: 2.5804 | F1: 0.2294
LR: 0.001914
Epoch 5/30


Train: 100%|██████████| 78/78 [33:36<00:00, 25.85s/it]   
Val: 100%|██████████| 14/14 [00:43<00:00,  3.09s/it]


Train Loss: 2.3608 | F1: 0.3227
Val Loss: 2.3137 | F1: 0.3336
LR: 0.001866
Epoch 6/30


Train: 100%|██████████| 78/78 [1:26:32<00:00, 66.58s/it]   
Val: 100%|██████████| 14/14 [00:43<00:00,  3.09s/it]


Train Loss: 2.2552 | F1: 0.3580
Val Loss: 2.2184 | F1: 0.3279
LR: 0.001809
Epoch 7/30


Train: 100%|██████████| 78/78 [1:17:14<00:00, 59.41s/it] 
Val: 100%|██████████| 14/14 [00:42<00:00,  3.05s/it]


Train Loss: 2.2182 | F1: 0.3882
Val Loss: 2.2849 | F1: 0.3447
LR: 0.001743
Epoch 8/30


Train: 100%|██████████| 78/78 [39:00<00:00, 30.01s/it]  
Val: 100%|██████████| 14/14 [16:10<00:00, 69.33s/it] 


Train Loss: 2.1851 | F1: 0.3967
Val Loss: 2.3811 | F1: 0.3030
LR: 0.001669
Epoch 9/30


Train: 100%|██████████| 78/78 [1:25:56<00:00, 66.11s/it]   
Val: 100%|██████████| 14/14 [01:10<00:00,  5.01s/it]


Train Loss: 2.1262 | F1: 0.4282
Val Loss: 2.2153 | F1: 0.3815
LR: 0.001588
Epoch 10/30


Train: 100%|██████████| 78/78 [20:45<00:00, 15.97s/it]
Val: 100%|██████████| 14/14 [00:59<00:00,  4.23s/it]


Train Loss: 2.0823 | F1: 0.4401
Val Loss: 1.9844 | F1: 0.4554
LR: 0.001500
Epoch 11/30


Train: 100%|██████████| 78/78 [23:15<00:00, 17.90s/it]
Val: 100%|██████████| 14/14 [01:03<00:00,  4.55s/it]


Train Loss: 2.0192 | F1: 0.4700
Val Loss: 1.9518 | F1: 0.4704
LR: 0.001407
Epoch 12/30


Train: 100%|██████████| 78/78 [51:32<00:00, 39.65s/it]   
Val: 100%|██████████| 14/14 [00:44<00:00,  3.18s/it]


Train Loss: 1.9758 | F1: 0.4939
Val Loss: 1.9303 | F1: 0.4740
LR: 0.001309
Epoch 13/30


Train: 100%|██████████| 78/78 [1:03:33<00:00, 48.89s/it] 
Val: 100%|██████████| 14/14 [00:51<00:00,  3.68s/it]


Train Loss: 1.9330 | F1: 0.5120
Val Loss: 1.9241 | F1: 0.4799
LR: 0.001208
Epoch 14/30


Train: 100%|██████████| 78/78 [1:46:32<00:00, 81.95s/it]   
Val: 100%|██████████| 14/14 [00:42<00:00,  3.04s/it]


Train Loss: 1.8976 | F1: 0.5225
Val Loss: 1.8956 | F1: 0.4916
LR: 0.001105
Epoch 15/30


Train: 100%|██████████| 78/78 [56:30<00:00, 43.47s/it]   
Val: 100%|██████████| 14/14 [00:43<00:00,  3.09s/it]


Train Loss: 1.8409 | F1: 0.5516
Val Loss: 1.8374 | F1: 0.5350
LR: 0.001000
Epoch 16/30


Train: 100%|██████████| 78/78 [1:05:23<00:00, 50.30s/it] 
Val: 100%|██████████| 14/14 [00:43<00:00,  3.08s/it]


Train Loss: 1.7978 | F1: 0.5643
Val Loss: 1.9947 | F1: 0.4960
LR: 0.000895
Epoch 17/30


Train:  32%|███▏      | 25/78 [12:21<26:12, 29.67s/it]   


KeyboardInterrupt: 

In [None]:
model.load_state_dict(torch.load(best_model_path))
model.eval()

sample_sub = pd.read_csv(os.path.join(csv_root, 'submission.csv'))
test_dataset = OnePieceDataset(sample_sub, img_root, transform=val_transforms, mode='test')
test_loader = DataLoader(test_dataset, batch_size=CONFIG["BATCH_SIZE"], shuffle=False, num_workers=0)

submission_data = []

with torch.no_grad():
    for images, ids in tqdm(test_loader, desc="Inference"):
        images = images.to(CONFIG["DEVICE"])
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        
        preds = preds.cpu().numpy()
        for img_id, label in zip(ids, preds):
            submission_data.append({'id': img_id, 'label': label})

submission_df = pd.DataFrame(submission_data)
submission_df.to_csv('submission_final.csv', index=False)
submission_df.head()

  model.load_state_dict(torch.load(best_model_path))
Inference: 100%|██████████| 27/27 [00:11<00:00,  2.28it/s]


Unnamed: 0,id,label
0,c41628b1-4781-4392-ac8d-6bfe981f73f9,10
1,f114acb3-fe18-478b-a19a-1f4cbe098851,7
2,d952ecfe-750c-44b2-96c2-1cac1a4ee146,2
3,2c14ec77-44ca-4b3c-b470-96286411c617,14
4,712c3ce9-750a-4cc4-8f94-f8033c31cb2c,0
