In [31]:
import os

from tqdm import tqdm
import pandas as pd
import numpy as np
import cv2

import torch.nn.functional as F
from torch.optim import AdamW
from torch.utils.data import Dataset, DataLoader, SequentialSampler, RandomSampler
import torch.nn as nn
import torch
import timm
import albumentations as A

In [32]:
root_path = "E:\\IOAI\\kits\\neoai-2025\\underfitting-cv"
seed = 42

# Data preparation

In [33]:
train = pd.read_csv(f"{root_path}\\train.csv")
train["path"] = [f"{root_path}\\train_images\\{x}" for x in train["path"]]

train.head()

Unnamed: 0,path,class
0,E:\IOAI\kits\neoai-2025\underfitting-cv\train_...,4
1,E:\IOAI\kits\neoai-2025\underfitting-cv\train_...,53
2,E:\IOAI\kits\neoai-2025\underfitting-cv\train_...,75
3,E:\IOAI\kits\neoai-2025\underfitting-cv\train_...,43
4,E:\IOAI\kits\neoai-2025\underfitting-cv\train_...,54


In [34]:
train["class"].unique()

array([ 4, 53, 75, 43, 54, 92, 56, 68, 28, 87, 85, 19,  8, 38, 12, 32, 14,
       78, 24, 13, 36, 44,  5], dtype=int64)

In [None]:
num_classes = 102

class_weights = torch.ones(num_classes, dtype=torch.float32)
class_weights[train["class"].unique()] = 5.0

  weight = F.softmax(class_weights)


In [36]:
class TrainDataset(Dataset):
    def __init__(self, path, target, transform):
        self.path = path
        self.target = target
        self.transform = transform

    def __len__(self):
        return len(self.target)

    def __getitem__(self, item):

        image = cv2.imread(self.path[item])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        target = self.target[item]
        image = self.transform(image=image)["image"]
        image = image.astype(np.float32) / 255.0
        image = image - 0.5
        image = torch.from_numpy(image).permute(2, 0, 1)

        return image, target


class PetNet(nn.Module):
    def __init__(self, model_name, num_classes):
        super().__init__()
        self.model = timm.create_model(model_name, num_classes=num_classes)

    def forward(self, image):
        x = self.model(image)
        return x


def get_train_transforms(dim=224):
    return A.Compose([
        A.LongestMaxSize(max_size=dim, p=1.0),
        A.PadIfNeeded(dim, dim, p=1.0),
        A.HorizontalFlip(p=0.5),
        A.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),
    ])


def get_valid_transforms(dim=224):
    return A.Compose([
        A.LongestMaxSize(max_size=dim, p=1.0),
        A.PadIfNeeded(dim, dim, p=1.0),
    ])

In [37]:
def seed_everything(seed):
    # random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [38]:
seed_everything(230)

batch_size = 64
params_train = {
    "batch_size": batch_size,
    # "shuffle": True,
    "drop_last": False,
}
device = "cuda"
dim = 224

train_dataset = TrainDataset(train["path"].tolist(), train["class"].tolist(), get_train_transforms(dim))
train_loader = DataLoader(
    train_dataset,
    sampler=RandomSampler(train_dataset, replacement=True, num_samples=len(train_dataset)*2),
    **params_train,
)

# Model selection

In [39]:
model = PetNet("tiny_vit_5m_224.dist_in22k_ft_in1k", num_classes=num_classes)

model.model.drop = nn.Dropout(p=0.5, inplace=False)

model

PetNet(
  (model): TinyVit(
    (patch_embed): PatchEmbed(
      (conv1): ConvNorm(
        (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (act): GELU(approximate='none')
      (conv2): ConvNorm(
        (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (stages): Sequential(
      (0): ConvLayer(
        (blocks): Sequential(
          (0): MBConv(
            (conv1): ConvNorm(
              (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            )
            (act1): GELU(approximate='none')
            (conv2): ConvNorm(
              (conv): Conv2d(256, 256, kernel_si

In [40]:
model_dict = torch.load(
    f"{root_path}\\model.pt",
    map_location="cuda",
    weights_only=False,
)
model.load_state_dict(model_dict, strict=False)

model = model.to(device)
model.train()

print("model ready")

model ready


In [None]:
criterion = nn.CrossEntropyLoss(label_smoothing=0.0, weight=class_weights.to(device))
scaler = torch.amp.GradScaler("cuda")
clip_grad_norm = 1

def train_model(epochs: int, lr: float):
    optimizer = AdamW(model.parameters(), lr=lr)

    for epoch in range(epochs):
        len_dataloader = len(train_loader)
        average_loss = 0
        tk0 = tqdm(enumerate(train_loader), total=len_dataloader)
        for batch_number, (inputs, labels) in tk0:

            optimizer.zero_grad()
            inputs = inputs.cuda()
            labels = labels.cuda().long()

            with torch.amp.autocast("cuda"):
                y_preds = model(inputs)
                loss = criterion(y_preds, labels)

            scaler.scale(loss).backward()

            if clip_grad_norm > 0:
                scaler.unscale_(optimizer)
                torch.nn.utils.clip_grad_norm_(model.parameters(), clip_grad_norm)
            scaler.step(optimizer)
            scaler.update()
            average_loss += loss.cpu().detach().numpy()

            tk0.set_postfix(
                loss=average_loss / (batch_number + 1), stage="train", epoch=epoch
            )

In [42]:
print("finetuning whole model:")
train_model(3, 1e-5)

model.model.stages.requires_grad_(False)
model.model.patch_embed.requires_grad_(False)

print("finetuning the fully-connected layer:")
train_model(15, 1e-3)

finetuning whole model:


100%|██████████| 8/8 [00:02<00:00,  3.82it/s, epoch=0, loss=4.06, stage=train]
100%|██████████| 8/8 [00:02<00:00,  3.90it/s, epoch=1, loss=3.87, stage=train]
100%|██████████| 8/8 [00:01<00:00,  4.00it/s, epoch=2, loss=3.73, stage=train]


finetuning the fully-connected layer:


100%|██████████| 8/8 [00:01<00:00,  6.08it/s, epoch=0, loss=3.48, stage=train]
100%|██████████| 8/8 [00:01<00:00,  6.07it/s, epoch=1, loss=3.14, stage=train]
100%|██████████| 8/8 [00:01<00:00,  5.68it/s, epoch=2, loss=2.79, stage=train]
100%|██████████| 8/8 [00:01<00:00,  5.55it/s, epoch=3, loss=2.48, stage=train]
100%|██████████| 8/8 [00:01<00:00,  5.57it/s, epoch=4, loss=2.22, stage=train]
100%|██████████| 8/8 [00:01<00:00,  5.64it/s, epoch=5, loss=2.1, stage=train] 
100%|██████████| 8/8 [00:01<00:00,  5.31it/s, epoch=6, loss=1.89, stage=train]
100%|██████████| 8/8 [00:01<00:00,  5.23it/s, epoch=7, loss=1.76, stage=train]
100%|██████████| 8/8 [00:01<00:00,  5.73it/s, epoch=8, loss=1.64, stage=train]
100%|██████████| 8/8 [00:01<00:00,  5.61it/s, epoch=9, loss=1.5, stage=train] 
100%|██████████| 8/8 [00:01<00:00,  5.74it/s, epoch=10, loss=1.33, stage=train]
100%|██████████| 8/8 [00:01<00:00,  5.80it/s, epoch=11, loss=1.24, stage=train]
100%|██████████| 8/8 [00:01<00:00,  5.78it/s, epoc

# Submission

In [43]:
paths_list = []
main_path = f"{root_path}\\test_images"
for path in sorted(os.listdir(main_path)):
    paths_list += [f"{main_path}/{path}"]

test = pd.DataFrame()
test["path"] = paths_list
test["class"] = 0

params_valid = {
    "batch_size": batch_size,
    "shuffle": False,
    "drop_last": False,
}

valid_loader = DataLoader(
    TrainDataset(
        test["path"].tolist(), test["class"].tolist(), get_valid_transforms(dim)
    ),
    **params_valid,
)

In [44]:
def make_predict(
    state_dict,
    valid_loader,
    name_csv="submission_1.csv",
    test_ids=[x.split("/")[-1] for x in test["path"]],
):
    preds = []
    len_loader = len(valid_loader)
    tk0 = tqdm(enumerate(valid_loader), total=len_loader)
    average_loss = 0
    model = timm.create_model("tiny_vit_5m_224.dist_in22k_ft_in1k", num_classes=102)
    model.cuda().eval()
    model.load_state_dict(state_dict)

    with torch.no_grad():
        for batch_number, (inputs, labels) in tk0:
            inputs = inputs.cuda()
            labels = labels.cuda().long()

            with torch.amp.autocast("cuda"):
                y_preds = model(inputs)

            preds += [y_preds.to("cpu").numpy()]

    preds = np.concatenate(preds)

    model.train()

    submission = pd.DataFrame()
    submission["id"] = test_ids
    submission["class"] = np.argmax(preds, 1)
    submission.to_csv(name_csv, index=None)

In [45]:
make_predict(model.model.state_dict(), valid_loader)

100%|██████████| 80/80 [00:13<00:00,  5.86it/s]
