# Data Preprocessing

In [1]:
import os
import time
import math
import random
from pathlib import Path

import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch.utils.tensorboard import SummaryWriter

import torchvision
from torchvision import transforms

print("torch:", torch.__version__)
print("torchvision:", torchvision.__version__)

torch: 2.10.0+cpu
torchvision: 0.25.0


In [2]:
PROJECT_ROOT = Path("..").resolve()
DATA_ROOT = (PROJECT_ROOT / "data").resolve()

TRAIN_IMG_DIR = DATA_ROOT / "images" / "train"
TRAIN_CSV = DATA_ROOT / "labels" / "train.csv"

RUNS_DIR = (PROJECT_ROOT / "runs").resolve()
CKPT_DIR = (PROJECT_ROOT / "checkpoints").resolve()
RUNS_DIR.mkdir(parents=True, exist_ok=True)
CKPT_DIR.mkdir(parents=True, exist_ok=True)

IMG_SIZE = 320
BATCH_SIZE = 8
EPOCHS = 10
LR = 3e-4
WEIGHT_DECAY = 1e-4
EMB_DIM = 512

SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)

Device: cpu


In [3]:
# load train.csv and basic checks
df = pd.read_csv(TRAIN_CSV)
print("Columns:", df.columns.tolist())

df = df.rename(columns={"ground_truth": "label"}).copy()
df["path"] = df["filename"].apply(lambda f: TRAIN_IMG_DIR / str(f))

missing = df[~df["path"].apply(lambda p: p.exists())]
print("Rows:", len(df))
print("Missing files:", len(missing))
if len(missing):
    display(missing.head(10))

num_ids = df["label"].nunique()
print("Num identities:", num_ids)
display(df["label"].value_counts().head(10))

Columns: ['filename', 'ground_truth']
Rows: 1895
Missing files: 0
Num identities: 31


label
Marcela     183
Ousado      179
Medrosa     170
Lua         120
Kwang       113
Kamaikua    105
Jaju        104
Ti           86
Benita       86
Saseka       79
Name: count, dtype: int64

In [4]:
# Label encoding
labels = sorted(df["label"].astype(str).unique().tolist())
label_to_id = {lab: i for i, lab in enumerate(labels)}
id_to_label = {i: lab for lab, i in label_to_id.items()}

df["label_id"] = df["label"].map(label_to_id).astype(int)
df.head()

Unnamed: 0,filename,label,path,label_id
0,train_0001.png,Abril,/workspace/trainer/data/images/train/train_000...,0
1,train_0002.png,Abril,/workspace/trainer/data/images/train/train_000...,0
2,train_0003.png,Abril,/workspace/trainer/data/images/train/train_000...,0
3,train_0004.png,Akaloi,/workspace/trainer/data/images/train/train_000...,1
4,train_0005.png,Akaloi,/workspace/trainer/data/images/train/train_000...,1


In [5]:
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)

train_tfms = transforms.Compose([
    transforms.Resize(int(IMG_SIZE * 1.15)),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.15, contrast=0.15, saturation=0.15, hue=0.02),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
    transforms.RandomErasing(p=0.5, scale=(0.02, 0.15), ratio=(0.3, 3.3), value="random")
])

eval_tfms = transforms.Compose([
    transforms.Resize(int(IMG_SIZE * 1.15)),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

In [6]:
# Dataset
class ReIDTrainDataset(Dataset):
    def __init__(self, df: pd.DataFrame, transforms=None):
        self.df = df.reset_index(drop=True)
        self.transforms = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        r = self.df.iloc[idx]
        img = Image.open(r["path"]).convert("RGB")
        if self.transforms:
            img = self.transforms(img)
        return img, int(r["label_id"])

In [7]:
# Weights inversely proportional to class frequency
class_counts = df["label_id"].value_counts().sort_index()
class_weights = 1.0 / class_counts
sample_weights = df["label_id"].map(class_weights).values

sampler = WeightedRandomSampler(
    weights = torch.as_tensor(sample_weights, dtype=torch.double),
    num_samples = len(sample_weights),
    replacement = True
)

train_ds = ReIDTrainDataset(df, transforms=train_tfms)

train_loader = DataLoader(
    train_ds,
    batch_size = BATCH_SIZE,
    sampler = sampler,
    num_workers = 2,
    pin_memory = (DEVICE == "cuda"),
    drop_last = True,
)

print("Batches/epoch:", len(train_loader))

Batches/epoch: 236


In [8]:
# Model (EffecientNetB0 backbone - embedding - classifier)
class ReIDNet(nn.Module):
    def __init__(self, num_classes: int, emb_dim: int = 256, pretrained: bool = True):
        super().__init__()

        weights = torchvision.models.EfficientNet_B0_Weights.IMAGENET1K_V1 if pretrained else None
        backbone = torchvision.models.efficientnet_b0(weights=weights)

        # EffecientNet: backbone.features and backbone.classifier
        self.backbone = backbone.features
        self.pool = nn.AdaptiveAvgPool2d(1)

        # EffecientNet-B0 feature dimension is 1280
        self.feat_dim = 1280

        self.embed = nn.Linear(self.feat_dim, emb_dim)
        self.bn = nn.BatchNorm1d(emb_dim)
        self.classifier = nn.Linear(emb_dim, num_classes)

    def forward(self, x):
        x = self.backbone(x)      # [B, 1280, H', W']
        x = self.pool(x).flatten(1)        # [B, 1280]
        emb = self.embed(x)                # [B, emb_dim]
        emb = self.bn(emb)
        emb = F.normalize(emb, p=2, dim=1) # normalized embeddings
        logits = self.classifier(emb)      # training only
        return emb, logits

model = ReIDNet(num_classes=num_ids, emb_dim=EMB_DIM, pretrained=True).to(DEVICE)
print("Model params:", sum(p.numel() for p in model.parameters())/1e6, "M")

        

Model params: 4.680347 M


In [9]:
# Training setup (optimizer + AMP)
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
criterion = nn.CrossEntropyLoss()

use_amp = (DEVICE == "cuda")
scaler = torch.cuda.amp.GradScaler(enabled=use_amp)

run_name = f"reid_effecientNetB0_{IMG_SIZE}px_emb{EMB_DIM}_{time.strftime('%Y%m%d-%H%M%S')}"
writer = SummaryWriter(log_dir=str(RUNS_DIR / run_name))
print("TensorBoard logdir:", (RUNS_DIR / run_name))

TensorBoard logdir: /workspace/trainer/runs/reid_effecientNetB0_320px_emb512_20260219-113435


  scaler = torch.cuda.amp.GradScaler(enabled=use_amp)


In [10]:
# Training loop (+ accuracy + checkpointing)
def accuracy(logits, y):
    pred = logits.argmax(dim=1)
    return (pred == y).float().mean().item()

In [None]:
global_step = 0

for epoch in range(1, EPOCHS + 1):
    model.train()
    epoch_loss = 0.0
    epoch_acc = 0.0

    for batch_idx, (x, y) in enumerate(train_loader):
        x = x.to(DEVICE, non_blocking=True)
        y = y.to(DEVICE, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)

        with torch.cuda.amp.autocast(enabled=use_amp):
            emb, logits = model(x)
            loss = criterion(logits, y)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        acc = accuracy(logits, y)

        epoch_loss += loss.item()
        epoch_acc += acc
        global_step += 1

        if batch_idx % 25 == 0:
            writer.add_scalar("train/loss", loss.item(), global_step)
            writer.add_scalar("train/acc", acc, global_step)
            print(f"epoch {epoch} [{batch_idx:04d}/{len(train_loader)}] loss={loss.item():.4f} acc={acc:.3f}")

    epoch_loss /= len(train_loader)
    epoch_acc /= len(train_loader)
    writer.add_scalar("epoch/loss", epoch_loss, epoch)
    writer.add_scalar("epoch/acc", epoch_acc, epoch)

    # Save checkpoint each epoch
    ckpt_path = CKPT_DIR / f"{run_name}_epoch{epoch}.pt"
    torch.save({
        "epoch": epoch,
        "model_state": model.state_dict(),
        "optimizer_state": optimizer.state_dict(),
        "label_to_id": label_to_id,
        "img_size": IMG_SIZE,
        "emb_dim": EMB_DIM,
    }, ckpt_path)

    print(f"Epoch {epoch} done | loss={epoch_loss:.4f} acc={epoch_acc:.3f} | saved {ckpt_path.name}")

writer.close()
print("Done")

  with torch.cuda.amp.autocast(enabled=use_amp):


epoch 1 [0000/236] loss=3.4235 acc=0.000
