In [1]:
!pip install -q git+https://github.com/tcstrength/item-identification.git@main timm


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
import os
import pandas as pd
from pathlib import Path
from hcmus.core import appconfig
from hcmus.lbs import LabelStudioConnector

[32m2025-06-08 21:56:59.517[0m | [1mINFO    [0m | [36mhcmus.core.appconfig[0m:[36m<module>[0m:[36m7[0m - [1mLoad DotEnv: True[0m


In [4]:
lsb_connector = LabelStudioConnector(
    url=appconfig.LABEL_STUDIO_URL,
    api_key=appconfig.LABEL_STUDIO_API_KEY,
    project_id=appconfig.LABEL_STUDIO_PROJECT_MAPPING["train"],
    temp_dir=appconfig.LABEL_STUDIO_TEMP_DIR
)

In [5]:
tasks = lsb_connector.get_tasks()
labels = lsb_connector.extract_labels(tasks)
dataset = lsb_connector.download_dataset(tasks, labels)
dataset = [x for x in dataset if x.get("target").get("labels")]

[32m2025-06-08 21:57:00.625[0m | [1mINFO    [0m | [36mhcmus.lbs._label_studio_connector[0m:[36mget_tasks[0m:[36m125[0m - [1mNew `page_to` applied: 34[0m
Loading tasks: 100%|██████████| 34/34 [00:08<00:00,  3.83it/s]
Downloading images: 100%|██████████| 3375/3375 [00:12<00:00, 278.95it/s] 


In [6]:
def build_classification_dataset(dataset, labels):
    ret_dataset = []
    unknown_idx = labels["unknown"]
    prioritized_label = "8935136865648"
    prioritized_idx = [v for k, v in labels.items() if k.startswith(prioritized_label)][0]

    for item in dataset:
        target_labels = list(set(item.get("target").get("labels")))
        if unknown_idx in labels:
            labels.remove(unknown_idx)

        final_label = -1
        if len(target_labels) == 1:
            final_label = target_labels[0]

        if len(target_labels) > 1:
            for label in item.get("target").get("labels"):
                final_label = label
                if prioritized_idx in target_labels:
                    final_label = prioritized_idx
                    break

        ret_dataset.append({
            "image": item.get("image"),
            "label": final_label
        })
    return ret_dataset

def select_labels_m_samples(cls_dataset, m_samples=12):
    label_stats = [x.get("label") for x in cls_dataset]
    df = pd.DataFrame(label_stats, columns=["label"])
    df = df.groupby("label")["label"].agg(["count"])
    df = df.sort_values("count")
    df = df[df["count"] >= m_samples]
    df = df.reset_index()
    selected_label_idx = df["label"].tolist()
    return selected_label_idx

def filter_cls_dataset(cls_dataset, selected_labels):
    ret_dataset = []
    for item in cls_dataset:
        if item.get("label") in selected_labels:
            ret_dataset.append(item)
    return ret_dataset

In [7]:
cls_dataset = build_classification_dataset(dataset, labels)
selected_labels = select_labels_m_samples(cls_dataset, 12)
filtered_dataset = filter_cls_dataset(cls_dataset, selected_labels)

In [8]:
idx2label = {v: k for k, v in labels.items()}

In [9]:
dataset_dir = Path("dataset")
dataset_dir.mkdir(exist_ok=True)

for item in filtered_dataset:
    label = item.get("label")
    path = item.get("image")
    label_name = idx2label[label]
    label_name = label_name.replace("/", "-")
    label_name = label_name.replace("'", "-")
    label_dir = dataset_dir.joinpath(label_name)
    label_dir.mkdir(exist_ok=True)
    return_code = os.system(f"cp -f '{path}' '{label_dir}'")
    if return_code != 0:
        print(label_name)
        break

### Models

In [10]:
# Install timm for EfficientNet
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split
from timm import create_model
from tqdm import tqdm

# Configuration
DATA_DIR = 'dataset'  # 👈 Path to your single dataset folder
BATCH_SIZE = 32
EPOCHS = 64
VAL_SPLIT = 0.2
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
MODEL_NAMES = [
    # 'resnet50',
    # 'densenet121',
    'densenet169',
    'efficientnet_b0',
    'convnext_base',
    'vit_base_patch16_224',
    'swin_base_patch4_window7_224'
]
# Transforms (same for train and val to keep things simple)
common_transforms = transforms.Compose([
    transforms.Resize((224, 224)),

    # Randomly change brightness, contrast, saturation, and hue
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),

    # Random horizontal and vertical flip
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),

    # Random rotation within ±30 degrees
    transforms.RandomRotation(degrees=30),

    # Random affine transformation (rotation, translation, scale, shear)
    transforms.RandomAffine(
        degrees=15,          # additional rotation control
        translate=(0.05, 0.05),  # 5% translation in both directions
        scale=(0.9, 1.1),    # zoom in/out
        shear=10             # shear angle
    ),

    # Random perspective transformation
    transforms.RandomPerspective(distortion_scale=0.2, p=0.5),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# Load full dataset and split
full_dataset = ImageFolder(DATA_DIR, transform=common_transforms)
NUM_CLASSES = len(full_dataset.classes)
val_size = int(len(full_dataset) * VAL_SPLIT)
train_size = len(full_dataset) - val_size

train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# Training and evaluation functions
def train_model(model, criterion, optimizer):
    model.train()
    total_loss, correct, total = 0, 0, 0
    for images, labels in tqdm(train_loader, desc="Training", leave=False):
        images, labels = images.to(DEVICE), labels.to(DEVICE)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
    return total_loss / total, 100. * correct / total

def evaluate_model(model):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in tqdm(val_loader, desc="Evaluating", leave=False):
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    return 100. * correct / total

# Freeze backbone parameters
def freeze_backbone(model):
    for param in model.parameters():
        param.requires_grad = False

# Build model with frozen backbone + classifier head
def build_feature_extractor(model_name):
    backbone = create_model(model_name, pretrained=True, num_classes=0, global_pool='avg')
    freeze_backbone(backbone)
    model = nn.Sequential(
        backbone,
        nn.Linear(backbone.num_features, NUM_CLASSES)
    )
    return model.to(DEVICE)


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
import mlflow
experiment_name = "backbone-features"
experiment = mlflow.get_experiment_by_name(experiment_name)
experiment_id = None
if not experiment:
    experiment_id = mlflow.create_experiment(experiment_name)
else:
    experiment_id = experiment.experiment_id

: 

In [None]:
# input_example = torch.rand(1, 3, 224, 224).numpy()
# Train loop for all models
for model_name in MODEL_NAMES:
    with mlflow.start_run(experiment_id=experiment_id, run_name=model_name):
        print(f"\n🔍 Training with frozen backbone: {model_name}")
        model = build_feature_extractor(model_name)
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model[-1].parameters(), lr=1e-3)
        best_acc = 0

        for epoch in range(EPOCHS):
            print(f"Epoch {epoch+1}/{EPOCHS}")
            train_loss, train_acc = train_model(model, criterion, optimizer)
            val_acc = evaluate_model(model)
            print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%")
            mlflow.log_metrics({
                "train_accuracy": train_acc,
                "train_loss": train_loss,
                "val_accuracy": val_acc
            }, step=epoch)

            if val_acc > best_acc:
                best_acc = val_acc
                mlflow.pytorch.log_model(model, "model")


🔍 Training with frozen backbone: densenet169
Epoch 1/64


                                                           

Train Loss: 4.9398, Train Acc: 6.72%, Val Acc: 26.16%




Epoch 2/64


                                                           

Train Loss: 3.5456, Train Acc: 35.22%, Val Acc: 45.16%




Epoch 3/64


                                                           

Train Loss: 2.6304, Train Acc: 55.44%, Val Acc: 55.81%




Epoch 4/64


                                                           

Train Loss: 1.9635, Train Acc: 70.39%, Val Acc: 66.67%




Epoch 5/64


                                                           

Train Loss: 1.5425, Train Acc: 78.62%, Val Acc: 67.64%




Epoch 6/64


                                                           

Train Loss: 1.2512, Train Acc: 83.26%, Val Acc: 75.39%




Epoch 7/64


                                                           

Train Loss: 1.0456, Train Acc: 85.44%, Val Acc: 79.26%




Epoch 8/64


                                                           

Train Loss: 0.8858, Train Acc: 88.05%, Val Acc: 78.49%
Epoch 9/64


                                                           

Train Loss: 0.7784, Train Acc: 89.40%, Val Acc: 80.23%




Epoch 10/64


                                                           

Train Loss: 0.6650, Train Acc: 91.10%, Val Acc: 82.75%




Epoch 11/64


                                                           

Train Loss: 0.5826, Train Acc: 92.74%, Val Acc: 83.14%




Epoch 12/64


                                                           

Train Loss: 0.5338, Train Acc: 92.84%, Val Acc: 83.91%




Epoch 13/64


                                                           

Train Loss: 0.4925, Train Acc: 94.10%, Val Acc: 85.85%




Epoch 14/64


                                                           

Train Loss: 0.4327, Train Acc: 94.10%, Val Acc: 82.75%
Epoch 15/64


                                                           

Train Loss: 0.4150, Train Acc: 93.90%, Val Acc: 83.91%
Epoch 16/64


                                                           

Train Loss: 0.3721, Train Acc: 94.68%, Val Acc: 86.05%




Epoch 17/64


                                                           

Train Loss: 0.3432, Train Acc: 95.60%, Val Acc: 84.50%
Epoch 18/64


                                                           

Train Loss: 0.3425, Train Acc: 94.82%, Val Acc: 86.24%




Epoch 19/64


                                                           

Train Loss: 0.3192, Train Acc: 95.50%, Val Acc: 86.05%
Epoch 20/64


                                                           

Train Loss: 0.2740, Train Acc: 96.37%, Val Acc: 86.43%




Epoch 21/64


                                                           

Train Loss: 0.2667, Train Acc: 96.37%, Val Acc: 87.40%




Epoch 22/64


                                                           

Train Loss: 0.2651, Train Acc: 96.27%, Val Acc: 88.18%




Epoch 23/64


                                                           

Train Loss: 0.2415, Train Acc: 96.42%, Val Acc: 88.18%
Epoch 24/64


                                                           

Train Loss: 0.2312, Train Acc: 96.86%, Val Acc: 86.82%
Epoch 25/64


                                                           

Train Loss: 0.2194, Train Acc: 96.71%, Val Acc: 87.21%
Epoch 26/64


                                                           

Train Loss: 0.2096, Train Acc: 96.95%, Val Acc: 87.79%
Epoch 27/64


                                                           

Train Loss: 0.2066, Train Acc: 97.15%, Val Acc: 84.88%
Epoch 28/64


                                                           

Train Loss: 0.1915, Train Acc: 96.90%, Val Acc: 86.43%
Epoch 29/64


                                                           

Train Loss: 0.1804, Train Acc: 97.29%, Val Acc: 87.98%
Epoch 30/64


                                                           

Train Loss: 0.1837, Train Acc: 97.05%, Val Acc: 87.60%
Epoch 31/64


                                                           

Train Loss: 0.1792, Train Acc: 96.86%, Val Acc: 90.31%




Epoch 32/64


                                                           

Train Loss: 0.1602, Train Acc: 97.82%, Val Acc: 88.76%
Epoch 33/64


                                                           

Train Loss: 0.1699, Train Acc: 96.76%, Val Acc: 87.79%
Epoch 34/64


                                                           

Train Loss: 0.1603, Train Acc: 97.48%, Val Acc: 87.40%
Epoch 35/64


                                                           

Train Loss: 0.1599, Train Acc: 97.44%, Val Acc: 88.57%
Epoch 36/64


                                                           

Train Loss: 0.1489, Train Acc: 97.10%, Val Acc: 88.57%
Epoch 37/64


                                                           

Train Loss: 0.1413, Train Acc: 97.73%, Val Acc: 87.60%
Epoch 38/64


                                                           

Train Loss: 0.1458, Train Acc: 97.68%, Val Acc: 88.76%
Epoch 39/64


                                                           

Train Loss: 0.1371, Train Acc: 98.02%, Val Acc: 88.95%
Epoch 40/64


                                                           

Train Loss: 0.1209, Train Acc: 98.50%, Val Acc: 87.79%
Epoch 41/64


                                                           

Train Loss: 0.1258, Train Acc: 97.87%, Val Acc: 86.82%
Epoch 42/64


                                                           

Train Loss: 0.1266, Train Acc: 98.11%, Val Acc: 87.98%
Epoch 43/64


                                                           

Train Loss: 0.1132, Train Acc: 98.31%, Val Acc: 87.98%
Epoch 44/64


                                                           

Train Loss: 0.1238, Train Acc: 97.58%, Val Acc: 87.79%
Epoch 45/64


                                                           

Train Loss: 0.1176, Train Acc: 97.87%, Val Acc: 87.98%
Epoch 46/64


                                                           

Train Loss: 0.1069, Train Acc: 98.31%, Val Acc: 88.57%
Epoch 47/64


                                                           

Train Loss: 0.1054, Train Acc: 97.97%, Val Acc: 88.37%
Epoch 48/64


                                                           

Train Loss: 0.1013, Train Acc: 98.45%, Val Acc: 88.57%
Epoch 49/64


                                                           

Train Loss: 0.1038, Train Acc: 97.97%, Val Acc: 89.15%
Epoch 50/64


                                                           

Train Loss: 0.0991, Train Acc: 98.31%, Val Acc: 89.73%
Epoch 51/64


                                                           

Train Loss: 0.0958, Train Acc: 98.16%, Val Acc: 88.95%
Epoch 52/64


                                                           

Train Loss: 0.0894, Train Acc: 98.26%, Val Acc: 88.76%
Epoch 53/64


                                                           

Train Loss: 0.0887, Train Acc: 98.69%, Val Acc: 88.18%
Epoch 54/64


                                                           

Train Loss: 0.0880, Train Acc: 98.65%, Val Acc: 87.98%
Epoch 55/64


                                                           

Train Loss: 0.0920, Train Acc: 98.21%, Val Acc: 87.79%
Epoch 56/64


                                                           

Train Loss: 0.0888, Train Acc: 98.36%, Val Acc: 89.53%
Epoch 57/64


                                                           

Train Loss: 0.0821, Train Acc: 98.60%, Val Acc: 88.37%
Epoch 58/64


                                                           

Train Loss: 0.0865, Train Acc: 98.21%, Val Acc: 87.79%
Epoch 59/64


                                                           

Train Loss: 0.0744, Train Acc: 98.98%, Val Acc: 88.95%
Epoch 60/64


                                                           

Train Loss: 0.0871, Train Acc: 98.16%, Val Acc: 88.18%
Epoch 61/64


                                                           

Train Loss: 0.0780, Train Acc: 98.60%, Val Acc: 89.53%
Epoch 62/64


                                                           

Train Loss: 0.0800, Train Acc: 98.45%, Val Acc: 88.57%
Epoch 63/64


                                                           

Train Loss: 0.0721, Train Acc: 98.84%, Val Acc: 88.18%
Epoch 64/64


                                                           

Train Loss: 0.0694, Train Acc: 98.55%, Val Acc: 88.37%
🏃 View run densenet169 at: http://jimica.ddns.net:5050/#/experiments/535611829265444121/runs/fc02ef88e7204474aaec0dbb2ad75fa7
🧪 View experiment at: http://jimica.ddns.net:5050/#/experiments/535611829265444121

🔍 Training with frozen backbone: efficientnet_b0
Epoch 1/64


                                                           

Train Loss: 4.6732, Train Acc: 13.79%, Val Acc: 33.33%




Epoch 2/64


                                                           

Train Loss: 3.3895, Train Acc: 46.88%, Val Acc: 51.36%




Epoch 3/64


                                                           

Train Loss: 2.5355, Train Acc: 66.47%, Val Acc: 57.56%




Epoch 4/64


                                                           

Train Loss: 1.9960, Train Acc: 73.00%, Val Acc: 68.60%




Epoch 5/64


                                                           

Train Loss: 1.6433, Train Acc: 79.10%, Val Acc: 69.96%




Epoch 6/64


                                                           

Train Loss: 1.3712, Train Acc: 80.75%, Val Acc: 75.19%




Epoch 7/64


                                                           

Train Loss: 1.1668, Train Acc: 85.44%, Val Acc: 74.81%
Epoch 8/64


                                                           

Train Loss: 1.0070, Train Acc: 87.52%, Val Acc: 75.39%




Epoch 9/64


                                                           

Train Loss: 0.9116, Train Acc: 88.15%, Val Acc: 76.55%




Epoch 10/64


                                                           

Train Loss: 0.8266, Train Acc: 88.63%, Val Acc: 76.16%
Epoch 11/64


                                                           

Train Loss: 0.7333, Train Acc: 89.94%, Val Acc: 78.49%




Epoch 12/64


                                                           

Train Loss: 0.6966, Train Acc: 90.23%, Val Acc: 80.23%




Epoch 13/64


                                                           

Train Loss: 0.6228, Train Acc: 90.90%, Val Acc: 80.81%




Epoch 14/64


                                                           

Train Loss: 0.6073, Train Acc: 90.86%, Val Acc: 83.72%




Epoch 15/64


                                                           

Train Loss: 0.5453, Train Acc: 92.74%, Val Acc: 79.07%
Epoch 16/64


                                                           

Train Loss: 0.4999, Train Acc: 93.28%, Val Acc: 81.40%
Epoch 17/64


                                                           

Train Loss: 0.4716, Train Acc: 93.66%, Val Acc: 81.20%
Epoch 18/64


                                                           

Train Loss: 0.4627, Train Acc: 93.86%, Val Acc: 82.17%
Epoch 19/64


                                                           

Train Loss: 0.4582, Train Acc: 93.23%, Val Acc: 83.91%




Epoch 20/64


                                                           

Train Loss: 0.4234, Train Acc: 93.52%, Val Acc: 81.01%
Epoch 21/64


                                                           

Train Loss: 0.4094, Train Acc: 93.61%, Val Acc: 80.23%
Epoch 22/64


                                                           

Train Loss: 0.3914, Train Acc: 94.05%, Val Acc: 82.56%
Epoch 23/64


                                                           

Train Loss: 0.3532, Train Acc: 95.02%, Val Acc: 82.36%
Epoch 24/64


                                                           

Train Loss: 0.3422, Train Acc: 94.24%, Val Acc: 84.50%




Epoch 25/64


                                                           

Train Loss: 0.3177, Train Acc: 94.97%, Val Acc: 84.88%




Epoch 26/64


                                                           

Train Loss: 0.3150, Train Acc: 95.07%, Val Acc: 82.95%
Epoch 27/64


                                                           

Train Loss: 0.3051, Train Acc: 96.13%, Val Acc: 84.30%
Epoch 28/64


                                                           

Train Loss: 0.3001, Train Acc: 95.16%, Val Acc: 83.72%
Epoch 29/64


                                                           

Train Loss: 0.2919, Train Acc: 95.21%, Val Acc: 87.02%




Epoch 30/64


                                                           

Train Loss: 0.2759, Train Acc: 96.03%, Val Acc: 82.56%
Epoch 31/64


                                                           

Train Loss: 0.2609, Train Acc: 95.79%, Val Acc: 82.56%
Epoch 32/64


                                                           

Train Loss: 0.2367, Train Acc: 96.27%, Val Acc: 84.11%
Epoch 33/64


                                                           

Train Loss: 0.2364, Train Acc: 96.47%, Val Acc: 85.47%
Epoch 34/64


                                                           

Train Loss: 0.2499, Train Acc: 95.45%, Val Acc: 82.36%
Epoch 35/64


                                                           

Train Loss: 0.2382, Train Acc: 95.69%, Val Acc: 83.72%
Epoch 36/64


                                                           

Train Loss: 0.2181, Train Acc: 96.66%, Val Acc: 84.30%
Epoch 37/64


                                                           

Train Loss: 0.2199, Train Acc: 96.57%, Val Acc: 83.91%
Epoch 38/64


                                                           

Train Loss: 0.2247, Train Acc: 96.18%, Val Acc: 82.75%
Epoch 39/64


                                                           

Train Loss: 0.2242, Train Acc: 95.98%, Val Acc: 85.08%
Epoch 40/64


                                                           

Train Loss: 0.2081, Train Acc: 96.23%, Val Acc: 85.47%
Epoch 41/64


                                                           

Train Loss: 0.1938, Train Acc: 97.15%, Val Acc: 85.66%
Epoch 42/64


                                                           

Train Loss: 0.1990, Train Acc: 96.86%, Val Acc: 85.47%
Epoch 43/64


                                                           

Train Loss: 0.1941, Train Acc: 96.76%, Val Acc: 82.95%
Epoch 44/64


Training:  25%|██▍       | 16/65 [00:29<01:19,  1.62s/it]