In [1]:
# clone the ADIS repository
!git clone https://github.com/sathishkumar67/SSD_MobileNetV3_ADIS.git
# move the files to the current directory
!mv /kaggle/working/SSD_MobileNetV3_ADIS/* /kaggle/working/
# upgrade pip
!pip install --upgrade pip
# install the required packages
!pip install  -r requirements.txt --upgrade --upgrade-strategy eager

Cloning into 'SSD_MobileNetV3_ADIS'...
remote: Enumerating objects: 331, done.[K
remote: Counting objects: 100% (99/99), done.[K
remote: Compressing objects: 100% (73/73), done.[K
remote: Total 331 (delta 53), reused 66 (delta 23), pack-reused 232 (from 2)[K
Receiving objects: 100% (331/331), 99.72 MiB | 17.48 MiB/s, done.
Resolving deltas: 100% (187/187), done.
Collecting pip
  Downloading pip-25.1.1-py3-none-any.whl.metadata (3.6 kB)
Downloading pip-25.1.1-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m32.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.1.1
Collecting optuna==4.2.1 (from -r requirements.txt (line 2))
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting huggingface-hub==0.30

In [1]:
# necessary imports
import os
import optuna
import joblib
from tqdm import tqdm
import random
import numpy as np
from tqdm import tqdm
from huggingface_hub import hf_hub_download
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, RandomSampler
from torch.optim.lr_scheduler import LinearLR, CosineAnnealingLR, SequentialLR
from ssdlite_mobnetv3_adis.utils import unzip_file, replace_activation_function
from ssdlite_mobnetv3_adis.dataset import collate_fn, SSDLITEOBJDET_DATASET, CachedSSDLITEOBJDET_DATASET
from ssdlite_mobnetv3_adis.model import SSDLITE_MOBILENET_V3_Large
from ssdlite_mobnetv3_adis.epu import EPU
from ssdlite_mobnetv3_adis.trainer import bohb_tunner, train


# set random seed for reproducibility
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed_all(RANDOM_SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
# set constants
REPO_ID = "pt-sk/ADIS" 
DATASET_NAME = "balanced_dataset"
REPO_TYPE = "dataset"
FILENAME_IN_REPO = f"{DATASET_NAME}.zip"
LOCAL_DIR = os.getcwd()
DATASET_PATH = f"{LOCAL_DIR}/{FILENAME_IN_REPO}"
DATASET_FOLDER_PATH = f"{LOCAL_DIR}/{DATASET_NAME}"                       
CLASSES = ['Cat', 'Cattle', 'Chicken', 'Deer', 'Dog', 'Squirrel', 'Eagle', 'Goat', 'Rodents', 'Snake'] 
NUM_CLASSES = len(CLASSES)
NUM_CLASSES_WITH_BG = NUM_CLASSES + 1    # 1 for background class

# download the dataset and unzip it
hf_hub_download(repo_id=REPO_ID, filename=FILENAME_IN_REPO, repo_type=REPO_TYPE, local_dir=LOCAL_DIR)
unzip_file(DATASET_PATH, LOCAL_DIR)

In [None]:
# set pin memory device
PIN_MEMORY_DEVICE = "cuda:0"
NUM_CORES = os.cpu_count()
BATCH_SIZE = 64

# prepare the dataset
train_dataset = CachedSSDLITEOBJDET_DATASET(
    dataset_class=SSDLITEOBJDET_DATASET,
    root_dir=DATASET_FOLDER_PATH,
    split="train",
    num_classes=NUM_CLASSES_WITH_BG)

val_dataset = CachedSSDLITEOBJDET_DATASET(
    dataset_class=SSDLITEOBJDET_DATASET,
    root_dir=DATASET_FOLDER_PATH,
    split="val",
    num_classes=NUM_CLASSES_WITH_BG)

test_dataset = CachedSSDLITEOBJDET_DATASET(
    dataset_class=SSDLITEOBJDET_DATASET,
    root_dir=DATASET_FOLDER_PATH,
    split="test",
    num_classes=NUM_CLASSES_WITH_BG)


# samplers for reproducibility
train_sampler = RandomSampler(train_dataset, generator=torch.Generator().manual_seed(RANDOM_SEED))
val_sampler = RandomSampler(val_dataset, generator=torch.Generator().manual_seed(RANDOM_SEED))
test_sampler = RandomSampler(test_dataset, generator=torch.Generator().manual_seed(RANDOM_SEED))


# prepare the dataloaders
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    sampler=train_sampler,
    num_workers=NUM_CORES,
    collate_fn=collate_fn,
    pin_memory=True,
    persistent_workers=True,
    prefetch_factor=2,
    pin_memory_device=PIN_MEMORY_DEVICE)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    sampler=val_sampler,
    num_workers=NUM_CORES,
    collate_fn=collate_fn,
    pin_memory=True,
    persistent_workers=True,
    prefetch_factor=2,
    pin_memory_device=PIN_MEMORY_DEVICE)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    sampler=test_sampler,
    num_workers=NUM_CORES,
    collate_fn=collate_fn,
    pin_memory=True,
    persistent_workers=True,
    prefetch_factor=2,
    pin_memory_device=PIN_MEMORY_DEVICE)

In [8]:
!ls /kaggle/input/savedckpts

ssdlite_mobnetv3_bestparams_ckpt.pth
ssdlite_mobv3_training_custom_params_ckpt.pth


In [13]:
custom_ckpt = torch.load("/kaggle/input/savedckpts/ssdlite_mobv3_training_custom_params_ckpt.pth", map_location="cpu")
best_ckpt = torch.load("/kaggle/input/savedckpts/ssdlite_mobnetv3_bestparams_ckpt.pth", map_location="cpu")

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
custom_model = SSDLITE_MOBILENET_V3_Large(num_classes_with_bg=NUM_CLASSES_WITH_BG)
custom_model.load_state_dict(custom_ckpt['model_state_dict'], strict=True)
custom_model.to(device)

best_model = SSDLITE_MOBILENET_V3_Large(num_classes_with_bg=NUM_CLASSES_WITH_BG)
best_model.load_state_dict(best_ckpt['model_state_dict'], strict=True)
best_model.to(device)

SSDLITE_MOBILENET_V3_Large(
  (model): SSD(
    (backbone): SSDLiteFeatureExtractorMobileNet(
      (features): Sequential(
        (0): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
            (1): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
            (2): Hardswish()
          )
          (1): InvertedResidual(
            (block): Sequential(
              (0): Conv2dNormActivation(
                (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
                (1): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
                (2): ReLU(inplace=True)
              )
              (1): Conv2dNormActivation(
                (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (1): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_

In [16]:
from typing import Tuple
import pandas as pd
from torchvision.ops import box_iou
from collections import defaultdict
from torchmetrics.detection import MeanAveragePrecision


def evaluate_model(model, dataloader, device, iou_threshold=0.5):
    metric = MeanAveragePrecision(
        box_format='xyxy',
        iou_type='bbox',
        iou_thresholds=[iou_threshold],
        class_metrics=True,
        extended_summary=True
    )
    
    model.eval()
    with torch.no_grad():
        for images, targets in dataloader:
            # Move images to the device
            outputs = model(images.to(device))
            
            # Convert outputs to TorchMetrics format
            preds = []
            for i, output in enumerate(outputs):
                preds.append({
                    'boxes': output['boxes'].cpu(),
                    'scores': output['scores'].cpu(),
                    'labels': output['labels'].cpu()
                })
            
            # Convert targets to TorchMetrics format
            targs = []
            for target in targets:
                targs.append({
                    'boxes': target['boxes'].cpu(),
                    'labels': target['labels'].cpu()
                })
            
            metric.update(preds, targs)
    
    # Compute metrics
    results = metric.compute()
    return results


def calculate_per_class_with_iou(model, dataloader, device, classes,
                                conf_thresh=0.2, iou_thresh=0.5):
    counters  = defaultdict(lambda: {"tp":0,"fp":0,"fn":0,"support":0})
    iou_sums   = defaultdict(float)
    iou_counts = defaultdict(int)

    model.eval()
    with torch.no_grad():
        for images, targets in dataloader:
            outputs = model([img.to(device) for img in images])
            for output, target in zip(outputs, targets):
                # Prepare tensors
                pred_boxes  = output["boxes"].cpu()
                pred_scores = output["scores"].cpu()
                pred_labels = output["labels"].cpu()
                true_boxes  = target["boxes"]
                true_labels = target["labels"]

                # Filter by confidence
                keep = pred_scores > conf_thresh
                pred_boxes  = pred_boxes[keep]
                pred_labels = pred_labels[keep]

                # Count support
                for lbl in true_labels.tolist():
                    counters[lbl]["support"] += 1

                # No predictions → all GT are FN
                if pred_boxes.numel() == 0:
                    for lbl in true_labels.tolist():
                        counters[lbl]["fn"] += 1
                    continue

                # Compute IoU matrix and find matches
                iou_matrix = box_iou(pred_boxes, true_boxes)
                matches    = torch.nonzero(iou_matrix > iou_thresh, as_tuple=False)

                matched_pred, matched_true = set(), set()
                for pi, ti in matches.tolist():
                    matched_pred.add(pi); matched_true.add(ti)
                    p_lbl = int(pred_labels[pi].item())
                    t_lbl = int(true_labels[ti].item())

                    if p_lbl == t_lbl:
                        counters[p_lbl]["tp"] += 1
                        iou_sums[p_lbl]   += iou_matrix[pi, ti].item()
                        iou_counts[p_lbl] += 1
                    else:
                        counters[p_lbl]["fp"] += 1
                        counters[t_lbl]["fn"] += 1

                # Unmatched → FP or FN
                for pi in range(len(pred_boxes)):
                    if pi not in matched_pred:
                        cls = int(pred_labels[pi].item())
                        counters[cls]["fp"] += 1
                for ti in range(len(true_boxes)):
                    if ti not in matched_true:
                        cls = int(true_labels[ti].item())
                        counters[cls]["fn"] += 1

    # Build results
    results = {}
    for cls, cnt in counters.items():
        tp, fp, fn, sup = cnt["tp"], cnt["fp"], cnt["fn"], cnt["support"]
        prec = tp / (tp + fp) if (tp + fp) > 0 else 0.0
        rec  = tp / (tp + fn) if (tp + fn) > 0 else 0.0
        f1   = 2*prec*rec/(prec+rec) if (prec+rec) > 0 else 0.0
        avg_iou = iou_sums[cls]/iou_counts[cls] if iou_counts[cls]>0 else 0.0
        accuracy = tp / (tp + fp + fn) if (tp + fp + fn) > 0 else 0.0
        
        # Store results
        results[cls] = {
            "count": sup,
            "precision": prec,
            "recall":    rec,
            "f1_score":  f1,
            "accuracy":  accuracy,
            "avg_iou":   avg_iou
        }
        
    # Convert to DataFrame for better readability
    df_metrics = pd.DataFrame(results).T
    df_metrics.index = [classes[idx-1] for idx in df_metrics.index]
    df_metrics = df_metrics.sort_index()
    map_score =  evaluate_model(model, dataloader, device)["map_per_class"].cpu().tolist()
    df_metrics["mAP"] = map_score
    return df_metrics

In [21]:
import time
start_time = time.time()
df_metrics = calculate_per_class_with_iou(custom_model, test_loader, device, classes=CLASSES)
df_metrics.loc["Average"] = df_metrics.mean()
print(f"Per-class metrics for test set:\n{df_metrics}")
end_time = time.time()
print(f"Time taken for test set evaluation: {end_time - start_time:.2f} seconds")

Per-class metrics for test set:
          count  precision    recall  f1_score  accuracy   avg_iou       mAP
Cat       242.0   0.813869  0.761092  0.786596  0.648256  0.845862  0.911833
Cattle    345.0   0.585106  0.675676  0.627138  0.456811  0.819685  0.736752
Chicken   562.0   0.663415  0.643533  0.653323  0.485137  0.776552  0.664035
Deer      357.0   0.716292  0.625000  0.667539  0.500982  0.843896  0.726711
Dog       293.0   0.670270  0.686981  0.678523  0.513458  0.771453  0.794626
Eagle     265.0   0.733108  0.738095  0.735593  0.581769  0.871335  0.799614
Goat      340.0   0.464539  0.615023  0.529293  0.359890  0.806676  0.840363
Rodents   280.0   0.578692  0.698830  0.633113  0.463178  0.806252  0.651600
Snake     252.0   0.738983  0.798535  0.767606  0.622857  0.792800  0.765717
Squirrel  245.0   0.588391  0.714744  0.645441  0.476496  0.792750  0.832217
Average   318.1   0.655267  0.695751  0.672416  0.510883  0.812726  0.772347
Time taken for test set evaluation: 49.37 se

In [22]:
import time
start_time = time.time()
df_metrics = calculate_per_class_with_iou(best_model, test_loader, device, classes=CLASSES)
df_metrics.loc["Average"] = df_metrics.mean()
print(f"Per-class metrics for test set:\n{df_metrics}")
end_time = time.time()
print(f"Time taken for test set evaluation: {end_time - start_time:.2f} seconds")

Per-class metrics for test set:
          count  precision    recall  f1_score  accuracy   avg_iou       mAP
Cat       242.0   0.846743  0.783688  0.813996  0.686335  0.846405  0.917846
Cattle    345.0   0.622685  0.667494  0.644311  0.475265  0.814553  0.730184
Chicken   562.0   0.582790  0.700627  0.636299  0.466597  0.757759  0.683786
Deer      357.0   0.765766  0.643939  0.699588  0.537975  0.834126  0.736504
Dog       293.0   0.648438  0.721739  0.683128  0.518750  0.772396  0.781978
Eagle     265.0   0.749164  0.783217  0.765812  0.620499  0.871900  0.803652
Goat      340.0   0.549884  0.564286  0.556992  0.385993  0.801846  0.869332
Rodents   280.0   0.574519  0.751572  0.651226  0.482828  0.794192  0.634833
Snake     252.0   0.691843  0.860902  0.767169  0.622283  0.784304  0.766913
Squirrel  245.0   0.617978  0.750853  0.677966  0.512821  0.795327  0.840071
Average   318.1   0.664981  0.722832  0.689649  0.530935  0.807281  0.776510
Time taken for test set evaluation: 47.46 se