In [1]:
# clone the ADIS repository
!git clone https://github.com/sathishkumar67/SSD_MobileNetV3_ADIS.git
# move the files to the current directory
!mv /kaggle/working/SSD_MobileNetV3_ADIS/* /kaggle/working/
# upgrade pip
!pip install --upgrade pip
# install the required packages
!pip install  -r requirements.txt --upgrade --upgrade-strategy eager

Cloning into 'SSD_MobileNetV3_ADIS'...
remote: Enumerating objects: 365, done.[K
remote: Counting objects: 100% (133/133), done.[K
remote: Compressing objects: 100% (98/98), done.[K
remote: Total 365 (delta 73), reused 88 (delta 32), pack-reused 232 (from 2)[K
Receiving objects: 100% (365/365), 99.76 MiB | 44.01 MiB/s, done.
Resolving deltas: 100% (207/207), done.
Collecting pip
  Downloading pip-25.1.1-py3-none-any.whl.metadata (3.6 kB)
Downloading pip-25.1.1-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m33.3 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.1.1
Collecting optuna==4.2.1 (from -r requirements.txt (line 2))
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting huggingface-hub==0.30.2 (fro

In [2]:
# necessary imports
import os
import optuna
import joblib
from tqdm import tqdm
import random
import numpy as np
from tqdm import tqdm
from huggingface_hub import hf_hub_download
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, RandomSampler
from torch.optim.lr_scheduler import LinearLR, CosineAnnealingLR, SequentialLR
from ssdlite_mobnetv3_adis.utils import unzip_file, replace_activation_function
from ssdlite_mobnetv3_adis.dataset import collate_fn, SSDLITEOBJDET_DATASET, CachedSSDLITEOBJDET_DATASET
from ssdlite_mobnetv3_adis.model import SSDLITE_MOBILENET_V3_Large
from ssdlite_mobnetv3_adis.epu import EPU
from ssdlite_mobnetv3_adis.trainer import bohb_tunner, train


# set random seed for reproducibility
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed_all(RANDOM_SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [3]:
# set constants
REPO_ID = "pt-sk/ADIS" 
DATASET_NAME = "balanced_dataset"
REPO_TYPE = "dataset"
FILENAME_IN_REPO = f"{DATASET_NAME}.zip"
LOCAL_DIR = os.getcwd()
DATASET_PATH = f"{LOCAL_DIR}/{FILENAME_IN_REPO}"
DATASET_FOLDER_PATH = f"{LOCAL_DIR}/{DATASET_NAME}"                       
CLASSES = ['Cat', 'Cattle', 'Chicken', 'Deer', 'Dog', 'Squirrel', 'Eagle', 'Goat', 'Rodents', 'Snake'] 
NUM_CLASSES = len(CLASSES)
NUM_CLASSES_WITH_BG = NUM_CLASSES + 1    # 1 for background class

# download the dataset and unzip it
hf_hub_download(repo_id=REPO_ID, filename=FILENAME_IN_REPO, repo_type=REPO_TYPE, local_dir=LOCAL_DIR)
unzip_file(DATASET_PATH, LOCAL_DIR)

balanced_dataset.zip:   0%|          | 0.00/7.04G [00:00<?, ?B/s]

Unzipping: 100%|██████████| 7.07G/7.07G [00:42<00:00, 168MB/s]


Unzipped /kaggle/working/balanced_dataset.zip to /kaggle/working
Removed zip file: /kaggle/working/balanced_dataset.zip


In [4]:
# set pin memory device
PIN_MEMORY_DEVICE = "cuda:0"
NUM_CORES = os.cpu_count()
BATCH_SIZE = 64

# prepare the dataset
train_dataset = CachedSSDLITEOBJDET_DATASET(
    dataset_class=SSDLITEOBJDET_DATASET,
    root_dir=DATASET_FOLDER_PATH,
    split="train",
    num_classes=NUM_CLASSES_WITH_BG)

val_dataset = CachedSSDLITEOBJDET_DATASET(
    dataset_class=SSDLITEOBJDET_DATASET,
    root_dir=DATASET_FOLDER_PATH,
    split="val",
    num_classes=NUM_CLASSES_WITH_BG)

test_dataset = CachedSSDLITEOBJDET_DATASET(
    dataset_class=SSDLITEOBJDET_DATASET,
    root_dir=DATASET_FOLDER_PATH,
    split="test",
    num_classes=NUM_CLASSES_WITH_BG)


# samplers for reproducibility
train_sampler = RandomSampler(train_dataset, generator=torch.Generator().manual_seed(RANDOM_SEED))
val_sampler = RandomSampler(val_dataset, generator=torch.Generator().manual_seed(RANDOM_SEED))
test_sampler = RandomSampler(test_dataset, generator=torch.Generator().manual_seed(RANDOM_SEED))


# prepare the dataloaders
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    sampler=train_sampler,
    num_workers=NUM_CORES,
    collate_fn=collate_fn,
    pin_memory=True,
    persistent_workers=True,
    prefetch_factor=2,
    pin_memory_device=PIN_MEMORY_DEVICE)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    sampler=val_sampler,
    num_workers=NUM_CORES,
    collate_fn=collate_fn,
    pin_memory=True,
    persistent_workers=True,
    prefetch_factor=2,
    pin_memory_device=PIN_MEMORY_DEVICE)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    sampler=test_sampler,
    num_workers=NUM_CORES,
    collate_fn=collate_fn,
    pin_memory=True,
    persistent_workers=True,
    prefetch_factor=2,
    pin_memory_device=PIN_MEMORY_DEVICE)

Preprocessing dataset and caching to /kaggle/working/balanced_dataset/train_cache...


100%|██████████| 18139/18139 [03:44<00:00, 80.91it/s] 


Preprocessing dataset and caching to /kaggle/working/balanced_dataset/val_cache...


100%|██████████| 2390/2390 [00:27<00:00, 85.58it/s] 


Preprocessing dataset and caching to /kaggle/working/balanced_dataset/test_cache...


100%|██████████| 2390/2390 [00:31<00:00, 74.82it/s] 


In [5]:
!ls /kaggle/input/savedckpts

ls: cannot access '/kaggle/input/savedckpts': No such file or directory


In [6]:
custom_ckpt = torch.load("/kaggle/input/savedckpts/ssdlite_mobv3_custom_params_ckpt.pth", map_location="cpu")
best_ckpt = torch.load("/kaggle/input/savedckpts/ssdlite_mobnetv3_bestparams_ckpt.pth", map_location="cpu")

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
custom_model = SSDLITE_MOBILENET_V3_Large(num_classes_with_bg=NUM_CLASSES_WITH_BG)
custom_model.load_state_dict(custom_ckpt['model_state_dict'], strict=True)
custom_model.to(device)

best_model = SSDLITE_MOBILENET_V3_Large(num_classes_with_bg=NUM_CLASSES_WITH_BG)
best_model.load_state_dict(best_ckpt['model_state_dict'], strict=True)
best_model.to(device)

Downloading: "https://download.pytorch.org/models/ssdlite320_mobilenet_v3_large_coco-a79551df.pth" to /root/.cache/torch/hub/checkpoints/ssdlite320_mobilenet_v3_large_coco-a79551df.pth
100%|██████████| 13.4M/13.4M [00:00<00:00, 86.5MB/s]


SSDLITE_MOBILENET_V3_Large(
  (model): SSD(
    (backbone): SSDLiteFeatureExtractorMobileNet(
      (features): Sequential(
        (0): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
            (1): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
            (2): Hardswish()
          )
          (1): InvertedResidual(
            (block): Sequential(
              (0): Conv2dNormActivation(
                (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
                (1): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
                (2): ReLU(inplace=True)
              )
              (1): Conv2dNormActivation(
                (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (1): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_

In [15]:
from typing import Tuple
import pandas as pd
from torchvision.ops import box_iou
from collections import defaultdict
from torchmetrics.detection import MeanAveragePrecision


def evaluate_model(model, dataloader, device, iou_threshold=None):
    metric = MeanAveragePrecision(
        box_format='xyxy',
        iou_type='bbox',
        iou_thresholds=[0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95],
        class_metrics=True,
        extended_summary=True
    )
    
    model.eval()
    with torch.no_grad():
        for images, targets in dataloader:
            # Move images to the device
            outputs = model(images.to(device))
            
            # Convert outputs to TorchMetrics format
            preds = []
            for i, output in enumerate(outputs):
                preds.append({
                    'boxes': output['boxes'].cpu(),
                    'scores': output['scores'].cpu(),
                    'labels': output['labels'].cpu()
                })
            
            # Convert targets to TorchMetrics format
            targs = []
            for target in targets:
                targs.append({
                    'boxes': target['boxes'].cpu(),
                    'labels': target['labels'].cpu()
                })
            
            metric.update(preds, targs)
    
    # Compute metrics
    results = metric.compute()
    return results


def calculate_per_class_with_iou(model, dataloader, device, classes,
                                conf_thresh=0.2, iou_thresh=0.5):
    counters  = defaultdict(lambda: {"tp":0,"fp":0,"fn":0,"support":0})
    iou_sums   = defaultdict(float)
    iou_counts = defaultdict(int)

    model.eval()
    with torch.no_grad():
        for images, targets in dataloader:
            outputs = model([img.to(device) for img in images])
            for output, target in zip(outputs, targets):
                # Prepare tensors
                pred_boxes  = output["boxes"].cpu()
                pred_scores = output["scores"].cpu()
                pred_labels = output["labels"].cpu()
                true_boxes  = target["boxes"]
                true_labels = target["labels"]

                # Filter by confidence
                keep = pred_scores > conf_thresh
                pred_boxes  = pred_boxes[keep]
                pred_labels = pred_labels[keep]

                # Count support
                for lbl in true_labels.tolist():
                    counters[lbl]["support"] += 1

                # No predictions → all GT are FN
                if pred_boxes.numel() == 0:
                    for lbl in true_labels.tolist():
                        counters[lbl]["fn"] += 1
                    continue

                # Compute IoU matrix and find matches
                iou_matrix = box_iou(pred_boxes, true_boxes)
                matches    = torch.nonzero(iou_matrix > iou_thresh, as_tuple=False)

                matched_pred, matched_true = set(), set()
                for pi, ti in matches.tolist():
                    matched_pred.add(pi); matched_true.add(ti)
                    p_lbl = int(pred_labels[pi].item())
                    t_lbl = int(true_labels[ti].item())

                    if p_lbl == t_lbl:
                        counters[p_lbl]["tp"] += 1
                        iou_sums[p_lbl]   += iou_matrix[pi, ti].item()
                        iou_counts[p_lbl] += 1
                    else:
                        counters[p_lbl]["fp"] += 1
                        counters[t_lbl]["fn"] += 1

                # Unmatched → FP or FN
                for pi in range(len(pred_boxes)):
                    if pi not in matched_pred:
                        cls = int(pred_labels[pi].item())
                        counters[cls]["fp"] += 1
                for ti in range(len(true_boxes)):
                    if ti not in matched_true:
                        cls = int(true_labels[ti].item())
                        counters[cls]["fn"] += 1

    # Build results
    results = {}
    for cls, cnt in counters.items():
        tp, fp, fn, sup = cnt["tp"], cnt["fp"], cnt["fn"], cnt["support"]
        prec = tp / (tp + fp) if (tp + fp) > 0 else 0.0
        rec  = tp / (tp + fn) if (tp + fn) > 0 else 0.0
        f1   = 2*prec*rec/(prec+rec) if (prec+rec) > 0 else 0.0
        avg_iou = iou_sums[cls]/iou_counts[cls] if iou_counts[cls]>0 else 0.0
        accuracy = tp / (tp + fp + fn) if (tp + fp + fn) > 0 else 0.0
        
        # Store results
        results[cls] = {
            "count": sup,
            "precision": prec,
            "recall":    rec,
            "f1_score":  f1,
            "accuracy":  accuracy,
            "avg_iou":   avg_iou
        }
        
    # Convert to DataFrame for better readability
    df_metrics = pd.DataFrame(results).T
    df_metrics.index = [classes[idx-1] for idx in df_metrics.index]
    df_metrics = df_metrics.sort_index()
    df_metrics["mAP@50"] = evaluate_model(model, dataloader, device)["map_50"].cpu().tolist()
    df_metrics["mAP@[50:95]"] = evaluate_model(model, dataloader, device)["map"].cpu().tolist()
    return df_metrics

In [19]:
# import time
# start_time = time.time()
# df_metrics = calculate_per_class_with_iou(custom_model, train_loader, device, classes=CLASSES)
# df_metrics.loc["Average"] = df_metrics.mean()
# print(f"Per-class metrics for test set:\n{df_metrics}")
# end_time = time.time()
# print(f"Time taken for test set evaluation: {end_time - start_time:.2f} seconds")

In [None]:
custom - test(0.4797) val(4789) train(0.642)
best - test(0.4778) val (0.4795) train(0.639)

In [22]:
import time
start_time = time.time()
df_metrics = calculate_per_class_with_iou(best_model, train_loader, device, classes=CLASSES)
df_metrics.loc["Average"] = df_metrics.mean()
print(f"Per-class metrics for test set:\n{df_metrics}")
end_time = time.time()
print(f"Time taken for test set evaluation: {end_time - start_time:.2f} seconds") 

Per-class metrics for test set:
           count  precision    recall  f1_score  accuracy   avg_iou    mAP@50  \
Cat       1910.0   0.913172  0.897237  0.905134  0.826708  0.872952  0.910199   
Cattle    2405.0   0.715599  0.836236  0.771228  0.627642  0.842956  0.910199   
Chicken   4041.0   0.605005  0.788279  0.684588  0.520436  0.799121  0.910199   
Deer      2558.0   0.866693  0.796249  0.829979  0.709371  0.858143  0.910199   
Dog       2098.0   0.752825  0.891623  0.816367  0.689713  0.815524  0.910199   
Eagle     1981.0   0.855909  0.897521  0.876221  0.779710  0.882491  0.910199   
Goat      2553.0   0.678016  0.742527  0.708806  0.548954  0.838798  0.910199   
Rodents   2094.0   0.714385  0.877358  0.787529  0.649524  0.840460  0.910199   
Snake     1971.0   0.820814  0.955078  0.882871  0.790303  0.835085  0.910199   
Squirrel  1861.0   0.749289  0.876010  0.807709  0.677443  0.850042  0.910199   
Average   2347.2   0.767171  0.855812  0.807043  0.681980  0.843557  0.910199