In [1]:
# clone the ADIS repository
!git clone https://github.com/sathishkumar67/SSD_MobileNetV3_ADIS.git
# move the files to the current directory
!mv /kaggle/working/SSD_MobileNetV3_ADIS/* /kaggle/working/
# upgrade pip
!pip install --upgrade pip
# install the required packages
!pip install  -r requirements.txt --upgrade --upgrade-strategy eager
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126

Cloning into 'SSD_MobileNetV3_ADIS'...
remote: Enumerating objects: 187, done.[K
remote: Counting objects: 100% (187/187), done.[K
remote: Compressing objects: 100% (137/137), done.[K
remote: Total 187 (delta 113), reused 116 (delta 50), pack-reused 0 (from 0)[K
Receiving objects: 100% (187/187), 24.96 MiB | 42.60 MiB/s, done.
Resolving deltas: 100% (113/113), done.
Collecting pip
  Downloading pip-25.0.1-py3-none-any.whl.metadata (3.7 kB)
Downloading pip-25.0.1-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.0.1
Collecting ultralytics (from -r requirements.txt (line 1))
  Downloading ultralytics-8.3.110-py3-none-any.whl.metadata (37 kB)
Collecting albumentatio

In [2]:
# necessary imports
import os
import optuna
import joblib
from typing import Tuple
from tqdm import tqdm
from ssd_mobnetv3_adis import unzip_file
from huggingface_hub import hf_hub_download
import torch
import torch.optim as optim
from torch.utils.data import DataLoader, RandomSampler
from torchmetrics.detection import MeanAveragePrecision
from ssd_mobnetv3_adis import collate_fn, SSDLITEOBJDET_DATASET, CachedSSDLITEOBJDET_DATASET, SSD_MOBILENET_V3_Large

In [3]:
# set constants
REPO_ID = "pt-sk/ADIS" 
DATASET_NAME = "balanced_dataset"
REPO_TYPE = "dataset"
FILENAME_IN_REPO = f"{DATASET_NAME}.zip"
LOCAL_DIR = os.getcwd()
DATASET_PATH = f"{LOCAL_DIR}/{FILENAME_IN_REPO}"
DATASET_FOLDER_PATH = f"{LOCAL_DIR}/{DATASET_NAME}"                       
CLASSES = ['Cat', 'Cattle', 'Chicken', 'Deer', 'Dog', 'Squirrel', 'Eagle', 'Goat', 'Rodents', 'Snake'] 
NUM_CLASSES = len(CLASSES)
MODEL_NUM_CLASSES = NUM_CLASSES + 1    # 1 for background class

# download the dataset and unzip it
hf_hub_download(repo_id=REPO_ID, filename=FILENAME_IN_REPO, repo_type=REPO_TYPE, local_dir=LOCAL_DIR)
unzip_file(DATASET_PATH, LOCAL_DIR)

# number of cores
num_cores = os.cpu_count()
print(f"Number of CPU cores: {num_cores}")

balanced_dataset.zip:   0%|          | 0.00/7.04G [00:00<?, ?B/s]

Unzipping: 100%|██████████| 7.07G/7.07G [00:42<00:00, 166MB/s]


Unzipped /kaggle/working/balanced_dataset.zip to /kaggle/working
Removed zip file: /kaggle/working/balanced_dataset.zip
Number of CPU cores: 4


In [4]:
# set pin memory device
PIN_MEMORY_DEVICE = "cuda:0"

# prepare the dataset
train_dataset = CachedSSDLITEOBJDET_DATASET(
    dataset_class=SSDLITEOBJDET_DATASET,
    root_dir=DATASET_FOLDER_PATH,
    split="train",
    num_classes=MODEL_NUM_CLASSES)

val_dataset = CachedSSDLITEOBJDET_DATASET(
    dataset_class=SSDLITEOBJDET_DATASET,
    root_dir=DATASET_FOLDER_PATH,
    split="val",
    num_classes=MODEL_NUM_CLASSES)

test_dataset = CachedSSDLITEOBJDET_DATASET(
    dataset_class=SSDLITEOBJDET_DATASET,
    root_dir=DATASET_FOLDER_PATH,
    split="test",
    num_classes=MODEL_NUM_CLASSES)


# samplers for reproducibility
train_sampler = RandomSampler(train_dataset, generator=torch.Generator().manual_seed(42))
val_sampler = RandomSampler(val_dataset, generator=torch.Generator().manual_seed(42))
test_sampler = RandomSampler(test_dataset, generator=torch.Generator().manual_seed(42))


# prepare the dataloaders
train_loader = DataLoader(
    train_dataset,
    batch_size=128,
    sampler=train_sampler,
    num_workers=num_cores,
    collate_fn=collate_fn,
    pin_memory=True,
    persistent_workers=True,
    prefetch_factor=2,
    pin_memory_device=PIN_MEMORY_DEVICE)

val_loader = DataLoader(
    val_dataset,
    batch_size=128,
    sampler=val_sampler,
    num_workers=num_cores,
    collate_fn=collate_fn,
    pin_memory=True,
    persistent_workers=True,
    prefetch_factor=2,
    pin_memory_device=PIN_MEMORY_DEVICE)

test_loader = DataLoader(
    test_dataset,
    batch_size=128,
    sampler=test_sampler,
    num_workers=num_cores,
    collate_fn=collate_fn,
    pin_memory=True,
    persistent_workers=True,
    prefetch_factor=2,
    pin_memory_device=PIN_MEMORY_DEVICE)

Preprocessing dataset and caching to /kaggle/working/balanced_dataset/train_cache...


100%|██████████| 18139/18139 [03:40<00:00, 82.12it/s] 


Preprocessing dataset and caching to /kaggle/working/balanced_dataset/val_cache...


100%|██████████| 2390/2390 [00:26<00:00, 88.72it/s] 


Preprocessing dataset and caching to /kaggle/working/balanced_dataset/test_cache...


100%|██████████| 2390/2390 [00:28<00:00, 83.07it/s] 


In [8]:
model = SSD_MOBILENET_V3_Large(num_classes_with_bg=MODEL_NUM_CLASSES)
model.load_state_dict(torch.load("best_model.pth", map_location="cpu")["model_state_dict"], strict=False)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

SSD_MOBILENET_V3_Large(
  (model): SSD(
    (backbone): SSDLiteFeatureExtractorMobileNet(
      (features): Sequential(
        (0): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
            (1): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
            (2): Hardswish()
          )
          (1): InvertedResidual(
            (block): Sequential(
              (0): Conv2dNormActivation(
                (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
                (1): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_running_stats=True)
                (2): ReLU(inplace=True)
              )
              (1): Conv2dNormActivation(
                (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (1): BatchNorm2d(16, eps=0.001, momentum=0.03, affine=True, track_runn

In [10]:
metric = MeanAveragePrecision(
        iou_type="bbox",
        class_metrics=True,
        extended_summary=True)

In [11]:
progress_bar = tqdm(val_loader, desc="Evaluating", unit="batch", total=len(val_loader))
model.eval()
with torch.no_grad():
    for images, targets in progress_bar:
        images = images.to(device)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        predictions = model(images)
        metric.update(predictions, targets)
    map_results = metric.compute()

Evaluating: 100%|██████████| 19/19 [00:18<00:00,  1.03batch/s]


In [None]:
# def train(warmup_epochs: int, num_epochs: int, patience: int, initial_lr: float, betas: Tuple[float, float], weight_decay: float, dataloaders: dict[str, torch.utils.data.DataLoader]) -> None:
#     # early stopping parameters
#     best_map = float('-inf')
#     patience_counter = 0
    
#     # get the dataloaders
#     train_loader, val_loader = dataloaders['train'], dataloaders['val']
    
#     # Set device
#     device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#     print(f"Using device: {device}")
    
#     # Load the model
#     model = SSD_MOBILENET_V3_Large(num_classes_with_bg=MODEL_NUM_CLASSES)
#     model.to(device)
    
#     # Optimizer
#     optimizer = model.configure_optimizers(lr=initial_lr, betas=betas, weight_decay=weight_decay, eps=1e-08, fused=True)
    
#     for epoch in range(num_epochs):
#         # Warmup phase: linearly increase learning rate for the first 4 epochs
#         if epoch < warmup_epochs:
#             lr = initial_lr * (epoch + 1) / warmup_epochs
#             for param_group in optimizer.param_groups:
#                 param_group['lr'] = lr
#         # Training phase
#         model.train()
#         total_loss = 0.0
#         num_batches = len(train_loader)
        
#         # Progress bar
#         train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")
        
#         for _, (images, targets) in enumerate(train_bar):
#             # Move data to device
#             images = images.to(device)
#             targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
#             # Forward pass
#             loss_dict = model(images, targets)
#             losses = sum(loss for loss in loss_dict.values())
            
#             # Backward pass and optimization
#             optimizer.zero_grad()
#             losses.backward()
#             optimizer.step()
            
#             batch_loss = losses.detach().item()
#             total_loss += batch_loss
            
#             # Update progress bar
#             train_bar.set_postfix(loss=batch_loss)
        
#         avg_loss = total_loss / num_batches
#         print(f"Epoch {epoch+1}/{num_epochs} | Learning Rate: {lr:.6f} | Avg Train Loss: {avg_loss:.4f}")
        
#         # Validation phase
#         model.eval()
#         metric = MeanAveragePrecision()
#         eval_bar = tqdm(val_loader, desc=f"Validating...", unit="batch")
#         with torch.no_grad():
#             for images, targets in eval_bar:
#                 # Move data to device
#                 images = images.to(device)
#                 targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
#                 # Forward pass    
#                 predictions = model(images)
#                 metric.update(predictions, targets)
        
#         map_result = metric.compute()
#         print(f"Epoch {epoch+1} | Val mAP: {map_result['map']:.4f}")
        
#         # Early stopping logic
#         if map_result['map'] > best_map:
#             best_map = map_result['map']
#             best_model_state_dict = {k: v.cpu() for k, v in model.state_dict().items()} 
#             best_optimizer_state_dict = optimizer.state_dict()
#             patience_counter = 0
#         else:
#             patience_counter += 1
#             if patience_counter >= patience:
#                 print("Early stopping triggered at epoch", epoch + 1)
#                 # save the best model
#                 torch.save({"model_state_dict" : best_model_state_dict,
#                     "optimizer_state_dict" : best_optimizer_state_dict,
#                 }, f"{LOCAL_DIR}/best_model.pth")
#                 print(f"Best model saved with mAP: {best_map:.4f}")
#                 break

# # train the model with the suggested hyperparameters
# train(warmup_epochs=4, num_epochs=50, patience=5, initial_lr=0.0001, betas=(0.9, 0.999), weight_decay=0.001, dataloaders={'train': train_loader, 'val': val_loader})

In [None]:
# # bohb tuning parameters
# def train(warmup_epochs: int, num_epochs: int, patience: int, initial_lr: float, betas: Tuple[float, float], weight_decay: float, dataloaders: dict[str, torch.utils.data.DataLoader], callback):
#     # early stopping parameters
#     best_map = float('-inf')
#     patience_counter = 0
    
#     # get the dataloaders
#     train_loader, val_loader = dataloaders['train'], dataloaders['val']
    
#     # Set device
#     device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
#     print(f"Using device: {device}")
    
#     # Load the model
#     model = SSD_MOBILENET_V3_Large(num_classes_with_bg=MODEL_NUM_CLASSES)
#     model.to(device)
    
#     # Optimizer
#     optimizer = model.configure_optimizers(lr=initial_lr, betas=betas, weight_decay=weight_decay, eps=1e-08, fused=True)
    
#     for epoch in range(num_epochs):
#         # Warmup phase: linearly increase learning rate for the first 4 epochs
#         if epoch < warmup_epochs:
#             lr = initial_lr * (epoch + 1) / warmup_epochs
#             for param_group in optimizer.param_groups:
#                 param_group['lr'] = lr
#         # Training phase
#         model.train()
#         total_loss = 0.0
#         num_batches = len(train_loader)
        
#         # Progress bar
#         train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch")
        
#         for _, (images, targets) in enumerate(train_bar):
#             # Move data to device
#             images = images.to(device)
#             targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
#             # Forward pass
#             loss_dict = model(images, targets)
#             losses = sum(loss for loss in loss_dict.values())
            
#             # Backward pass and optimization
#             optimizer.zero_grad()
#             losses.backward()
#             optimizer.step()
            
#             batch_loss = losses.detach().item()
#             total_loss += batch_loss
            
#             # Update progress bar
#             train_bar.set_postfix(loss=batch_loss)
        
#         avg_loss = total_loss / num_batches
#         print(f"Epoch {epoch+1}/{num_epochs} | Learning Rate: {lr:.6f} | Avg Train Loss: {avg_loss:.4f}")
        
#         # Validation phase
#         model.eval()
#         metric = MeanAveragePrecision()
#         eval_bar = tqdm(val_loader, desc=f"Validating...", unit="batch")
#         with torch.no_grad():
#             for images, targets in eval_bar:
#                 # Move data to device
#                 images = images.to(device)
#                 targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
#                 # Forward pass    
#                 predictions = model(images)
#                 metric.update(predictions, targets)
        
#         map_result = metric.compute()
#         print(f"Epoch {epoch+1} | Val mAP: {map_result['map']:.4f}")
        
#         # Report the validation mAP
#         callback(map_result['map'], epoch+1)
        
#         # Early stopping logic
#         if map_result['map'] > best_map:
#             best_map = map_result['map']
#             patience_counter = 0
#         else:
#             patience_counter += 1
#             if patience_counter >= patience:
#                 print("Early stopping triggered at epoch", epoch + 1)
#                 break
            
#     return best_map

In [None]:
# # constants
# WARMUP_EPOCHS = 3
# NUM_EPOCHS = 15
# PATIENCE = 3

# # define the dataloaders
# dataloaders = {"train":train_loader, "val":val_loader}

# # define the objective function
# def objective(trial):
#     # define callback to report intermidiate results
#     def on_train_epoch_end(score, epoch):
#         trial.report(score, step=epoch)  
#         if trial.should_prune():
#             raise optuna.TrialPruned()
        
#     # suggest hyperparameters for the model
#     lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
#     weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-2, log=True)
#     momentum = trial.suggest_float("momentum", 0.7, 0.99)
    
#     # train the model
#     best_map = train(warmup_epochs=WARMUP_EPOCHS, num_epochs=NUM_EPOCHS, patience=PATIENCE, initial_lr=lr, betas=(momentum, 0.999), weight_decay=weight_decay,
#         dataloaders=dataloaders, callback=on_train_epoch_end)
    
#     # return the best mAP
#     return best_map

In [None]:
# # define the number of trials
# NUM_TRIALS = 5

# # load the study
# study = optuna.create_study(direction='maximize', 
#                             sampler=optuna.samplers.TPESampler(), 
#                             pruner=optuna.pruners.HyperbandPruner(),
#                             study_name="ssd_mobnetv3_adis_tuning",
#                             load_if_exists=True)

# # Optimize with a callback to stop after NUM_TRIALS complete trials
# study.optimize(objective, n_trials=NUM_TRIALS)

In [None]:
# joblib.dump(study, f"{LOCAL_DIR}/optuna_study.pkl")