# Custom Mask R-CNN Training

Training notebook for custom Mask R-CNN with EfficientNet backbone and CBAM attention.


In [None]:
!git clone https://github.com/michaelo-ponteski/test.git
%cd test/

In [None]:
# The following code will only execute
# successfully when compression is complete

import kagglehub

# Download latest version
path = kagglehub.dataset_download("michaeloponteski/isaid-patches")

print("Path to dataset files:", path)

In [None]:
import torch
import matplotlib.pyplot as plt
import importlib

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# Reload modules for development
import datasets.isaid_dataset
import models.maskrcnn_model
import utils.overfit_test
import training.trainer
import training.transforms

importlib.reload(datasets.isaid_dataset)
importlib.reload(models.maskrcnn_model)
importlib.reload(utils.overfit_test)
importlib.reload(training.trainer)
importlib.reload(training.transforms)

from datasets.isaid_dataset import iSAIDDataset, visualize_sample
from models.maskrcnn_model import get_custom_maskrcnn
from utils.overfit_test import overfit_single_image_test
from training import Trainer

## Load Dataset


In [None]:
root_dir = path + "/iSAID_patches"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 16

# Load dataset
train_dataset = iSAIDDataset(root_dir, split="train")
val_dataset = iSAIDDataset(root_dir, split="val")

print(f"Train: {len(train_dataset)}, Val: {len(val_dataset)}")

In [None]:
# Visualize a sample
visualize_sample(train_dataset, 19)

## Create Model


In [None]:
model = get_custom_maskrcnn(num_classes=num_classes, pretrained_backbone=True)

# Model info
info = model.get_model_info()
print(f"Total parameters: {info['total_parameters']:,}")
print(f"Trainable parameters: {info['trainable_parameters']:,}")
print(f"Model size: {info['model_size_mb']:.1f} MB")

## Overfit Single Image Test

Sanity check - can the model learn to overfit a single image?


In [None]:
losses, preds = overfit_single_image_test(
    model, train_dataset, idx=19, num_epochs=100, device=device
)

## Training

Full training with the Trainer class.


In [None]:
# Re-create model (fresh weights)
model = get_custom_maskrcnn(num_classes=num_classes, pretrained_backbone=True)

In [None]:
trainer = Trainer(
    data_root=root_dir,
    num_classes=num_classes,
    batch_size=4,
    lr=0.005,
    device=device,
    use_amp=True,
)

In [None]:
trainer.fit(epochs=20, save_dir="checkpoints")

## Load and Test Best Model


In [None]:
# Load best checkpoint
trainer.load_checkpoint("checkpoints/best.pth")
trainer.model.eval()
print("Loaded best model")

In [None]:
# Quick inference on a sample
from training.transforms import get_transforms

idx = 50
image, target = val_dataset[idx]
image_tensor = (
    get_transforms(train=False)(image) if not isinstance(image, torch.Tensor) else image
)

with torch.no_grad():
    pred = trainer.model([image_tensor.to(device)])[0]

print(f"Found {len(pred['boxes'])} detections")
print(f"Ground truth has {len(target['boxes'])} objects")