In [None]:
!pip -q install pycocotools
!pip -q install albumentations
!pip -q install torch torchvision
!pip -q install matplotlib seaborn imutils opencv-contrib-python scikit-learn
!pip -q install pandas mapcalc boto3
!sudo apt-get update && sudo apt-get install ffmpeg libsm6 libxext6  -y

In [None]:
import sys; sys.path.append("../src", "../../src")
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

import json
from functools import partial
from pathlib import Path

import numpy

import torch
import torch.nn as nn
import torchvision
from torch.utils.data import ConcatDataset
from torch.utils.data import Dataset, random_split, DataLoader

from contextlib import redirect_stdout
from pathlib import Path
print(torch.cuda.is_available())

In [None]:
from data_loader import CustomDataset, ResizeTransform, ResizeRotateTransform, ResizeColorTransform, ResizeHorzTransform
from engine import train_one_epoch, evaluate
from utils import Tee

In [None]:
# MODELS - https://github.com/pytorch/vision/tree/main/torchvision/models/detection
from torchvision.models.detection import (
    fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights,
    fasterrcnn_mobilenet_v3_large_fpn, FasterRCNN_MobileNet_V3_Large_FPN_Weights,
    fasterrcnn_mobilenet_v3_large_320_fpn, FasterRCNN_MobileNet_V3_Large_320_FPN_Weights,
    ssd300_vgg16, SSD300_VGG16_Weights,
    ssdlite320_mobilenet_v3_large, SSDLite320_MobileNet_V3_Large_Weights,
    retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights,
    fcos_resnet50_fpn, FCOS_ResNet50_FPN_Weights
)
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.retinanet import RetinaNetClassificationHead
from torchvision.models.detection.fcos import FCOSHead

# Data Prep

### Data Parameters

In [None]:
data_dir = '../data/images_v1_v2'
coco_path = '../data/annotations_v1_v2/coco_v1_v2.json'
 
# 20% annotations
aug_perc = 0.2
sample_coco_path = f"../data/annotations_v1_v2/coco_v1_v2_{aug_perc}.json"

image_size= (256, 256)  # (128, 128) (256, 256) (512, 512)
batch_size = 2
val_percent = 0.1
num_classes =  2  # object + background class

### Dataset

create dataset from image and annotations

In [None]:
# create Dataset
applied_transforms = ResizeTransform(size=image_size)

dataset = CustomDataset(root=data_dir,
                          annotation=coco_path,
                          transforms=applied_transforms)

print(len(dataset))

Split data into train and validation

In [None]:
# set seed to split data into train & validation
generator1 = torch.Generator().manual_seed(42)

# Number of data points in train & val
val_size = int(val_percent * len(dataset))
train_size = len(dataset) - val_size
print(train_size, val_size)

# Split data into train & val
train_ds, val_ds = torch.utils.data.random_split(dataset, [train_size, val_size], generator=generator1)
print(len(train_ds), len(val_ds))
print("shape of an image in the dataset:", val_ds[0][0].shape)
print("sample image & annotation bbox:", val_ds[0])

### Apply Augmnetations

In [None]:
aug_transforms = ResizeRotateTransform(size=image_size)

aug_dataset = CustomDataset(root=data_dir,
                          annotation=sample_coco_path,
                          transforms=aug_transforms)

# Concat train and augmenented data
train_ds = torch.utils.data.ConcatDataset([train_ds, aug_dataset])
len(train_ds)

In [None]:
aug_transforms = ResizeColorTransform(size=image_size)

aug_dataset = CustomDataset(root=data_dir,
                          annotation=sample_coco_path,
                          transforms=aug_transforms)

# Concat train and augmenented data
train_ds = torch.utils.data.ConcatDataset([train_ds, aug_dataset])
len(train_ds)

In [None]:
trns = "resize_horz_clrjtr_rot"
aug_transforms = ResizeHorzTransform(size=image_size)

aug_dataset = CustomDataset(root=data_dir,
                          annotation=sample_coco_path,
                          transforms=aug_transforms)

# Concat train and augmenented data
train_ds = torch.utils.data.ConcatDataset([train_ds, aug_dataset])
len(train_ds)

### Data Loader

In [None]:
# Train DataLoader
data_loader_train = torch.utils.data.DataLoader(train_ds,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          num_workers=4,
                                          collate_fn=utils.collate_fn)

# Val DataLoader
data_loader_val = torch.utils.data.DataLoader(val_ds,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          num_workers=4,
                                          collate_fn=utils.collate_fn)

# Modeling

### Fine Tuning
* * * * *

üß† 1. Training hyperparameters
------------------------------

| Parameter | Meaning | Typical effect |
| --- | --- | --- |
| **`num_epochs`** | Number of full passes over the training dataset. | More epochs ‚Üí better convergence up to a point; too many ‚Üí overfitting. |
| **`learning_rate`** | Step size for gradient updates. | Too high ‚Üí unstable or diverges; too low ‚Üí slow learning or stuck in local minima. |
| **`momentum`** | Controls how much of the previous gradient is added to the current one (acts like inertia). | Helps smooth noisy gradients and escape shallow minima. Typical: 0.9--0.95. |
| **`weight_decay`** | L2 regularization term. Penalizes large weights. | Helps reduce overfitting. Too high ‚Üí underfits. |
| **`lr_scheduler`** | Reduces the learning rate over time. | Prevents overshooting late in training and helps refine minima. |
| **`StepLR(step_size=3, gamma=0.1)`** | Every 3 epochs, multiply LR by 0.1. | Makes big drops in LR; better for short, sharp fine-tunes than long runs. |

* * * * *

üß© 2. Model structure parameters
--------------------------------

### (a) **ROI Heads** (Region of Interest heads)

These control how the detector classifies and refines proposals after the RPN.

| Parameter | Description | Effect / trade-off |
| --- | --- | --- |
| **`box_fg_iou_thresh`** | IoU threshold above which a proposal is labeled **foreground** (positive). | Higher ‚Üí fewer, cleaner positives. Lower ‚Üí more positives but noisier. |
| **`box_bg_iou_thresh`** | IoU threshold below which a proposal is labeled **background** (negative). | Lower ‚Üí more negatives; higher ‚Üí fewer but "harder" negatives. |
| **`positive_fraction`** | Fraction of samples per mini-batch that are positives. | Smaller ‚Üí model sees more background, good when background is confusing. |
| **`batch_size_per_image`** | Number of sampled RoIs per image for training. | More samples ‚Üí better statistics but higher memory use. |
| **`score_thresh`** | Minimum confidence score to keep a detection at inference. | Lower ‚Üí more detections (higher recall, lower precision). Higher ‚Üí fewer false positives but may miss objects. |
| **`nms_thresh`** *(often added)* | IoU threshold for Non-Maximum Suppression between overlapping detections. | Lower ‚Üí fewer overlapping boxes (more aggressive suppression). Higher ‚Üí more duplicates. |
| **`detections_per_img`** | Max number of detections returned per image. | Prevents cluttered outputs. |

* * * * *

### (b) **RPN (Region Proposal Network)**

The RPN generates candidate regions likely to contain objects.

| Parameter | Description | Effect / trade-off |
| --- | --- | --- |
| **`foreground_iou_thresh`** | IoU ‚â• this ‚Üí positive anchor. | Higher ‚Üí cleaner positives, fewer of them. |
| **`background_iou_thresh`** | IoU ‚â§ this ‚Üí negative anchor. | Lower ‚Üí more negatives, helps discriminate similar background. |
| **`pre_nms_top_n_train` / `pre_nms_top_n_test`** | Number of top-scoring anchors kept **before** NMS during training / testing. | Larger ‚Üí more proposals (higher recall, slower). |
| **`post_nms_top_n_train` / `post_nms_top_n_test`** | Number of proposals kept **after** NMS. | Limits proposals sent to ROI head; smaller ‚Üí faster, possibly lower recall. |
| **`nms_thresh`** | IoU threshold for NMS in the RPN. | Lower ‚Üí fewer overlapping proposals; higher ‚Üí more redundancy. |

* * * * *

‚öôÔ∏è 3. Optimizer & scheduler internals
-------------------------------------

| Parameter | Description | Why it matters |
| --- | --- | --- |
| **`optimizer = torch.optim.SGD(...)`** | Stochastic Gradient Descent updates weights each mini-batch. | Standard choice for detection; stable and efficient. |
| **`params = [p for p in model.parameters() if p.requires_grad]`** | Only train unfrozen layers. | Useful if you freeze backbone for fine-tuning. |
| **`gamma` in scheduler** | Multiplicative LR decay factor. | 0.1 ‚Üí LR drops to 10% at each step. |
| **`step_size`** | How often (in epochs) to decay LR. | Smaller ‚Üí more frequent drops; larger ‚Üí smoother. |

* * * * *

üß† 4. Conceptual relationships
------------------------------

-   **IoU thresholds** decide what counts as positive/negative. Setting the gap between foreground and background helps avoid ambiguous samples.

-   **Positive fraction** and **batch_size_per_image** together define the ratio of object vs. background regions the model learns from.

-   **RPN thresholds** affect *proposal quality* and recall; **ROI head thresholds** affect *classification precision*.

-   **Score & NMS thresholds** matter mostly at inference --- they balance precision vs. recall.

-   **Learning rate & scheduler** govern how fast and steadily weights adapt.

-   **Momentum** and **weight decay** keep optimization stable and generalizable.

* * * * *


### Modeling Parameters

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print("device:", device)

model_type = Path("../models/fasterrcnn_resnet50_fpn_v2")

log_dir = Path(model_type)
log_dir.mkdir(parents=True, exist_ok=True)
log_path = log_dir / f"training_log.txt"

num_epochs = 25
# Rule of thumb: ~0.0025 per image. So LR ‚âà 0.0025 √ó (total_batch). Examples: total_batch=8 ‚Üí 0.02; total_batch=4 ‚Üí 0.01; total_batch=2 ‚Üí 0.005. Try a sweep: {0.005, 0.01, 0.02}.
learning_rate = 0.0005   # {0.005, 0.01, 0.02}
momentum = 0.9 # {0.9-0.95}
weight_decay=0.0001   # {0.0001, 0.0005}

### Base Model: Detection Framework + Backbone

In [None]:
model = fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.COCO_V1)

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# --- ROI Heads: emphasize hard negatives, be stricter at test time ---
model.roi_heads.box_fg_iou_thresh = 0.5  # Increase positive samples - 0.8
model.roi_heads.box_bg_iou_thresh = 0.3  # Lower to include harder negatives - 0.2
model.roi_heads.positive_fraction = 0.35   # Loosen foreground criteria - 0.25
model.roi_heads.batch_size_per_image = 512

# Make RPN positives cleaner and fewer noisy proposals
model.rpn.foreground_iou_thresh = 0.8          # default 0.7
model.rpn.background_iou_thresh = 0.2          # default 0.3
model.rpn.pre_nms_top_n_train  = 1200          # default 2000
model.rpn.post_nms_top_n_train = 512           # default 1000
model.rpn.pre_nms_top_n_test   = 600           # default 1000
model.rpn.post_nms_top_n_test  = 300           # default 1000
model.rpn.nms_thresh = 0.6                     # default 0.7

# Inference thresholds (tune via PR curve)
model.roi_heads.nms_thresh = 0.5
model.roi_heads.score_thresh = 0.3   # Allow lower-confidence detections - 0.5
model.roi_heads.detections_per_img = 10

# move model to the current device
model.to(device)

# construct an optimizer
# It returns all trainable parameters of the model
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=learning_rate, momentum=momentum, weight_decay=weight_decay)

# and a learning rate scheduler which decreases the learning rate by 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(
  optimizer,
  step_size=3,
  gamma=0.1
)

# Training 

In [None]:
with open(log_path, "w") as f:
    tee = Tee(sys.stdout, f)
    with redirect_stdout(tee):
        for epoch in range(num_epochs):
            # train one epoch
            train_one_epoch(model, optimizer, data_loader_train, device, epoch, print_freq=100)

            # update LR
            lr_scheduler.step()

            # evaluate on validation set
            evaluate(model, data_loader_val, device=device)

            # save model weights
            model_name = f"20_{trns}_epoch{epoch}"
            model_save_path = model_type / f"{model_name}.pth"
            print(f"\nModel saved as: {model_save_path}\n")

            torch.save(model.state_dict(), model_save_path)

print(f"\n‚úÖ Training complete. Logs saved to {log_path}")
