# Aleket Faster R-CNN training notebook

In [None]:
%pip install pillow
%pip install numpy<2.0
%pip install torch torchvision --index-url https://download.pytorch.org/whl/cu124
%pip install matplotlib
%pip install pycocotools
%pip install gdown
%pip install tqdm

from IPython.display import clear_output

clear_output(wait=False)

print("ALL DEPENDENCIES INSTALLED")

In [1]:
# IMPORTS

# Standard Library
import random

# Third-Party Libraries
import numpy as np

# Torch
import torch

# Torchvision
import torchvision
import torchvision.models.detection as tv_detection
from torchvision.models.detection import FasterRCNN
import torchvision.transforms.v2 as v2

# Utils
from aleket_dataset import AleketDataset, download_dataset
from utils import split_dataset, create_dataloaders
from metrics import CocoEvaluator
from training_and_evaluation import train

In [None]:
# Helper Functions

# Device Selection
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

# Model Builder
def get_model(num_classes) -> FasterRCNN:
    """Loads or creates a Faster R-CNN model.
    Args:
        num_classes: The number of classes in the dataset.

    Returns:
        The Faster R-CNN model on the specified device.
    """
    model = tv_detection.fasterrcnn_mobilenet_v3_large_fpn(
        weights="DEFAULT"
    )
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = (
        tv_detection.faster_rcnn.FastRCNNPredictor(
            in_features, num_classes
        )
    )
    return model.to(DEVICE)


In [None]:
# Prints the architecture of the model
print(get_model(3))

In [None]:
# Random Seed for Reproducibility
SEED = 1
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

# Path variables
RESUME = False
RESULT_PATH = "result"
DATASET_ROOT = download_dataset("dataset_patched", "")

# Dataset split
VALIDATION_FRACTION = 0.2
DATASET_FRACTION = 0.1
DATALOADER_WORKERS = 16

# Training Hyperparameters
IMG_SIZE = 1024
BATCH_SIZE = 16
EPOCHS = 10
LR = 0.001
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0001
WARMUP_EPOCHS = 50

# Data Augmentation Transforms

DEFAULT_TRANSFORMS = v2.Compose(
    [v2.Resize(size=IMG_SIZE), v2.ToDtype(torch.float32, scale=True), ]
)
   
TRAINING_TRANSFORMS = v2.Compose(
    [
        v2.RandomHorizontalFlip(p=0.5),
        v2.RandomVerticalFlip(p=0.5),
        v2.RandomPerspective(distortion_scale=0.2, p=0.2),
        v2.RandomRotation(degrees=(-10, 10), expand=True),
        DEFAULT_TRANSFORMS,
    ]
)

np_generator = np.random.default_rng(SEED)

def main():
    """Main training and evaluation loop."""
    
    dataset = AleketDataset(DATASET_ROOT, DEFAULT_TRANSFORMS)
    train_set, val_set = split_dataset(dataset, DATASET_FRACTION, VALIDATION_FRACTION, np_generator)
    
    print(f"VALIDATION SET: {list(val_set.keys())}\n")
    
    train_indicies = []
    val_indicies = []
    
    for indicies in train_set.values():
        train_indicies.extend(indicies)
    for indicies in val_set.values():
        val_indicies.extend(indicies)  
          
    
    train_dataloader, val_dataloader = create_dataloaders(dataset, train_indicies, val_indicies, BATCH_SIZE, DATALOADER_WORKERS)
    coco_eval = CocoEvaluator(val_dataloader.dataset)      
    
    model = get_model(3)
    
    train(
        "train1",
        model,
        dataset,
        DEFAULT_TRANSFORMS,
        TRAINING_TRANSFORMS,
        train_dataloader,
        val_dataloader,
        EPOCHS,
        WARMUP_EPOCHS,
        LR,
        MOMENTUM,
        WEIGHT_DECAY,
        coco_eval,
        DEVICE
    ) 
    
if __name__ == "__main__":
    main()