In this notebook, we train a custom **RF-DETR** (Region-Focused DEtection TRansformer) model for object detection using a labeled dataset in COCO format. RF-DETR improves detection accuracy by focusing attention on spatial regions of interest, making it well-suited for complex scenes with clutter or small objects. To ensure efficient training and avoid overfitting, we incorporate key training callbacks—such as early stopping, model checkpointing, and learning rate scheduling. By the end of this notebook, you’ll have a fully trained RF-DETR model ready for evaluation and deployment.

In [None]:
!pip install -q rfdetr supervision roboflow

In [None]:
# Connect to google drive for data access.
from google.colab import drive
drive.mount('/content/gdrive')

try:
  !ln -s /content/gdrive/My\ Drive/ /mydrive
  print('Successful')
except Exception as e:
  print(e)
  print('Not successful')

In [None]:
# Import required libraries.
import io
import requests
import supervision as sv
from PIL import Image
from rfdetr import RFDETRLarge
from rfdetr.util.coco_classes import COCO_CLASSES
from typing import Dict, List, Optional, Tuple, Any
import json
import glob
import os
import natsort
import gc
import torch
import weakref
import pandas as pd
import matplotlib.pyplot as plt
import warnings

warnings.filterwarnings("ignore")

!export CUDA_LAUNCH_BLOCKING=1
!export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

In [None]:
#@title Utils
def read_coco_json(json_path):
    """
    Reads a COCO-format JSON annotation file.

    Args:
        json_path: Path to the COCO JSON file.

    Returns:
        Dictionary with keys 'images', 'annotations', and 'categories'.
    """
    with open(json_path, 'r', encoding='utf-8') as f:
        coco_data = json.load(f)
    return coco_data


def cleanup_gpu_memory(obj=None, verbose: bool = False):

    if not torch.cuda.is_available():
        if verbose:
            print("[INFO] CUDA is not available. No GPU cleanup needed.")
        return

    def get_memory_stats():
        allocated = torch.cuda.memory_allocated()
        reserved = torch.cuda.memory_reserved()
        return allocated, reserved

    torch.cuda.synchronize()

    if verbose:
        alloc, reserv = get_memory_stats()
        print(f"[Before] Allocated: {alloc / 1024**2:.2f} MB | Reserved: {reserv / 1024**2:.2f} MB")

    # Ensure we drop all strong references
    if obj is not None:
        ref = weakref.ref(obj)
        del obj
        if ref() is not None and verbose:
            print("[WARNING] Object not fully garbage collected yet.")

    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.ipc_collect()

    torch.cuda.synchronize()

    if verbose:
        alloc, reserv = get_memory_stats()
        print(f"[After]  Allocated: {alloc / 1024**2:.2f} MB | Reserved: {reserv / 1024**2:.2f} MB")

## Load pre-trained model.

In [None]:
model = RFDETRLarge()

## Start tensorboard to visualize training metric.

In [None]:
model_output_path = "/mydrive/LLM/rf-detr/data/output/"  # @param {type: "string", placeholder: "[path to the model]", isTemplate: true}

In [None]:
%load_ext tensorboard
%tensorboard  --logdir $model_output_path

In [None]:
history = []

def callback2(data):
	history.append(data)

model.callbacks["on_fit_epoch_end"].append(callback2)

## Dataset

RF-DETR expects the dataset to be in COCO format. Divide your dataset into three subdirectories: `train`, `valid`, and `test`. Each subdirectory should contain its own `_annotations.coco.json` file that holds the annotations for that particular split, along with the corresponding image files. Below is an example of the directory structure:

```
dataset/
├── train/
│   ├── _annotations.coco.json
│   ├── image1.jpg
│   ├── image2.jpg
│   └── ... (other image files)
├── valid/
│   ├── _annotations.coco.json
│   ├── image1.jpg
│   ├── image2.jpg
│   └── ... (other image files)
└── test/
    ├── _annotations.coco.json
    ├── image1.jpg
    ├── image2.jpg
    └── ... (other image files)
```

The annotated COCO JSON files should be in the format mentioned in the link - [click here](https://roboflow.com/formats/coco-json?ref=blog.roboflow.com)

In [None]:
dataset = "/mydrive/LLM/rf-detr/data/dataset/"  # @param {type: "string", placeholder: "[path to the dataset]", isTemplate: true}

## Training

In [None]:
model.train(
    dataset_dir=dataset,
    epochs=200,
    batch_size=8,
    grad_accum_steps=1,
    output_dir=model_output_path,
    early_stopping=True,
    early_stopping_patience=10,
    early_stopping_min_delta=0.001,
    early_stopping_use_ema=False,
    lr=1e-4,                      # Higher LR for the new decoder
    lr_encoder=1e-5,              # Lower LR for the pre-trained encoder
    weight_decay=1e-4,            # Adds regularization to prevent overfitting
    lr_scheduler='cosine',        # Use the smoother cosine scheduler
    warmup_epochs=2,              # Stabilizes the beginning of training
)

## Plot training metric curves.

In [None]:
df = pd.DataFrame(history)

plt.figure(figsize=(12, 8))

plt.plot(
	df['epoch'],
	df['train_loss'],
	label='Training Loss',
	marker='o',
	linestyle='-'
)

plt.plot(
	df['epoch'],
	df['test_loss'],
	label='Validation Loss',
	marker='o',
	linestyle='--'
)

plt.title('Train/Validation Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.show()