## 1. Setup and Import Libraries:

In [1]:
!pip install 'git+https://github.com/facebookresearch/detectron2.git'

Collecting git+https://github.com/facebookresearch/detectron2.git
  Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-9fdawgka
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-9fdawgka
  Resolved https://github.com/facebookresearch/detectron2.git to commit 9131ce0e5bc0c89904541bc0355d933ccd6acbfb
  Preparing metadata (setup.py) ... [?25ldone
Collecting pycocotools>=2.0.2 (from detectron2==0.6)
  Downloading pycocotools-2.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Collecting yacs>=0.1.8 (from detectron2==0.6)
  Downloading yacs-0.1.8-py3-none-any.whl.metadata (639 bytes)
Collecting fvcore<0.1.6,>=0.1.5 (from detectron2==0.6)
  Downloading fvcore-0.1.5.post20221221.tar.gz (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.p

In [2]:
# Import libraries
import os
import cv2
import torch
from torch.cuda.amp import autocast, GradScaler
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.utils.visualizer import Visualizer
import logging

In [3]:
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("detectron2")

## 2. Register COCO Datasets (Training, Validation, Test):

In [4]:
# Register COCO dataset (Train and Validation)
register_coco_instances("coco_train", {}, "/kaggle/input/mscoco/annotations_trainval2017/annotations/instances_train2017.json", "/kaggle/input/mscoco/train2017/train2017")
register_coco_instances("coco_val", {}, "/kaggle/input/mscoco/annotations_trainval2017/annotations/instances_val2017.json", "/kaggle/input/mscoco/val2017/val2017")

## 3. Configuration Setup:

In [5]:
cfg = get_cfg()

# Load the Cascade Mask R-CNN configuration from the model zoo
cfg.merge_from_file(model_zoo.get_config_file("Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"))

# --- Configuration for dataset ---
cfg.DATASETS.TRAIN = ("coco_train",)  # Specify the training dataset
cfg.DATASETS.TEST = ("coco_val",)    # Specify the validation dataset

# --- DataLoader settings ---
cfg.DATALOADER.NUM_WORKERS = 4  # Number of workers to load the data in parallel

# --- Input settings ---
cfg.INPUT.MIN_SIZE_TRAIN = (800,)  # Minimum size of images during training
cfg.INPUT.MAX_SIZE_TRAIN = 1333  # Maximum size of images during training

# --- Model weights ---
# cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/cascade_mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" # Load pretrained weights from model zoo

# --- Solver settings ---
cfg.SOLVER.IMS_PER_BATCH = 4  # Batch size during training
cfg.SOLVER.BASE_LR = 0.00025  # Learning rate for the optimizer
cfg.SOLVER.MAX_ITER = 30000  # Total number of iterations for training

# --- ROI Head settings ---
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512  # Number of proposals per image during training
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 80  # COCO dataset has 80 object classes

# --- Output directory ---
cfg.OUTPUT_DIR = "/kaggle/working/output"  # Directory to save model checkpoints, logs, and other outputs
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
logger.info(f"Using device: {device}")

## 4. Train the Model:

In [7]:
# Initialize the trainer
trainer = DefaultTrainer(cfg)
trainer.model.to(device)  # Move model to GPU

# Mixed precision setup
scaler = GradScaler()

# Log training start
logger.info("Training started...")

# Training loop with mixed precision
trainer.resume_or_load(resume=False)
trainer.train()

  scaler = GradScaler()
R-50.pkl: 102MB [00:00, 176MB/s]                            
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


## 5. Testing the Model:

In [8]:
# # Testing the model after training and logging evaluation results
# eval_results = trainer.test(cfg, trainer.model)
# logger.info("Evaluation Results: %s", eval_results)

## 6. Sample Inference on a Single Image:

In [9]:
# # Set up the predictor for inference
# predictor = DefaultPredictor(cfg)

# # Load a sample image for inference (replace with actual test image path)
# img = cv2.imread("/kaggle/input/mscoco/test2017/sample_image.jpg")

# # Run inference on the image
# outputs = predictor(img)

# # Visualize the predictions on the image
# v = Visualizer(img[:, :, ::-1], metadata=MetadataCatalog.get("coco_train"), scale=1.2)
# out = v.draw_instance_predictions(outputs["instances"].to("cpu"))

# # Save or display the output image
# output_image_path = "/kaggle/working/output/inference_result.jpg"
# cv2.imwrite(output_image_path, out.get_image()[:, :, ::-1])
# logger.info(f"Inference result saved at {output_image_path}")