[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/khetansarvesh/CV/blob/main/Faster_RCNN/runner.ipynb)

In [None]:
import sys
import threading
import argparse
import os
from math import ceil
from enum import Enum
import imageio
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from dataclasses import dataclass
from PIL import Image
from typing import List, Tuple
from pathlib import Path
import random
import xml.etree.ElementTree as ET
import torch as t
from torch import nn
from torch.nn import functional as F
import torchvision
!git clone https://github.com/khetansarvesh/CV.git

# Dataset

In [None]:
'''train+validation dataset'''
!wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar # downloading the VOC2007 tar file
!tar -xf VOCtrainval_06-Nov-2007.tar # extracting the above tar file

--2024-03-13 15:18:41--  http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
Resolving host.robots.ox.ac.uk (host.robots.ox.ac.uk)... 129.67.94.152
Connecting to host.robots.ox.ac.uk (host.robots.ox.ac.uk)|129.67.94.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 460032000 (439M) [application/x-tar]
Saving to: ‘VOCtrainval_06-Nov-2007.tar’


2024-03-13 15:19:23 (10.7 MB/s) - ‘VOCtrainval_06-Nov-2007.tar’ saved [460032000/460032000]



'test dataset'

In [None]:
@dataclass
class PreprocessingParams:
  """
  Image preprocessing parameters. Channel order may be either ChannelOrder.RGB or ChannelOrder.BGR.
  Scaling factor is applied first, followed by standardization with supplied means and standard
  deviations supplied in the order specified by channel_order.
  """
  channel_order: str
  scaling: float
  means: List[float]
  stds: List[float]

# instead of this try using the new pytorch feature where it gives you automatically what the foundation model configurations were instead of hard coding it
image_preprocessing_params = PreprocessingParams(channel_order = "RGB", scaling = 1.0 / 255.0, means = [ 0.485, 0.456, 0.406 ], stds = [ 0.229, 0.224, 0.225 ])

In [None]:
# this is a custom data loader, try and see if pytorch dataloader can be used instead
from CV.Faster_RCNN.dataloader import Dataset
training_data = Dataset(
  dir = "VOCdevkit/VOC2007",
  split = "trainval", #Dataset split to use for training
  image_preprocessing_params = image_preprocessing_params,
  compute_feature_map_shape_fn = backbone.compute_feature_map_shape,
  feature_pixels = backbone.feature_pixels,
  augment = False,
  shuffle = True,
  cache = False
)

In [None]:
# visualizing anchors and ground truth boxes for the first image
from CV.Faster_RCNN.util_visualization import show_anchors
c = 0
for sample in iter(training_data):
  if c == 0:
    output_path = os.path.join("/content/anchors_temp.png")
    show_anchors(
      output_path = output_path,
      image = sample.image,
      anchor_map = sample.anchor_map,
      anchor_valid_map = sample.anchor_valid_map,
      gt_rpn_map = sample.gt_rpn_map,
      gt_boxes = sample.gt_boxes,
      display = True
    )
  else:
    break

  c = c+1

  data = imageio.imread(url, pilmode = "RGB")
  data = imageio.imread(url, pilmode = "RGB")


# Modelling

In [None]:
# def no_grad(func):
#   def wrapper_nograd(*args, **kwargs):
#     with t.no_grad():
#       return func(*args, **kwargs)
#   return wrapper_nograd

In [None]:
# foundation model
from CV.Faster_RCNN.foundation_model_resnet50 import *
backbone = ResNetBackbone()

In [None]:
# Construct model and load initial weights
from CV.Faster_RCNN.model import *
model = FasterRCNNModel(num_classes = Dataset.num_classes, backbone = backbone).cuda()

# Training

In [None]:
epochs = 10
momentum = 0.9
learning_rate =  1e-3
weight_decay = 5e-4

In [None]:
# creating an optimizer
params = []
for key, value in dict(model.named_parameters()).items():
  if not value.requires_grad:
    continue
  if "weight" in key:
    params += [{ "params": [value], "weight_decay": weight_decay }]

optimizer = t.optim.SGD(params, lr = learning_rate, momentum = momentum)

In [None]:
for epoch in range(1, 1 + epochs):
  print("Epoch %d/%d" % (epoch, epochs))

  rpn_class_loss = float("inf")
  rpn_regression_loss = float("inf")
  detector_class_loss = float("inf")
  detector_regression_loss = float("inf")
  _rpn_class_losses = []
  _rpn_regression_losses = []
  _detector_class_losses = []
  _detector_regression_losses = []

  progbar = tqdm(iterable = iter(training_data), total = training_data.num_samples, postfix = {
      "rpn_class_loss": "%1.4f" % rpn_class_loss,
      "rpn_regr_loss": "%1.4f" % rpn_regression_loss,
      "detector_class_loss": "%1.4f" % detector_class_loss,
      "detector_regr_loss": "%1.4f" % detector_regression_loss,
      "total_loss": "%1.2f" % (rpn_class_loss + rpn_regression_loss + detector_class_loss + detector_regression_loss)
    })

  for sample in progbar:
    loss = model.train_step(  # don't retain any tensors we don't need (helps memory usage)
      optimizer = optimizer,
      image_data = t.from_numpy(sample.image_data).unsqueeze(dim = 0).cuda(),
      anchor_map = sample.anchor_map,
      anchor_valid_map = sample.anchor_valid_map,
      gt_rpn_map = t.from_numpy(sample.gt_rpn_map).unsqueeze(dim = 0).cuda(),
      gt_rpn_object_indices = [ sample.gt_rpn_object_indices ],
      gt_rpn_background_indices = [ sample.gt_rpn_background_indices ],
      gt_boxes = [ sample.gt_boxes ]
    )


    _rpn_class_losses.append(loss.rpn_class)
    _rpn_regression_losses.append(loss.rpn_regression)
    _detector_class_losses.append(loss.detector_class)
    _detector_regression_losses.append(loss.detector_regression)
    rpn_class_loss = np.mean(_rpn_class_losses)
    rpn_regression_loss = np.mean(_rpn_regression_losses)
    detector_class_loss = np.mean(_detector_class_losses)
    detector_regression_loss = np.mean(_detector_regression_losses)


    progbar.set_postfix({
      "rpn_class_loss": "%1.4f" % rpn_class_loss,
      "rpn_regr_loss": "%1.4f" % rpn_regression_loss,
      "detector_class_loss": "%1.4f" % detector_class_loss,
      "detector_regr_loss": "%1.4f" % detector_regression_loss,
      "total_loss": "%1.2f" % (rpn_class_loss + rpn_regression_loss + detector_class_loss + detector_regression_loss)
    })

Epoch 1/10


  data = imageio.imread(url, pilmode = "RGB")
100%|██████████| 5011/5011 [13:34<00:00,  6.16it/s, rpn_class_loss=0.1942, rpn_regr_loss=0.0562, detector_class_loss=0.4086, detector_regr_loss=0.3549, total_loss=1.01]


Epoch 2/10


100%|██████████| 5011/5011 [13:38<00:00,  6.13it/s, rpn_class_loss=0.1420, rpn_regr_loss=0.0499, detector_class_loss=0.2937, detector_regr_loss=0.3237, total_loss=0.81]


Epoch 3/10


100%|██████████| 5011/5011 [13:38<00:00,  6.12it/s, rpn_class_loss=0.1255, rpn_regr_loss=0.0474, detector_class_loss=0.2465, detector_regr_loss=0.2795, total_loss=0.70]


Epoch 4/10


100%|██████████| 5011/5011 [13:38<00:00,  6.13it/s, rpn_class_loss=0.1136, rpn_regr_loss=0.0460, detector_class_loss=0.2171, detector_regr_loss=0.2504, total_loss=0.63]


Epoch 5/10


100%|██████████| 5011/5011 [13:37<00:00,  6.13it/s, rpn_class_loss=0.1056, rpn_regr_loss=0.0444, detector_class_loss=0.1975, detector_regr_loss=0.2305, total_loss=0.58]


Epoch 6/10


100%|██████████| 5011/5011 [13:39<00:00,  6.11it/s, rpn_class_loss=0.0972, rpn_regr_loss=0.0432, detector_class_loss=0.1796, detector_regr_loss=0.2146, total_loss=0.53]


Epoch 7/10


100%|██████████| 5011/5011 [13:40<00:00,  6.10it/s, rpn_class_loss=0.0897, rpn_regr_loss=0.0424, detector_class_loss=0.1660, detector_regr_loss=0.1996, total_loss=0.50]


Epoch 8/10


100%|██████████| 5011/5011 [13:39<00:00,  6.12it/s, rpn_class_loss=0.0845, rpn_regr_loss=0.0411, detector_class_loss=0.1597, detector_regr_loss=0.1888, total_loss=0.47]


Epoch 9/10


100%|██████████| 5011/5011 [13:38<00:00,  6.12it/s, rpn_class_loss=0.0790, rpn_regr_loss=0.0407, detector_class_loss=0.1493, detector_regr_loss=0.1782, total_loss=0.45]


Epoch 10/10


100%|██████████| 5011/5011 [13:43<00:00,  6.09it/s, rpn_class_loss=0.0736, rpn_regr_loss=0.0398, detector_class_loss=0.1376, detector_regr_loss=0.1684, total_loss=0.42]


# Inference

In [None]:
image_data, image_obj, _, _ = load_image(url = "https://trzy.org/files/fasterrcnn/gary.jpg", preprocessing = image_preprocessing_params, min_dimension_pixels = 600)

NameError: name 'model' is not defined

In [None]:
image_data = t.from_numpy(image_data).unsqueeze(dim = 0).cuda()
scored_boxes_by_class_index = model.predict(image_data = image_data, score_threshold = 0.7)

In [None]:
show_detections(
                output_path = None,
                show_image = True,
                image = image_obj,
                scored_boxes_by_class_index = scored_boxes_by_class_index,
                class_index_to_name = Dataset.class_index_to_name
                )