# Faster R - CNN


## Install detectron2

In [1]:
!pip install pyyaml==5.1

import torch
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/$CUDA_VERSION/torch$TORCH_VERSION/index.html

torch:  1.10 ; cuda:  cu111
Looking in links: https://dl.fbaipublicfiles.com/detectron2/wheels/cu111/torch1.10/index.html


## Basic setup

In [2]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow

from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

## Download the dataset

In [3]:
!wget http://images.cocodataset.org/zips/val2017.zip
!unzip val2017.zip > /dev/null

--2021-11-26 16:55:51--  http://images.cocodataset.org/zips/val2017.zip
Resolving images.cocodataset.org (images.cocodataset.org)... 52.217.161.121
Connecting to images.cocodataset.org (images.cocodataset.org)|52.217.161.121|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 815585330 (778M) [application/zip]
Saving to: ‘val2017.zip’


2021-11-26 16:55:59 (93.2 MB/s) - ‘val2017.zip’ saved [815585330/815585330]



## Download annotations for Coco dataset

In [4]:
 !wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
 !unzip annotations_trainval2017.zip > /dev/null

--2021-11-26 16:56:13--  http://images.cocodataset.org/annotations/annotations_trainval2017.zip
Resolving images.cocodataset.org (images.cocodataset.org)... 52.216.129.251
Connecting to images.cocodataset.org (images.cocodataset.org)|52.216.129.251|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 252907541 (241M) [application/zip]
Saving to: ‘annotations_trainval2017.zip’


2021-11-26 16:56:16 (88.4 MB/s) - ‘annotations_trainval2017.zip’ saved [252907541/252907541]



## Register Coco dataset to detectron2

In [5]:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("my_dataset", {}, "/content/annotations/instances_val2017.json", "/content/val2017")

## Train

We are doing it in order to receive model_final.pth file

In [6]:
from detectron2.engine import DefaultTrainer

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("my_dataset",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 300    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512   # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 80
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

[32m[11/26 16:57:42 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

model_final_280758.pkl: 167MB [00:06, 26.7MB/s]                           


[32m[11/26 16:57:55 d2.engine.train_loop]: [0mStarting training from iteration 0


  max_size = (max_size + (stride - 1)) // stride * stride
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[11/26 16:58:21 d2.utils.events]: [0m eta: 0:05:56  iter: 19  total_loss: 0.5201  loss_cls: 0.1766  loss_box_reg: 0.2324  loss_rpn_cls: 0.01863  loss_rpn_loc: 0.03851  time: 1.2584  data_time: 0.0240  lr: 1.6068e-05  max_mem: 2554M
[32m[11/26 16:58:46 d2.utils.events]: [0m eta: 0:05:28  iter: 39  total_loss: 0.6324  loss_cls: 0.1829  loss_box_reg: 0.2445  loss_rpn_cls: 0.02056  loss_rpn_loc: 0.04908  time: 1.2483  data_time: 0.0125  lr: 3.2718e-05  max_mem: 2554M
[32m[11/26 16:59:11 d2.utils.events]: [0m eta: 0:05:04  iter: 59  total_loss: 0.5408  loss_cls: 0.1907  loss_box_reg: 0.2535  loss_rpn_cls: 0.02146  loss_rpn_loc: 0.04879  time: 1.2488  data_time: 0.0081  lr: 4.9367e-05  max_mem: 2643M
[32m[11/26 16:59:37 d2.utils.events]: [0m eta: 0:04:41  iter: 79  total_loss: 0.5164  loss_cls: 0.1916  loss_box_reg: 0.2406  loss_rpn_cls: 0.04338  loss_rpn_loc: 0.05476  time: 1.2649  data_time: 0.0127  lr: 6.6017e-05  max_mem: 2734M
[32m[11/26 17:00:02 d2.utils.events]: [0m eta:

In [7]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") 
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
predictor = DefaultPredictor(cfg)

## Evaluate the performance by using AP metric implementation

In [8]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
evaluator = COCOEvaluator("my_dataset", output_dir="./output")
val_loader = build_detection_test_loader(cfg, "my_dataset")
print(inference_on_dataset(predictor.model, val_loader, evaluator))

Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[32m[11/26 17:04:58 d2.data.datasets.coco]: [0mLoaded 5000 images in COCO format from /content/annotations/instances_val2017.json
[32m[11/26 17:04:58 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[11/26 17:04:58 d2.data.common]: [0mSerializing 5000 elements to byte tensors and concatenating them all ...
[32m[11/26 17:04:59 d2.data.common]: [0mSerialized dataset takes 19.07 MiB
[32m[11/26 17:04:59 d2.evaluation.evaluator]: [0mStart inference on 5000 batches


  max_size = (max_size + (stride - 1)) // stride * stride


[32m[11/26 17:05:03 d2.evaluation.evaluator]: [0mInference done 11/5000. Dataloading: 0.0018 s/iter. Inference: 0.3274 s/iter. Eval: 0.0003 s/iter. Total: 0.3295 s/iter. ETA=0:27:23
[32m[11/26 17:05:08 d2.evaluation.evaluator]: [0mInference done 27/5000. Dataloading: 0.0023 s/iter. Inference: 0.3247 s/iter. Eval: 0.0004 s/iter. Total: 0.3276 s/iter. ETA=0:27:09
[32m[11/26 17:05:13 d2.evaluation.evaluator]: [0mInference done 43/5000. Dataloading: 0.0023 s/iter. Inference: 0.3264 s/iter. Eval: 0.0003 s/iter. Total: 0.3293 s/iter. ETA=0:27:12
[32m[11/26 17:05:18 d2.evaluation.evaluator]: [0mInference done 59/5000. Dataloading: 0.0023 s/iter. Inference: 0.3268 s/iter. Eval: 0.0003 s/iter. Total: 0.3297 s/iter. ETA=0:27:08
[32m[11/26 17:05:23 d2.evaluation.evaluator]: [0mInference done 74/5000. Dataloading: 0.0023 s/iter. Inference: 0.3279 s/iter. Eval: 0.0003 s/iter. Total: 0.3309 s/iter. ETA=0:27:09
[32m[11/26 17:05:29 d2.evaluation.evaluator]: [0mInference done 90/5000. Datal

# SSD

## Import some libraries 

In [9]:
import torch
import torchvision
from torchvision import transforms
from PIL import Image, ImageFile
import zipfile

## Install Coco API

In [10]:
pip install git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI

Collecting git+https://github.com/philferriere/cocoapi.git#subdirectory=PythonAPI
  Cloning https://github.com/philferriere/cocoapi.git to /tmp/pip-req-build-8emdajx2
  Running command git clone -q https://github.com/philferriere/cocoapi.git /tmp/pip-req-build-8emdajx2
Building wheels for collected packages: pycocotools
  Building wheel for pycocotools (setup.py) ... [?25l[?25hdone
  Created wheel for pycocotools: filename=pycocotools-2.0-cp37-cp37m-linux_x86_64.whl size=264143 sha256=62953e0e766dd3518ad6d9fc0001322508ed5d1cd9f8ba5286aac4dd7eea5160
  Stored in directory: /tmp/pip-ephem-wheel-cache-3udcs31f/wheels/6b/c6/c5/cb6da4cb793a6cb1ab91f6578d76c42686422127eb4dbcea94
Successfully built pycocotools
Installing collected packages: pycocotools
  Attempting uninstall: pycocotools
    Found existing installation: pycocotools 2.0.2
    Uninstalling pycocotools-2.0.2:
      Successfully uninstalled pycocotools-2.0.2
[31mERROR: pip's dependency resolver does not currently take into acco

In [11]:
pip install pycocotools



## Pre procassing the data

In [12]:
data = "/content/val2017"

In [13]:
transform = transforms.Compose([
    transforms.Resize((400,400)),    
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225] )
    ])
batch_size=20

In [14]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [15]:
test_data = torchvision.datasets.CocoDetection(data,"/content/annotations/instances_val2017.json",transform=transform)

loading annotations into memory...
Done (t=0.84s)
creating index...
index created!


In [16]:
testloader = torch.utils.data.DataLoader(test_data, batch_size=64, collate_fn=collate_fn)

## Download the model and validate it on dataset

In [22]:
SSD = torchvision.models.detection.ssd300_vgg16(pretrained=True)
SSD.eval()
x = [torch.rand(3, 300, 300), torch.rand(3, 500, 400)]
predictionsSSD = SSD(x)

In [19]:
if torch.cuda.is_available():
    device = torch.device("cuda") 
else:
    device = torch.device("cpu")

In [23]:
for batch in testloader:
  inputs, targets = batch
  inputs = inputs[1].to(device)
  output = predictionsSSD

## Study predictions

boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.

labels (Int64Tensor[N]): the predicted labels for each detection

scores (Tensor[N]): the scores for each detection

In [24]:
predictionsSSD

[{'boxes': tensor([[ 67.9313,   2.3697, 255.0805, 179.3450],
          [ 70.1910,   1.6638, 222.7966,  89.3027],
          [ 39.1785,   5.3961, 115.3067, 155.9398],
          [  6.3562,  20.9728,  83.6169, 200.1223],
          [103.3574,  39.4626, 218.0388, 273.1003],
          [  9.4098,   1.6629, 152.5855,  92.7701],
          [ 28.8267,   0.0000, 145.2865, 220.9718],
          [ 69.8480,  10.5413, 154.1235, 145.3918],
          [107.2577,   6.3449, 181.2944, 158.6008],
          [  1.5354,   1.6396, 299.8727, 292.3865],
          [132.9502,   9.9965, 219.6029, 145.3501],
          [115.9080,  18.8337, 189.8596,  93.8772],
          [ 44.1532,  61.3865, 112.9935, 220.7560],
          [ 79.7251,  33.1362, 213.6867, 119.4597],
          [  0.0000,  45.8275, 117.2950, 268.8766],
          [142.8425,  55.5459, 300.0000, 259.3199],
          [169.5291,   3.5140, 245.6282, 157.3768],
          [ 84.6675,  18.6952, 157.5719,  93.7868],
          [ 31.3205,  17.3290, 108.8149,  94.5086],
   

# Retina Net

## Download the model and validate it on dataset

In [25]:
Retina = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=True)
Retina.eval()
x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
predictionsRetina = Retina(x)

Downloading: "https://download.pytorch.org/models/retinanet_resnet50_fpn_coco-eeacb38b.pth" to /root/.cache/torch/hub/checkpoints/retinanet_resnet50_fpn_coco-eeacb38b.pth


  0%|          | 0.00/130M [00:00<?, ?B/s]

In [26]:
for batch in testloader:
  inputs, targets = batch
  inputs = inputs[1].to(device)
  output = predictionsRetina

## Study predictions

boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.

labels (Int64Tensor[N]): the predicted labels for each detection

scores (Tensor[N]): the scores of each detection

In [27]:
predictionsRetina

[{'boxes': tensor([[  0.0000,   2.3714, 400.0000, 295.8163],
          [  0.0000,   7.1562, 400.0000, 299.1739],
          [  0.8063,   0.0000, 400.0000, 300.0000],
          [  0.8063,   0.0000, 400.0000, 300.0000],
          [  0.8063,   0.0000, 400.0000, 300.0000],
          [  0.8063,   0.0000, 400.0000, 300.0000],
          [  0.0000,   2.3714, 400.0000, 295.8163]], grad_fn=<StackBackward0>),
  'labels': tensor([72, 28,  9,  7,  3, 65, 64]),
  'scores': tensor([0.1348, 0.1051, 0.1036, 0.0859, 0.0725, 0.0641, 0.0606],
         grad_fn=<IndexBackward0>)},
 {'boxes': tensor([[  6.9416,   4.3319, 400.0000, 500.0000],
          [  6.9416,   4.3319, 400.0000, 500.0000]], grad_fn=<StackBackward0>),
  'labels': tensor([72, 64]),
  'scores': tensor([0.0638, 0.0541], grad_fn=<IndexBackward0>)}]

# Custom IoU function

In [28]:
SMOOTH = 1e-6

def iou_pytorch(outputs: torch.Tensor, labels: torch.Tensor):
    # You can comment out this line if you are passing tensors of equal shape
    # But if you are passing output from UNet or something it will most probably
    # be with the BATCH x 1 x H x W shape
    outputs = outputs.squeeze(1)  # BATCH x 1 x H x W => BATCH x H x W
    
    intersection = (outputs & labels).float().sum((1, 2))  # Will be zero if Truth=0 or Prediction=0
    union = (outputs | labels).float().sum((1, 2))         # Will be zzero if both are 0
    
    iou = (intersection + SMOOTH) / (union + SMOOTH)  # We smooth our devision to avoid 0/0
    
    thresholded = torch.clamp(20 * (iou - 0.5), 0, 10).ceil() / 10  # This is equal to comparing with thresolds
    
    return thresholded  # Or thresholded.mean() if you are interested in average across the batch