<a href="https://colab.research.google.com/github/spiderb59/github-projects-playground/blob/main/frcnn_resnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Faster RCNN object detection using ResNet50 backbone
# Done by: Shital Mainali


In [None]:
# To show the image
import matplotlib.pyplot as plt

# Pytorch object detection pretrained models
from torchvision.models import detection

# To change the download method
import ssl

# For all the matrix and tensors
import numpy as np

# Needed to load the weight of the model from the disk
import pickle

# Pytorch modules
import torch

# Opencv for image processing
import cv2

# To change the weight download location
import os
from os.path import exists

import glob

# disable all the ssl verification, can (sometimes) throw error if enabled
ssl._create_default_https_context = ssl._create_unverified_context

In [None]:
def plt_imshow(title, image):
    # convert the image frame BGR to RGB color space and display it
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    plt.imshow(image)
    plt.title(title)
    plt.grid(False)
    plt.show()

In [None]:
model_test_parameters = {
    "image": "images/example_07.jpg",
    "model": "frcnn-resnet",
    "labels": "coco_classes.pickle",
    "probability": 0.5
}

In [None]:
# Use GPU if GPU is available, else use CPU

if torch.cuda.is_available():
    HARDWARE = "cuda"
else:
    HARDWARE = "cpu"

# Load the COCO dataset classes available for detection
CLASSES = pickle.loads(open(model_test_parameters["labels"], "rb").read())

# Bounding box for each classes
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))

In [None]:
# Download the weight file to the current directory 'models'
os.environ['HOME'] = r"models"

In [None]:
# Download the pretrained model
# Classifier: FasterRCNN
# Backbone: ResNet50

if exists("models"):
    model = detection.fasterrcnn_resnet50_fpn(pretrained=True,
                                              progress=True,
                                              num_classes=len(CLASSES),
                                              pretrained_backbone=True).to(HARDWARE)
else:
    model = model

In [None]:
# Evaluate the model
model.eval()

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=0.0)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=0.0)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=0.0)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=0.0)
          (relu): ReLU(

In [None]:
def read_image(img):
    # Read the saved image
    image = cv2.imread(img)
    orig = image.copy()

    # BGR -> RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.transpose((2, 0, 1))

    # batch dimension and normalization
    image = np.expand_dims(image, axis=0)
    image = image / 255.0
    image = torch.FloatTensor(image)

    # pass the image through the available hardware
    image = image.to(HARDWARE)
    detections = model(image)[0]
    return detections, orig

In [None]:
def recognize_objects(detections, probability_threshold, orig):
    # loop over the detections
    for i in range(0, len(detections["boxes"])):
        # get the probability that the given detection is an object
        probability = detections["scores"][i]

        # if the probability is greater than threshold, use it
        if probability > probability_threshold:
            # class label index and coordinates
            index = int(detections["labels"][i])
            box = detections["boxes"][i].detach().cpu().numpy()
            (init_x, init_y, end_x, end_y) = box.astype("int")

            # display the prediction to our terminal
            label = "{}: {:.2f}%".format(CLASSES[index], probability * 100)
            print("[INFO] {}".format(label))

            # draw the bounding box and label on the image
            cv2.rectangle(orig, (init_x, init_y), (end_x, end_y),
                COLORS[index], 2)
            y = init_y - 15 if init_y - 15 > 15 else init_y + 15
            cv2.putText(orig, label, (init_x, y),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[index], 2)

    # show the output image
    plt_imshow("Output Image", orig)
    cv2.imshow('Output Image', orig)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [None]:
img_list = [glob.glob('images/*.jpg')[0]]
for i in range(len(img_list)):
    img = img_list[i].split('\\')
    print(f"==========IMAGE: {img[0]} =====================")
    img = img[0] + '/' + img[1]
    detections, orig = read_image(img)
    recognize_objects(detections, model_test_parameters["probability"], orig)



IndexError: ignored