# Preparation

Let's start by installing nnabla and accessing nnabla-examples repository. If you're running on Colab, make sure that your Runtime setting is set as GPU, which can be set up from the top menu (Runtime → change runtime type), and make sure to click Connect on the top right-hand side of the screen before you start.

In [None]:
# May show warnings for newly imported packages if run in Colab default python environment.
# Please click the `RESTART RUNTIME` to run the following script correctly.
# The error message of conflicts is acceptable.
!pip install nnabla-ext-cuda116
!git clone https://github.com/sony/nnabla-examples.git

In [None]:
%cd nnabla-examples/object-detection/centernet

In [None]:
import cv2
import sys
import numpy as np
import nnabla as nn
import matplotlib.pyplot as plt
from nnabla.utils.image_utils import imread
from nnabla.ext_utils import get_extension_context
from PIL import Image

ctx = get_extension_context('cudnn')
nn.set_default_context(ctx)

Then you need to choose backbone network architecture and dataset the pretrained model has been trained on. Just click the dropdown list and select one for each. After you choose, execute the cell.

In [None]:
#@title Choose backbone architecture and dataset which the model is trained on.
architecture = 'dlav0' #@param ['resnet', 'dlav0']

#@title Choose dataset
dataset = 'coco' #@param ['coco', 'pascal']

if architecture == "resnet":
    num_layer = 18
else:
    num_layer = 34
param_url = f"https://nnabla.org/pretrained-models/nnabla-examples/object-detection/ceneternet/ctdet/{architecture}_{num_layer}_{dataset}_fp.h5"
param_path = param_url.split("/")[-1]

We will now download the pre-trained weight parameters for the selected neural network.

In [None]:
!wget $param_url

### Get CenterNet detector

In [None]:
sys.path.append("./src")

In [None]:
import _init_paths
from opts import opts
from detectors.detector_factory import detector_factory
from datasets.dataset.pascal_config import PascalVOCDefaultParams
from datasets.dataset.coco_config import COCODefaultParams

In [None]:
o = opts()
opt, _ = o.parser.parse_known_args(["--task", "ctdet", "--arch", architecture, "--dataset", dataset, "--checkpoint", param_path, "--save_dir", "./", "--debug", "1",])
opt.head_conv = 256 if 'dlav0' in opt.arch else 64
opt.down_ratio = 4
opt.pad = 31
opt.num_stacks = 1
opt.test_scales = [1.0]
opt.fix_res = True
default_dataset_info = {
  'coco': COCODefaultParams,
  'pascal': PascalVOCDefaultParams,
}
dataset_para = default_dataset_info[dataset]
opt = o.update_dataset_info_and_set_heads(opt, dataset_para)

In [None]:
nn.set_auto_forward(True) 
Detector = detector_factory[opt.task]
detector = Detector(opt)

# Upload Image
Run the following cell to upload your own image. Note that too small images might cause poor result.


In [None]:
from google.colab import files

img = files.upload()

In [None]:
import os
ext = os.path.splitext(list(img.keys())[-1])[-1]
os.rename(list(img.keys())[-1], "input_image{}".format(ext)) 
input_img = "input_image" + ext

# Object Detection
Now let's run CenterNet on your image and see how it performs object detection!

In [None]:
ret = detector.run(input_img)

In [None]:
Image.open("./ctdet.jpg")

# Visualization using Eigen-CAM

In [None]:
def eigencam(middle_layer, eigenvector_index=0):
    """
    Calculate EigenCAM.
    Parameters
    ----------
    middle_layer: nn.Variable
        The layer of interest to apply EigenCAM
    Returns
    ----------
    heatmap: ndarray
        2D array of same size as width and height of middle_layer
    """
    conv_layer_output = middle_layer.d
    heatmap = get_2d_projection(conv_layer_output, eigenvector_index)
    max_v, min_v = np.max(heatmap), np.min(heatmap)
    if max_v != min_v:
        heatmap = (heatmap - min_v) / (max_v - min_v)
    return heatmap[0]

def get_2d_projection(activation_batch, eigenvector_index=0):
    # https://github.com/jacobgil/pytorch-grad-cam/blob/master/pytorch_grad_cam/utils/svd_on_activations.py
    # TBD: use pytorch batch svd implementation
    activation_batch[np.isnan(activation_batch)] = 0
    activation_batch[np.isinf(activation_batch)] = 0
    
    projections = []
    for activations in activation_batch:
        reshaped_activations = (activations).reshape(
            activations.shape[0], -1).transpose()
        # Centering before the SVD seems to be important here,
        # Otherwise the image returned is negative
        reshaped_activations = reshaped_activations - \
            reshaped_activations.mean(axis=0)
        U, S, VT = np.linalg.svd(reshaped_activations, full_matrices=True)
        projection = reshaped_activations @ VT[eigenvector_index, :]
        projection = projection.reshape(activations.shape[1:])
        projections.append(projection)
    return np.float32(projections)

In [None]:
def overlay_images(base_img, overlay_img, overlay_coef=1.0):
    # resize
    _overlay_img = cv2.resize(
        overlay_img, (base_img.shape[1], base_img.shape[0]))
    
    # normalize
    _overlay_img = 255 * _overlay_img / _overlay_img.max()
    _overlay_img = _overlay_img.astype('uint8')
    
    # color adjust
    _overlay_img = cv2.applyColorMap(_overlay_img, cv2.COLORMAP_JET)
    base_img = cv2.cvtColor(base_img, cv2.COLOR_BGR2RGB)
    
    # overlay
    ret_img = _overlay_img * overlay_coef + base_img
    ret_img = 255 * ret_img / ret_img.max()
    ret_img = ret_img.astype('uint8')
    
    ret_img = cv2.cvtColor(ret_img, cv2.COLOR_BGR2RGB)
    return ret_img

In [None]:
from collections import OrderedDict
class get_middle_variables:
    def __init__(self):
        self.middle_vars_dict = OrderedDict()
        self.middle_layer_count_dict = OrderedDict()
    def __call__(self, f):
        if f.name in self.middle_layer_count_dict:
            self.middle_layer_count_dict[f.name] += 1
        else:
            self.middle_layer_count_dict[f.name] = 1
        key = f.name + '_{}'.format(self.middle_layer_count_dict[f.name])
        self.middle_vars_dict[key] = f.outputs[0]

In [None]:
img_orig = np.array(Image.open(input_img))

In [None]:
image = cv2.imread(input_img)
images, meta = detector.pre_process(image, opt.test_scales[0], None)
inputs = nn.Variable.from_numpy_array(images)
outputs = detector.model(inputs)

# Get middle layer activations

In [None]:
GET_MIDDLE_VARIABLES_CLASS = get_middle_variables()
outputs[0].visit(GET_MIDDLE_VARIABLES_CLASS)
middle_vars = GET_MIDDLE_VARIABLES_CLASS.middle_vars_dict
middle_vars

In [None]:
fig = plt.figure(figsize=(20, 5))

ax = fig.add_subplot(1, 4, 1)
ax.imshow(img_orig)
ax.axis("off")
plt.title("original image")

for i in range(3):
    ax = fig.add_subplot(1, 4, i+2)
    heatmap = eigencam(middle_vars['ConvolutionCudaCudnn_36'], eigenvector_index=i) #set variables key
    overlaid_img = overlay_images(img_orig, heatmap)
    plt.title(f"using\n  {i+1}st singular vector")
    ax.imshow(overlaid_img)
    ax.axis("off")
plt.show()

# Real time Visualization for CenterNet

In [None]:
def decode_img_str(img_str):
    decimg = base64.b64decode(img_str.split(',')[1], validate=True)
    decimg = Image.open(BytesIO(decimg))
    decimg = np.array(decimg, dtype=np.uint8); 
    decimg = cv2.cvtColor(decimg, cv2.COLOR_BGR2RGB)
    return decimg

def encode_img(img):
    _, encimg = cv2.imencode(".jpg", img, [int(cv2.IMWRITE_JPEG_QUALITY), 80])
    img_str = encimg.tobytes()
    img_str = "data:image/jpeg;base64," + base64.b64encode(img_str).decode('utf-8')
    return img_str

In [None]:
import IPython
import base64
from google.colab import output
from io import BytesIO

def run(img_str):
    # decode to image
    decimg = decode_img_str(img_str)

    ##### detection and visualization 
    images, meta = detector.pre_process(decimg, opt.test_scales[0], None)
    inputs = nn.Variable.from_numpy_array(images)
    outputs = detector.model(inputs)

    GET_MIDDLE_VARIABLES_CLASS = get_middle_variables()
    outputs[0].visit(GET_MIDDLE_VARIABLES_CLASS)
    middle_vars = GET_MIDDLE_VARIABLES_CLASS.middle_vars_dict

    heatmap = eigencam(middle_vars["ConvolutionCudaCudnn_35"], eigenvector_index=1)

    out_img = overlay_images(decimg, heatmap)

    #encode to string
    img_str = encode_img(out_img)

    return IPython.display.JSON({'img_str': img_str})

output.register_callback('notebook.run', run)

In [None]:
from IPython.display import display, Javascript
from google.colab.output import eval_js

def use_webcam(quality=0.8):
  js = Javascript('''
    async function useCam(quality) {
      const div = document.createElement('div');
      document.body.appendChild(div);

      // camera btn
      var current_deviceId = "test";
      var new_deviceId = "test";
      const camera_div = document.createElement('div');
      document.body.appendChild(camera_div);
      //get deviceIds
      navigator.mediaDevices.enumerateDevices()
      .then(function(devices) {
          devices.forEach(function(device, i) {
              //exit button
              if (device.deviceId != "") {
                const canera_btn = document.createElement('button');
                canera_btn.textContent = "camera" + i;
                canera_btn.onclick = function() {
                  new_deviceId = device.deviceId
                };
                camera_div.appendChild(canera_btn);
              }
          });
      })
      .catch(function(err) {
        console.log(err.name + ": " + err.message);
      });

      //video element
      const video = document.createElement('video');
      video.style.display = 'None';
      const stream = await navigator.mediaDevices.getUserMedia({video: { deviceId: current_deviceId } });
      div.appendChild(video);
      video.srcObject = stream;
      await video.play();

      //canvas for display. frame rate is depending on display size and jpeg quality.
      display_size = 500
      const src_canvas = document.createElement('canvas');
      src_canvas.width  = display_size;
      src_canvas.height = display_size * video.videoHeight / video.videoWidth;
      const src_canvasCtx = src_canvas.getContext('2d');
      src_canvasCtx.translate(src_canvas.width, 0);
      src_canvasCtx.scale(-1, 1);
      div.appendChild(src_canvas);

      const dst_canvas = document.createElement('canvas');
      dst_canvas.width  = src_canvas.width;
      dst_canvas.height = src_canvas.height;
      const dst_canvasCtx = dst_canvas.getContext('2d');
      div.appendChild(dst_canvas);

      //exit button
      const btn_div = document.createElement('div');
      document.body.appendChild(btn_div);
      const exit_btn = document.createElement('button');
      exit_btn.textContent = 'Exit';
      var exit_flg = true
      exit_btn.onclick = function() {exit_flg = false};
      btn_div.appendChild(exit_btn);


      // Resize the output to fit the video element.
      google.colab.output.setIframeHeight(document.documentElement.scrollHeight, true);

      var send_num = 0
      // loop
      _canvasUpdate();
      async function _canvasUpdate() {

            src_canvasCtx.drawImage(video, 0, 0, video.videoWidth, video.videoHeight, 0, 0, src_canvas.width, src_canvas.height);     
            if (send_num<1){
                send_num += 1
                const img = src_canvas.toDataURL('image/jpeg', quality);
                const result = google.colab.kernel.invokeFunction('notebook.run', [img], {});
                result.then(function(value) {
                    parse = JSON.parse(JSON.stringify(value))["data"]
                    parse = JSON.parse(JSON.stringify(parse))["application/json"]
                    parse = JSON.parse(JSON.stringify(parse))["img_str"]
                    var image = new Image()
                    image.src = parse;
                    image.onload = function(){dst_canvasCtx.drawImage(image, 0, 0)}
                    send_num -= 1
                })
            }
            if (exit_flg){
                requestAnimationFrame(_canvasUpdate);   
            }else{
                stream.getVideoTracks()[0].stop();
            }
            if (new_deviceId != current_deviceId) {
              console.log("change camera!");
              current_deviceId = new_deviceId;
              const stream = await navigator.mediaDevices.getUserMedia({video: { deviceId: current_deviceId } });
              video.srcObject = stream;
              await video.play();
            }
      };
    }
    ''')
  display(js)
  data = eval_js('useCam({})'.format(quality))

In [None]:
use_webcam()