<a href="https://colab.research.google.com/github/mzdwedar/Bicycle-Tracking/blob/main/bicycle_tracking.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install norfair

In [2]:
!pip install pyyaml==5.1

import torch
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)

!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/$CUDA_VERSION/torch$TORCH_VERSION/index.html

Collecting pyyaml==5.1
  Downloading PyYAML-5.1.tar.gz (274 kB)
[?25l[K     |█▏                              | 10 kB 25.7 MB/s eta 0:00:01[K     |██▍                             | 20 kB 8.5 MB/s eta 0:00:01[K     |███▋                            | 30 kB 7.4 MB/s eta 0:00:01[K     |████▉                           | 40 kB 7.0 MB/s eta 0:00:01[K     |██████                          | 51 kB 5.1 MB/s eta 0:00:01[K     |███████▏                        | 61 kB 5.2 MB/s eta 0:00:01[K     |████████▍                       | 71 kB 5.4 MB/s eta 0:00:01[K     |█████████▋                      | 81 kB 6.0 MB/s eta 0:00:01[K     |██████████▊                     | 92 kB 4.9 MB/s eta 0:00:01[K     |████████████                    | 102 kB 5.4 MB/s eta 0:00:01[K     |█████████████▏                  | 112 kB 5.4 MB/s eta 0:00:01[K     |██████████████▍                 | 122 kB 5.4 MB/s eta 0:00:01[K     |███████████████▌                | 133 kB 5.4 MB/s eta 0:00:01[K     |█████

In [3]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

import numpy as np
import os, cv2 
  
from typing import List

from norfair import Detection, Tracker, Video, draw_tracked_objects, draw_boxes

from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

In [4]:
cfg = get_cfg()

cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_101_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5

cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/retinanet_R_101_FPN_3x.yaml")

predictor = DefaultPredictor(cfg)

Loading config /usr/local/lib/python3.7/dist-packages/detectron2/model_zoo/configs/COCO-Detection/../Base-RetinaNet.yaml with yaml.unsafe_load. Your machine may be at risk if the file contains malicious content.
model_final_971ab9.pkl: 228MB [00:05, 40.2MB/s]                           
The checkpoint state_dict contains keys that are not used by the model:
  [35mpixel_mean[0m
  [35mpixel_std[0m


In [56]:
def centroid_distance(detection, tracked_object):
    return np.linalg.norm(detection.points - tracked_object.estimate)

def get_norfair_detections(retina):
  """
  filter the object with class '1' i.e bicycle,
  and convert the retina's detections to 'Detection' Object

  returns:
    the number of object
    Detection object with bbox and score
  """
  norfair_detections: List[Detection] = []

  bboxes = retina['instances'].pred_boxes.tensor.cpu().numpy()
  scores = retina['instances'].scores.cpu().numpy()
  classes = retina['instances'].pred_classes.cpu().numpy()

  for p, score, c in zip(bboxes, scores, classes):
    if(c == 1 and score >= 0.52):

      # [[Xmin, ymin], [xmax, yamx]]
      bbox = np.array(
          [
              [p[0].item(), p[1].item()],
              [p[2].item(), p[3].item()]
          ]
      )
      scores = np.array([score.item(), score.item()])
      norfair_detections.append(
          Detection(points=bbox, scores=scores)
      )

  return len(norfair_detections), norfair_detections

In [None]:
video = Video(input_path="./sample_10s.mp4")
tracker = Tracker(distance_function=centroid_distance, distance_threshold=20)

font = cv2.FONT_HERSHEY_SIMPLEX

for frame in video:
    retina_detections = predictor(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    counts, detections = get_norfair_detections(retina_detections)
    cv2.putText(frame, 
                f'Counts: {counts}', 
                (50, 50), 
                font, 1, 
                (0, 255, 255), 
                2, 
                cv2.LINE_4)
    
    tracked_objects = tracker.update(detections=detections)
    draw_boxes(frame, detections)
    draw_tracked_objects(frame, tracked_objects)
    video.write(frame)