<a href="https://colab.research.google.com/github/stevearonson/VB-Video-Tracking/blob/master/Detectron2_with_DeepSort.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Install Detectron2

In [1]:
# install dependencies: (use cu101 because colab has CUDA 10.1)
!pip install -U torch==1.5 torchvision==0.6 -f https://download.pytorch.org/whl/cu101/torch_stable.html 
!pip install pyyaml==5.1 pycocotools>=2.0.1
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
!gcc --version
# opencv is pre-installed on colab

Looking in links: https://download.pytorch.org/whl/cu101/torch_stable.html
Requirement already up-to-date: torch==1.5 in /usr/local/lib/python3.6/dist-packages (1.5.0+cu101)
Requirement already up-to-date: torchvision==0.6 in /usr/local/lib/python3.6/dist-packages (0.6.0+cu101)
1.5.0+cu101 True
gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.



In [2]:
!pip install detectron2==0.1.3 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.5/index.html

Looking in links: https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.5/index.html


In [3]:
import numpy as np
import pandas as pd
import random

import cv2
from google.colab.patches import cv2_imshow
from tqdm import tqdm 


## Access My Google Drive

In [4]:
from google.colab import drive
drive.mount('/gdrive')
!ls '/gdrive/My Drive/VB/Video'

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).
 Cheshire_Halasz_Perin.pdf   court-diagram-vertical.jpg   panopt.mp4
 Cheshire_Halasz_Perin.zip   demo.avi			  people.mp4
 ckpt.t7		    'MBVF M7 S3.mp4'		  tracker.mp4
 court-diagram.jpg	     OpenCV.ipynb		  video-clip.mp4


##Install DeepSort

In [5]:
!git clone --recurse-submodules https://github.com/sayef/detectron2-deepsort-pytorch.git
!cp '/gdrive/My Drive/VB/Video/ckpt.t7' detectron2-deepsort-pytorch/deep_sort/deep/checkpoint/

Cloning into 'detectron2-deepsort-pytorch'...
remote: Enumerating objects: 374, done.[K
remote: Total 374 (delta 0), reused 0 (delta 0), pack-reused 374[K
Receiving objects: 100% (374/374), 18.97 MiB | 28.40 MiB/s, done.
Resolving deltas: 100% (183/183), done.
Submodule 'detectron2' (https://github.com/facebookresearch/detectron2.git) registered for path 'detectron2'
Cloning into '/content/detectron2-deepsort-pytorch/detectron2'...
remote: Enumerating objects: 6065, done.        
remote: Total 6065 (delta 0), reused 0 (delta 0), pack-reused 6065        
Receiving objects: 100% (6065/6065), 2.77 MiB | 14.98 MiB/s, done.
Resolving deltas: 100% (4407/4407), done.
Submodule path 'detectron2': checked out 'eef3ab14c0777da7114e25689c703bb5d6094737'


In [6]:
!mv detectron2-deepsort-pytorch detectron2_deepsort_pytorch

## Run the Detectron2-DeepSort Demo

In [None]:
%cd detectron2-deepsort-pytorch
!python demo_detectron2_deepsort.py '/gdrive/My Drive/VB/Video/video-clip.mp4' --ignore_display
!cp demo.avi '/gdrive/My Drive/VB/Video'

Flow of video processing:

1. Iterate over video frames
2. Detect objects in frame
3. Identify players (people) inside court playing area
4. Move player detection data into deep sort data structure
5. Update deepsort algorithm
6. Draw bbox on original frame
7. Map bbox to player location on court diagram
8. Insert court diagram into original frame
9. Write new frame to output video file

## Create a detection class using Detectron2

In [34]:
# from detectron2.utils.logger import setup_logger
# setup_logger()

import detectron2
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg


class Detectron2:

    def __init__(self):
        self.cfg = get_cfg()
        self.cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
        self.cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
        self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
        self.predictor = DefaultPredictor(self.cfg)

    def bbox(self, img):
        rows = np.any(img, axis=1)
        cols = np.any(img, axis=0)
        rmin, rmax = np.where(rows)[0][[0, -1]]
        cmin, cmax = np.where(cols)[0][[0, -1]]
        return cmin, rmin, cmax, rmax

    def detect(self, im):
        outputs = self.predictor(im)
        boxes = outputs["instances"].pred_boxes.tensor.cpu().numpy()
        classes = outputs["instances"].pred_classes.cpu().numpy()
        scores = outputs["instances"].scores.cpu().numpy()

        bbox_xcycwh, cls_conf, cls_ids = [], [], []

        for (box, _class, score) in zip(boxes, classes, scores):

            if _class == 0:
                x0, y0, x1, y1 = box
                bbox_xcycwh.append([(x1 + x0) / 2, (y1 + y0) / 2, (x1 - x0), (y1 - y0)])
                cls_conf.append(score)
                cls_ids.append(_class)

        return np.array(bbox_xcycwh, dtype=np.float64), np.array(cls_conf), np.array(cls_ids)


## Create class for handling court geometries

In [56]:
from shapely.geometry import Point, Polygon

class Court:
    def __init__(self):

        self.video_court = np.array([[[467, 249], [781, 242], 
                                      [1274, 588], [630, 656], [3, 619]]])
        self.video_playing = Polygon(np.array([[426, 235], [824, 230], 
                                               [1272, 512], [1253, 714], 
                                               [9, 713], [15, 470]]))
        self.diagram_court = np.array([[[100, 100], [400, 100], [400, 700], 
                                        [250, 700], [100, 700]]])
        self.M = cv2.findHomography(self.video_court, self.diagram_court)[0]

        self.court_diagram_file = '/gdrive/My Drive/VB/Video/court-diagram-vertical.jpg'

        # defaults for drawing on video frames
        self.font = cv2.FONT_HERSHEY_SIMPLEX
        self.fontScale = 1


        self.radius = 10
        self.color = [255, 0, 0]   
        self.thickness = 2



    def draw_court(self, frame):
         return cv2.polylines(frame, [self.video_court], isClosed=True, 
                              color=self.color, thickness=self.thickness)


    def in_playing_area(self, bbox_xcycwh):

        xc, yc, w, h = bbox_xcycwh
        player_pos = (xc, yc + h/2)
        return Point(player_pos).within(self.video_playing)

    def map_pos_to_diagram(self, player_positions):

        src_pts = np.array([player_positions.astype('float32')])
        dst_pts = cv2.perspectiveTransform(src_pts, self.M)
        return dst_pts.squeeze().astype('int')

    def create_mini_map(self, court_positions, identities):
        # player marker parameters

        court_diagram = cv2.imread(self.court_diagram_file)

        for pt, tag in zip(court_positions, identities):
            # cv2.circle(court_diagram, tuple(pt), self.radius, 
            #           self.color, self.thickness)
            cv2.putText(court_diagram, str(tag), tuple(pt), self.font, self.fontScale, 
                        self.color, self.thickness, cv2.LINE_AA)


        mini_map = cv2.resize(court_diagram, (250, 400), interpolation = cv2.INTER_AREA)

        return mini_map


## Main Loop

In [9]:
# create a time log for rallys in video
time_log = pd.DataFrame({'Rally': [1,2,3,4], 
                         'Start': pd.to_timedelta(['00:00:11', '00:00:33', '00:00:52','00:01:18']),
                         'Length': pd.to_timedelta(['00:00:06', '00:00:08', '00:00:14','00:00:11'])
                         })

In [36]:
!ls

'=2.0.1'   detectron2_deepsort_pytorch	 sample_data


In [57]:
import importlib  

from detectron2_deepsort_pytorch.deep_sort import DeepSort
from detectron2_deepsort_pytorch.util import draw_bboxes

cap = cv2.VideoCapture('/gdrive/My Drive/VB/Video/MBVF M7 S3.mp4')
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
frames_per_second = cap.get(cv2.CAP_PROP_FPS)
num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', 'V')
videoOut = cv2.VideoWriter('/gdrive/My Drive/VB/Video/tracker.mp4', fourcc,
                           frames_per_second, (width, height))

detectron2 = Detectron2()
deepsort = DeepSort('detectron2_deepsort_pytorch/deep_sort/deep/checkpoint/ckpt.t7', use_cuda=True)
court = Court()

video_time = 0
all_pos = []

for _,rally in time_log.iterrows():

    # skip to beginning of rally
    skip_frames = int((rally['Start'].seconds - video_time) * frames_per_second)
    for ix in range(skip_frames):
        ret, frame = cap.read()
        videoOut.write(frame)
    video_time = rally['Start'].seconds

    # process rally frames
    rally_frames = int(rally['Length'].seconds * frames_per_second)
    for ix in tqdm(range(rally_frames)):
        ret, frame = cap.read()
        bbox_xcycwh, cls_conf, cls_ids = detectron2.detect(frame)

        # add the court outline
        frame_poly = court.draw_court(frame)

        # find detection indicies of players
        players_ix = [ix for ix, bbox in enumerate(bbox_xcycwh) if court.in_playing_area(bbox)]

        # limit detections to players
        player_xcycwh = bbox_xcycwh[players_ix, :]
        # player_xcycwh[:, 3:] *= 1.2
        player_conf = cls_conf[players_ix]

        outputs = deepsort.update(player_xcycwh, player_conf, frame)
        if len(outputs) > 0:
            bbox_xyxy = outputs[:, :4]
            identities = outputs[:, -1]
            frame_poly = draw_bboxes(frame_poly, bbox_xyxy, identities)


            # create the min map
            video_pos = np.array([[(x1+x2)/2, y2] for x1, y1, x2, y2 in bbox_xyxy])
            diagram_pos = court.map_pos_to_diagram(video_pos)
            miniMap = court.create_mini_map(diagram_pos, identities)

            # all_pos.extend(player_pos)


            #insert mini map into frame
            (w,h,c) = miniMap.shape
            frame_poly[0:w, 0:h, :] = miniMap

        videoOut.write(frame_poly)

    video_time += rally['Length'].seconds

videoOut.release()
cap.release()

# print(all_pos)

Loading weights from detectron2_deepsort_pytorch/deep_sort/deep/checkpoint/ckpt.t7... Done!


100%|██████████| 168/168 [01:25<00:00,  1.97it/s]
100%|██████████| 224/224 [01:56<00:00,  1.93it/s]
100%|██████████| 393/393 [03:24<00:00,  1.92it/s]
100%|██████████| 309/309 [02:40<00:00,  1.93it/s]
