## Import Libs

In [2]:
# Bytetrack Needed
!$PIP_INSTALL -qq loguru thop lap tqdm cython_bbox

In [4]:
%reload_ext watermark
%reload_ext autoreload
%autoreload 2
%watermark -p jupyterlab,ipykernel,IPython
%watermark -p numpy,sklearn,pandas
%watermark -p ipywidgets,cv2,PIL,matplotlib,plotly,netron
%watermark -p torch,torchvision,torchaudio

%matplotlib inline
# %matplotlib widget

import sys, os, io, logging, time, random, math
import json, base64, requests, shutil, gc
import argparse, shlex, signal
import numpy as np
import cv2
import torch
import matplotlib.pyplot as plt
from tqdm import tqdm
from dataclasses import dataclass
from cython_bbox import bbox_overlaps as bbox_ious

jupyterlab: 4.0.11
ipykernel : 6.29.0
IPython   : 8.20.0

numpy  : 1.26.3
sklearn: 1.4.0
pandas : 2.2.0

ipywidgets: 8.1.1
cv2       : 4.9.0
PIL       : 10.2.0
matplotlib: 3.8.2
plotly    : 5.18.0
netron    : 7.3.9

torch      : 2.1.1+cu121
torchvision: 0.16.1+cu121
torchaudio : 2.1.1+cu121



In [5]:
np.set_printoptions(
    edgeitems=3, infstr='inf',
    linewidth=75, nanstr='nan', precision=6,
    suppress=True, threshold=100, formatter=None)


def _DIR(x, dumps=True, ret=True):
    attrs = sorted([y for y in dir(x) if not y.startswith('_')])
    result = '%s: %s' % (str(type(x))[8:-2], json.dumps(attrs) if dumps else attrs)
    if ret:
        return result

## Global Setting

In [6]:
PROJ_TOP_PATH = '/data/source/hzcsai_com/hzcsbet'
DATASETS_PATH = f'{PROJ_TOP_PATH}/gamebet/datasets'
EXTERNAL_LIBS_PATH = f'{PROJ_TOP_PATH}/gamebet/thirdparts'
TEST_VIDEO_PATH = f'{DATASETS_PATH}/0bfacc_5.mp4'
TEST_OUTPUT_PATH = f'{DATASETS_PATH}/track_output.mp4'
YOLO_WEIGHTS_PATH = f'{PROJ_TOP_PATH}/gamebet/checkpoints/gamebet_yolov8x.pt'

sys.path.append(f'{EXTERNAL_LIBS_PATH}/ByteTrack')
sys.path.append(f'{EXTERNAL_LIBS_PATH}/ultralytics')

In [7]:
from yolox.tracker.byte_tracker import BYTETracker, STrack
from ultralytics import YOLO

In [8]:
IND_TO_CLS = {
    0: "ball",
    1: "player",
    2: "referee",
    3: "goalkeeper",
}

COLORS = {
    "ball": (0, 200, 200),
    "player": (255, 0, 0),
    "goalkeeper": (255, 0, 255),
    "referee": (0, 0, 255),
}

BOUNDARIES = [
    ([43, 31, 4], [128, 0, 0], [250, 88, 50]),        # blue
    ([0, 100, 0], [0, 128, 0], [50, 255, 50]),        # green
    ([17, 15, 100], [0, 0, 255], [50, 56, 200]),      # red
    ([192, 192, 0], [192, 192, 0], [255, 255, 128]),  # cyan
    ([192, 0, 192], [192, 0, 192], [255, 128, 255]),  # magenta
    ([0, 192, 192], [0, 192, 192], [128, 255, 255]),  # yellow
    ([0, 0, 0], [0, 0, 0], [50, 50, 50]),             # black
    ([187, 169, 112], [255, 255, 255], [255, 255, 255]),  # white
]

## Yolov8

In [18]:
def get_video_frames(video_path):
    video = cv2.VideoCapture(video_path)
    print('width: %d, height: %d, fps: %d, count: %d' % (
        video.get(cv2.CAP_PROP_FRAME_WIDTH),
        video.get(cv2.CAP_PROP_FRAME_HEIGHT),
        video.get(cv2.CAP_PROP_FPS),
        video.get(cv2.CAP_PROP_FRAME_COUNT)))
    frames = []
    while video.isOpened():
        success, frame = video.read()
        if not success:
            break
        frames.append(frame)
    video.release()
    return frames

def detect_color_cv2(image):
    total_pix = 0
    color_idx = -1
    for i, b in enumerate(BOUNDARIES):
        mask = cv2.inRange(image, np.array(b[0]), np.array(b[2]))
        output = cv2.bitwise_and(image, image, mask=mask)
        nonzero = np.count_nonzero(output)
        if nonzero > total_pix:
            total_pix = nonzero
            color_idx = i
    return BOUNDARIES[color_idx][1]

def draw_detections(image, detections, draw_tacker_id: bool = False):
    image = image.copy()
    for pred in detections:
        x1, y1, x2, y2 = pred.boxes.xyxy.int().tolist()[0]
        color = detect_color_cv2(image[y1:y2, x1:x2])
        cls_ind = pred.boxes.cls.int().item()
        cls = IND_TO_CLS[cls_ind]
        cv2.rectangle(img=image, pt1=(x1, y1), pt2=(x2, y2), color=color, thickness=3)
        if draw_tacker_id:
            cv2.putText(image, str(pred.tracker_id), (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 3)
        else:
            cv2.putText(image, cls, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 3)
    return image

In [10]:
test_frames = get_video_frames(TEST_VIDEO_PATH)
len(test_frames)

width: 1920, height: 1080, fps: 25, count: 750


750

In [11]:
yolo_model = YOLO(YOLO_WEIGHTS_PATH)

In [12]:
test_0_prediction = yolo_model(test_frames[0], conf=0.15)


0: 384x640 19 players, 2 referees, 214.5ms
Speed: 13.9ms preprocess, 214.5ms inference, 935.4ms postprocess per image at shape (1, 3, 384, 640)


In [57]:
# image = draw_detections(test_frames[0], test_0_prediction[0])
# 
# plt.figure(figsize=(20, 20))
# plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
# plt.grid(False)
# plt.show()

In [86]:
yolo_model = None
gc.collect()
torch.cuda.empty_cache()

## ByteTrack

In [13]:
@dataclass(frozen=True)
class BYTETrackerArgs:
    track_thresh: float = 0.25
    track_buffer: int = 25
    match_thresh: float = 0.8
    aspect_ratio_thresh: float = 3.0
    min_box_area: float = 1.0
    mot20: bool = False


def get_video_writer(output_video_path, fps, width, height):
    return cv2.VideoWriter(
        output_video_path,
        fourcc=cv2.VideoWriter_fourcc(*"mp4v"),
        fps=fps,
        frameSize=(width, height),
        isColor=True
    )


def box_iou_batch(atlbrs, btlbrs):
    ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=float)
    if ious.size == 0:
        return ious

    ious = bbox_ious(
        np.ascontiguousarray(atlbrs, dtype=float),
        np.ascontiguousarray(btlbrs, dtype=float)
    )

    return ious


def format_predictions(predictions, with_conf: bool = True):
    frame_detections = []
    for pred in predictions:
        bbox = pred.boxes.xyxy.int().tolist()[0]
        conf = pred.boxes.conf.item()
        if with_conf:
            detection = bbox + [conf]
        else:
            detection = bbox

        frame_detections.append(detection)
    return np.array(frame_detections, dtype=float)


def match_detections_with_tracks(detections, tracks):
    detections_bboxes = format_predictions(detections, with_conf=False)
    tracks_bboxes = np.array([track.tlbr for track in tracks], dtype=float)
    iou = box_iou_batch(tracks_bboxes, detections_bboxes)
    track2detection = np.argmax(iou, axis=1)

    for tracker_index, detection_index in enumerate(track2detection):
        if iou[tracker_index, detection_index] != 0:
            detections[detection_index].tracker_id = tracks[tracker_index].track_id
    return detections


In [14]:
byte_tracker = BYTETracker(BYTETrackerArgs)
video_writer = get_video_writer(TEST_OUTPUT_PATH, 25, 1920, 1080)

In [19]:
for frame in tqdm(test_frames):
    detections = yolo_model(frame, conf=0.15, verbose=False)[0]

    detections_with_tracker = []
    for detection in detections:
        detection.tracker_id = ""
        detections_with_tracker.append(detection)

    tracks = byte_tracker.update(
        output_results=format_predictions(detections_with_tracker, with_conf=True),
        img_info=frame.shape,
        img_size=frame.shape
    )
    detections_with_tracker = match_detections_with_tracks(detections_with_tracker, tracks)
    image = draw_detections(frame, detections_with_tracker, True)
    video_writer.write(image)

video_writer.release()

100% 750/750 [01:43<00:00,  7.23it/s]


## References

### CUDA out of memory

```python
yolo_model = None
import gc
gc.collect()
torch.cuda.empty_cache()
```