01. ByteTracker 
- very easy to implement since its integrated to supervision - the open-source computer vision toolkit by Roboflow


In [None]:
#imports and paths
import os
from inference import get_model
import supervision as sv
from ultralytics import YOLO
import numpy as np
import sys
sys.path.append(os.getenv("PROJECT_PATH"))

VIDEO_PATH = "../../data/videos/testvid.mp4"

PLAYER_DETECTION_MODEL_ID = 'football-players-detection-3zvbc/2'
ROBOFLOW_API_KEY = os.getenv("ROBOFLOW_API_KEY")
PLAYER_DETECTION_MODEL = get_model(PLAYER_DETECTION_MODEL_ID, ROBOFLOW_API_KEY)
tracker = sv.ByteTrack()


ByteTracker

In [None]:
#Team Assignment based on colours
#use of the SigLIP, UMAP, and KMeans combo
import supervision as sv
from tqdm import tqdm
from utils.teamclassifier import TeamClassifier

PLAYER_ID = 2
STRIDE = 30
frame_generator = sv.get_video_frames_generator(
    source_path=VIDEO_PATH, stride=STRIDE
)

crops = []
for frame in tqdm(frame_generator, desc="collecting crops"):
    result = PLAYER_DETECTION_MODEL.infer(frame, confidence=0.3)[0]
    detections = sv.Detections.from_inference(result)
    players_detections = detections[detections.class_id == PLAYER_ID]
    
    # Skip the frame if no players are detected
    if len(players_detections.xyxy) == 0:
        continue
    
    players_crops = [sv.crop_image(frame, xyxy) for xyxy in players_detections.xyxy]
    crops += players_crops

# Ensure there are enough crops for clustering
if len(crops) < 2:
    print("Not enough player crops detected. Skipping team classification.")
else:
    team_classifier = TeamClassifier(device="cpu")
    team_classifier.fit(crops)
    
print(f"Number of players detected: {len(crops)}")

In [9]:
import math
from utils.resolveteamgk import resolve_goalkeepers_team_id

tracker = sv.ByteTrack()

bounding_box_annotator = sv.BoundingBoxAnnotator()
label_annotator = sv.LabelAnnotator()

BALL_ID = 0
GOALKEEPER_ID = 1
PLAYER_ID = 2

def callback(frame: np.ndarray, index: int) -> np.ndarray:
    results = PLAYER_DETECTION_MODEL.infer(frame, confidence=0.3)[0]
    detections = sv.Detections.from_inference(results)

    # Filter out the ball and apply NMS
    all_detections = detections[detections.class_id != BALL_ID]
    all_detections = all_detections.with_nms(threshold=0.5, class_agnostic=True)
    all_detections = tracker.update_with_detections(detections=all_detections)

    # Separate detections
    goalkeepers_detections = all_detections[all_detections.class_id == GOALKEEPER_ID]
    players_detections = all_detections[all_detections.class_id == PLAYER_ID]

    # Team assignment
    players_crops = [sv.crop_image(frame, xyxy) for xyxy in players_detections.xyxy]
    players_detections.class_id = team_classifier.predict(players_crops)

    if len(goalkeepers_detections.xyxy) > 0:
        goalkeepers_detections.class_id = resolve_goalkeepers_team_id(
            players_detections, goalkeepers_detections)
    else:
        goalkeepers_detections.class_id = np.array([])

    # Merge player and goalkeeper detections
    all_detections = sv.Detections.merge([players_detections, goalkeepers_detections])

    # Extract and sanitize tracker_ids
    tracker_ids = all_detections.tracker_id
    if tracker_ids is None or len(tracker_ids) == 0:
        tracker_ids = [-1] * len(all_detections.xyxy)
    else:
        tracker_ids = [
            int(tid) if not isinstance(tid, float) or not math.isnan(tid) else -1
            for tid in tracker_ids
        ]
    tracker_ids = np.array(tracker_ids, dtype=int)

    # Filter out invalid (-1) tracker IDs
    valid_mask = tracker_ids != -1
    tracker_ids = tracker_ids[valid_mask]

    all_detections = sv.Detections(
        xyxy=all_detections.xyxy[valid_mask],
        class_id=all_detections.class_id[valid_mask],
        confidence=all_detections.confidence[valid_mask],
        tracker_id=tracker_ids
    )

    # Create labels
    labels = [f"#{tid}" for tid in tracker_ids]

    # Annotate
    annotated_frame = bounding_box_annotator.annotate(
        scene=frame.copy(), detections=all_detections)
    annotated_frame = label_annotator.annotate(
        scene=annotated_frame, detections=all_detections, labels=labels)

    return annotated_frame




In [10]:
from tqdm import tqdm
import supervision as sv
import cv2

def process_video_with_progress(source_path, target_path, callback):
    # Get video info
    video_info = sv.VideoInfo.from_video_path(source_path)

    # Create reader and writer
    frame_generator = sv.get_video_frames_generator(source_path)
    with sv.VideoSink(target_path, video_info) as sink:
        for index, frame in enumerate(tqdm(frame_generator, total=video_info.total_frames, desc="Processing Video")):
            result_frame = callback(frame, index)
            sink.write_frame(result_frame)
            
process_video_with_progress(
    source_path=VIDEO_PATH,
    target_path="../../data/tracker_outputs/testvid-v1.mp4",
    callback=callback
)

Processing Video:   0%|          | 0/165 [00:00<?, ?it/s]
Embedding extraction: 0it [00:00, ?it/s]
Embedding extraction: 1it [00:07,  7.24s/it]
Processing Video:   1%|          | 1/165 [00:08<23:32,  8.61s/it]
Embedding extraction: 0it [00:00, ?it/s]
Embedding extraction: 1it [00:07,  7.26s/it]
Processing Video:   1%|          | 2/165 [00:17<23:31,  8.66s/it]
Embedding extraction: 0it [00:00, ?it/s]
Embedding extraction: 1it [00:08,  8.20s/it]
Processing Video:   2%|▏         | 3/165 [00:26<24:20,  9.02s/it]
Embedding extraction: 0it [00:00, ?it/s]
Embedding extraction: 1it [00:06,  6.93s/it]
Processing Video:   2%|▏         | 4/165 [00:35<23:39,  8.82s/it]
Embedding extraction: 0it [00:00, ?it/s]
Embedding extraction: 1it [00:05,  5.13s/it]
Processing Video:   3%|▎         | 5/165 [00:41<20:45,  7.79s/it]
Embedding extraction: 0it [00:00, ?it/s]
Embedding extraction: 1it [00:07,  7.36s/it]
Processing Video:   4%|▎         | 6/165 [00:49<21:09,  7.99s/it]
Embedding extraction: 0it [00:

TypeError: list indices must be integers or slices, not numpy.float64