01. ByteTracker 
- very easy to implement since its integrated to supervision - the open-source computer vision toolkit by Roboflow


In [None]:
#imports and paths
import os
from inference import get_model
import supervision as sv
from ultralytics import YOLO
import numpy as np
import sys
sys.path.append(os.getenv("PROJECT_PATH"))

VIDEO_PATH = "../../data/videos/source10s.mp4"

#models
PLAYER_DETECTION_MODEL_ID = 'football-players-detection-3zvbc/2'
FIELD_DETECTION_MODEL_ID = "football-field-detection-f07vi/14"

ROBOFLOW_API_KEY = os.getenv("ROBOFLOW_API_KEY")

PLAYER_DETECTION_MODEL = get_model(PLAYER_DETECTION_MODEL_ID, ROBOFLOW_API_KEY)
FIELD_DETECTION_MODEL = get_model(model_id=FIELD_DETECTION_MODEL_ID, api_key=ROBOFLOW_API_KEY)
tracker = sv.ByteTrack()


ByteTracker

In [None]:
#Supervision annotators
from utils.pitchconfig import SoccerPitchConfiguration

CONFIG = SoccerPitchConfiguration()

ellipse_annotator = sv.EllipseAnnotator(
    color=sv.ColorPalette.from_hex(['#00BFFF', '#FF1493', '#FFD700']), #blue, pink, yellow
    thickness=2
)
label_annotator = sv.LabelAnnotator(
    color=sv.ColorPalette.from_hex(['#00BFFF', '#FF1493', '#FFD700']), #blue, pink, yellow
    text_color=sv.Color.from_hex('#000000'),
    text_position=sv.Position.BOTTOM_CENTER
)
triangle_annotator = sv.TriangleAnnotator(
    color=sv.Color.from_hex('#FFD700'),
    base=25,
    height=21,
    outline_thickness=1
)

#Supervision - virtualization
edge_annotator = sv.EdgeAnnotator(
    color=sv.Color.from_hex('#00BFFF'),
    thickness=2, edges=CONFIG.edges)
vertex_annotator = sv.VertexAnnotator(
    color=sv.Color.from_hex('#FF1493'),
    radius=8)
vertex_annotator_2 = sv.VertexAnnotator(
    color=sv.Color.from_hex('#00BFFF'),
    radius=8)

In [None]:
#Team Assignment based on colours
#use of the SigLIP, UMAP, and KMeans combo
import supervision as sv
from tqdm import tqdm
from utils.teamclassifier import TeamClassifier

PLAYER_ID = 2
STRIDE = 30
frame_generator = sv.get_video_frames_generator(
    source_path=VIDEO_PATH, stride=STRIDE
)

crops = []
for frame in tqdm(frame_generator, desc="collecting crops"):
    result = PLAYER_DETECTION_MODEL.infer(frame, confidence=0.3)[0]
    detections = sv.Detections.from_inference(result)
    players_detections = detections[detections.class_id == PLAYER_ID]
    
    # Skip the frame if no players are detected
    if len(players_detections.xyxy) == 0:
        continue
    
    players_crops = [sv.crop_image(frame, xyxy) for xyxy in players_detections.xyxy]
    crops += players_crops

# Ensure there are enough crops for clustering
if len(crops) < 2:
    print("Not enough player crops detected. Skipping team classification.")
else:
    team_classifier = TeamClassifier(device="cpu")
    team_classifier.fit(crops)
    
print(f"Number of players detected: {len(crops)}")

Detections with Team Assignment & GK Association

In [None]:
import math
import numpy as np
from utils.resolveteamgk import resolve_goalkeepers_team_id

tracker = sv.ByteTrack()

bounding_box_annotator = sv.BoundingBoxAnnotator()
label_annotator = sv.LabelAnnotator()

BALL_ID = 0
GOALKEEPER_ID = 1
PLAYER_ID = 2

def callback(frame: np.ndarray, index: int) -> np.ndarray:
    results = PLAYER_DETECTION_MODEL.infer(frame, confidence=0.3)[0]
    detections = sv.Detections.from_inference(results)

    # Filter out the ball and apply NMS
    all_detections = detections[detections.class_id != BALL_ID]
    all_detections = all_detections.with_nms(threshold=0.5, class_agnostic=True)
    all_detections = tracker.update_with_detections(detections=all_detections)

    # Separate detections
    goalkeepers_detections = all_detections[all_detections.class_id == GOALKEEPER_ID]
    players_detections = all_detections[all_detections.class_id == PLAYER_ID]

    # Team assignment
    players_crops = [sv.crop_image(frame, xyxy) for xyxy in players_detections.xyxy]
    players_detections.class_id = team_classifier.predict(players_crops)

    if len(goalkeepers_detections.xyxy) > 0:
        goalkeepers_detections.class_id = resolve_goalkeepers_team_id(
            players_detections, goalkeepers_detections)
    else:
        goalkeepers_detections.class_id = np.array([])

    # Merge player and goalkeeper detections
    all_detections = sv.Detections.merge([players_detections, goalkeepers_detections])

    # Ensure tracker_ids are valid integers
    tracker_ids = all_detections.tracker_id
    if tracker_ids is None or len(tracker_ids) == 0:
        tracker_ids = [-1] * len(all_detections.xyxy)
    else:
        tracker_ids = [
            int(tid) if not (isinstance(tid, float) and math.isnan(tid)) else -1
            for tid in tracker_ids
        ]

    # Filter invalid IDs
    valid_mask = [tid != -1 for tid in tracker_ids]

    # Filter and convert lists to numpy arrays
    filtered_xyxy = [box for box, valid in zip(all_detections.xyxy, valid_mask) if valid]
    filtered_class_id = [cid for cid, valid in zip(all_detections.class_id, valid_mask) if valid]
    filtered_confidence = [conf for conf, valid in zip(all_detections.confidence, valid_mask) if valid]
    filtered_tracker_ids = [tid for tid, valid in zip(tracker_ids, valid_mask) if valid]

    if len(filtered_xyxy) > 0:
        filtered_xyxy = np.array(filtered_xyxy)
    else:
        filtered_xyxy = np.empty((0, 4), dtype=float)

    filtered_class_id = np.array(filtered_class_id, dtype=int) if filtered_class_id else np.array([], dtype=int)
    filtered_confidence = np.array(filtered_confidence, dtype=float) if filtered_confidence else np.array([], dtype=float)
    filtered_tracker_ids = np.array(filtered_tracker_ids, dtype=int) if filtered_tracker_ids else np.array([], dtype=int)

    all_detections = sv.Detections(
        xyxy=filtered_xyxy,
        class_id=filtered_class_id,
        confidence=filtered_confidence,
        tracker_id=filtered_tracker_ids
    )

    labels = [f"#{tid}" for tid in filtered_tracker_ids]

    # Annotate
    annotated_frame = bounding_box_annotator.annotate(
        scene=frame.copy(), detections=all_detections)
    annotated_frame = label_annotator.annotate(
        scene=annotated_frame, detections=all_detections, labels=labels)

    return annotated_frame

from tqdm import tqdm
import supervision as sv
import cv2

def process_video_with_progress(source_path, target_path, callback):
    # Get video info
    video_info = sv.VideoInfo.from_video_path(source_path)

    # Create reader and writer
    frame_generator = sv.get_video_frames_generator(source_path)
    with sv.VideoSink(target_path, video_info) as sink:
        for index, frame in enumerate(tqdm(frame_generator, total=video_info.total_frames, desc="Processing Video")):
            result_frame = callback(frame, index)
            sink.write_frame(result_frame)
            
process_video_with_progress(
    source_path=VIDEO_PATH,
    target_path="../../data/tracker_outputs/10s-team-assignment.mp4",
    callback=callback
)


Keypoint Detection

In [None]:
import math
import numpy as np
from utils.resolveteamgk import resolve_goalkeepers_team_id
import supervision as sv
import numpy as np
from utils.viewtransformer import ViewTransformer
from utils.drawpitch import draw_pitch, draw_points_on_pitch
from utils.resolveteamgk import resolve_goalkeepers_team_id

tracker = sv.ByteTrack()

bounding_box_annotator = sv.BoundingBoxAnnotator()
label_annotator = sv.LabelAnnotator()

BALL_ID = 0
GOALKEEPER_ID = 1
PLAYER_ID = 2

def callback(frame: np.ndarray, index: int) -> np.ndarray:
    result = FIELD_DETECTION_MODEL.infer(frame, confidence=0.3)[0]
    key_points = sv.KeyPoints.from_inference(result)

    filter = key_points.confidence[0] > 0.5
    frame_reference_points = key_points.xy[0][filter]
    frame_reference_key_points = sv.KeyPoints(
        xy=frame_reference_points[np.newaxis, ...])

    annotated_frame = frame.copy()
    annotated_frame = vertex_annotator.annotate(
        scene=annotated_frame,
        key_points=frame_reference_key_points)

    return annotated_frame

from tqdm import tqdm
import supervision as sv
import cv2

def process_video_with_progress(source_path, target_path, callback):
    # Get video info
    video_info = sv.VideoInfo.from_video_path(source_path)

    # Create reader and writer
    frame_generator = sv.get_video_frames_generator(source_path)
    with sv.VideoSink(target_path, video_info) as sink:
        for index, frame in enumerate(tqdm(frame_generator, total=video_info.total_frames, desc="Processing Video")):
            result_frame = callback(frame, index)
            sink.write_frame(result_frame)
            
process_video_with_progress(
    source_path=VIDEO_PATH,
    target_path="../../data/tracker_outputs/10s-detection-keypoints.mp4",
    callback=callback
)