In [None]:
!pip install ultralytics tqdm torch torchvision

In [1]:
import random
from deep_sort.tracker import Tracker
from deep_sort.deep.extractor import Extractor
from deep_sort.deep.configuration import ResNetConfiguration
from deep_sort.deep.weights import RESNET18_WEIGHTS


resnet = ResNetConfiguration(
    base="resnet18", 
    weights_path=RESNET18_WEIGHTS, 
    use_cuda=True
)
extractor = Extractor(model=resnet, batch_size=1)

tracker = Tracker(
    feature_extractor=extractor
)

colors = [(
    random.randint(0, 255), 
    random.randint(0, 255), 
    random.randint(0, 255)) for j in range(10)
]

In [2]:
from ultralytics import YOLO

model = YOLO("yolov8x.pt")
detection_threshold = 0.5

In [3]:
from tqdm import tqdm

import cv2

video_path = "data/running.mp4"
video_out_path = "out_running.mp4"

cap = cv2.VideoCapture(video_path)
ret, frame = cap.read()

cap_out = cv2.VideoWriter(
    video_out_path,
    cv2.VideoWriter_fourcc(*"MP4V"),
    cap.get(cv2.CAP_PROP_FPS),
    (frame.shape[1], frame.shape[0])
)

frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_count = 100

progress_bar = tqdm(total=frame_count, desc="Processing frames", unit="frame")

while ret:
    if frame is None:
        break
        
    if cap.get(cv2.CAP_PROP_POS_FRAMES) > frame_count:
        break
    
    results = model.predict(
        frame, 
        verbose=False, 
        conf=detection_threshold
    )
    result = results[0]
    
    detections = []
    for r in result.boxes.data.tolist():
        x1, y1, x2, y2, score, class_id = r
        x1 = int(x1)
        x2 = int(x2)
        y1 = int(y1)
        y2 = int(y2)
        class_id = int(class_id)

        if score > detection_threshold:
            detections.append([x1, y1, x2, y2, score, class_id])

    tracker.update(frame, detections)

    for track in tracker.tracks:
        x1, y1, x2, y2 = track.to_tlbr()
        track_id = track.track_id
        class_id = track.class_id

        color = colors[track_id % len(colors)]

        cv2.rectangle(
            frame,
            (int(x1), int(y1)),
            (int(x2), int(y2)),
            color,
            3
        )
        cv2.putText(
            frame,
            f"ID: {track_id} | Class: {model.names[class_id]}",
            (int(x1), int(y1) - 10),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            colors[track_id % len(colors)],
            2
        )
        
    cap_out.write(frame)
        
    progress_bar.update(1)
    
    ret, frame = cap.read()
    
cap.release()
cap_out.release()
progress_bar.close()