In [None]:
pip install yolox --no-deps  # onnxruntime==1.8.0 is putdated, hence --no-deps

In [None]:
from pathlib import Path

import cv2
import gdown
import numpy as np
import torch
from yolox.exp import get_exp
from yolox.utils import postprocess
from yolox.utils.model_utils import fuse_model

from boxmot import BotSort
from boxmot.utils.ops import yolox_preprocess

# Dictionary for YOLOX model weights URLs
YOLOX_ZOO = {
    'yolox_n.pt': 'https://drive.google.com/uc?id=1AoN2AxzVwOLM0gJ15bcwqZUpFjlDV1dX',
    'yolox_s.pt': 'https://drive.google.com/uc?id=1uSmhXzyV1Zvb4TJJCzpsZOIcw7CCJLxj',
    'yolox_m.pt': 'https://drive.google.com/uc?id=11Zb0NN_Uu7JwUd9e6Nk8o2_EUfxWqsun',
    'yolox_l.pt': 'https://drive.google.com/uc?id=1XwfUuCBF4IgWBWK2H7oOhQgEj9Mrb3rz',
    'yolox_x.pt': 'https://drive.google.com/uc?id=1P4mY0Yyd3PPTybgZkjMYhFri88nTmJX5',
}

# Preprocessing pipeline
input_size = [800, 1440]
device = torch.device('cpu')
yolox_model = 'yolox_s.pt'
yolox_model_path = Path(yolox_model)

# Download model if not present
if not yolox_model_path.exists():
    gdown.download(YOLOX_ZOO[yolox_model], output=str(yolox_model_path), quiet=False)

# Initialize YOLOX model
exp = get_exp(None, 'yolox_s')
exp.num_classes = 1
ckpt = torch.load(yolox_model_path, map_location=device)

model = exp.get_model()
model.load_state_dict(ckpt["model"])
model = fuse_model(model).to(device).eval()

# Initialize tracker
tracker = BotSort(reid_weights=Path('osnet_x0_25_msmt17.pt'), device=device, half=False)

In [None]:
# Video capture setup
vid = cv2.VideoCapture(0)

while True:
    ret, frame = vid.read()
    if not ret:
        break

    # Preprocess frame
    frame_img, ratio = yolox_preprocess(frame, input_size=input_size)
    frame_tensor = torch.Tensor(frame_img).unsqueeze(0).to(device)

    # Detection with YOLOX
    with torch.no_grad():
        dets = model(frame_tensor)
    dets = postprocess(dets, 1, 0.5, 0.7, class_agnostic=True)[0]

    if dets is not None:
        # Rescale coordinates from letterbox back to the original frame size
        dets[:, 0] = (dets[:, 0]) / ratio
        dets[:, 1] = (dets[:, 1]) / ratio
        dets[:, 2] = (dets[:, 2]) / ratio
        dets[:, 3] = (dets[:, 3]) / ratio
        dets[:, 4] *= dets[:, 5]
        dets = dets[:, [0, 1, 2, 3, 4, 6]].cpu().numpy()
    else:
        dets = np.empty((0, 6))

    # Update tracker
    res = tracker.update(dets, frame)

    # Plot results and display
    tracker.plot_results(frame, show_trajectories=True)
    cv2.imshow('BoXMOT + YOLOX', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
vid.release()
cv2.destroyAllWindows()