In [None]:
pip install yolox --no-deps  # onnxruntime==1.8.0 is putdated, hence --no-deps

In [None]:
import cv2
import gdown
import torch
import numpy as np
from pathlib import Path
from torchvision import transforms
from ultralytics.utils import ops

from yolox.exp import get_exp
from yolox.utils import postprocess
from yolox.utils.model_utils import fuse_model
from boxmot import BotSort

# Dictionary for YOLOX model weights URLs
YOLOX_ZOO = {
    'yolox_n.pt': 'https://drive.google.com/uc?id=1AoN2AxzVwOLM0gJ15bcwqZUpFjlDV1dX',
    'yolox_s.pt': 'https://drive.google.com/uc?id=1uSmhXzyV1Zvb4TJJCzpsZOIcw7CCJLxj',
    'yolox_m.pt': 'https://drive.google.com/uc?id=11Zb0NN_Uu7JwUd9e6Nk8o2_EUfxWqsun',
    'yolox_l.pt': 'https://drive.google.com/uc?id=1XwfUuCBF4IgWBWK2H7oOhQgEj9Mrb3rz',
    'yolox_x.pt': 'https://drive.google.com/uc?id=1P4mY0Yyd3PPTybgZkjMYhFri88nTmJX5',
}

# Preprocessing pipeline
preprocess = transforms.Compose([transforms.ToTensor()])
device = torch.device('cpu')
yolox_model = 'yolox_s.pt'
yolox_model_path = Path(yolox_model)

print(yolox_model_path)
print(yolox_model_path.exists())

# Download model if not present
if not yolox_model_path.exists():
    gdown.download(YOLOX_ZOO[yolox_model], output=str(yolox_model_path), quiet=False)

# Initialize YOLOX model
exp = get_exp(None, 'yolox_s')
exp.num_classes = 1
ckpt = torch.load(yolox_model_path, map_location=device)

model = exp.get_model()
model.load_state_dict(ckpt["model"])
model = fuse_model(model).to(device).eval()

# Initialize tracker
tracker = BotSort(reid_weights=Path('osnet_x0_25_msmt17.pt'), device=device, half=False)

# Function for letterbox resizing
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
    shape = img.shape[:2]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:
        r = min(r, 1.0)

    ratio = r, r
    new_unpad = (int(round(shape[1] * r)), int(round(shape[0] * r)))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
    if auto:
        dw, dh = np.mod(dw, 32), np.mod(dh, 32)
    elif scaleFill:
        dw, dh = 0.0, 0.0
        new_unpad = new_shape
        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]

    dw /= 2
    dh /= 2

    if shape[::-1] != new_unpad:
        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
    img = cv2.copyMakeBorder(img, int(round(dh - 0.1)), int(round(dh + 0.1)), int(round(dw - 0.1)), int(round(dw + 0.1)), cv2.BORDER_CONSTANT, value=color)
    return img, ratio, (dw, dh)

# Video capture setup
vid = cv2.VideoCapture(0)

while True:
    ret, frame = vid.read()
    if not ret:
        break

    # Preprocess frame
    frame_letterbox, ratio, (dw, dh) = letterbox(frame, new_shape=[640, 640], auto=False, scaleFill=True)
    frame_tensor = preprocess(frame_letterbox).unsqueeze(0).to(device)

    # Detection with YOLOX
    with torch.no_grad():
        dets = model(frame_tensor)
    dets = postprocess(dets, 1, 0.5, 0.2, class_agnostic=True)[0]

    if dets is not None:
        dets[:, [0, 2]] -= dw
        dets[:, [1, 3]] -= dh
        dets[:, :4] /= ratio[0]
        dets[:, 4] *= dets[:, 5]
        dets = dets[:, [0, 1, 2, 3, 4, 6]].cpu().numpy()
    else:
        dets = np.empty((0, 6))

    # Update tracker
    res = tracker.update(dets, frame)

    # Plot results and display
    tracker.plot_results(frame, show_trajectories=True)
    cv2.imshow('BoXMOT + YOLOX', frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release resources
vid.release()
cv2.destroyAllWindows()