In [1]:
import sys
import argparse
import os
import os.path as osp
import time
import cv2
import torch

from loguru import logger

sys.path.append('.')

from yolox.data.data_augment import preproc
from yolox.exp import get_exp
from yolox.utils import fuse_model, get_model_info, postprocess
from yolox.utils.visualize import plot_tracking

from tracker.mc_bot_sort import BoTSORT
from tracker.tracking_utils.timer import Timer



exp = get_exp('../yolox/exps/example/mot/yolox_s_mix_det.py', 'yolox_s_mix_det')

model = exp.get_model().to('cpu')
model.eval()

ckpt_file = '../../yolox_s_coco.pth'
ckpt = torch.load(ckpt_file, map_location="cpu")

model.load_state_dict(ckpt["model"])
msg = model.load_state_dict(ckpt["model"])
model.eval()
print(msg)


<All keys matched successfully>


In [None]:
import argparse

args = argparse.Namespace(
    appearance_thresh=0.25,
    camid=0,
    ckpt='../../yolox_s_coco.pt',
    cmc_method='sparseOptFlow',
    conf=0.1,
    demo='images',
    device='cpu',
    exp_file='../yolox/exps/example/mot/yolox_s_mix_det.py',
    experiment_name=None,
    fast_reid_config='../fast_reid/configs/MOT17/sbs_S50.yml',
    fast_reid_weights='../pretrained/mot17_sbs_S50.pth',
    fp16=False,
    fps=30,
    fuse=False,
    fuse_score=False,
    match_thresh=0.8,
    min_box_area=10,
    name=None,
    new_track_thresh=0.1,
    nms=None,
    path='../../data/images',
    proximity_thresh=0.5,
    save_result=True,
    track_buffer=30,
    track_high_thresh=0.3,
    track_low_thresh=0.1,
    trt=False,
    tsize=None,
    with_reid=False
)


In [3]:
IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"]

def get_image_list(path):
    image_names = []
    for maindir, subdir, file_name_list in os.walk(path):
        for filename in file_name_list:
            apath = osp.join(maindir, filename)
            ext = osp.splitext(apath)[1]
            if ext in IMAGE_EXT:
                image_names.append(apath)
    return image_names

args.ablation = False
args.mot20 = None

files = get_image_list(args.path)

In [4]:
class Predictor(object):
    def __init__(
        self,
        model,
        exp,
        trt_file=None,
        decoder=None,
        device=torch.device("cpu"),
        fp16=False
    ):
        self.model = model
        self.decoder = decoder
        self.num_classes = exp.num_classes
        self.confthre = exp.test_conf
        self.nmsthre = exp.nmsthre
        self.test_size = exp.test_size
        self.device = device
        self.fp16 = fp16
        if trt_file is not None:
            from torch2trt import TRTModule

            model_trt = TRTModule()
            model_trt.load_state_dict(torch.load(trt_file))

            x = torch.ones((1, 3, exp.test_size[0], exp.test_size[1]), device=device)
            self.model(x)
            self.model = model_trt
        self.rgb_means = (0.485, 0.456, 0.406)
        self.std = (0.229, 0.224, 0.225)

    def inference(self, img, timer):
        img_info = {"id": 0}
        if isinstance(img, str):
            img_info["file_name"] = osp.basename(img)
            img = cv2.imread(img)
        else:
            img_info["file_name"] = None

        height, width = img.shape[:2]
        img_info["height"] = height
        img_info["width"] = width
        img_info["raw_img"] = img

        img, ratio = preproc(img, self.test_size, self.rgb_means, self.std)
        img_info["ratio"] = ratio
        img = torch.from_numpy(img).unsqueeze(0).float().to(self.device)
        if self.fp16:
            img = img.half()  # to FP16

        with torch.no_grad():
            timer.tic()
            outputs = self.model(img)
            if self.decoder is not None:
                outputs = self.decoder(outputs, dtype=outputs.type())
            outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
        return outputs, img_info



        

In [5]:
exp = get_exp(args.exp_file, args.name)

args.ablation = False
args.mot20 = not args.fuse_score

logger.info("Args: {}".format(args))

if args.conf is not None:
    exp.test_conf = args.conf
if args.nms is not None:
    exp.nmsthre = args.nms
if args.tsize is not None:
    exp.test_size = (args.tsize, args.tsize)
    
logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size)))

trt_file = None
decoder = None

predictor = Predictor(model, exp, trt_file, decoder, args.device, args.fp16)

tracker = BoTSORT(args, frame_rate=args.fps)

timer = Timer()
results = []

for frame_id, img_path in enumerate(files, 1):

    # Detect objects
    outputs, img_info = predictor.inference(img_path, timer)
    scale = min(exp.test_size[0] / float(img_info['height'], ), exp.test_size[1] / float(img_info['width']))

    detections = []
    if outputs[0] is not None:
        outputs = outputs[0].cpu().numpy()
        detections = outputs[:, :7]
        detections[:, :4] /= scale

    # Run tracker
    online_targets = tracker.update(detections, img_info['raw_img'])

    online_tlwhs = []
    online_ids = []
    online_scores = []
    online_cls = []
    for t in online_targets:
        tlwh = t.tlwh
        tid = t.track_id
        if tlwh[2] * tlwh[3] > args.min_box_area:
            online_tlwhs.append(tlwh)
            online_ids.append(tid)
            online_scores.append(t.score)
            online_cls.append(t.cls)

            # save results
            results.append(
                f"{frame_id},{tid},{tlwh[0]:.2f},{tlwh[1]:.2f},{tlwh[2]:.2f},{tlwh[3]:.2f},{t.score:.2f},-1,-1,-1\n"
            )
    timer.toc()
    online_im = plot_tracking(
        img_info['raw_img'], online_tlwhs, online_ids, frame_id=frame_id, fps=1. / timer.average_time, ids2=online_cls
    )



[32m2025-06-18 00:54:28.244[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mArgs: Namespace(ablation=False, appearance_thresh=0.25, camid=0, ckpt='../../yolox_s_coco.pt', cmc_method='sparseOptFlow', conf=0.1, demo='images', device='cpu', exp_file='../yolox/exps/example/mot/yolox_s_mix_det.py', experiment_name=None, fast_reid_config='../fast_reid/configs/MOT17/sbs_S50.yml', fast_reid_weights='../pretrained/mot17_sbs_S50.pth', fp16=False, fps=30, fuse=False, fuse_score=False, match_thresh=0.8, min_box_area=10, mot20=True, name=None, new_track_thresh=0.1, nms=None, path='../../data/images', proximity_thresh=0.5, save_result=True, track_buffer=30, track_high_thresh=0.3, track_low_thresh=0.1, trt=False, tsize=None, with_reid=False)[0m
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]
[32m2025-06-18 00:54:28.468[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m15[0m - [1mModel Summary: Params: 8.97M, Gflops: 75.73[0m


In [6]:
results 

['1,1,219.68,336.83,154.47,60.42,0.41,-1,-1,-1\n',
 '2,1,209.27,333.38,122.37,63.71,0.30,-1,-1,-1\n',
 '3,1,182.76,333.56,166.75,72.42,0.45,-1,-1,-1\n',
 '4,1,136.28,332.62,200.91,85.17,0.39,-1,-1,-1\n',
 '5,1,77.34,336.88,230.28,92.84,0.16,-1,-1,-1\n',
 '108,2,931.95,254.14,102.46,282.72,0.43,-1,-1,-1\n',
 '109,3,1002.96,234.87,127.26,312.55,0.84,-1,-1,-1\n',
 '157,4,1121.56,293.82,132.95,96.26,0.32,-1,-1,-1\n',
 '170,5,764.01,301.94,97.01,54.41,0.37,-1,-1,-1\n',
 '171,5,776.93,301.64,106.23,60.07,0.45,-1,-1,-1\n',
 '172,5,790.11,301.58,117.38,61.40,0.42,-1,-1,-1\n',
 '173,5,821.90,298.03,110.61,64.03,0.19,-1,-1,-1\n',
 '174,5,824.19,296.88,132.19,69.57,0.18,-1,-1,-1\n']