In [1]:
import numpy as np
import cv2
from pathlib import Path
from yolov11_onnx_wrapper import YOLOv11

In [2]:
model_path = Path("../yolo11s.onnx")
model = YOLOv11(
    model_path=model_path,
    valid_class_checker=lambda lbl_id, _: 1 <= lbl_id <= 8 # only detect vehicles
)
# vid_path_l = [ Path("../input_videos/5473765-uhd_3840_2160_24fps.mp4") ]
vid_path_l = Path("../input_videos").glob("*.mp4")
out_vid_dir = Path("./out_vid_dir")
out_vid_dir.mkdir(exist_ok=True)

test_b = True
roi_color = (255, 0, 0)
bbox_color = (0, 0, 255)

[0;93m2025-02-09 09:43:10.038565 [W:onnxruntime:, coreml_execution_provider.cc:115 GetCapability] CoreMLExecutionProvider::GetCapability, number of partitions supported by CoreML: 15 number of nodes in the graph: 320 number of nodes supported by CoreML: 304[m


In [3]:
color_idx = 0
total_colors = 30

def generate_unique_colors():
    global color_idx
    hue = int((color_idx * 180 / total_colors) % 180)
    color_idx += 1
    saturation, value = 200, 255
    color = np.uint8([[[hue, saturation, value]]])
    bgr_color = cv2.cvtColor(color, cv2.COLOR_HSV2BGR)[0][0]
    return tuple(map(int, bgr_color))

def format_time(secs):
    mins, secs = secs / 60, secs % 60
    ret_str = f"{secs:.2f}"
    if mins > 0:
        ret_str = f"{int(mins)} {ret_str}"
    return ret_str

In [4]:
exit_b = False
for vid_path in vid_path_l:
    
    cap = cv2.VideoCapture(vid_path)
    if not cap.isOpened():
        print(f"Error: Reading {vid_path.name} video failed.")
        continue
    
    vid_w, vid_h, fps = map(lambda x: int(cap.get(x)), (cv2.CAP_PROP_FRAME_WIDTH, cv2.CAP_PROP_FRAME_HEIGHT, cv2.CAP_PROP_FPS))
    print(f"Video WxH:{vid_w}x{vid_h} FPS:{fps}")
    
    # Read ROI
    roi_path = str(vid_path.parent / vid_path.stem) + "_roi.txt"
    roi = np.loadtxt(roi_path, dtype=np.int32)
    assert roi.shape == (4,), f"Invalid ROI {roi}"
    
    out_vid_path = out_vid_dir / vid_path.name
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out_vid = cv2.VideoWriter(out_vid_path, fourcc, fps, (vid_w, vid_h))

    det_l = []
    dist_thresh = 40
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break 
        
        x1, y1, x2, y2 = roi
        img = frame[y1:y2, x1:x2, :]
        bbox_l = model.detect(img)
        cv2.rectangle(frame, roi[:2], roi[2:], roi_color, 2)
        new_det_l = []
        for bbox_idx, bbox in enumerate(bbox_l):
            center = np.array([(bbox.x1 + bbox.x2) / 2, (bbox.y1 + bbox.y2) / 2])
            # calc dist from all known objects
            min_idx, min_dist = -1, np.inf
            for idx, det in enumerate(det_l):
                d_center = det['center']
                dist = np.linalg.norm(center - d_center)
                if dist < min_dist:
                    min_idx, min_dist = idx, dist
            if min_dist < dist_thresh:
                color = det_l[min_idx]['color']
                frame_count = det_l[min_idx]['frame_count'] + 1
            else:
                color = generate_unique_colors()
                frame_count = 1
            new_det_l.append({
                'color': color,
                'frame_count': frame_count,
                'center': center,
                'bbox_idx': bbox_idx,
            })
        
        det_l = new_det_l
        for det in det_l:
            bbox = bbox_l[det['bbox_idx']]
            color = det['color']
            disp_bbox = np.array([bbox.x1, bbox.y1, bbox.x2, bbox.y2], dtype=np.int32).reshape(2, 2) + roi[:2]
            x1, y1, x2, y2 = disp_bbox.reshape(-1)
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            # time 
            t = det['frame_count'] / fps
            mins, secs = int(t // 60), t % 60
            t_str = f"{mins}:{secs:.2f}"
            bbox_center = (x1 + x2) // 2, (y1 + y2) // 2
            bbox_height = y2 - y1
            font_scale = max(0.5, bbox_height / 200)
            font_thickness = max(1, int(font_scale * 2))
            txt_size, _ = cv2.getTextSize(t_str, cv2.FONT_HERSHEY_SIMPLEX, font_scale, font_thickness)
            txt_pos = int(bbox_center[0] - txt_size[0] * .5), int(bbox_center[1] - txt_size[1] * .5)
            cv2.putText(frame, t_str, txt_pos, cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, font_thickness, cv2.LINE_AA)
            
        out_vid.write(frame)
        cv2.imshow('show', frame)
        key = cv2.waitKey(1) & 0xFF
        if key == ord('n'):
            break
        if key == ord('q'):
            exit_b = True
            break
    
    cap.release()
    out_vid.release()
    cv2.destroyAllWindows()
    cv2.waitKey(1)
    
    if exit_b:
        break

Video WxH:3840x2160 FPS:23


2025-02-09 09:43:13.055 Python[4243:49529] +[IMKClient subclass]: chose IMKClient_Modern
2025-02-09 09:43:13.055 Python[4243:49529] +[IMKInputSession subclass]: chose IMKInputSession_Modern


Video WxH:3840x2160 FPS:15
Video WxH:3840x2160 FPS:30
Video WxH:1280x720 FPS:50
Video WxH:3840x2160 FPS:23


In [5]:
cv2.destroyAllWindows()
cv2.waitKey(1)

-1