In [1]:
import cv2
import torch
from ultralytics import YOLO
from baseballcv.functions import LoadTools
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

# 載入模型
load_tools = LoadTools()
model_path = 'https://data.balldatalab.com/index.php/s/YkGBwbFtsf34ky3/download/ball_tracking_v4-YOLOv11.pt'
model = YOLO(model_path)

# 🚀 加速設定
model.fuse()  # layer fusion
model.to('cuda:0')
model.half()  # 半精度推論 (需支援 FP16 的 GPU)
device = 'cuda:0'

@torch.no_grad()
def predict_pitch_boxes_from_video_batch(video_path, batch_size=16, model=model):
    cap = cv2.VideoCapture(video_path)
    frame_idx = 0
    box_results = []

    batch_frames = []
    frame_indices = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # 加速：OpenCV 輸出的是 uint8，我們轉為 half tensor 推論
        batch_frames.append(frame)
        frame_indices.append(frame_idx)
        frame_idx += 1

        if len(batch_frames) == batch_size:
            results = model.predict(
                source=batch_frames,
                imgsz=640,
                device=device,
                verbose=False,
                half=True,  # 強制半精度
                stream=False
            )

            for idx, result in enumerate(results):
                boxes = result.boxes
                if boxes is not None and len(boxes) > 0:
                    best_box = boxes[0]
                    x1, y1, x2, y2 = best_box.xyxy[0].tolist()
                    box_results.append((frame_indices[idx], (x1, y1, x2, y2)))
                else:
                    box_results.append((frame_indices[idx], None))

            batch_frames = []
            frame_indices = []

    # 最後一批
    if batch_frames:
        results = model.predict(
            source=batch_frames,
            imgsz=640,
            device=device,
            verbose=False,
            half=True,
            stream=False
        )
        for idx, result in enumerate(results):
            boxes = result.boxes
            if boxes is not None and len(boxes) > 0:
                best_box = boxes[0]
                x1, y1, x2, y2 = best_box.xyxy[0].tolist()
                box_results.append((frame_indices[idx], (x1, y1, x2, y2)))
            else:
                box_results.append((frame_indices[idx], None))

    cap.release()
    return box_results


def draw_boxes_on_video_batch(input_path, output_path, box_results, batch_size=16):
    cap = cv2.VideoCapture(input_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS)

    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_idx = 0

    while frame_idx < total_frames:
        frames_batch = []
        indices_batch = []

        # 批次讀取 batch_size 幀
        for _ in range(batch_size):
            ret, frame = cap.read()
            if not ret:
                break
            frames_batch.append(frame)
            indices_batch.append(frame_idx)
            frame_idx += 1

        # 批次畫框
        for i, frame in enumerate(frames_batch):
            if indices_batch[i] < len(box_results):
                _, box = box_results[indices_batch[i]]
                if box is not None:
                    x1, y1, x2, y2 = map(int, box)
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            out.write(frame)

    cap.release()
    out.release()


Found https://data.balldatalab.com/index.php/s/YkGBwbFtsf34ky3/download/ball_tracking_v4-YOLOv11.pt locally at weights/ball_tracking_v4-YOLOv11.pt
YOLO11x summary (fused): 190 layers, 56,831,644 parameters, 0 gradients, 194.4 GFLOPs


In [2]:
import time
s = time.time()
boxes = predict_pitch_boxes_from_video_batch("/content/drive/MyDrive/Baseball Movies/CH_videos_4s/pitch_0001.mp4")
e = time.time()
print(e-s)

7.254924058914185


In [3]:
boxes

[(0, None),
 (1, None),
 (2, None),
 (3, None),
 (4, None),
 (5, None),
 (6, (620.0, 210.5, 638.0, 229.0)),
 (7, None),
 (8, None),
 (9, None),
 (10, None),
 (11, None),
 (12, None),
 (13, None),
 (14, None),
 (15, None),
 (16, None),
 (17, None),
 (18, None),
 (19, None),
 (20, None),
 (21, None),
 (22, None),
 (23, None),
 (24, None),
 (25, (657.0, 239.5, 675.0, 257.0)),
 (26, (657.0, 239.75, 675.0, 257.25)),
 (27, (657.0, 239.5, 675.0, 257.5)),
 (28, (657.0, 238.75, 675.0, 256.25)),
 (29, None),
 (30, None),
 (31, None),
 (32, None),
 (33, None),
 (34, None),
 (35, None),
 (36, None),
 (37, None),
 (38, None),
 (39, None),
 (40, None),
 (41, None),
 (42, None),
 (43, None),
 (44, None),
 (45, None),
 (46, None),
 (47, None),
 (48, None),
 (49, None),
 (50, None),
 (51, None),
 (52, (617.0, 226.875, 637.0, 246.0)),
 (53, (618.0, 228.25, 640.0, 247.75)),
 (54, (619.0, 229.375, 641.0, 248.5)),
 (55, (621.0, 230.0, 641.0, 249.0)),
 (56, (621.5, 231.25, 642.5, 249.75)),
 (57, None),
 (58

In [4]:
draw_boxes_on_video_batch("/content/drive/MyDrive/Baseball Movies/CH_videos_4s/pitch_0001.mp4", "output_with_boxes.mp4", boxes)