In [24]:
# Cell 0: Setup
import os
import random
import cv2  # OpenCV for video processing
import time

# For inference with YOLO (assuming you use ultralytics)
from ultralytics import YOLO

# Define directory paths (modify as per your folder structure)
test_videos_dir = 'data/test'  # Directory containing your test videos
extracted_frames_dir = 'data/test_frames'  # Directory to save extracted frames

# Create folder if it doesn't exist
os.makedirs(extracted_frames_dir, exist_ok=True)


In [71]:
# select 3 random vids
import random
random.seed(162)

# Cell 1: Randomly choose 3 candidate videos
all_videos = [f for f in os.listdir(test_videos_dir) if f.lower().endswith(('.mp4', '.avi', '.mov'))]
print(f"Total videos found: {len(all_videos)}")

# Select 3 random videos (ensure there are at least 3)
num_candidates = 3
candidate_videos = random.sample(all_videos, min(num_candidates, len(all_videos)))
print("Selected candidate videos:")
for vid in candidate_videos:
    print(vid)


Total videos found: 10
Selected candidate videos:
bicycle_test.mp4
car_test.mp4
people_test.mp4


In [72]:
# Cell 2: Extract one frame from each segment
# Define the segment length and the gap between segments
segment_length = 6   # seconds per segment
segment_gap = 1      # gap between segments, so segments start at 0, 7, 13, etc.

# We'll choose the middle of each segment. For a 6-sec segment, the midpoint is 3 seconds after the segment's start.
def extract_segment_frames(video_path, output_dir, seg_length, seg_gap):
    cap = cv2.VideoCapture(video_path)
    
    # Get FPS and total frame count
    video_fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration_sec = total_frames / video_fps
    print(f"Video: {os.path.basename(video_path)} | FPS: {video_fps:.2f} | Duration: {duration_sec:.2f}s")
    
    # Calculate the start times for segments; segments start at times 0, (seg_length + seg_gap), (2*(seg_length+seg_gap)), etc.
    segment_interval = seg_length + seg_gap
    segment_starts = [t for t in range(0, int(duration_sec), segment_interval) if t + seg_length <= duration_sec]
    
    print(f"Number of segments to extract: {len(segment_starts)}")
    
    # For each segment, choose the midpoint frame (start time + seg_length/2)
    for seg_start in segment_starts:
        target_time = seg_start + seg_length / 2  # in seconds
        target_frame_index = int(target_time * video_fps)
        
        # Set the video capture position to the target frame index
        cap.set(cv2.CAP_PROP_POS_FRAMES, target_frame_index)
        ret, frame = cap.read()
        if ret:
            # Define filename and save the extracted frame
            frame_filename = os.path.join(output_dir, f"frame_{target_frame_index:04d}.jpg")
            cv2.imwrite(frame_filename, frame)
            print(f"Saved frame at {target_time:.1f}s (Frame {target_frame_index}) -> {frame_filename}")
        else:
            print(f"Failed to capture frame at {target_time:.1f}s (Frame {target_frame_index})")
    
    cap.release()

# Process each candidate video
for vid_name in candidate_videos:
    vid_path = os.path.join(test_videos_dir, vid_name)
    # Create a subdirectory for frames for this video
    video_frames_dir = os.path.join(extracted_frames_dir, os.path.splitext(vid_name)[0])
    os.makedirs(video_frames_dir, exist_ok=True)
    
    # Extract frames with non-continuous segments
    extract_segment_frames(vid_path, video_frames_dir, segment_length, segment_gap)
    print()


Video: bicycle_test.mp4 | FPS: 28.00 | Duration: 79.11s
Number of segments to extract: 11
Saved frame at 3.0s (Frame 84) -> data/test_frames\bicycle_test\frame_0084.jpg
Saved frame at 10.0s (Frame 280) -> data/test_frames\bicycle_test\frame_0280.jpg
Saved frame at 17.0s (Frame 476) -> data/test_frames\bicycle_test\frame_0476.jpg
Saved frame at 24.0s (Frame 672) -> data/test_frames\bicycle_test\frame_0672.jpg
Saved frame at 31.0s (Frame 868) -> data/test_frames\bicycle_test\frame_0868.jpg
Saved frame at 38.0s (Frame 1064) -> data/test_frames\bicycle_test\frame_1064.jpg
Saved frame at 45.0s (Frame 1260) -> data/test_frames\bicycle_test\frame_1260.jpg
Saved frame at 52.0s (Frame 1456) -> data/test_frames\bicycle_test\frame_1456.jpg
Saved frame at 59.0s (Frame 1652) -> data/test_frames\bicycle_test\frame_1652.jpg
Saved frame at 66.0s (Frame 1848) -> data/test_frames\bicycle_test\frame_1848.jpg
Saved frame at 73.0s (Frame 2044) -> data/test_frames\bicycle_test\frame_2044.jpg

Video: car_tes

# Inference


In [None]:
# Cell 4: Run inference on the extracted frames and measure inference time

# Load your pre-trained YOLO model (update the model path accordingly)
model = YOLO("path/to/your/trained_model.pt")  # Replace with your model file

def run_inference_on_frame(image_path):
    img = cv2.imread(image_path)
    start_time = time.time()
    # Run prediction; here we use conf=0.25 as default
    results = model.predict(source=img, conf=0.25, imgsz=640)
    elapsed = time.time() - start_time
    return results, elapsed

# Loop through all extracted frames for each candidate video
all_inference_times = []
for vid_name in candidate_videos:
    video_frames_dir = os.path.join(extracted_frames_dir, os.path.splitext(vid_name)[0])
    frame_files = sorted([f for f in os.listdir(video_frames_dir) if f.endswith('.jpg')])
    print(f"Running inference on {len(frame_files)} frames for video {vid_name}:")
    
    for frame_file in frame_files:
        frame_path = os.path.join(video_frames_dir, frame_file)
        results, elapsed = run_inference_on_frame(frame_path)
        all_inference_times.append(elapsed)
        print(f"Frame {frame_file}: Inference time: {elapsed*1000:.2f} ms")
    
    print()

if all_inference_times:
    avg_time = sum(all_inference_times) / len(all_inference_times)
    print(f"Average inference time per frame: {avg_time*1000:.2f} ms")


In [None]:
# Cell 5: (Optional) Evaluate mAP using the YOLO test command (run in a terminal or via subprocess)
!yolo task=detect mode=test model=path/to/your/trained_model.pt data=path/to/dataset.yaml imgsz=640 conf=0.25
