
Step 2: Split Traffic Videos into Fixed-Length Scenes
This notebook performs two main tasks:
1. Automatically converts all input videos (.mov, .avi) into standardized .mp4 format using FFmpeg.
2. Splits each video into fixed-length scenes (e.g., 5 seconds each) for CLIP-based semantic search.
   It also logs the time taken to process each video.

Setup and configuration

In [1]:
import os
import cv2
import subprocess
import time

INPUT_DIR = 'video_data/raw_videos'            # Raw input videos of any supported format
TEMP_DIR = 'video_data/converted_videos'       # Temp storage for converted .mp4 files
OUTPUT_DIR = 'video_data/scenes'               # Output folder for split scene clips
CLIP_DURATION = 5  # Duration of each scene (in seconds)
# Ensure required folders exist
os.makedirs(TEMP_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)

Function: Convert video file fomats into standard .mp4 for uniformity

In [2]:
def convert_to_mp4(file_path, out_dir):
    '''
    Uses FFmpeg to convert .mov or .avi files to .mp4 for better compatibility.
    Skips files that are already converted.
    '''
    base_name = os.path.splitext(os.path.basename(file_path))[0]
    output_path = os.path.join(out_dir, base_name + '.mp4')
    if os.path.exists(output_path):
        return output_path  # Already converted

    print(f"Converting {file_path} to .mp4")
    command = [
        'ffmpeg', '-y', '-i', file_path,
        '-c:v', 'libx264', '-preset', 'fast', '-crf', '23',
        '-c:a', 'aac', '-strict', 'experimental',
        output_path
    ]
    try:
        subprocess.run(command, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        print(f"Converted: {output_path}")
    except subprocess.CalledProcessError:
        print(f"FFmpeg failed to convert {file_path}")
        return None

    return output_path

Function: Split one video into clips

In [3]:
def split_video(file_path, out_dir, clip_duration):
    '''
    Splits the input video into multiple clips of fixed duration.
    Saves each scene with a numbered filename.
    '''
    video_name = os.path.splitext(os.path.basename(file_path))[0]
    cap = cv2.VideoCapture(file_path)

    if not cap.isOpened():
        print(f"Failed to open {file_path}")
        return

    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    clip_frames = int(fps * clip_duration)

    print(f"🎬 Processing: {video_name} ({total_frames} frames @ {fps:.2f} fps)")
    clip_idx = 0
    frame_idx = 0
    start_time = time.time()  # Start timing the process

    while frame_idx < total_frames:
        out_path = os.path.join(out_dir, f"{video_name}_clip_{clip_idx:03d}.mp4")
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        writer = cv2.VideoWriter(out_path, fourcc, fps, (
            int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        ))

        frames_written = 0
        while frames_written < clip_frames and frame_idx < total_frames:
            ret, frame = cap.read()
            if not ret:
                break
            writer.write(frame)
            frames_written += 1
            frame_idx += 1

        writer.release()
        clip_idx += 1

    cap.release()
    elapsed = time.time() - start_time
    print(f"Done: {video_name} → {clip_idx} clips (⏱ {elapsed:.2f} sec)")

Run the splitter for all the videos

In [4]:
video_files = [
    f for f in os.listdir(INPUT_DIR)
    if f.lower().endswith(('.mp4', '.avi', '.mov'))
]

for video_file in video_files:
    full_path = os.path.join(INPUT_DIR, video_file)
    mp4_path = convert_to_mp4(full_path, TEMP_DIR)
    if mp4_path:
        split_video(mp4_path, OUTPUT_DIR, CLIP_DURATION)


Converting video_data/raw_videos\vecteezy_car-and-truck-traffic-on-the-highway-in-europe-poland_7957364.mp4 to .mp4
Converted: video_data/converted_videos\vecteezy_car-and-truck-traffic-on-the-highway-in-europe-poland_7957364.mp4
🎬 Processing: vecteezy_car-and-truck-traffic-on-the-highway-in-europe-poland_7957364 (307 frames @ 24.00 fps)
Done: vecteezy_car-and-truck-traffic-on-the-highway-in-europe-poland_7957364 → 3 clips (⏱ 26.20 sec)
Converting video_data/raw_videos\vecteezy_third-transportation-ring-of-moscow_28261175.mov to .mp4
Converted: video_data/converted_videos\vecteezy_third-transportation-ring-of-moscow_28261175.mp4
🎬 Processing: vecteezy_third-transportation-ring-of-moscow_28261175 (443 frames @ 23.98 fps)
Done: vecteezy_third-transportation-ring-of-moscow_28261175 → 4 clips (⏱ 9.29 sec)
