# Keyframe Extraction with Byte Offsets

This notebook extracts keyframes from a video and stores their byte offsets for efficient sequential decoding.

In [17]:
import subprocess
import json
import os
from pathlib import Path

In [18]:
# Video path - update this to your video file
video_path = "/Users/kuntalsuman/Downloads/demo.mov"

# Verify the file exists
if not os.path.exists(video_path):
    raise FileNotFoundError(f"Video file not found: {video_path}")

print(f"Video file: {video_path}")
print(f"File size: {os.path.getsize(video_path):,} bytes")

Video file: /Users/kuntalsuman/Downloads/demo.mov
File size: 18,712,861 bytes


## Extract Keyframe Information with Byte Offsets

We'll use FFprobe to extract detailed packet information including:
- Frame type (I-frames are keyframes)
- Byte position (offset in the file)
- Presentation timestamp (PTS)
- Frame number

In [19]:
def extract_keyframe_offsets(video_path):
    """
    Extract keyframe information including byte offsets from a video file.
    
    Returns:
        list: List of dictionaries containing keyframe metadata
    """
    # Use ffprobe to get packet information
    cmd = [
        'ffprobe',
        '-v', 'error',
        '-select_streams', 'v:0',  # Select first video stream
        '-show_entries', 'packet=pts_time,pos,flags,size',  # Get timestamp, position, flags, and size
        '-of', 'json',  # Output as JSON
        video_path
    ]
    
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        data = json.loads(result.stdout)
        
        keyframes = []
        frame_num = 0
        
        for packet in data.get('packets', []):
            # Check if this is a keyframe (I-frame)
            # The 'K' flag indicates a keyframe
            if 'K' in packet.get('flags', ''):
                keyframe_info = {
                    'frame_number': frame_num,
                    'byte_offset': int(packet.get('pos', -1)),
                    'pts_time': float(packet.get('pts_time', 0)),
                    'packet_size': int(packet.get('size', 0))
                }
                keyframes.append(keyframe_info)
            frame_num += 1
        
        return keyframes
    
    except subprocess.CalledProcessError as e:
        print(f"Error running ffprobe: {e}")
        print(f"stderr: {e.stderr}")
        raise
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON output: {e}")
        raise

In [20]:
# Extract keyframe offsets
print("Extracting keyframe information...")
keyframes = extract_keyframe_offsets(video_path)

print(f"\nFound {len(keyframes)} keyframes")
print("\nFirst 5 keyframes:")
for i, kf in enumerate(keyframes[:5]):
    print(f"\nKeyframe {i+1}:")
    print(f"  Frame Number: {kf['frame_number']}")
    print(f"  Byte Offset: {kf['byte_offset']:,}")
    print(f"  Timestamp: {kf['pts_time']:.3f}s")
    print(f"  Packet Size: {kf['packet_size']:,} bytes")

Extracting keyframe information...

Found 24 keyframes

First 5 keyframes:

Keyframe 1:
  Frame Number: 0
  Byte Offset: 36
  Timestamp: 0.000s
  Packet Size: 115,518 bytes

Keyframe 2:
  Frame Number: 33
  Byte Offset: 715,662
  Timestamp: 1.376s
  Packet Size: 79,365 bytes

Keyframe 3:
  Frame Number: 77
  Byte Offset: 1,513,552
  Timestamp: 3.212s
  Packet Size: 73,328 bytes

Keyframe 4:
  Frame Number: 164
  Byte Offset: 2,956,882
  Timestamp: 6.840s
  Packet Size: 38,628 bytes

Keyframe 5:
  Frame Number: 221
  Byte Offset: 3,460,108
  Timestamp: 9.218s
  Packet Size: 117,067 bytes


## Save Keyframe Metadata

Save the keyframe information to a JSON file for later use.

In [39]:
def seconds_to_mmss_ms(seconds):
    """
    Convert seconds (float) to MM:SS:mmm format.
    
    Args:
        seconds (float): Time in seconds (e.g., 54.179125)
    
    Returns:
        str: Time in MM:SS:mmm format (e.g., "00:54:179")
    
    Examples:
        >>> seconds_to_mmss_ms(54.179125)
        '00:54:179'
        >>> seconds_to_mmss_ms(125.5)
        '02:05:500'
        >>> seconds_to_mmss_ms(3661.123)
        '61:01:123'
    """
    # Extract minutes
    minutes = int(seconds // 60)
    
    # Extract remaining seconds
    remaining_seconds = seconds % 60
    
    # Extract whole seconds and milliseconds
    secs = int(remaining_seconds)
    milliseconds = int((remaining_seconds - secs) * 1000)
    
    # Format as MM:SS:mmm
    return f"{minutes:02d}:{secs:02d}:{milliseconds:03d}"

In [40]:
# Save to JSON file
output_file = "keyframe_offsets.json"

metadata = {
    'video_path': video_path,
    'video_file_size': video_file_size,
    'total_keyframes': len(bounded_keyframes),
    'keyframes': bounded_keyframes
}

with open(output_file, 'w') as f:
    json.dump(metadata, f, indent=2)

print(f"\nKeyframe metadata saved to: {output_file}")
print(f"Total keyframes: {len(bounded_keyframes)}")


Keyframe metadata saved to: keyframe_offsets.json
Total keyframes: 24


In [42]:
keyframes[23]

{'frame_number': 1299,
 'byte_offset': 17421683,
 'pts_time': 54.179125,
 'packet_size': 48278}

# Decode and display frames

In [43]:
import ffmpeg

In [44]:
def seconds_to_mmss_ms(seconds):
    """
    Convert seconds (float) to MM:SS:mmm format.
    
    Args:
        seconds (float): Time in seconds (e.g., 54.179125)
    
    Returns:
        str: Time in MM:SS:mmm format (e.g., "00:54:179")
    
    Examples:
        >>> seconds_to_mmss_ms(54.179125)
        '00:54:179'
        >>> seconds_to_mmss_ms(125.5)
        '02:05:500'
        >>> seconds_to_mmss_ms(3661.123)
        '61:01:123'
    """
    # Extract minutes
    minutes = int(seconds // 60)
    
    # Extract remaining seconds
    remaining_seconds = seconds % 60
    
    # Extract whole seconds and milliseconds
    secs = int(remaining_seconds)
    milliseconds = int((remaining_seconds - secs) * 1000)
    
    # Format as MM:SS:mmm
    return f"{minutes:02d}:{secs:02d}:{milliseconds:03d}"

In [51]:
time = keyframes[0].get("pts_time")
time

0.0

In [68]:
def get_keyframe_image(time, number):
    ffmpeg_command = f"ffmpeg -ss {time} -i {video_path} -frames:v 1 -c:v png -f image2 - > {folder_name}/frame{number}.png"
    subprocess.call(ffmpeg_command, shell=True)

In [69]:
get_keyframe_image(0.0, 1)

ffmpeg version 8.0.1 Copyright (c) 2000-2025 the FFmpeg developers
  built with Apple clang version 17.0.0 (clang-1700.4.4.1)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/8.0.1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags= --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --

# make a folder to store the keyframes

In [59]:
import os

In [61]:
folder_name = "keyframes"
os.makedirs(folder_name, exist_ok=True)

delete all the files in the folder

In [70]:
for file in Path(folder_name).iterdir():
    if file.is_file():
        file.unlink()

In [71]:
no = 0
for keyframe in keyframes:
    no += 1
    time = keyframe.get("pts_time")
    get_keyframe_image(time, no)
    

ffmpeg version 8.0.1 Copyright (c) 2000-2025 the FFmpeg developers
  built with Apple clang version 17.0.0 (clang-1700.4.4.1)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/8.0.1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags= --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --

In [72]:
video_folder = "video_between_keyframes"

In [73]:
os.makedirs(video_folder, exist_ok=True)

In [74]:
for file in Path(video_folder).iterdir():
    if file.is_file():
        file.unlink()

In [78]:
def slice_videos(t0, t1, part):
    ffmpeg_command_to_slice_video = f"ffmpeg -ss 00:{t0} -to 00:{t1} -i {video_path} -c copy -avoid_negative_ts make_zero {video_folder}/slice{part}.mp4"
    subprocess.call(ffmpeg_command_to_slice_video, shell=True)



In [81]:
for i in range(0, 23):
    t0 = keyframes[i].get("pts_time")
    t1 = keyframes[i+1].get("pts_time")
    part = i+1
    slice_videos(t0, t1, part)

ffmpeg version 8.0.1 Copyright (c) 2000-2025 the FFmpeg developers
  built with Apple clang version 17.0.0 (clang-1700.4.4.1)
  configuration: --prefix=/opt/homebrew/Cellar/ffmpeg/8.0.1 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags= --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --