# Keyframe Extraction with Byte Offsets

This notebook extracts keyframes from a video and stores their byte offsets for efficient sequential decoding.

In [17]:
import subprocess
import json
import os
from pathlib import Path

In [18]:
# Video path - update this to your video file
video_path = "/Users/kuntalsuman/Downloads/demo.mov"

# Verify the file exists
if not os.path.exists(video_path):
    raise FileNotFoundError(f"Video file not found: {video_path}")

print(f"Video file: {video_path}")
print(f"File size: {os.path.getsize(video_path):,} bytes")

Video file: /Users/kuntalsuman/Downloads/demo.mov
File size: 18,712,861 bytes


## Extract Keyframe Information with Byte Offsets

We'll use FFprobe to extract detailed packet information including:
- Frame type (I-frames are keyframes)
- Byte position (offset in the file)
- Presentation timestamp (PTS)
- Frame number

In [19]:
def extract_keyframe_offsets(video_path):
    """
    Extract keyframe information including byte offsets from a video file.
    
    Returns:
        list: List of dictionaries containing keyframe metadata
    """
    # Use ffprobe to get packet information
    cmd = [
        'ffprobe',
        '-v', 'error',
        '-select_streams', 'v:0',  # Select first video stream
        '-show_entries', 'packet=pts_time,pos,flags,size',  # Get timestamp, position, flags, and size
        '-of', 'json',  # Output as JSON
        video_path
    ]
    
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        data = json.loads(result.stdout)
        
        keyframes = []
        frame_num = 0
        
        for packet in data.get('packets', []):
            # Check if this is a keyframe (I-frame)
            # The 'K' flag indicates a keyframe
            if 'K' in packet.get('flags', ''):
                keyframe_info = {
                    'frame_number': frame_num,
                    'byte_offset': int(packet.get('pos', -1)),
                    'pts_time': float(packet.get('pts_time', 0)),
                    'packet_size': int(packet.get('size', 0))
                }
                keyframes.append(keyframe_info)
            frame_num += 1
        
        return keyframes
    
    except subprocess.CalledProcessError as e:
        print(f"Error running ffprobe: {e}")
        print(f"stderr: {e.stderr}")
        raise
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON output: {e}")
        raise

In [20]:
# Extract keyframe offsets
print("Extracting keyframe information...")
keyframes = extract_keyframe_offsets(video_path)

print(f"\nFound {len(keyframes)} keyframes")
print("\nFirst 5 keyframes:")
for i, kf in enumerate(keyframes[:5]):
    print(f"\nKeyframe {i+1}:")
    print(f"  Frame Number: {kf['frame_number']}")
    print(f"  Byte Offset: {kf['byte_offset']:,}")
    print(f"  Timestamp: {kf['pts_time']:.3f}s")
    print(f"  Packet Size: {kf['packet_size']:,} bytes")

Extracting keyframe information...

Found 24 keyframes

First 5 keyframes:

Keyframe 1:
  Frame Number: 0
  Byte Offset: 36
  Timestamp: 0.000s
  Packet Size: 115,518 bytes

Keyframe 2:
  Frame Number: 33
  Byte Offset: 715,662
  Timestamp: 1.376s
  Packet Size: 79,365 bytes

Keyframe 3:
  Frame Number: 77
  Byte Offset: 1,513,552
  Timestamp: 3.212s
  Packet Size: 73,328 bytes

Keyframe 4:
  Frame Number: 164
  Byte Offset: 2,956,882
  Timestamp: 6.840s
  Packet Size: 38,628 bytes

Keyframe 5:
  Frame Number: 221
  Byte Offset: 3,460,108
  Timestamp: 9.218s
  Packet Size: 117,067 bytes


## Calculate Bounding Offsets

For each keyframe, we'll calculate the bounding byte offsets (start and end positions).

In [21]:
def calculate_bounding_offsets(keyframes, video_file_size):
    """
    Calculate bounding byte offsets for each keyframe.
    
    Args:
        keyframes: List of keyframe dictionaries
        video_file_size: Total size of the video file in bytes
    
    Returns:
        list: Keyframes with added 'offset_start' and 'offset_end' fields
    """
    bounded_keyframes = []
    
    for i, kf in enumerate(keyframes):
        bounded_kf = kf.copy()
        
        # Start offset is the byte position of this keyframe
        bounded_kf['offset_start'] = kf['byte_offset']
        
        # End offset is either:
        # - The start of the next keyframe (if there is one)
        # - The end of the file (for the last keyframe)
        if i < len(keyframes) - 1:
            bounded_kf['offset_end'] = keyframes[i + 1]['byte_offset']
        else:
            bounded_kf['offset_end'] = video_file_size
        
        # Calculate the segment size
        bounded_kf['segment_size'] = bounded_kf['offset_end'] - bounded_kf['offset_start']
        
        bounded_keyframes.append(bounded_kf)
    
    return bounded_keyframes

In [38]:
# Get video file size
video_file_size = os.path.getsize(video_path)

# Calculate bounding offsets
bounded_keyframes = calculate_bounding_offsets(keyframes, video_file_size)

print(f"Video file size: {video_file_size:,} bytes")
print(f"\nBounding offsets for first 5 keyframes:")
for i, kf in enumerate(bounded_keyframes[:5]):
    print(f"\nKeyframe {i+1}:")
    print(f"  Frame Number: {kf['frame_number']}")
    print(f"  Timestamp: {kf['pts_time']:.3f}")
    print(f"  Offset Start: {kf['offset_start']:,} bytes")
    print(f"  Offset End: {kf['offset_end']:,} bytes")
    print(f"  Segment Size: {kf['segment_size']:,} bytes")

Video file size: 18,712,861 bytes

Bounding offsets for first 5 keyframes:

Keyframe 1:
  Frame Number: 0
  Timestamp: 0.000
  Offset Start: 36 bytes
  Offset End: 715,662 bytes
  Segment Size: 715,626 bytes

Keyframe 2:
  Frame Number: 33
  Timestamp: 1.376
  Offset Start: 715,662 bytes
  Offset End: 1,513,552 bytes
  Segment Size: 797,890 bytes

Keyframe 3:
  Frame Number: 77
  Timestamp: 3.212
  Offset Start: 1,513,552 bytes
  Offset End: 2,956,882 bytes
  Segment Size: 1,443,330 bytes

Keyframe 4:
  Frame Number: 164
  Timestamp: 6.840
  Offset Start: 2,956,882 bytes
  Offset End: 3,460,108 bytes
  Segment Size: 503,226 bytes

Keyframe 5:
  Frame Number: 221
  Timestamp: 9.218
  Offset Start: 3,460,108 bytes
  Offset End: 5,774,047 bytes
  Segment Size: 2,313,939 bytes


## Save Keyframe Metadata

Save the keyframe information to a JSON file for later use.

In [39]:
def seconds_to_mmss_ms(seconds):
    """
    Convert seconds (float) to MM:SS:mmm format.
    
    Args:
        seconds (float): Time in seconds (e.g., 54.179125)
    
    Returns:
        str: Time in MM:SS:mmm format (e.g., "00:54:179")
    
    Examples:
        >>> seconds_to_mmss_ms(54.179125)
        '00:54:179'
        >>> seconds_to_mmss_ms(125.5)
        '02:05:500'
        >>> seconds_to_mmss_ms(3661.123)
        '61:01:123'
    """
    # Extract minutes
    minutes = int(seconds // 60)
    
    # Extract remaining seconds
    remaining_seconds = seconds % 60
    
    # Extract whole seconds and milliseconds
    secs = int(remaining_seconds)
    milliseconds = int((remaining_seconds - secs) * 1000)
    
    # Format as MM:SS:mmm
    return f"{minutes:02d}:{secs:02d}:{milliseconds:03d}"

In [40]:
# Save to JSON file
output_file = "keyframe_offsets.json"

metadata = {
    'video_path': video_path,
    'video_file_size': video_file_size,
    'total_keyframes': len(bounded_keyframes),
    'keyframes': bounded_keyframes
}

with open(output_file, 'w') as f:
    json.dump(metadata, f, indent=2)

print(f"\nKeyframe metadata saved to: {output_file}")
print(f"Total keyframes: {len(bounded_keyframes)}")


Keyframe metadata saved to: keyframe_offsets.json
Total keyframes: 24


In [42]:
keyframes[23]

{'frame_number': 1299,
 'byte_offset': 17421683,
 'pts_time': 54.179125,
 'packet_size': 48278}

# Decode and display frames

In [43]:
import ffmpeg

In [None]:
def seconds_to_mmss_ms(seconds):
    """
    Convert seconds (float) to MM:SS:mmm format.
    
    Args:
        seconds (float): Time in seconds (e.g., 54.179125)
    
    Returns:
        str: Time in MM:SS:mmm format (e.g., "00:54:179")
    
    Examples:
        >>> seconds_to_mmss_ms(54.179125)
        '00:54:179'
        >>> seconds_to_mmss_ms(125.5)
        '02:05:500'
        >>> seconds_to_mmss_ms(3661.123)
        '61:01:123'
    """
    # Extract minutes
    minutes = int(seconds // 60)
    
    # Extract remaining seconds
    remaining_seconds = seconds % 60
    
    # Extract whole seconds and milliseconds
    secs = int(remaining_seconds)
    milliseconds = int((remaining_seconds - secs) * 1000)
    
    # Format as MM:SS:mmm
    return f"{minutes:02d}:{secs:02d}:{milliseconds:03d}"

In [None]:
def get_keyframe_image(time):
    
    ffmpeg_command = f"ffmpeg -ss {time} -i video.mp4 -frames:v 1 -c:v png -f image2 - > frame.png"

[in#0 @ 0x152f04530] Error opening input: No such file or directory
Error opening input file video.mp4.
Error opening input files: No such file or directory


Error: ffmpeg error (see stderr output for detail)