# Keyframe Extraction with Byte Offsets

This notebook extracts keyframes from a video and stores their byte offsets for efficient sequential decoding.

In [None]:
import subprocess
import json
import os
from pathlib import Path

In [None]:
# Video path - update this to your video file
video_path = "/Users/kuntalsuman/Downloads/demo.mov"

# Verify the file exists
if not os.path.exists(video_path):
    raise FileNotFoundError(f"Video file not found: {video_path}")

print(f"Video file: {video_path}")
print(f"File size: {os.path.getsize(video_path):,} bytes")

## Extract Keyframe Information with Byte Offsets

We'll use FFprobe to extract detailed packet information including:
- Frame type (I-frames are keyframes)
- Byte position (offset in the file)
- Presentation timestamp (PTS)
- Frame number

In [None]:
def extract_keyframe_offsets(video_path):
    """
    Extract keyframe information including byte offsets from a video file.
    
    Returns:
        list: List of dictionaries containing keyframe metadata
    """
    # Use ffprobe to get packet information
    cmd = [
        'ffprobe',
        '-v', 'error',
        '-select_streams', 'v:0',  # Select first video stream
        '-show_entries', 'packet=pts_time,pos,flags,size',  # Get timestamp, position, flags, and size
        '-of', 'json',  # Output as JSON
        video_path
    ]
    
    try:
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        data = json.loads(result.stdout)
        
        keyframes = []
        frame_num = 0
        
        for packet in data.get('packets', []):
            # Check if this is a keyframe (I-frame)
            # The 'K' flag indicates a keyframe
            if 'K' in packet.get('flags', ''):
                keyframe_info = {
                    'frame_number': frame_num,
                    'byte_offset': int(packet.get('pos', -1)),
                    'pts_time': float(packet.get('pts_time', 0)),
                    'packet_size': int(packet.get('size', 0))
                }
                keyframes.append(keyframe_info)
            frame_num += 1
        
        return keyframes
    
    except subprocess.CalledProcessError as e:
        print(f"Error running ffprobe: {e}")
        print(f"stderr: {e.stderr}")
        raise
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON output: {e}")
        raise

In [None]:
# Extract keyframe offsets
print("Extracting keyframe information...")
keyframes = extract_keyframe_offsets(video_path)

print(f"\nFound {len(keyframes)} keyframes")
print("\nFirst 5 keyframes:")
for i, kf in enumerate(keyframes[:5]):
    print(f"\nKeyframe {i+1}:")
    print(f"  Frame Number: {kf['frame_number']}")
    print(f"  Byte Offset: {kf['byte_offset']:,}")
    print(f"  Timestamp: {kf['pts_time']:.3f}s")
    print(f"  Packet Size: {kf['packet_size']:,} bytes")

## Calculate Bounding Offsets

For each keyframe, we'll calculate the bounding byte offsets (start and end positions).

In [None]:
def calculate_bounding_offsets(keyframes, video_file_size):
    """
    Calculate bounding byte offsets for each keyframe.
    
    Args:
        keyframes: List of keyframe dictionaries
        video_file_size: Total size of the video file in bytes
    
    Returns:
        list: Keyframes with added 'offset_start' and 'offset_end' fields
    """
    bounded_keyframes = []
    
    for i, kf in enumerate(keyframes):
        bounded_kf = kf.copy()
        
        # Start offset is the byte position of this keyframe
        bounded_kf['offset_start'] = kf['byte_offset']
        
        # End offset is either:
        # - The start of the next keyframe (if there is one)
        # - The end of the file (for the last keyframe)
        if i < len(keyframes) - 1:
            bounded_kf['offset_end'] = keyframes[i + 1]['byte_offset']
        else:
            bounded_kf['offset_end'] = video_file_size
        
        # Calculate the segment size
        bounded_kf['segment_size'] = bounded_kf['offset_end'] - bounded_kf['offset_start']
        
        bounded_keyframes.append(bounded_kf)
    
    return bounded_keyframes

In [None]:
# Get video file size
video_file_size = os.path.getsize(video_path)

# Calculate bounding offsets
bounded_keyframes = calculate_bounding_offsets(keyframes, video_file_size)

print(f"Video file size: {video_file_size:,} bytes")
print(f"\nBounding offsets for first 5 keyframes:")
for i, kf in enumerate(bounded_keyframes[:5]):
    print(f"\nKeyframe {i+1}:")
    print(f"  Frame Number: {kf['frame_number']}")
    print(f"  Timestamp: {kf['pts_time']:.3f}s")
    print(f"  Offset Start: {kf['offset_start']:,} bytes")
    print(f"  Offset End: {kf['offset_end']:,} bytes")
    print(f"  Segment Size: {kf['segment_size']:,} bytes")

## Save Keyframe Metadata

Save the keyframe information to a JSON file for later use.

In [None]:
# Save to JSON file
output_file = "keyframe_offsets.json"

metadata = {
    'video_path': video_path,
    'video_file_size': video_file_size,
    'total_keyframes': len(bounded_keyframes),
    'keyframes': bounded_keyframes
}

with open(output_file, 'w') as f:
    json.dump(metadata, f, indent=2)

print(f"\nKeyframe metadata saved to: {output_file}")
print(f"Total keyframes: {len(bounded_keyframes)}")

## Summary Statistics

In [None]:
# Calculate some statistics
if bounded_keyframes:
    segment_sizes = [kf['segment_size'] for kf in bounded_keyframes]
    avg_segment_size = sum(segment_sizes) / len(segment_sizes)
    min_segment_size = min(segment_sizes)
    max_segment_size = max(segment_sizes)
    
    timestamps = [kf['pts_time'] for kf in bounded_keyframes]
    video_duration = max(timestamps) if timestamps else 0
    avg_keyframe_interval = video_duration / len(bounded_keyframes) if len(bounded_keyframes) > 0 else 0
    
    print("\n" + "="*50)
    print("KEYFRAME EXTRACTION SUMMARY")
    print("="*50)
    print(f"Video Duration: {video_duration:.2f} seconds")
    print(f"Total Keyframes: {len(bounded_keyframes)}")
    print(f"Average Keyframe Interval: {avg_keyframe_interval:.2f} seconds")
    print(f"\nSegment Size Statistics:")
    print(f"  Average: {avg_segment_size:,.0f} bytes")
    print(f"  Minimum: {min_segment_size:,} bytes")
    print(f"  Maximum: {max_segment_size:,} bytes")
    print("="*50)