# Auto-Generate Video Captions

Add subtitles to videos using AI transcription.


In [None]:
%pip install -qU pixeltable openai


In [None]:
import os, getpass
if 'OPENAI_API_KEY' not in os.environ:
    os.environ['OPENAI_API_KEY'] = getpass.getpass('OpenAI API Key:')


In [None]:
import pixeltable as pxt
from pixeltable.functions import openai


In [None]:
# Step 1: Create table and add video
pxt.create_dir('captions', if_exists='ignore')
videos = pxt.create_table('captions.videos', {'video': pxt.Video}, if_exists='ignore')
videos.insert([{'video': 'https://github.com/pixeltable/pixeltable/raw/release/tests/data/audio/jfk_1961_0109_cityuponahill-excerpt.flac'}])


In [None]:
# Step 2: Extract audio and transcribe
videos.add_computed_column(audio=videos.video.extract_audio(), if_exists='ignore')
videos.add_computed_column(if_exists='ignore',
    transcript=openai.transcriptions(
        audio=videos.audio,
        model='whisper-1',
        model_kwargs={'response_format': 'verbose_json'}
    ))
videos.add_computed_column(captions=videos.transcript.segments, if_exists='ignore')


In [None]:
# Step 3: View captions with timestamps
result = videos.select(videos.captions).head(1)
for segment in result['captions'][0][:3]:  # Show first 3 segments
    print(f"[{segment['start']:.2f}s - {segment['end']:.2f}s]: {segment['text']}")


**What's Happening:**
- Video → audio extraction (automatic)
- Audio → transcription with timestamps
- Segments contain start/end times + text
- Perfect for SRT/VTT caption file generation

**Variation:** Export to SRT format:
```python
@pxt.udf
def to_srt(segments: list[dict]) -> str:
    lines = []
    for i, seg in enumerate(segments, 1):
        start = format_timestamp(seg['start'])
        end = format_timestamp(seg['end'])
        lines.append(f"{i}\n{start} --> {end}\n{seg['text']}\n")
    return '\n'.join(lines)
```

**Next:** `index-video-meetings-for-search.ipynb` • `extract-insights-from-earnings-calls.ipynb`
