# Audio File Batch Transcription

Transcribe audio files using OpenAI Whisper with GPU acceleration.

**Supported formats:** .m4a, .mp3, .wav, .flac

**Output formats:** .txt, .srt, .vtt, .tsv, .json

In [None]:
# Check GPU availability
import torch
print("CUDA available:", torch.cuda.is_available())
print("GPU name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "N/A")

In [None]:
# Load Whisper model
import whisper

# Model options: tiny, base, small, medium, large
# RTX 3070 Ti (8GB VRAM) can handle up to 'large' but 'medium' is faster
MODEL_SIZE = "medium"

print(f"Loading Whisper {MODEL_SIZE} model...")
model = whisper.load_model(MODEL_SIZE)
print("Model loaded!")

In [None]:
# Configure paths
from pathlib import Path

# Input: single file or folder
AUDIO_FILE = Path("recordings/test_recording.wav")  # Change this to your file

# Output folder (same as input file location by default)
OUTPUT_DIR = AUDIO_FILE.parent

print(f"Audio file: {AUDIO_FILE}")
print(f"Output dir: {OUTPUT_DIR}")
print(f"File exists: {AUDIO_FILE.exists()}")

In [None]:
# Transcribe the audio file
import time

print(f"Transcribing: {AUDIO_FILE.name}")
start = time.time()

result = model.transcribe(
    str(AUDIO_FILE),
    language="en",  # Set to None for auto-detect
    verbose=True    # Show progress
)

elapsed = time.time() - start
print(f"\nDone! Took {elapsed:.1f} seconds")

In [None]:
# Display the transcription
print("=" * 60)
print("TRANSCRIPTION")
print("=" * 60)
print(result["text"])

In [None]:
# Save outputs in multiple formats
from whisper.utils import get_writer

base_name = AUDIO_FILE.stem

# Save each format
for fmt in ["txt", "srt", "vtt", "tsv", "json"]:
    writer = get_writer(fmt, str(OUTPUT_DIR))
    writer(result, str(AUDIO_FILE), {})
    print(f"Saved: {OUTPUT_DIR / base_name}.{fmt}")

print("\nAll formats saved!")

---
## Batch Transcription

Process multiple audio files at once.

In [None]:
# Batch transcribe all audio files in a folder
from pathlib import Path
import time
from whisper.utils import get_writer

INPUT_FOLDER = Path("recordings")
EXTENSIONS = [".m4a", ".mp3", ".wav", ".flac"]

# Find all audio files
audio_files = [f for f in INPUT_FOLDER.iterdir() 
               if f.suffix.lower() in EXTENSIONS]

print(f"Found {len(audio_files)} audio files:")
for f in audio_files:
    print(f"  - {f.name}")

In [None]:
# Process each file (run this cell to start batch transcription)
for i, audio_file in enumerate(audio_files, 1):
    print(f"\n[{i}/{len(audio_files)}] {audio_file.name}")
    print("-" * 50)
    
    # Skip if already transcribed
    txt_file = audio_file.with_suffix(".txt")
    if txt_file.exists():
        print("  Already transcribed, skipping...")
        continue
    
    start = time.time()
    result = model.transcribe(str(audio_file), language="en")
    elapsed = time.time() - start
    
    # Save all formats
    for fmt in ["txt", "srt", "json"]:
        writer = get_writer(fmt, str(INPUT_FOLDER))
        writer(result, str(audio_file), {})
    
    print(f"  Done in {elapsed:.1f}s")
    print(f"  Preview: {result['text'][:100]}...")

print("\n" + "=" * 50)
print("Batch transcription complete!")