In [1]:
#!/usr/bin/env python3
"""
Speech-to-Text Analysis with MELD Dataset

This script demonstrates how to use the PitchPerfect speech-to-text capabilities
with the processed MELD dataset WAV files.
"""

from pitchperfect.speech_to_text import AudioTranscriber
from pitchperfect.data import meld_loader as ml
from pitchperfect.config.settings import MELD_WAV_DIR
from pathlib import Path
import os

In [2]:
print(f"Using config paths for consistent directory resolution")

DEFAULT_MELD_WAV_OUT = MELD_WAV_DIR
print(f"WAV output directory: {DEFAULT_MELD_WAV_OUT}")
print(f"Directory exists: {DEFAULT_MELD_WAV_OUT.exists()}")

# List available WAV files
base = Path(DEFAULT_MELD_WAV_OUT)
split = 'dev'  # Can be 'train', 'dev', or 'test'
split_dir = base / split
print(f"Looking for WAV files in: {split_dir}")
print(f"Split directory absolute path: {split_dir.absolute()}")

if not split_dir.exists():
    print(f"❌ Split directory not found: {split_dir}")
    print(f"❌ Absolute path: {split_dir.absolute()}")
    print(f"❌ Parent directory exists: {split_dir.parent.exists()}")
    print(f"❌ Available in parent: {list(split_dir.parent.iterdir()) if split_dir.parent.exists() else 'N/A'}")
    raise FileNotFoundError(f"Split directory not found: {split_dir}")

wav_files = sorted(split_dir.rglob("*.wav"))
print(f"Found {len(wav_files)} WAV files")
print(f"First 5 files: {[f.name for f in wav_files[:5]]}")

# Initialize transcriber and test single file
t = AudioTranscriber(model="whisper-1", language="en", use_cache=True)

if wav_files:
    # Test with first WAV file
    test_file = wav_files[0]
    print(f"Testing transcription with: {test_file.name}")
    try:
        res = t.transcribe(str(test_file))
        print(f"Transcript: {res['text']}")
        print(f"Language: {res.get('language', 'Unknown')}")
        print(f"Model: {res.get('model', 'Unknown')}")
    except Exception as e:
        print(f"Error transcribing {test_file.name}: {e}")
else:
    print("No WAV files found to test")

Using config paths for consistent directory resolution
WAV output directory: /Users/sunaina/code/tanzania2025/pitch_perfect/data/processed/meld_wav
Directory exists: True
Looking for WAV files in: /Users/sunaina/code/tanzania2025/pitch_perfect/data/processed/meld_wav/dev
Split directory absolute path: /Users/sunaina/code/tanzania2025/pitch_perfect/data/processed/meld_wav/dev
Found 1112 WAV files
First 5 files: ['dia0_utt0.wav', 'dia0_utt1.wav', 'dia100_utt0.wav', 'dia101_utt0.wav', 'dia102_utt0.wav']
Testing transcription with: dia0_utt0.wav
Transcript: Oh my God, he's lost it, he's totally lost it.
Language: english
Model: whisper-1


In [3]:
test_file

PosixPath('/Users/sunaina/code/tanzania2025/pitch_perfect/data/processed/meld_wav/dev/dia0_utt0.wav')

In [3]:
import whisper

# "turbo" is fast and solid for English; use "base/small/medium/large" for other languages or translation
model = whisper.load_model("tiny")  # or "small", "medium", "large"
# If you're on CPU only, add fp16=False
result = model.transcribe(str(test_file), fp16=False)
print(result["text"])

 My God, he's lost, he's totally lost.
