# TTS Debug Notebook

Experimenting with piper-tts to get clear audio output.

In [1]:
from pathlib import Path
import numpy as np
import sounddevice as sd
from piper import PiperVoice

model_path = Path('../models/en_GB-alan-medium.onnx')
voice = PiperVoice.load(str(model_path))
print(f'Sample rate: {voice.config.sample_rate}')

Sample rate: 22050


In [4]:
# Try saying Hello Romilly
text = 'Hello Romilly'

audio_segments = []
for chunk in voice.synthesize(text):
    audio_segment = np.frombuffer(chunk.audio_int16_bytes, dtype=np.int16)
    audio_segments.append(audio_segment)

audio_data = np.concatenate(audio_segments)
print(f'Audio shape: {audio_data.shape}, dtype: {audio_data.dtype}')
print(f'Min/max: {audio_data.min()} / {audio_data.max()}')

sd.play(audio_data, samplerate=voice.config.sample_rate)
sd.wait()

Audio shape: (23296,), dtype: int16
Min/max: -32767 / 32235


In [None]:
# Alternative: try synthesize_wav and write to file
import wave
import io

wav_buffer = io.BytesIO()
with wave.open(wav_buffer, 'wb') as wav_file:
    wav_file.setnchannels(1)
    wav_file.setsampwidth(2)  # 16-bit
    wav_file.setframerate(voice.config.sample_rate)
    wav_file.writeframes(audio_data.tobytes())

# Save to file for external playback test
with open('/tmp/test_hello.wav', 'wb') as f:
    f.write(wav_buffer.getvalue())
print('Saved to /tmp/test_hello.wav')

In [None]:
# Play using aplay instead of sounddevice
import subprocess
subprocess.run(['aplay', '/tmp/test_hello.wav'])