# Language Testing

Test TTS rendering across different languages and backends.

**What you can do here:**
- Render the same sentence in multiple languages
- Compare how each backend handles a given language

Run cells 0–2 (setup) first, then jump to any section.

### Language coverage (European — Kokoro ∩ Chatterbox)

| Language | Code | Kokoro | Chatterbox |
|---|---|---|---|
| English | `en` | yes | yes |
| French | `fr` | yes | yes |
| Spanish | `es` | yes | yes |
| Italian | `it` | yes | yes |
| Portuguese | `pt` | yes | yes |

In [None]:
%load_ext autoreload
%autoreload 2

# Setup — add src/ to path so we can import all packages
import sys
import time
from pathlib import Path

import numpy as np
from IPython.display import Audio, display

sys.path.insert(0, str(Path.cwd().parents[1] / "src"))

from shared.providers import (
    KokoroTTS, ChatterboxTTS,
    KOKORO_VOICE_PRESETS,
    get_tts_runtime,
)
from audiobook.render import (
    load_tts_model,
    render_section,
    SAMPLE_RATE,
)

print(f"TTS runtime: {get_tts_runtime()}")
print(f"Kokoro voice presets: {list(KOKORO_VOICE_PRESETS.keys())}")
print("Imports OK")


In [None]:
# Helper — render audio and show inline player with stats

def play(audio: np.ndarray, sr: int = SAMPLE_RATE, label: str = ""):
    """Display an inline audio player with duration stats."""
    duration = len(audio) / sr
    prefix = f"{label}: " if label else ""
    print(f"{prefix}{duration:.1f}s, {len(audio):,} samples @ {sr} Hz")
    display(Audio(audio, rate=sr))

print("play() helper defined")


---
# 1. Sample Sentences

Short narration samples in each supported language for quick testing.

In [None]:
# 1.1 Sample sentences per language (European — Kokoro ∩ Chatterbox)
SAMPLES = {
    "en": (
        "Deep learning has transformed nearly every field of computer science. "
        "[PAUSE_SHORT] "
        "From natural language processing to computer vision, "
        "neural networks now achieve state of the art results."
    ),
    "fr": (
        "L'apprentissage profond a transformé presque tous les domaines de l'informatique. "
        "[PAUSE_SHORT] "
        "Du traitement du langage naturel à la vision par ordinateur, "
        "les réseaux de neurones atteignent désormais des résultats de pointe."
    ),
    "es": (
        "El aprendizaje profundo ha transformado prácticamente todos los campos de la informática. "
        "[PAUSE_SHORT] "
        "Desde el procesamiento del lenguaje natural hasta la visión por computador, "
        "las redes neuronales logran ahora resultados de vanguardia."
    ),
    "it": (
        "Il deep learning ha trasformato quasi ogni campo dell'informatica. "
        "[PAUSE_SHORT] "
        "Dall'elaborazione del linguaggio naturale alla visione artificiale, "
        "le reti neurali raggiungono ormai risultati all'avanguardia."
    ),
    "pt": (
        "A aprendizagem profunda transformou praticamente todos os campos da ciência da computação. "
        "[PAUSE_SHORT] "
        "Do processamento de linguagem natural à visão computacional, "
        "as redes neurais alcançam agora resultados de ponta."
    ),
}

print(f"Languages available: {list(SAMPLES.keys())}")


---
# 2. Kokoro — Multi-language

Kokoro supports 8 languages. Voices are auto-selected from `KOKORO_VOICE_PRESETS`.

In [None]:
# 2.1 Pick languages to test with Kokoro
kokoro_langs = ["en", "fr", "es", "it", "pt"]

print(f"Testing Kokoro with: {kokoro_langs}")
print(f"Available presets: {list(KOKORO_VOICE_PRESETS.keys())}\n")

for lang in kokoro_langs:
    if lang not in KOKORO_VOICE_PRESETS:
        print(f"\n--- {lang} --- SKIPPED (no Kokoro preset)")
        continue
    
    tts = KokoroTTS(lang=lang)
    print(f"\n--- {lang} --- voices: {tts.voices}")
    
    model = load_tts_model(tts)
    
    t0 = time.time()
    audio = render_section(SAMPLES[lang], tts, model=model)
    elapsed = time.time() - t0
    
    print(f"Render time: {elapsed:.1f}s")
    play(audio, label=f"Kokoro ({lang})")


---
# 3. Cross-backend Comparison

Render the same language with different backends and compare.

In [None]:
# 3.1 Pick a language and compare backends
compare_lang = "fr"  # change to any language both backends support

tts_kokoro = KokoroTTS(lang=compare_lang)
tts_chatterbox = ChatterboxTTS(lang=compare_lang)

model_kokoro = load_tts_model(tts_kokoro)
model_chatterbox = load_tts_model(tts_chatterbox)

text = SAMPLES[compare_lang]

print(f"Language: {compare_lang}")
print(f"Text: {text[:80]}...\n")

# Kokoro
t0 = time.time()
audio_kokoro = render_section(text, tts_kokoro, model=model_kokoro)
time_kokoro = time.time() - t0

# Chatterbox
t0 = time.time()
audio_chatterbox = render_section(text, tts_chatterbox, model=model_chatterbox)
time_chatterbox = time.time() - t0

# Stats
dur_kokoro = len(audio_kokoro) / SAMPLE_RATE
dur_chatterbox = len(audio_chatterbox) / SAMPLE_RATE

print(f"{'Metric':<20} {'Kokoro':>16} {'Chatterbox':>16}")
print(f"{'-' * 20} {'-' * 16} {'-' * 16}")
print(f"{'Render time (s)':<20} {time_kokoro:>16.1f} {time_chatterbox:>16.1f}")
print(f"{'Audio duration (s)':<20} {dur_kokoro:>16.1f} {dur_chatterbox:>16.1f}")

print(f"\n--- Kokoro ({compare_lang}) ---")
play(audio_kokoro)
print(f"\n--- Chatterbox ({compare_lang}) ---")
play(audio_chatterbox)
