In [None]:
import gc
import os
os.environ["PYDEVD_DISABLE_FILE_VALIDATION"] = "1"
os.environ["TRANSFORMERS_ATTENTION_IMPLEMENTATION"] = "eager"

In [None]:
import torch
from datetime import datetime
from pathlib import Path
import warnings

warnings.filterwarnings("ignore")

In [None]:
# Install chatterbox
!pip install -q git+https://github.com/rsxdalv/chatterbox.git@004a0214c308f544f4276e74ccf516c96c271f5b soundfile

In [None]:
# Configuration parameters (will be updated by deploy script)
TEXT = "Hello, this is a test."
VOICE_ID = "chatterbox-default"
OUTPUT_FILE = "output.wav"

In [None]:
print(f"Starting Chatterbox TTS generation...")
print(f"Text: {TEXT}")
print(f"Voice: {VOICE_ID}")
print(f"Output: {OUTPUT_FILE}")

In [None]:
# Load Chatterbox T3 model
from chatterbox.models.t3.t3_model import T3Model

print("Loading Chatterbox T3 model...")
model = T3Model(device="cuda")
print("Model loaded successfully")

In [None]:
# Determine voice configuration
if VOICE_ID and VOICE_ID != "chatterbox-default":
    # Try to load custom voice
    voice_path = Path("/kaggle/working/cache/chatterbox/voices") / f"{VOICE_ID}.pt"
    if voice_path.exists():
        print(f"Using custom voice: {voice_path}")
        voice_embed = torch.load(voice_path, map_location="cuda")
    else:
        print(f"Custom voice not found, using default")
        voice_embed = None
else:
    voice_embed = None

In [None]:
# Generate audio
print(f"Generating audio for: {TEXT[:50]}...")

# Generate audio using T3
audio_output = model.synthesize(
    text=TEXT,
    voice_embed=voice_embed,
    temperature=0.7,
    cfg_strength=3.0,
)

print("Audio generation complete")

In [None]:
# Save audio to file
import soundfile as sf
import numpy as np

# Extract audio data
if isinstance(audio_output, torch.Tensor):
    audio_data = audio_output.cpu().numpy()
else:
    audio_data = audio_output

# Ensure audio is in correct format
if audio_data.ndim > 1:
    audio_data = audio_data.squeeze()

sample_rate = 24000  # Chatterbox sample rate

print(f"Audio shape: {audio_data.shape}, dtype: {audio_data.dtype}")
print(f"Sample rate: {sample_rate}")

# Save to file
sf.write(OUTPUT_FILE, audio_data, sample_rate)
print(f"Audio saved to: {OUTPUT_FILE}")

In [None]:
# Verify output file exists
output_path = Path(OUTPUT_FILE)
if output_path.exists():
    file_size = output_path.stat().st_size
    print(f"✓ Output file created: {OUTPUT_FILE} ({file_size} bytes)")
else:
    print(f"✗ ERROR: Output file not found: {OUTPUT_FILE}")

In [None]:
# Cleanup
del model
torch.cuda.empty_cache()
gc.collect()
print("Cleanup complete")