In [1]:
# Install dependencies (if not already installed):
# pip install TTS soundfile librosa

from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
import soundfile as sf
import librosa
from IPython.display import Audio

In [3]:
# 1. Download model files using ModelManager
model_name = "tts_models/multilingual/multi-dataset/your_tts"
manager = ModelManager()
model_path = manager.download_model(model_name)

 > Downloading model to /Users/ravindudinuththara/Library/Application Support/tts/tts_models--multilingual--multi-dataset--your_tts
 > Model's license - CC BY-NC-ND 4.0
 > Check https://creativecommons.org/licenses/by-nc-nd/4.0/ for more info.


In [13]:
import os
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer

model_name = "tts_models/multilingual/multi-dataset/your_tts"
manager = ModelManager()

result = manager.download_model(model_name)

# Extract model checkpoint path (string ending with .pth)
model_path = None
config_path = None

# result may be tuple or string
if isinstance(result, (tuple, list)):
    for item in result:
        if isinstance(item, str) and item.endswith(".pth"):
            model_path = item
        elif isinstance(item, str) and item.endswith(".json"):
            config_path = item
else:
    # if single string
    if isinstance(result, str):
        if result.endswith(".pth"):
            model_path = result
        elif result.endswith(".json"):
            config_path = result

# If config_path not found, try to find it in model_path's folder
if config_path is None and model_path is not None:
    folder = os.path.dirname(model_path)
    for file in os.listdir(folder):
        if file.endswith(".json"):
            config_path = os.path.join(folder, file)
            break

if model_path is None:
    raise RuntimeError("Model checkpoint (.pth) file not found.")
if config_path is None:
    raise RuntimeError("Config (.json) file not found.")

print("Model checkpoint:", model_path)
print("Config path:", config_path)

# Now initialize synthesizer with these paths
synthesizer = Synthesizer(tts_checkpoint=model_path, tts_config_path=config_path)

 > tts_models/multilingual/multi-dataset/your_tts is already downloaded.
Model checkpoint: /Users/ravindudinuththara/Library/Application Support/tts/tts_models--multilingual--multi-dataset--your_tts/model_file.pth
Config path: /Users/ravindudinuththara/Library/Application Support/tts/tts_models--multilingual--multi-dataset--your_tts/config.json
 > Using model: vits
 > Setting up Audio Processor...
 | > sample_rate:16000
 | > resample:False
 | > num_mels:80
 | > log_func:np.log10
 | > min_level_db:0
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:None
 | > fft_size:1024
 | > power:None
 | > preemphasis:0.0
 | > griffin_lim_iters:None
 | > signal_norm:None
 | > symmetric_norm:None
 | > mel_fmin:0
 | > mel_fmax:None
 | > pitch_fmin:None
 | > pitch_fmax:None
 | > spec_gain:20.0
 | > stft_pad_mode:reflect
 | > max_norm:1.0
 | > clip_norm:True
 | > do_trim_silence:False
 | > trim_db:60
 | > do_sound_norm:False
 | > do_amp_to_db_linear:True
 | > do_amp_to_db_mel:True
 | >

In [14]:
# 3. Load your parent voice sample and resample to 16kHz
input_voice_path = "/Users/ravindudinuththara/Desktop/Sliit/University of Colombo.wav"
y, sr = librosa.load(input_voice_path, sr=16000)
resampled_path = "parent_voice_16k.wav"
sf.write(resampled_path, y, 16000)

In [15]:
# 4. Text to speak
text = "Hello baby, mommy is here. Everything is okay."

In [17]:
text = "Hello baby, mommy is here. Everything is okay."
audio = synthesizer.tts(text, speaker_wav=resampled_path, language_name="en")

 > Text splitted to sentences.
['Hello baby, mommy is here.', 'Everything is okay.']
 > Processing time: 1.0667810440063477
 > Real-time factor: 0.2514806798694832


In [18]:
# 6. Save and play
synthesizer.save_wav(audio, "/Users/ravindudinuththara/Desktop/Sliit/output_voice.wav")
Audio("output_voice.wav")