In [9]:
import torch
from TTS.api import TTS
import os
import torch.serialization # Import torch.serialization

# --- Import the necessary config and args classes ---
# Import the specific classes that PyTorch needs to load safely
from TTS.tts.configs.xtts_config import XttsConfig
from TTS.tts.models.xtts import XttsAudioConfig, XttsArgs # <-- Import XttsArgs here
from TTS.config.shared_configs import BaseDatasetConfig

# --- Add the classes to PyTorch's safe globals ---
# This tells PyTorch it's okay to load these specific class structures
# Add all identified classes to the list
torch.serialization.add_safe_globals([
    XttsConfig,
    XttsAudioConfig,
    BaseDatasetConfig,
    XttsArgs # <-- Add XttsArgs to the list
])
# --------------------------------------------------------

# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# --- Define your audio path ---
# Make sure this path points to your reference audio file
speaker_wav_path = "/home/tdamen/jasmin-data/Data/data/audio/wav/comp-q/vl/fv170099.wav"

# --- Define your output path ---
output_wav_path = "output_speech.wav" # You can change this if needed

# --- Check if the speaker audio file exists ---
if not os.path.exists(speaker_wav_path):
    print(f"ERROR: Speaker audio file not found at: {speaker_wav_path}")
    print("Please make sure the path is correct and the file exists.")
else:
    print(f"Found speaker audio file: {speaker_wav_path}")

    # --- Initialize TTS ---
    # Now this should work because you've allowed all necessary classes
    print("Initializing TTS model...")
    try:
        tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
        print("TTS model initialized successfully.")

        # --- Run TTS for voice cloning ---
        print(f"Synthesizing speech to: {output_wav_path}")
        tts.tts_to_file(
            text="Doormiddel van dit programma wil ik er achter komen welke phonemen ik moeite heb",
            speaker_wav=speaker_wav_path,
            language="nl", # Set language to Dutch
            file_path=output_wav_path
        )
        print(f"Speech successfully saved to {output_wav_path}")

    except Exception as e:
        # Catch potential errors during initialization or synthesis
        print(f"An error occurred during TTS initialization or synthesis: {e}")



Using device: cuda
Found speaker audio file: /home/tdamen/jasmin-data/Data/data/audio/wav/comp-q/vl/fv170099.wav
Initializing TTS model...
 > tts_models/multilingual/multi-dataset/xtts_v2 is already downloaded.
 > Using model: xtts
TTS model initialized successfully.
Synthesizing speech to: output_speech.wav
 > Text splitted to sentences.
['Doormiddel van dit programma wil ik er achter komen welke phonemen ik moeite heb']
 > Processing time: 2.1252331733703613
 > Real-time factor: 0.27156578275855625
Speech successfully saved to output_speech.wav
