In [1]:
import os
print(os.path.exists(".../dataset/dataset_train/"))

False


In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

from myxtts.config.config import XTTSConfig,ModelConfig,DataConfig,TrainingConfig
from myxtts import get_xtts_model, get_trainer, get_inference_engine
from myxtts.utils.performance import start_performance_monitoring
start_performance_monitoring()

# Complete Model Configuration
m=ModelConfig(
    # Text encoder settings
    text_encoder_dim=512,           # Increased for better performance
    text_encoder_layers=6,
    text_encoder_heads=8,
    text_vocab_size=256_256,
    
    # Audio encoder settings
    audio_encoder_dim=512,
    audio_encoder_layers=6,
    audio_encoder_heads=8,
    
    # Decoder settings
    decoder_dim=1024,              # Increased for better quality
    decoder_layers=12,
    decoder_heads=16,
    
    # Mel spectrogram settings
    n_mels=80,
    n_fft=1024,
    hop_length=256,
    win_length=1024,
    sample_rate=22050,
    
    # Voice conditioning
    speaker_embedding_dim=256,
    use_voice_conditioning=True,
    
    # Language support
    languages=["en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh", "ja", "hu", "ko"],
    max_text_length=500,
    
    # Tokenizer settings
    tokenizer_type="nllb",
    tokenizer_model="facebook/nllb-200-distilled-600M"
)

# Complete Training Configuration
t=TrainingConfig(
    # Training parameters
    epochs=200,
    learning_rate=5e-5,
    warmup_steps=2000,
    weight_decay=1e-6,
    gradient_clip_norm=1.0,
    
    # Optimizer settings
    optimizer="adamw",
    beta1=0.9,
    beta2=0.999,
    eps=1e-8,
    
    # Scheduler
    scheduler="noam",
    scheduler_params={},
    
    # Loss weights
    mel_loss_weight=45.0,
    kl_loss_weight=1.0,
    duration_loss_weight=1.0,
    
    # Checkpointing
    save_step=5000,               # Save more frequently for 200 epochs
    checkpoint_dir="./checkpoints",
    
    # Validation
    val_step=1000,                # Validate more frequently
    
    # Logging
    log_step=100,
    use_wandb=False,              # Disable wandb for simple training
    wandb_project="myxtts",
    
    # Device / distribution
    multi_gpu=False,              # Single GPU training
    visible_gpus=None
)

# Complete Data Configuration
d=DataConfig(
    # Dataset paths
    dataset_path="../dataset",     # Main dataset directory
    dataset_name="custom_dataset",
    
    # Custom metadata and wav paths
    metadata_train_file="metadata_train.csv",
    metadata_eval_file="metadata_eval.csv",
    wavs_train_dir="wavs",
    wavs_eval_dir="wavs",
    
    # Audio processing
    sample_rate=22050,
    trim_silence=True,
    normalize_audio=True,
    
    # Text processing
    text_cleaners=["english_cleaners"],
    language="en",
    add_blank=True,
    
    # Training data splits
    train_split=0.9,
    val_split=0.1,
    
    # Batch and workers
    batch_size=4,                 # Optimized for GPU memory
    num_workers=16,               # High for good CPU utilization
    
    # Voice conditioning
    reference_audio_length=3.0,
    min_audio_length=1.0,
    max_audio_length=11.0,
    
    # Performance optimization
    prefetch_buffer_size=8,
    shuffle_buffer_multiplier=20,
    enable_memory_mapping=True,
    cache_verification=True,
    
    # Sequence length caps
    max_mel_frames=512,
    
    # GPU optimizations
    enable_xla=True,
    enable_tensorrt=False,
    mixed_precision=True,
    pin_memory=True,
    persistent_workers=True,
    
    # Preprocessing control
    preprocessing_mode="auto"
)

# Load configuration with complete parameters
config = XTTSConfig(
    model=m,
    training=t, 
    data=d,
    multi_gpu=False
)

# Create checkpoints directory if it doesn't exist
import os
os.makedirs('./checkpoints', exist_ok=True)

print("Configuration completed with all parameters!")
print(f"Model parameters: {sum(1 for _ in config.model.__dict__.items())}")
print(f"Training parameters: {sum(1 for _ in config.training.__dict__.items())}")
print(f"Data parameters: {sum(1 for _ in config.data.__dict__.items())}")



2025-09-13 02:48:16.279394: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1757719096.296785 1390661 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1757719096.302163 1390661 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1757719096.317527 1390661 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1757719096.317545 1390661 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1757719096.317547 1390661 computation_placer.cc:177] computation placer alr

Performance monitoring started


In [None]:
# Training
model = get_xtts_model()(config.model)
trainer = get_trainer()(config, model)
train_dataset, val_dataset = trainer.prepare_datasets(train_data_path="../dataset/dataset_train",val_data_path="../dataset/dataset_eval")
trainer.train(train_dataset, val_dataset)


I0000 00:00:1757719100.776982 1390661 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 20786 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4090, pci bus id: 0000:4d:00.0, compute capability: 8.9
2025-09-13 02:48:22,250 - MyXTTS - INFO - Using device: CPU
2025-09-13 02:48:22,251 - MyXTTS - INFO - Using strategy: OneDeviceStrategy


GPU setup error: Physical devices cannot be modified after being initialized
Using single GPU strategy
Loaded 20509 items for train subset
Loaded 2591 items for val subset
Precomputing mel spectrograms to ../dataset/dataset_train/processed/mels_sr22050_n80_hop256 (overwrite=False)...
All mel spectrograms already cached.
Precomputing mel spectrograms to ../dataset/dataset_eval/processed/mels_sr22050_n80_hop256 (overwrite=False)...
All mel spectrograms already cached.


2025-09-13 02:48:29,641 - MyXTTS - INFO - Cache verify: train {'checked': 20509, 'fixed': 0, 'failed': 0}, val {'checked': 2591, 'fixed': 0, 'failed': 0}
2025-09-13 02:48:30,746 - MyXTTS - INFO - Using cached items - train: 20509, val: 2591
2025-09-13 02:48:32,109 - MyXTTS - INFO - Training samples: 20509
2025-09-13 02:48:32,109 - MyXTTS - INFO - Validation samples: 2591
2025-09-13 02:48:32,110 - MyXTTS - INFO - Data loading performance:
2025-09-13 02:48:32,110 - MyXTTS - INFO - === Data Loading Profile ===

Cache Efficiency: 0.0%
  Hits: 0
  Misses: 0
  Errors: 0


2025-09-13 02:48:32,110 - MyXTTS - INFO - Starting training for 200 epochs
2025-09-13 02:48:32,110 - MyXTTS - INFO - Current step: 0


Performance monitoring started


Epoch 0:   0%|          | 3/5128 [01:28<37:31:15, 26.36s/it, loss=169.8851, step=3, data_ms=1.4, comp_ms=20471.7, mel=3.77, stop=0.170] 

In [None]:

# Enhanced Inference Section
import os

# Ensure checkpoint directory exists
checkpoint_path = "./checkpoints/best.ckpt"
if not os.path.exists(checkpoint_path):
    print(f"Warning: Checkpoint not found at {checkpoint_path}")
    print("Make sure to train the model first or provide a valid checkpoint path")
    # Use the latest checkpoint if available
    checkpoint_dir = "./checkpoints"
    if os.path.exists(checkpoint_dir):
        checkpoints = [f for f in os.listdir(checkpoint_dir) if f.endswith('.ckpt')]
        if checkpoints:
            # Sort by modification time and get the latest
            latest_checkpoint = max(checkpoints, key=lambda x: os.path.getmtime(os.path.join(checkpoint_dir, x)))
            checkpoint_path = os.path.join(checkpoint_dir, latest_checkpoint)
            print(f"Using latest checkpoint: {checkpoint_path}")

# Initialize inference engine
try:
    inference = get_inference_engine()(config, checkpoint_path=checkpoint_path)
    
    # Test synthesis with sample text
    test_texts = [
        "Hello, this is a test of the MyXTTS model!",
        "The weather is beautiful today.",
        "Artificial intelligence is transforming the world."
    ]
    
    # Synthesize each test text
    for i, text in enumerate(test_texts):
        print(f"\nSynthesizing text {i+1}: {text}")
        try:
            result = inference.synthesize(text)
            output_path = f"output_sample_{i+1}.wav"
            inference.save_audio(result["audio"], output_path)
            print(f"Audio saved to: {output_path}")
        except Exception as e:
            print(f"Error synthesizing text {i+1}: {e}")
    
    print("\nInference testing completed!")
    
except Exception as e:
    print(f"Failed to initialize inference engine: {e}")
    print("This is expected if no trained model checkpoint is available yet.")
