# MiniMax Selenium Provider - Usage Examples

This notebook demonstrates how to use the **MiniMax Selenium Provider** for voice cloning and text-to-speech synthesis.

## Features
- ‚úÖ Voice cloning from reference audio
- ‚úÖ Vietnamese language support
- ‚úÖ Batch processing with TextFileLoader
- ‚úÖ Google OAuth authentication
- ‚úÖ Comprehensive error handling

## Requirements
- Valid Google account for MiniMax authentication
- Reference audio file for voice cloning
- Chrome browser installed

In [None]:
import os
import sys
from pathlib import Path
from pprint import pprint

# Add speech-synth-engine to path
sys.path.insert(0, "/home/nampv1/projects/tts/speech-synth-engine")

# Import MiniMax provider and text loaders
from speech_synth_engine.providers.minimax_selenium_provider import MiniMaxSeleniumProvider
from speech_synth_engine.dataset.text_loaders import TextFileLoader

print("‚úÖ Imports successful")

## 1. Configuration

Set up your MiniMax credentials and configuration:

In [None]:
# MiniMax Configuration
minimax_config = {
    "base_url": "https://www.minimax.io/audio/voices-cloning",
    "google_email": os.getenv("MINIMAX_GOOGLE_EMAIL", "your_email@gmail.com"),
    "google_password": os.getenv("MINIMAX_GOOGLE_PASSWORD", "your_password"),
    "headless": False,  # Set to True for headless mode
    "sample_rate": 22050,
    "language": "Vietnamese",
    "timeout": 60,
    "download_timeout": 180,
    "max_wait_time": 300,
    "batch_processing": True,
    "max_batch_size": 10,
    "batch_delay": 2
}

# Create provider instance
provider = MiniMaxSeleniumProvider("minimax_selenium", minimax_config)

print(f"‚úÖ Provider created: {provider.name}")
print(f"üìä Provider capabilities: {provider.provider_info}")

## 2. Single Text Synthesis

Basic text-to-speech synthesis (requires uploaded reference audio):

In [None]:
# Prepare output directory
output_dir = Path("examples_output/minimax")
output_dir.mkdir(parents=True, exist_ok=True)

# Test text
test_text = "Xin ch√†o, ƒë√¢y l√† MiniMax voice cloning v·ªõi ti·∫øng Vi·ªát"
output_file = output_dir / "single_test.wav"

print(f"üé§ Text: {test_text}")
print(f"üìÅ Output: {output_file}")

# Note: This requires a reference audio to be uploaded first
# For first-time use, you need to provide a reference audio file
result = provider.synthesize_with_metadata(test_text, "cloned_voice", output_file)

print("\nüìä Result:")
pprint(result)

## 3. Voice Cloning

Clone voice from reference audio and synthesize text:

In [None]:
# Reference audio file (use your own audio file)
reference_audio_sources = [
    Path("/home/nampv1/projects/tts/speech-synth-engine/test_output/audio/test.wav"),
    Path("/media/nampv1/hdd/data/m·∫´u-gi·ªçng-nh√¢n-vi√™n-nh·∫≠p-li·ªáu-b∆∞u-c·ª•c-thƒÉng-long-24-10-20251024T103708Z-1-001/m·∫´u-gi·ªçng-nh√¢n-vi√™n-nh·∫≠p-li·ªáu-b∆∞u-c·ª•c-thƒÉng-long-24-10/spk2_1.m4a")
]

# Find a valid reference audio
reference_audio = None
for source in reference_audio_sources:
    if source.exists() and source.stat().st_size > 1000:
        reference_audio = source
        break

if not reference_audio:
    print("‚ö†Ô∏è No reference audio found. Please provide a valid audio file.")
    print("üìù Expected formats: WAV, MP3 (5-30 seconds, good quality)")
else:
    print(f"‚úÖ Using reference audio: {reference_audio}")
    print(f"üìä File size: {reference_audio.stat().st_size / 1024:.1f} KB")
    
    # Clone voice and synthesize
    clone_text = "T√¥i r·∫•t vui ƒë∆∞·ª£c l√†m vi·ªác v·ªõi MiniMax voice cloning"
    clone_output = output_dir / "cloned_voice.wav"
    
    print(f"\nüé≠ Cloning voice for text: {clone_text}")
    print(f"üìÅ Output: {clone_output}")
    
    success = provider.clone(clone_text, reference_audio, clone_output)
    
    if success:
        print(f"‚úÖ Voice cloning successful: {clone_output}")
        print(f"üìä File size: {clone_output.stat().st_size / 1024:.1f} KB")
    else:
        print("‚ùå Voice cloning failed")

## 4. Batch Processing

Process multiple texts from a file using TextFileLoader:

In [None]:
# Create sample text file for batch processing
sample_texts_file = Path("examples_output/sample_texts.txt")
sample_texts_file.parent.mkdir(parents=True, exist_ok=True)

# Sample Vietnamese texts with IDs
vietnamese_texts = [
    "1\tXin ch√†o, ƒë√¢y l√† MiniMax voice cloning",
    "2\tT√¥i r·∫•t vui ƒë∆∞·ª£c l√†m vi·ªác v·ªõi b·∫°n",
    "3\tC·∫£m ∆°n b·∫°n ƒë√£ s·ª≠ d·ª•ng d·ªãch v·ª• c·ªßa ch√∫ng t√¥i",
    "4\tCh√∫c b·∫°n m·ªôt ng√†y t·ªët l√†nh",
    "5\tH·∫πn g·∫∑p l·∫°i b·∫°n l·∫ßn sau"
]

# Write to file
sample_texts_file.write_text("\n".join(vietnamese_texts), encoding='utf-8')
print(f"‚úÖ Created sample text file: {sample_texts_file}")

# Load texts using TextFileLoader
loader = TextFileLoader(sample_texts_file)
loaded_texts = loader.load()

print(f"\nüìÑ Loaded {len(loaded_texts)} texts:")
for text_id, text in loaded_texts:
    print(f"  {text_id}: {text}")

In [None]:
# Batch voice cloning (requires reference audio)
if reference_audio and reference_audio.exists():
    batch_output_dir = output_dir / "batch_cloning"
    batch_output_dir.mkdir(parents=True, exist_ok=True)
    
    print(f"\nüé≠ Starting batch voice cloning...")
    print(f"üìÑ Input file: {sample_texts_file}")
    print(f"üéµ Reference audio: {reference_audio}")
    print(f"üìÅ Output directory: {batch_output_dir}")
    
    # Process batch
    batch_result = provider.clone_batch(sample_texts_file, reference_audio, batch_output_dir)
    
    print(f"\nüìä Batch processing results:")
    print(f"   Total texts: {batch_result.get('total_texts', 0)}")
    print(f"   Processed: {batch_result.get('processed', 0)}")
    print(f"   Failed: {batch_result.get('failed', 0)}")
    print(f"   Success rate: {batch_result.get('success_rate', 0):.1f}%")
    
    # Show individual results
    print(f"\nüìã Individual results:")
    for result in batch_result.get('results', []):
        status = "‚úÖ" if result.get('success') else "‚ùå"
        print(f"   {status} {result.get('id', 'unknown')}: {result.get('output_file', 'no_file')}")
        if not result.get('success'):
            print(f"      Error: {result.get('error', 'Unknown error')}")
else:
    print("‚ö†Ô∏è Skipping batch cloning - no reference audio available")

## 5. TextFileLoader Examples

Different ways to use TextFileLoader:

In [None]:
# Create different text file formats for testing
formats_dir = Path("examples_output/text_formats")
formats_dir.mkdir(parents=True, exist_ok=True)

# 1. Simple text file (auto ID generation)
simple_text = formats_dir / "simple.txt"
simple_text.write_text("Hello world\nThis is a test\nMultiple lines", encoding='utf-8')

# 2. Tab-separated format (ID + Text)
tab_text = formats_dir / "tab_separated.txt"
tab_text.write_text("001\tHello from tab format\n002\tSecond line with tab\n003\tThird line", encoding='utf-8')

# 3. CSV format
csv_text = formats_dir / "data.csv"
csv_text.write_text("id,text\n101,CSV first line\n102,CSV second line\n103,CSV third line", encoding='utf-8')

print("‚úÖ Created different text formats")

# Test loading different formats
from speech_synth_engine.dataset.text_loaders import TextLoaderFactory

for file_path in [simple_text, tab_text, csv_text]:
    print(f"\nüìÑ Loading {file_path.name}:")
    loader = TextLoaderFactory.create_loader(file_path, loader_type="auto")
    texts = loader.load()
    print(f"   Loaded {len(texts)} texts:")
    for text_id, text in texts[:3]:  # Show first 3
        print(f"     {text_id}: {text[:50]}...")

## 6. Advanced Usage

Custom configurations and error handling:

In [None]:
# Advanced configuration example
advanced_config = {
    "base_url": "https://www.minimax.io/audio/voices-cloning",
    "google_email": os.getenv("MINIMAX_GOOGLE_EMAIL"),
    "google_password": os.getenv("MINIMAX_GOOGLE_PASSWORD"),
    "headless": False,
    "sample_rate": 22050,
    "language": "Vietnamese",
    "timeout": 120,  # Longer timeout for complex texts
    "download_timeout": 300,  # Longer download timeout
    "max_wait_time": 600,  # 10 minutes max wait
    "batch_processing": True,
    "max_batch_size": 5,  # Smaller batches for stability
    "batch_delay": 5,  # Longer delay between requests
    "chars_per_second": 15,  # Slower speech rate
    "min_duration": 1.0,
    "max_duration": 30.0
}

print("‚úÖ Advanced configuration:")
pprint(advanced_config)

# Create provider with advanced config
advanced_provider = MiniMaxSeleniumProvider("minimax_advanced", advanced_config)
print(f"\nüìä Provider info: {advanced_provider.provider_info}")

## 7. Error Handling

Proper error handling and recovery:

In [None]:
def safe_synthesis(provider, text, output_file, reference_audio=None):
    """
    Safe synthesis with comprehensive error handling
    """
    try:
        print(f"üé§ Synthesizing: {text[:50]}...")
        
        if reference_audio and reference_audio.exists():
            success = provider.clone(text, reference_audio, output_file)
        else:
            result = provider.synthesize_with_metadata(text, "cloned_voice", output_file)
            success = result.get('success', False)
        
        if success and output_file.exists():
            file_size = output_file.stat().st_size / 1024
            print(f"‚úÖ Success: {output_file} ({file_size:.1f} KB)")
            return True
        else:
            print(f"‚ùå Failed: {text[:50]}...")
            return False
            
    except Exception as e:
        print(f"‚ùå Error: {e}")
        return False
    finally:
        # Always cleanup
        provider.cleanup()

# Test error handling
test_cases = [
    ("Text b√¨nh th∆∞·ªùng", "examples_output/error_test1.wav"),
    ("", "examples_output/error_test2.wav"),  # Empty text
    ("Text r·∫•t d√†i " * 100, "examples_output/error_test3.wav")  # Very long text
]

print("üß™ Testing error handling:")
for text, output_path in test_cases:
    result = safe_synthesis(provider, text, Path(output_path), reference_audio)
    print(f"   Result: {'‚úÖ' if result else '‚ùå'}")

## 8. Cleanup

Clean up generated files:

In [None]:
# Clean up provider resources
provider.cleanup()
advanced_provider.cleanup()

print("‚úÖ Provider cleanup completed")

# Optional: Clean up generated files
import shutil

if output_dir.exists():
    # Uncomment to clean up
    # shutil.rmtree(output_dir)
    print(f"üìÅ Generated files in: {output_dir}")
    
print("\nüéâ MiniMax examples completed!")