# Hmong Text-to-Speech Batch Processing

Notebook này xử lý hàng loạt chuyển văn bản tiếng Hmong thành giọng nói từ file Excel.

## Các bước thực hiện:
1. Cài đặt thư viện cần thiết
2. Upload file Excel với 2 cột: `file_name` và `transcript`
3. Chạy script để tạo file WAV
4. Download kết quả

## Bước 1: Cài đặt Dependencies

In [None]:
# Cài đặt các thư viện cần thiết
!pip install -q pandas openpyxl TTS torch torchaudio librosa soundfile scipy

## Bước 2: Import Libraries và Định nghĩa Functions

In [None]:
import os
import sys
import pandas as pd
from pathlib import Path
import logging

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[logging.StreamHandler(sys.stdout)]
)
logger = logging.getLogger(__name__)

print("✓ Libraries imported successfully!")

## Bước 3: Định nghĩa Hmong TTS Processor Class

In [None]:
class KaggleHmongTTS:
    """Kaggle-optimized Hmong TTS processor."""
    
    def __init__(self, output_dir='/kaggle/working/audio_output', tts_suffix='_tts'):
        """Initialize the TTS processor for Kaggle environment."""
        self.output_dir = Path(output_dir)
        self.tts_suffix = tts_suffix
        self.tts_model = None
        
        # Create output directory
        self.output_dir.mkdir(parents=True, exist_ok=True)
        logger.info(f"Output directory: {self.output_dir}")
        
        # Check for GPU availability
        try:
            import torch
            self.device = "cuda" if torch.cuda.is_available() else "cpu"
            logger.info(f"Using device: {self.device}")
        except ImportError:
            self.device = "cpu"
            logger.info("PyTorch not available, using CPU")
    
    def initialize_tts_model(self):
        """Initialize TTS model with Kaggle optimizations."""
        try:
            from TTS.api import TTS
            
            logger.info("Loading TTS model...")
            
            # Try different models in order of preference
            models_to_try = [
                "tts_models/multilingual/multi-dataset/your_tts",
                "tts_models/en/ljspeech/tacotron2-DDC",
                "tts_models/en/ljspeech/fast_pitch",
            ]
            
            for model_name in models_to_try:
                try:
                    logger.info(f"Attempting to load: {model_name}")
                    self.tts_model = TTS(model_name=model_name, progress_bar=True, gpu=(self.device=="cuda"))
                    logger.info(f"✓ Successfully loaded: {model_name}")
                    break
                except Exception as e:
                    logger.warning(f"Could not load {model_name}: {e}")
                    continue
            
            if self.tts_model is None:
                raise RuntimeError("Failed to load any TTS model")
                
        except Exception as e:
            logger.error(f"Error initializing TTS: {e}")
            raise
    
    def process_excel(self, excel_path):
        """Process Excel file with Hmong transcripts."""
        # Read Excel file
        logger.info(f"Reading Excel file: {excel_path}")
        df = pd.read_excel(excel_path)
        
        # Validate columns
        if 'file_name' not in df.columns or 'transcript' not in df.columns:
            raise ValueError("Excel must have 'file_name' and 'transcript' columns")
        
        logger.info(f"Loaded {len(df)} rows")
        logger.info(f"Columns: {list(df.columns)}")
        
        # Display first few rows
        print("\nFirst 5 rows:")
        print(df.head())
        
        # Initialize TTS
        self.initialize_tts_model()
        
        # Process each row
        success_count = 0
        failed_count = 0
        
        for idx, row in df.iterrows():
            try:
                file_name = str(row['file_name']).strip()
                transcript = str(row['transcript']).strip()
                
                if not transcript or transcript.lower() == 'nan':
                    logger.warning(f"Skipping empty transcript for: {file_name}")
                    continue
                
                # Create output filename
                base_name = os.path.splitext(file_name)[0]
                output_filename = f"{base_name}{self.tts_suffix}.wav"
                output_path = self.output_dir / output_filename
                
                logger.info(f"Processing [{idx+1}/{len(df)}]: {file_name}")
                logger.info(f"  Text: {transcript[:80]}...")
                
                # Generate speech
                self.tts_model.tts_to_file(
                    text=transcript,
                    file_path=str(output_path)
                )
                
                logger.info(f"  ✓ Saved: {output_path.name}")
                success_count += 1
                
            except Exception as e:
                logger.error(f"  ✗ Failed: {e}")
                failed_count += 1
                continue
        
        # Print summary
        self.print_summary(len(df), success_count, failed_count)
    
    def print_summary(self, total, success, failed):
        """Print processing summary."""
        print("\n" + "="*70)
        print("PROCESSING SUMMARY")
        print("="*70)
        print(f"Total entries:          {total}")
        print(f"Successfully processed: {success}")
        print(f"Failed:                 {failed}")
        if total > 0:
            print(f"Success rate:           {success/total*100:.1f}%")
        print(f"Output directory:       {self.output_dir}")
        print("="*70)
        
        # List generated files
        wav_files = list(self.output_dir.glob("*.wav"))
        print(f"\nGenerated {len(wav_files)} WAV files:")
        for wav_file in sorted(wav_files)[:10]:  # Show first 10
            print(f"  - {wav_file.name} ({wav_file.stat().st_size / 1024:.1f} KB)")
        if len(wav_files) > 10:
            print(f"  ... and {len(wav_files) - 10} more files")

print("✓ KaggleHmongTTS class defined successfully!")

## Bước 4: Cấu hình và Chạy Processing

**LƯU Ý**: Thay đổi đường dẫn `excel_path` theo file của bạn!

In [None]:
# Cấu hình
# THAY ĐỔI ĐƯỜNG DẪN NÀY!
excel_path = '/kaggle/input/your-dataset/your-file.xlsx'
output_dir = '/kaggle/working/hmong_audio_output'
tts_suffix = '_tts'

# Khởi tạo processor
processor = KaggleHmongTTS(
    output_dir=output_dir,
    tts_suffix=tts_suffix
)

# Xử lý file Excel
processor.process_excel(excel_path)

## Bước 5: Kiểm tra Kết quả

In [None]:
# Liệt kê tất cả file WAV đã tạo
output_path = Path('/kaggle/working/hmong_audio_output')
wav_files = sorted(output_path.glob('*.wav'))

print(f"Total WAV files: {len(wav_files)}")
print("\nFiles:")
for f in wav_files:
    size_kb = f.stat().st_size / 1024
    print(f"  {f.name}: {size_kb:.1f} KB")

## Bước 6: (Optional) Nghe thử một file audio

In [None]:
from IPython.display import Audio, display

# Nghe thử file đầu tiên
if wav_files:
    first_file = wav_files[0]
    print(f"Playing: {first_file.name}")
    display(Audio(str(first_file)))
else:
    print("No WAV files found!")

## Hoàn thành!

Các file WAV đã được tạo trong thư mục `/kaggle/working/hmong_audio_output/`

Bạn có thể download chúng từ Kaggle output.