In [None]:
# @title Step 1 (Final Revised): Install Dependencies
# This version upgrades Gradio to solve the Pydantic dependency conflict.
!pip install -q TTS==0.22.0
!pip install -q langdetect==1.0.9
# This is the key change to fix the Pydantic error:
!pip install -q gradio==4.29.0
!pip install -q numpy==1.23.5
!pip install -q scipy==1.10.1

print("✅ All libraries installed successfully!")
print("🔴 IMPORTANT: Please restart the session now before continuing.")

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/73.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.5/73.5 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.7/3.7 MB[0m [31m74.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.6/101.6 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.3/15.3 MB[0m [31m118.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00

In [None]:
# @title Step 2: Load TTS Model
import os
import torch
from TTS.api import TTS
import gradio as gr
from langdetect import detect

# Check for GPU availability and set the device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"🚀 Using device: {device.upper()}")

# Define the model name
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"

# Load the TTS model to the specified device
print("Downloading and loading the XTTS-v2 model... (This may take a moment)")
try:
    tts = TTS(model_name).to(device)
    print("✅ TTS model loaded successfully!")
except Exception as e:
    print(f"Error loading TTS model: {e}")
    print("Please ensure you have accepted the model's terms on Hugging Face if required.")

ModuleNotFoundError: No module named 'numpy.char'

In [None]:
# @title Step 3 (Revised): Upload Local Voice Samples
from google.colab import files
import os

print("📤 Please select the 5 audio voice samples from your local storage.")
print("Wait for all files to reach 100% before proceeding.")

# Create a directory to store the uploaded voices
upload_dir = 'uploaded_voices'
os.makedirs(upload_dir, exist_ok=True)

# Upload files to the created directory
uploaded = files.upload()

# Move uploaded files to the target directory and store their paths
speaker_wav_paths = {}
for filename, content in uploaded.items():
    # Define a path in the created directory
    local_path = os.path.join(upload_dir, filename)

    # Write the file content to the new path
    with open(local_path, 'wb') as f:
        f.write(content)

    # Use the filename (without extension) as the voice name in the dropdown
    voice_name = os.path.splitext(filename)[0]
    speaker_wav_paths[voice_name] = local_path

if speaker_wav_paths:
    print("\n✅ Files uploaded successfully!")
    print("The following voices are now available:")
    for name in speaker_wav_paths.keys():
        print(f"- {name}")
else:
    print("\n⚠️ No files were uploaded.")

In [None]:
# @title Step 4: Run the TTS Application with Gradio UI
import uuid

# Define the core synthesis function
def generate_speech(text, voice_choice, custom_voice_upload):
    """
    Synthesizes speech from text using a selected or cloned voice.
    """
    if not text.strip():
        return None, "⚠️ Please enter some text to synthesize."

    # --- 1. Determine the Speaker ---
    speaker_wav = None
    status = ""

    if voice_choice == "Clone from uploaded voice":
        if custom_voice_upload is not None:
            speaker_wav = custom_voice_upload
            status = "🎤 Using cloned voice from uploaded file."
        else:
            # Fallback to a default voice if clone is selected but no file is uploaded
            default_voice_key = list(speaker_wav_paths.keys())[0]
            speaker_wav = speaker_wav_paths[default_voice_key]
            status = f"⚠️ No custom voice uploaded. Falling back to default: {default_voice_key}."
    else:
        speaker_wav = speaker_wav_paths[voice_choice]
        status = f"🗣️ Using pre-defined voice: {voice_choice}."

    # --- 2. Detect Language ---
    try:
        detected_lang = detect(text)
        status += f"\n🔍 Detected Language: {detected_lang.upper()}"
    except Exception as e:
        detected_lang = "en" # Fallback to English
        status += f"\n⚠️ Language detection failed, falling back to English. Error: {e}"

    # --- 3. Synthesize Speech ---
    # Generate a unique filename for the output
    output_filename = f"output_{uuid.uuid4()}.wav"
    output_path = os.path.join("/content", output_filename)

    try:
        print("Synthesizing... Please wait.")
        # The magic happens here!
        tts.tts_to_file(
            text=text,
            speaker_wav=speaker_wav,
            language=detected_lang,
            file_path=output_path
        )
        print(f"Synthesis complete! Audio saved to {output_path}")
        return output_path, status
    except Exception as e:
        error_message = f"❌ An error occurred during synthesis: {e}"
        print(error_message)
        return None, error_message


# --- Gradio Interface Definition ---
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky")) as demo:
    gr.Markdown(
        """
        # 🎼 Offline Multilingual TTS with Voice Cloning
        Enter text, choose a voice, or upload your own voice clip to synthesize speech.
        """
    )

    with gr.Row():
        with gr.Column(scale=2):
            text_input = gr.Textbox(
                label="Input Text",
                placeholder="Enter text here... (e.g., नमस्ते, दुनिया! or வணக்கம், உலகம்!)",
                lines=4
            )
            voice_dropdown = gr.Dropdown(
                label="Select a Voice",
                choices=["Clone from uploaded voice"] + list(speaker_wav_paths.keys()),
                value="Hindi Female"
            )
            voice_upload = gr.Audio(
                label="Optional: Upload a 3-10 second voice clip (.wav) to clone",
                type="filepath",
            )
            generate_button = gr.Button("▶️ Generate Speech", variant="primary")

        with gr.Column(scale=1):
            status_output = gr.Textbox(label="Status", interactive=False, lines=2)
            audio_output = gr.Audio(label="Synthesized Speech")

    generate_button.click(
        fn=generate_speech,
        inputs=[text_input, voice_dropdown, voice_upload],
        outputs=[audio_output, status_output],
        api_name="synthesize"
    )

    gr.Examples(
        examples=[
            ["नमस्ते, आपकी आवाज़ बहुत स्पष्ट है।", "Hindi Female", None],
            ["The quick brown fox jumps over the lazy dog.", "Marathi Male", None],
            ["மின்னணுவியலில் ஒரு புதிய தொடக்கத்தை உருவாக்குவோம்.", "Tamil Female", None]
        ],
        inputs=[text_input, voice_dropdown, voice_upload],
        outputs=[audio_output, status_output],
        fn=generate_speech,
        cache_examples=False,
    )

# Launch the Gradio app
# The `share=True` argument creates a public link for easy access.
demo.launch(debug=True, share=True)