# Fine-Tuning Parler TTS with Your Nepali Voice

To fine-tune the Parler TTS model with your voice, you'll need to:
1. Record your voice samples
2. Prepare a dataset
3. Fine-tune the model

Here's a complete pipeline:

In [2]:
## Step 1: Install Dependencies

# Install required packages
!pip install git+https://github.com/huggingface/parler-tts.git
!pip install soundfile pandas tqdm matplotlib librosa
!pip install gradio # for recording interface


[0mCollecting git+https://github.com/huggingface/parler-tts.git
  Cloning https://github.com/huggingface/parler-tts.git to /private/var/folders/cd/vmj840453cz603ygrc6my4300000gn/T/pip-req-build-zeftw7uz
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/parler-tts.git /private/var/folders/cd/vmj840453cz603ygrc6my4300000gn/T/pip-req-build-zeftw7uz
  Resolved https://github.com/huggingface/parler-tts.git to commit d108732cd57788ec86bc857d99a6cabd66663d68
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting descript-audiotools@ git+https://github.com/descriptinc/audiotools (from parler_tts==0.2.2)
  Cloning https://github.com/descriptinc/audiotools to /private/var/folders/cd/vmj840453cz603ygrc6my4300000gn/T/pip-install-bwv9b5sf/descript-audiotools_5fc7e36798f04e8b8b32d27ed9fcf0d1
  Running command git clone --filter=blob:none 

In [None]:
## Step 2: Create Recording Interface


Loaded 1483 sentences from file


In [None]:
# Create Gradio interface

import os
import gradio as gr
import pandas as pd
import shutil
import traceback

# Create directories with proper permissions
os.makedirs("voice_data", exist_ok=True)

# Load Nepali sentences from text file
def load_sentences_from_file(file_path="nepali_sentences.txt"):
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            sentences = [line.strip() for line in f if line.strip()]
        if not sentences:
            raise ValueError("No sentences found in the file")
        return sentences
    except (FileNotFoundError, ValueError) as e:
        print(f"Warning: {e}. Using default sentences.")
        return [
            "नमस्ते, मेरो नाम जीवन हो।",
            "म नेपाली भाषामा बोल्छु।"
        ]

# Load sentences from file
nepali_sentences = load_sentences_from_file()
print(f"Loaded {len(nepali_sentences)} sentences from file")

# Track current recording index
current_idx = 0

def record_sample(audio_path, text):
    global current_idx
    
    if audio_path is None:
        return "Please record audio first.", current_idx, nepali_sentences[current_idx % len(nepali_sentences)]
    
    try:
        # Debug information
        print(f"Received audio path: {audio_path}")
        if not os.path.exists(audio_path):
            return f"Error: Audio file {audio_path} doesn't exist", current_idx, nepali_sentences[current_idx % len(nepali_sentences)]
        
        # Create output directory if it doesn't exist
        os.makedirs("voice_data", exist_ok=True)
        
        # Save the audio file
        output_filename = f"voice_data/sample_{current_idx:03d}.wav"
        shutil.copy(audio_path, output_filename)
        print(f"Copied audio to {output_filename}")
        
        # Save the text
        with open(f"voice_data/sample_{current_idx:03d}.txt", "w", encoding="utf-8") as f:
            f.write(text)
        
        # Update metadata
        metadata_file = "voice_data/metadata.csv"
        new_row = {"file": f"sample_{current_idx:03d}.wav", "text": text}
        
        if os.path.exists(metadata_file):
            metadata = pd.read_csv(metadata_file)
            metadata = pd.concat([metadata, pd.DataFrame([new_row])], ignore_index=True)
        else:
            metadata = pd.DataFrame([new_row])
        
        metadata.to_csv(metadata_file, index=False)
        
        # Increment index
        current_idx += 1
        
        return f"✅ Saved sample {current_idx-1}", current_idx, nepali_sentences[current_idx % len(nepali_sentences)]
    except Exception as e:
        error_details = traceback.format_exc()
        print(f"ERROR: {error_details}")
        return f"❌ Error saving recording: {str(e)}", current_idx, nepali_sentences[current_idx % len(nepali_sentences)]

# Create improved Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Record Your Nepali Voice Samples")
    gr.Markdown("Read the sentence below and record your voice. Try to speak clearly and naturally.")
    
    with gr.Row():
        with gr.Column(scale=3):
            text_to_read = gr.Textbox(
                value=nepali_sentences[0], 
                label="Sentence to Read",
                lines=3
            )
            
            # Add both microphone recording and file upload options
            with gr.Tabs():
                with gr.TabItem("Record (Microphone)"):
                    audio_recorder = gr.Audio(
                        sources=["microphone"], 
                        type="filepath",
                        label="Record Your Voice"
                    )
                with gr.TabItem("Upload Audio"):
                    audio_upload = gr.Audio(
                        type="filepath",
                        label="Upload Audio File"
                    )
            
            record_btn = gr.Button("Save Recording", variant="primary")
            status = gr.Textbox(value="Ready to record", label="Status")
            count = gr.Number(value=0, label="Samples Recorded")
            
            # Show microphone debugging info
            debug_info = gr.HTML("""
            <details>
                <summary>Microphone Troubleshooting</summary>
                <div id='mic-debug'>
                    <p>If microphone is not working:</p>
                    <ol>
                        <li>Check browser permissions</li>
                        <li>Try refreshing the page</li>
                        <li>Try using the Upload tab instead</li>
                    </ol>
                </div>
            </details>
            """)
        
        with gr.Column(scale=2):
            gr.Markdown("""
            ### Recording Tips:
            1. Use a quiet environment
            2. Speak clearly at a consistent pace
            3. Position yourself about 20cm from the microphone
            4. Maintain consistent tone and volume
            5. Record at least 50 samples for good results
            
            ### Progress:
            - Use the number counter to track your progress
            - Aim for 50-100 recordings for best results
            """)
    
    # Connect both recording and upload
    def handle_audio(audio, text):
        if audio is not None:
            return record_sample(audio, text)
        else:
            return "No audio detected", current_idx, nepali_sentences[current_idx % len(nepali_sentences)]
    
    record_btn.click(
        handle_audio, 
        inputs=[audio_recorder, text_to_read], 
        outputs=[status, count, text_to_read]
    ).then(
        lambda: None,  # Reset audio recorder
        None,
        [audio_recorder]
    )
    
    # Also connect the upload tab
    audio_upload.change(
        handle_audio,
        inputs=[audio_upload, text_to_read],
        outputs=[status, count, text_to_read]
    )

# Launch with share=True for easier testing
demo.launch(share=True)

Loaded 1483 sentences from file
* Running on local URL:  http://127.0.0.1:7867
* Running on public URL: https://5331edc9940d31a1b9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Received audio path: /private/var/folders/cd/vmj840453cz603ygrc6my4300000gn/T/gradio/90fbbb12d4a5a88575b97dd74d25aad25f2265e9170bc63a4cfb3b3ab43b2008/audio.wav
Copied audio to voice_data/sample_000.wav
Received audio path: /private/var/folders/cd/vmj840453cz603ygrc6my4300000gn/T/gradio/e5ef0fc122c66d42c428c778b77016217ce0b18e17018120128fa13690465cd2/audio.wav
Copied audio to voice_data/sample_001.wav
Received audio path: /private/var/folders/cd/vmj840453cz603ygrc6my4300000gn/T/gradio/984ffd29b4a85358276146e2bf1c872c75b2b4830bb9f01b2c4d32c1c9d1d4a8/audio.wav
Copied audio to voice_data/sample_002.wav
Received audio path: /private/var/folders/cd/vmj840453cz603ygrc6my4300000gn/T/gradio/f8d44c2bb91a52702c6a71f6f894d11081a405542d563625e85dbdd66f6ad380/audio.wav
Copied audio to voice_data/sample_003.wav
Received audio path: /private/var/folders/cd/vmj840453cz603ygrc6my4300000gn/T/gradio/a27ee060612d7e82fb03c6de7f4b6521c57d35f37c6f26d5f42b490603ac4309/audio.wav
Copied audio to voice_data/sampl