In [None]:
# F5-TTS Voice Cloning - Simplified Error-Free Approach
# Run each cell step by step in Google Colab

# ================================
# CELL 1: Environment Setup
# ================================
print("🚀 Starting F5-TTS Voice Cloning Setup...")

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Install essential packages
import subprocess
import sys
import os

def run_command(cmd):
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
    if result.returncode != 0:
        print(f"Error: {result.stderr}")
    return result.returncode == 0

# Install dependencies
print("Installing core packages...")

# Use the official PyTorch installation command for CUDA 11.8
# This ensures torch, torchaudio, and torchdata are compatible
run_command("pip install torch==2.1.0 torchaudio==2.1.0 torchvision==0.16.0 --index-url https://download.pytorch.org/whl/cu118")

# Install other dependencies
run_command("pip install transformers>=4.35.0")
run_command("pip install librosa soundfile pydub numpy scipy")
run_command("pip install accelerate safetensors")
run_command("pip install ffmpeg-python") # Added ffmpeg-python for pydub compatibility

print("✅ Environment setup complete!")

🚀 Starting F5-TTS Voice Cloning Setup...
Mounted at /content/drive
Installing core packages...
✅ Environment setup complete!


In [None]:
output_path = "/content/drive/MyDrive/voice cloning/reference_audio.wav"

In [None]:
print("\n📦 Setting up F5-TTS...")

os.chdir('/content')

# Remove existing directory if it exists
if os.path.exists('F5-TTS'):
    run_command('rm -rf F5-TTS')

# Clone repository
print("Cloning F5-TTS repository...")
if run_command('git clone https://github.com/SWivid/F5-TTS.git'):
    print("✅ Repository cloned successfully!")
else:
    print("❌ Failed to clone repository")

# Install F5-TTS
os.chdir('/content/F5-TTS')
run_command('pip install -e .')
run_command('pip install -r requirements.txt')

print("✅ F5-TTS installation complete!")


📦 Setting up F5-TTS...
Cloning F5-TTS repository...
✅ Repository cloned successfully!
Error: ERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirements.txt'

✅ F5-TTS installation complete!


In [None]:
print("\n📝 Preparing text data...")

# Your texts
reference_text = "You just tell me what works, date, time, and your preferred location. We'll arrange everything for you. No pressure. Seriously. I'm just here to help you"

target_text = "Hey Welcome to Nexi, I'm your personal assistant to help you with all your jaguar land rover queries. We have a wide range of car from SUV to sports, tell me what should i help you with."

print("Reference text:", reference_text)
print("\nTarget text:", target_text)
print("✅ Texts prepared!")


📝 Preparing text data...
Reference text: You just tell me what works, date, time, and your preferred location. We'll arrange everything for you. No pressure. Seriously. I'm just here to help you

Target text: Hey Welcome to Nexi, I'm your personal assistant to help you with all your jaguar land rover queries. We have a wide range of car from SUV to sports, tell me what should i help you with.
✅ Texts prepared!


In [None]:
# FastAPI Fix - Run this to kill previous server and start fresh

import os
import signal
import subprocess
import time

print("🔧 Fixing port conflict...")

# Method 1: Kill processes on port 5000
print("1️⃣ Killing processes on port 5000...")
try:
    # Find and kill processes using port 5000
    result = subprocess.run(['lsof', '-ti:5000'], capture_output=True, text=True)
    if result.stdout:
        pids = result.stdout.strip().split('\n')
        for pid in pids:
            if pid:
                try:
                    os.kill(int(pid), signal.SIGTERM)
                    print(f"   Killed process {pid}")
                except:
                    pass
    else:
        print("   No processes found on port 5000")
except:
    print("   lsof command not available, trying alternative...")

# Method 2: Alternative kill method
try:
    subprocess.run(['pkill', '-f', 'uvicorn'], capture_output=True)
    subprocess.run(['pkill', '-f', 'flask'], capture_output=True)
    print("   Killed uvicorn and flask processes")
except:
    pass

# Wait a moment
time.sleep(2)

# Method 3: Use a different port
print("2️⃣ Starting FastAPI on port 8000 instead...")

# Install FastAPI and dependencies
!pip install fastapi uvicorn python-multipart -q

from fastapi import FastAPI, HTTPException, BackgroundTasks
from fastapi.responses import FileResponse, JSONResponse
from pydantic import BaseModel
import uvicorn
import threading
import time
import os
from typing import Optional

# Global variable to store the target text from API
api_target_text = None
api_ready = False

# Pydantic models for request/response
class VoiceRequest(BaseModel):
    target_text: str

class VoiceResponse(BaseModel):
    status: str
    message: str
    target_text: str

class HealthResponse(BaseModel):
    status: str
    message: str
    timestamp: float
    service: str

class StatusResponse(BaseModel):
    api_ready: bool
    target_text_received: bool
    audio_generated: bool
    current_target_text: Optional[str]

# Create FastAPI app
app = FastAPI(
    title="F5-TTS Voice Cloning API",
    description="API for voice cloning using F5-TTS",
    version="1.0.0"
)

@app.get("/health", response_model=HealthResponse)
async def health_check():
    """Simple health check endpoint to test if API is running"""
    return HealthResponse(
        status="healthy",
        message="Voice Cloning API is running successfully!",
        timestamp=time.time(),
        service="F5-TTS Voice Cloning API"
    )

@app.post("/generate_voice", response_model=VoiceResponse)
async def generate_voice_api(request: VoiceRequest):
    """Receive target text for voice generation"""
    global api_target_text, api_ready

    try:
        api_target_text = request.target_text
        api_ready = True

        print(f"✅ Received target text via API: {api_target_text}")

        return VoiceResponse(
            status="success",
            message="Target text received. Voice generation will start.",
            target_text=api_target_text
        )

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/get_audio")
async def get_generated_audio():
    """Endpoint to download the generated audio file"""
    try:
        audio_files = [
            "/content/generated_voice_fixed.wav",
            "/content/generated_voice.wav"
        ]

        for audio_file in audio_files:
            if os.path.exists(audio_file):
                return FileResponse(
                    audio_file,
                    media_type="audio/wav",
                    filename="generated_voice.wav"
                )

        raise HTTPException(status_code=404, detail="No generated audio file found")

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/status", response_model=StatusResponse)
async def get_status():
    """Check if API is ready and if audio is generated"""
    global api_ready, api_target_text

    audio_exists = any(os.path.exists(f) for f in [
        "/content/generated_voice_fixed.wav",
        "/content/generated_voice.wav"
    ])

    return StatusResponse(
        api_ready=api_ready,
        target_text_received=api_target_text is not None,
        audio_generated=audio_exists,
        current_target_text=api_target_text
    )

@app.get("/")
async def root():
    """Root endpoint with API information"""
    return {
        "message": "F5-TTS Voice Cloning API",
        "version": "1.0.0",
        "endpoints": {
            "health": "/health",
            "generate_voice": "/generate_voice",
            "get_audio": "/get_audio",
            "status": "/status",
            "docs": "/docs"
        }
    }

# Stop any existing FastAPI threads
print("3️⃣ Cleaning up existing threads...")

# Function to run FastAPI app in background on port 8000
def run_fastapi_app():
    uvicorn.run(app, host="0.0.0.0", port=8000, log_level="warning")

# Start FastAPI app in a separate thread
print("4️⃣ Starting FastAPI on port 8000...")
fastapi_thread = threading.Thread(target=run_fastapi_app, daemon=True)
fastapi_thread.start()

# Wait for FastAPI to start
time.sleep(4)

print("🚀 FastAPI Server Started Successfully!")
print("📡 NEW API URLs (Port 8000):")
print("  GET  http://localhost:8000/health - Health check")
print("  POST http://localhost:8000/generate_voice - Send target text")
print("  GET  http://localhost:8000/get_audio - Download audio")
print("  GET  http://localhost:8000/status - Check status")
print("  GET  http://localhost:8000/docs - Interactive docs")

print("\n🧪 JSON Example for POST /generate_voice:")
print('''{
  "target_text": "Your text message here"
}''')

# Internal health check on new port
print("\n🔧 Running health check on port 8000...")
try:
    import requests
    time.sleep(2)
    response = requests.get("http://localhost:8000/health", timeout=5)
    if response.status_code == 200:
        print("✅ FastAPI health check passed on port 8000!")
        print(f"   Response: {response.json()}")
    else:
        print(f"⚠️ Health check failed: {response.status_code}")
except Exception as e:
    print(f"⚠️ Health check error: {e}")

print("\n⏳ Waiting for target text via API...")
print("🌐 Visit http://localhost:8000/docs for interactive testing!")
print("📍 Use PORT 8000 in Postman, not 5000!")

🔧 Fixing port conflict...
1️⃣ Killing processes on port 5000...
   No processes found on port 5000
   Killed uvicorn and flask processes
2️⃣ Starting FastAPI on port 8000 instead...
3️⃣ Cleaning up existing threads...
4️⃣ Starting FastAPI on port 8000...
🚀 FastAPI Server Started Successfully!
📡 NEW API URLs (Port 8000):
  GET  http://localhost:8000/health - Health check
  POST http://localhost:8000/generate_voice - Send target text
  GET  http://localhost:8000/get_audio - Download audio
  GET  http://localhost:8000/status - Check status
  GET  http://localhost:8000/docs - Interactive docs

🧪 JSON Example for POST /generate_voice:
{
  "target_text": "Your text message here"
}

🔧 Running health check on port 8000...
✅ FastAPI health check passed on port 8000!
   Response: {'status': 'healthy', 'message': 'Voice Cloning API is running successfully!', 'timestamp': 1750221448.9792147, 'service': 'F5-TTS Voice Cloning API'}

⏳ Waiting for target text via API...
🌐 Visit http://localhost:8000/

In [None]:
# FastAPI Test on Port 8000 - Run this in a NEW CELL

import requests
import json
import time

print("🧪 Testing FastAPI on Port 8000...")

BASE_URL = "http://localhost:8000"

# Test 1: Health Check
print("\n1️⃣ Health Check Test:")
try:
    response = requests.get(f'{BASE_URL}/health', timeout=5)
    print(f"Status Code: {response.status_code}")
    print(f"Response: {json.dumps(response.json(), indent=2)}")
    if response.status_code == 200:
        print("✅ Health check passed!")
    else:
        print("❌ Health check failed!")
except Exception as e:
    print(f"❌ Health check error: {e}")
    print("🔧 Make sure your FastAPI is running on port 8000")
    exit()

# Test 2: Send Target Text
print("\n2️⃣ Sending Target Text:")
test_text = "Hello from FastAPI on port 8000! This is my test message for voice cloning."

try:
    payload = {"target_text": test_text}
    response = requests.post(f'{BASE_URL}/generate_voice',
                           json=payload,
                           timeout=10)

    print(f"Status Code: {response.status_code}")
    print(f"Response: {json.dumps(response.json(), indent=2)}")

    if response.status_code == 200:
        print("✅ Text sent successfully!")
    else:
        print("❌ Failed to send text!")

except Exception as e:
    print(f"❌ Send text error: {e}")

# Test 3: Check Status
print("\n3️⃣ Status Check:")
try:
    response = requests.get(f'{BASE_URL}/status', timeout=5)
    print(f"Status Code: {response.status_code}")
    status_data = response.json()
    print(f"Status: {json.dumps(status_data, indent=2)}")

    if status_data.get('target_text_received'):
        print("✅ Target text was received!")
        print(f"📝 Received text: {status_data.get('current_target_text')}")
    else:
        print("❌ Target text not received!")

except Exception as e:
    print(f"❌ Status check error: {e}")

print("\n🎯 FastAPI Test Complete!")
print(f"💡 API Documentation: {BASE_URL}/docs")
print("📍 Use PORT 8000 for all your API calls!")

# Show Postman examples
print("\n📮 Postman Testing Examples:")
print("="*40)
print("Health Check:")
print(f"  GET {BASE_URL}/health")
print("\nSend Text:")
print(f"  POST {BASE_URL}/generate_voice")
print("  Headers: Content-Type: application/json")
print("  Body: {\"target_text\": \"Your message here\"}")
print("\nCheck Status:")
print(f"  GET {BASE_URL}/status")

🧪 Testing FastAPI on Port 8000...

1️⃣ Health Check Test:
Status Code: 200
Response: {
  "status": "healthy",
  "message": "Voice Cloning API is running successfully!",
  "timestamp": 1750221454.2774675,
  "service": "F5-TTS Voice Cloning API"
}
✅ Health check passed!

2️⃣ Sending Target Text:
✅ Received target text via API: Hello from FastAPI on port 8000! This is my test message for voice cloning.
Status Code: 200
Response: {
  "status": "success",
  "message": "Target text received. Voice generation will start.",
  "target_text": "Hello from FastAPI on port 8000! This is my test message for voice cloning."
}
✅ Text sent successfully!

3️⃣ Status Check:
Status Code: 200
Status: {
  "api_ready": true,
  "target_text_received": true,
  "audio_generated": false,
  "current_target_text": "Hello from FastAPI on port 8000! This is my test message for voice cloning."
}
✅ Target text was received!
📝 Received text: Hello from FastAPI on port 8000! This is my test message for voice cloning.

🎯

In [None]:
# API Test Script - Run this in a separate cell to test your API

import requests
import json
import time

# Test configuration
API_BASE_URL = "http://localhost:5000"  # Change this if running on different host/port
TEST_TEXT = "Hello, this is a test message from my chatbot API. Can you hear me clearly?"

print("🧪 Starting API Test...")
print("=" * 50)

# Test 1: Check if API server is running
print("\n1️⃣ Testing API Status...")
try:
    response = requests.get(f"{API_BASE_URL}/status", timeout=5)
    if response.status_code == 200:
        status_data = response.json()
        print("✅ API Server is running!")
        print(f"   Status: {json.dumps(status_data, indent=2)}")
    else:
        print(f"❌ API Status check failed: {response.status_code}")
except requests.exceptions.RequestException as e:
    print(f"❌ Cannot connect to API server: {e}")
    print("   Make sure the Flask app is running in the previous cell")
    exit()

# Test 2: Send target text via POST
print("\n2️⃣ Sending target text via POST...")
try:
    payload = {
        "target_text": TEST_TEXT
    }

    headers = {
        "Content-Type": "application/json"
    }

    print(f"   Sending: {TEST_TEXT}")

    response = requests.post(
        f"{API_BASE_URL}/generate_voice",
        json=payload,
        headers=headers,
        timeout=10
    )

    if response.status_code == 200:
        result = response.json()
        print("✅ POST request successful!")
        print(f"   Response: {json.dumps(result, indent=2)}")
    else:
        print(f"❌ POST request failed: {response.status_code}")
        print(f"   Error: {response.text}")

except requests.exceptions.RequestException as e:
    print(f"❌ POST request failed: {e}")

# Test 3: Check status after sending text
print("\n3️⃣ Checking status after sending text...")
try:
    time.sleep(1)  # Wait a moment
    response = requests.get(f"{API_BASE_URL}/status", timeout=5)

    if response.status_code == 200:
        status_data = response.json()
        print("✅ Status check successful!")
        print(f"   Updated Status: {json.dumps(status_data, indent=2)}")

        # Verify the text was received
        if status_data.get('target_text_received') and status_data.get('current_target_text') == TEST_TEXT:
            print("✅ Target text was successfully received and stored!")
        else:
            print("❌ Target text was not properly received")

    else:
        print(f"❌ Status check failed: {response.status_code}")

except requests.exceptions.RequestException as e:
    print(f"❌ Status check failed: {e}")

# Test 4: Test error handling (missing target_text)
print("\n4️⃣ Testing error handling...")
try:
    # Send request without target_text
    response = requests.post(
        f"{API_BASE_URL}/generate_voice",
        json={},  # Empty payload
        headers={"Content-Type": "application/json"},
        timeout=10
    )

    if response.status_code == 400:
        error_data = response.json()
        print("✅ Error handling works correctly!")
        print(f"   Expected 400 error: {error_data}")
    else:
        print(f"⚠️ Unexpected response for empty payload: {response.status_code}")

except requests.exceptions.RequestException as e:
    print(f"❌ Error handling test failed: {e}")

print("\n" + "=" * 50)
print("🎯 Test Summary:")
print("✅ If all tests passed, your API is working correctly!")
print("✅ Your voice cloning notebook should now use the text you sent")
print("\n💡 Next steps:")
print("1. Check your voice generation cell output")
print("2. The target_text variable should now contain your test message")
print("3. Voice generation should proceed with your API text")

# Alternative test using curl command (for reference)
print("\n🔧 Alternative test using curl command:")
print("You can also test manually using this curl command:")
print(f"""
curl -X POST {API_BASE_URL}/generate_voice \\
  -H 'Content-Type: application/json' \\
  -d '{{"target_text": "Your test message here"}}'
""")

print("\nOr check status with:")
print(f"curl -X GET {API_BASE_URL}/status")

INFO:werkzeug:127.0.0.1 - - [17/Jun/2025 11:34:56] "GET /status HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [17/Jun/2025 11:34:56] "POST /generate_voice HTTP/1.1" 200 -


🧪 Starting API Test...

1️⃣ Testing API Status...
✅ API Server is running!
   Status: {
  "api_ready": false,
  "audio_generated": false,
  "current_target_text": null,
  "target_text_received": false
}

2️⃣ Sending target text via POST...
   Sending: Hello, this is a test message from my chatbot API. Can you hear me clearly?
✅ Received target text via API: Hello, this is a test message from my chatbot API. Can you hear me clearly?
✅ POST request successful!
   Response: {
  "message": "Target text received. Voice generation will start.",
  "status": "success",
  "target_text": "Hello, this is a test message from my chatbot API. Can you hear me clearly?"
}

3️⃣ Checking status after sending text...


INFO:werkzeug:127.0.0.1 - - [17/Jun/2025 11:34:57] "GET /status HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [17/Jun/2025 11:34:57] "[31m[1mPOST /generate_voice HTTP/1.1[0m" 400 -


✅ Status check successful!
   Updated Status: {
  "api_ready": true,
  "audio_generated": false,
  "current_target_text": "Hello, this is a test message from my chatbot API. Can you hear me clearly?",
  "target_text_received": true
}
✅ Target text was successfully received and stored!

4️⃣ Testing error handling...
✅ Error handling works correctly!
   Expected 400 error: {'error': 'target_text parameter is required'}

🎯 Test Summary:
✅ If all tests passed, your API is working correctly!
✅ Your voice cloning notebook should now use the text you sent

💡 Next steps:
1. Check your voice generation cell output
2. The target_text variable should now contain your test message
3. Voice generation should proceed with your API text

🔧 Alternative test using curl command:
You can also test manually using this curl command:

curl -X POST http://localhost:5000/generate_voice \
  -H 'Content-Type: application/json' \
  -d '{"target_text": "Your test message here"}'


Or check status with:
curl -X GE

In [None]:
print("\n🎤 Generating cloned voice...")

import torch
import torchaudio
import numpy as np
import sys
import os
import subprocess

sys.path.append('/content/F5-TTS/src')

# Set device first (fix for device not defined error)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Fix the tensor/tuple issue by using direct inference approach
try:
    # Import necessary modules (remove problematic import)
    from f5_tts.model import DiT, UNetT
    from f5_tts.infer.utils_infer import (
        load_checkpoint,
        preprocess_ref_audio_text,
        infer_process,
        load_vocoder
        # Removed remove_silence as it's causing import error
    )
    import torchaudio.transforms as T

    print("✅ F5-TTS modules imported successfully!")

    # Load model configuration
    model_cfg = dict(
        dim=1024,
        depth=22,
        heads=16,
        ff_mult=2,
        text_dim=512,
        conv_layers=4
    )

    # Initialize model
    print("Loading F5-TTS model...")
    model = DiT(**model_cfg)

    # Load checkpoint with proper device handling
    ckpt_path = "hf://SWivid/F5-TTS/F5TTS_Base/model_1200000.safetensors"
    model, _, _ = load_checkpoint(model, ckpt_path, device=str(device))

    print("✅ Model loaded successfully!")

    # Process reference audio and text
    print("Processing reference audio...")
    ref_audio, ref_text = preprocess_ref_audio_text(
        output_path,
        reference_text,
        device=device
    )

    print("Generating speech... This will take several minutes...")

    # Generate audio using infer_process
    final_wave, final_sample_rate, spectrogram = infer_process(
        ref_audio=ref_audio,
        ref_text=ref_text,
        gen_text=target_text,
        model=model,
        cross_fade_duration=0.15,
        speed=1.0,
        device=device
    )

    # Fix the tuple/tensor issue by ensuring proper tensor format
    if isinstance(final_wave, tuple):
        final_wave = final_wave[0]  # Take first element if it's a tuple

    # Ensure tensor is in correct format and shape
    if len(final_wave.shape) == 1:
        final_wave = final_wave.unsqueeze(0)  # Add channel dimension if needed
    elif len(final_wave.shape) == 3:
        final_wave = final_wave.squeeze(0)  # Remove batch dimension if present

    # Make sure we have the right shape: [channels, samples]
    if final_wave.shape[0] > final_wave.shape[1]:
        final_wave = final_wave.transpose(0, 1)

    # Save the generated audio
    output_file = "/content/generated_voice.wav"
    torchaudio.save(output_file, final_wave.cpu(), final_sample_rate)

    print(f"✅ Voice generation complete!")
    print(f"Generated file: {output_file}")

    # Create a simple silence removal function instead of using the problematic import
    def simple_remove_silence(audio, sample_rate, threshold=0.01):
        """Simple silence removal based on amplitude threshold"""
        audio_np = audio.numpy() if hasattr(audio, 'numpy') else audio
        if len(audio_np.shape) > 1:
            audio_np = audio_np[0]  # Take first channel

        # Find non-silent segments
        non_silent = np.abs(audio_np) > threshold
        if np.any(non_silent):
            start_idx = np.argmax(non_silent)
            end_idx = len(audio_np) - np.argmax(non_silent[::-1]) - 1
            return torch.tensor(audio_np[start_idx:end_idx+1])
        return torch.tensor(audio_np)

    # Create a clean version without silence
    try:
        clean_wave = simple_remove_silence(final_wave.squeeze().cpu(), final_sample_rate)
        clean_output = "/content/generated_voice_clean.wav"
        torchaudio.save(clean_output, clean_wave.unsqueeze(0), final_sample_rate)
        print(f"✅ Clean version saved: {clean_output}")
    except Exception as clean_error:
        print(f"Note: Clean version creation skipped - {clean_error}")

except Exception as e:
    print(f"❌ Direct inference failed: {e}")
    print("Trying gradio interface approach...")

    # Alternative using gradio interface approach (with device fix)
    try:
        # Set environment variable to fix hash seed issue
        os.environ['PYTHONHASHSEED'] = '0'

        # Try using the gradio app approach
        from f5_tts.infer.infer_gradio import infer, load_model

        print("Loading model via gradio interface...")
        model, vocoder = load_model("F5-TTS", "F5TTS_Base", str(device))

        print("Generating audio via gradio interface...")
        result = infer(
            ref_audio_input=output_path,
            ref_text_input=reference_text,
            gen_text_input=target_text,
            model_obj=model,
            vocoder_obj=vocoder,
            cross_fade_duration=0.15,
            speed=1.0
        )

        if result:
            print("✅ Voice generated using gradio interface!")
        else:
            print("❌ Gradio interface approach failed")

    except Exception as e2:
        print(f"❌ Gradio approach failed: {e2}")

        # Final fallback - simple file-based approach with fixed tensor handling
        print("Trying simple file-based generation...")
        try:
            # Create a simple script file to avoid CLI issues
            script_content = f'''
import torch
import torchaudio
import sys
import numpy as np
sys.path.append('/content/F5-TTS')

from f5_tts.api import F5TTS

# Initialize F5TTS
f5tts = F5TTS()

# Generate audio
audio = f5tts.infer(
    ref_file="{output_path}",
    ref_text="{reference_text}",
    gen_text="{target_text}",
    cross_fade_duration=0.15,
    speed=1.0
)

# Handle tuple/tensor issue properly
if isinstance(audio, tuple):
    audio = audio[0]

print(f"Audio type: {{type(audio)}}")
if hasattr(audio, 'shape'):
    print(f"Audio shape: {{audio.shape}}")

# Convert numpy array to torch tensor first
if isinstance(audio, np.ndarray):
    print("Converting numpy array to torch tensor...")
    audio = torch.from_numpy(audio.copy())

# Now handle tensor shape issues
if hasattr(audio, 'shape'):
    # Ensure correct tensor format
    if len(audio.shape) == 1:
        audio = audio.unsqueeze(0)  # Add channel dimension
    elif len(audio.shape) == 3:
        audio = audio.squeeze(0)   # Remove batch dimension

    # Make sure channels are first dimension
    if audio.shape[0] > audio.shape[1]:
        audio = audio.transpose(0, 1)

    print(f"Final audio shape: {{audio.shape}}")

# Save audio with proper error handling
try:
    torchaudio.save("/content/generated_voice.wav", audio, 24000)
    print("✅ Audio saved successfully to /content/generated_voice.wav!")
except Exception as save_error:
    print(f"Save error: {{save_error}}")
    # Try alternative save method
    try:
        import soundfile as sf
        if hasattr(audio, 'numpy'):
            audio_np = audio.numpy()
        else:
            audio_np = audio

        if len(audio_np.shape) > 1:
            audio_np = audio_np[0]  # Take first channel

        sf.write("/content/generated_voice.wav", audio_np, 24000)
        print("✅ Audio saved using soundfile!")
    except Exception as sf_error:
        print(f"Soundfile save error: {{sf_error}}")
        # Final fallback - save as numpy
        if isinstance(audio, torch.Tensor):
            audio_np = audio.numpy()
        else:
            audio_np = audio

        np.save("/content/generated_voice.npy", audio_np)
        print("Audio saved as numpy array to /content/generated_voice.npy")
'''

            # Write and execute script
            with open('/content/generate_voice.py', 'w') as f:
                f.write(script_content)

            # Run the script
            result = subprocess.run([sys.executable, '/content/generate_voice.py'],
                                  capture_output=True, text=True,
                                  env={**os.environ, 'PYTHONHASHSEED': '0'})

            if result.returncode == 0:
                print("✅ Voice generated using script approach!")
                print("Output:", result.stdout)
            else:
                print(f"Script approach error: {result.stderr}")
                print("Script output:", result.stdout)

        except Exception as e3:
            print(f"❌ All approaches failed. Error: {e3}")
            print("Please try running the notebook again or check GPU memory.")

# Additional fallback - try using direct torchaudio operations
print("\nTrying direct audio generation fallback...")
try:
    # Check if generated file exists and try to load/resave it properly
    potential_files = ["/content/generated_voice.wav", "/tmp/generated_audio.wav"]

    for file_path in potential_files:
        if os.path.exists(file_path):
            print(f"Found audio file: {file_path}")
            try:
                # Load and resave with proper format
                waveform, sample_rate = torchaudio.load(file_path)
                print(f"Loaded audio shape: {waveform.shape}, sample_rate: {sample_rate}")

                # Ensure proper format
                if len(waveform.shape) == 1:
                    waveform = waveform.unsqueeze(0)

                # Save with corrected format
                output_path_fixed = "/content/generated_voice_fixed.wav"
                torchaudio.save(output_path_fixed, waveform, sample_rate)
                print(f"✅ Audio successfully saved to: {output_path_fixed}")
                break

            except Exception as load_error:
                print(f"Error processing {file_path}: {load_error}")
                continue

except Exception as fallback_error:
    print(f"Fallback approach failed: {fallback_error}")

print("\n🎵 Voice cloning process completed. Check the output files!")


🎤 Generating cloned voice...
Using device: cuda
❌ Direct inference failed: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject
Trying gradio interface approach...
❌ Gradio approach failed: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject
Trying simple file-based generation...
✅ Voice generated using script approach!
Output: Download Vocos from huggingface charactr/vocos-mel-24khz

vocab :  /content/F5-TTS/src/f5_tts/infer/examples/vocab.txt
token :  custom
model :  /root/.cache/huggingface/hub/models--SWivid--F5-TTS/snapshots/84e5a410d9cead4de2f847e7c9369a6440bdfaca/F5TTS_v1_Base/model_1250000.safetensors 

Converting audio...
Using custom reference text...

ref_text   You just tell me what works, date, time, and your preferred location. We'll arrange everything for you. No pressure. Seriously. I'm just here to help you. 
gen_text 0 Hey Welcome to Nexi, I'm your per

In [None]:
# Cell 6: Pickling the model
import pickle

# Only attempt to pickle the model if it was successfully loaded/defined
if 'model' in locals() and model is not None:
    try:
        with open('model_pickle.pkl', 'wb') as f:
            pickle.dump(model, f)
        print("✅ Model successfully pickled!")
    except Exception as e:
        print(f"❌ Error pickling model: {e}")
else:
    print("⚠️ Model was not successfully loaded or defined in the previous cell. Skipping pickling.")

✅ Model successfully pickled!
