In [1]:
import torch
import torchaudio
import torchaudio.transforms as transforms
from transformers import pipeline
import gradio as gr
import numpy as np
from fastapi import FastAPI



In [2]:

# Load the pre-trained TTS model (Tortoise-TTS or ElevenLabs API integration)
def generate_voice(text, reference_audio):
    # Process reference audio
    waveform, sample_rate = torchaudio.load(reference_audio)
    transform = transforms.Resample(orig_freq=sample_rate, new_freq=22050)
    waveform = transform(waveform)

    # Use API or model for voice cloning
    cloned_voice = pipeline("text-to-speech", model="tortoise-tts")
    output_audio = cloned_voice(text, speaker=waveform.numpy())
    return output_audio



In [3]:
# Gradio UI
def voice_cloning_interface(text, reference_audio):
    output_audio = generate_voice(text, reference_audio)
    return output_audio



In [4]:
# FastAPI deployment
app = FastAPI()
@app.get("/generate")
def generate(text: str, reference_audio: str):
    return {"audio": voice_cloning_interface(text, reference_audio)}



In [5]:
# Launch UI
demo = gr.Interface(fn=voice_cloning_interface, inputs=["text", "file"], outputs="audio")
demo.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2561657cc35db0089e.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


