# Qwen3-TTS Voice Cloning API (Colab)

Run on a free T4 GPU to serve Qwen3-TTS as an API for the voice app.

**First time**: Run cells 1-5 to create and save your voice clone prompt to Google Drive.

**Each session**: Run cells 1-4, then cell 6 to start the API server.

**Sharing**: Send this notebook link â€” each person uses their own reference audio + Google Drive.

In [None]:
# Cell 1: Install dependencies
!pip install -q qwen-tts pyngrok fastapi uvicorn soundfile

In [None]:
# Cell 2: Check GPU
import torch
print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'NONE'}")
if torch.cuda.is_available():
    props = torch.cuda.get_device_properties(0)
    vram = getattr(props, 'total_memory', None) or getattr(props, 'total_mem', 0)
    print(f"VRAM: {vram / 1e9:.1f} GB")
assert torch.cuda.is_available(), "GPU required! Go to Runtime > Change runtime type > T4 GPU"

In [None]:
# Cell 3: (Optional) Install flash-attn â€” only works on Ampere+ GPUs (A100, L4, etc.)
# T4 GPUs do NOT support flash-attn. Skip this cell on T4.
# !pip install -q flash-attn --no-build-isolation
print("Skipping flash-attn (not supported on T4). Using default attention.")

In [None]:
# Cell 4: Load model
import torch
from qwen_tts import Qwen3TTSModel

model = Qwen3TTSModel.from_pretrained(
    "Qwen/Qwen3-TTS-12Hz-1.7B-Base",
    device_map="cuda:0",
    dtype=torch.bfloat16,
)
print("Model loaded!")

In [None]:
# Cell 5: Extract & save voice clone prompt (RUN ONCE)
# Upload your reference audio when prompted, then it saves to Google Drive.
# You also need to provide the transcript of what's said in the reference audio.

from google.colab import drive, files
import os
import pickle

drive.mount('/content/drive')
os.makedirs('/content/drive/MyDrive/qwen3-tts', exist_ok=True)

print("Upload your reference audio file (WAV/MP3, 10-30 seconds of clear speech):")
uploaded = files.upload()
ref_path = list(uploaded.keys())[0]
print(f"Using: {ref_path}")

ref_text = input("Enter the transcript of the reference audio: ")

# Create voice clone prompt (includes x-vector + acoustic info)
# Set x_vector_only_mode=True if you don't want to provide ref_text (lower quality)
voice_clone_prompt = model.create_voice_clone_prompt(
    ref_audio=ref_path,
    ref_text=ref_text,
    x_vector_only_mode=False,
)

save_path = '/content/drive/MyDrive/qwen3-tts/voice_clone_prompt.pkl'
with open(save_path, 'wb') as f:
    pickle.dump(voice_clone_prompt, f)
print(f"Voice clone prompt saved to {save_path}")
print("You won't need to run this cell again unless you want a different voice.")

In [None]:
# Cell 6: Start TTS API server
# Run this each session after cells 1-4.
# SETUP: Add your ngrok token as a Colab Secret named NGROK_TOKEN
#   Click the ðŸ”‘ (Secrets) icon in the left sidebar â†’ Add new secret â†’ Name: NGROK_TOKEN

from google.colab import drive, userdata
import torch
import os
import io
import pickle
import soundfile as sf
from fastapi import FastAPI, HTTPException
from fastapi.responses import Response
from pydantic import BaseModel
from pyngrok import ngrok
import uvicorn
import threading

# Mount drive and load voice clone prompt
drive.mount('/content/drive', force_remount=False)
prompt_path = '/content/drive/MyDrive/qwen3-tts/voice_clone_prompt.pkl'
assert os.path.exists(prompt_path), f"Voice clone prompt not found at {prompt_path}. Run Cell 5 first!"
with open(prompt_path, 'rb') as f:
    voice_clone_prompt = pickle.load(f)
print("Loaded voice clone prompt from Drive")

app = FastAPI()

class TTSRequest(BaseModel):
    text: str
    language: str = "English"

@app.post("/tts")
async def tts(req: TTSRequest):
    if not req.text.strip():
        raise HTTPException(400, "Empty text")
    try:
        with torch.no_grad():
            wavs, sr = model.generate_voice_clone(
                text=req.text,
                language=req.language,
                voice_clone_prompt=voice_clone_prompt,
            )
        # Convert to WAV bytes
        buf = io.BytesIO()
        sf.write(buf, wavs[0], sr, format='WAV', subtype='PCM_16')
        return Response(content=buf.getvalue(), media_type="audio/wav")
    except Exception as e:
        raise HTTPException(500, str(e))

@app.get("/health")
async def health():
    return {"status": "ok"}

# Set ngrok auth token from Colab Secrets
ngrok.set_auth_token(userdata.get('NGROK_TOKEN'))

public_url = ngrok.connect(8000)
print(f"\n{'='*60}")
print(f"  TTS API is live!")
print(f"  Public URL: {public_url}")
print(f"  Paste this URL into the voice app's Qwen3 URL field.")
print(f"{'='*60}\n")

# Run server in background thread
threading.Thread(
    target=uvicorn.run,
    args=(app,),
    kwargs={"host": "0.0.0.0", "port": 8000, "log_level": "info"},
    daemon=True
).start()

print("Server running. Keep this cell running!")
# Keep cell alive
import time
while True:
    time.sleep(60)