# VieNeu-TTS Google Colab Backend

This notebook sets up a remote TTS backend for VieNeu-TTS using Google Colab's free GPU.

**‚ö†Ô∏è Keep this notebook running** - Closing it will disconnect your TTS server.

**Runtime**: GPU (T4) recommended | CPU also supported

---

## Optional: Mount Google Drive (for model caching)

Uncomment and run this cell to cache models on Google Drive (saves re-download time on restarts).

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')
# import os
# os.environ['HF_HOME'] = '/content/drive/MyDrive/huggingface_cache'

## 1. Install System Dependencies

In [None]:
!apt-get update -qq
!apt-get install -y espeak-ng

## 2. Install Python Dependencies

In [None]:
!pip install -q fastapi uvicorn pyngrok requests pydantic
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q transformers accelerate phonemizer librosa soundfile pyyaml

## 3. Clone VieNeu-TTS Repository

In [None]:
!git clone https://github.com/pnnbao-ump/Vina-TTS.git
%cd Vina-TTS

## 4. Configure and Start FastAPI Server

This cell:
- Creates a FastAPI server for TTS synthesis
- Loads the model ({{ backbone_repo }}, {{ codec_repo }})
- Starts ngrok tunnel for remote access
- Displays connection URL and auth token

In [None]:
import os
import secrets
import base64
from fastapi import FastAPI, HTTPException, Depends, status
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from pydantic import BaseModel
from pyngrok import ngrok
import uvicorn
import nest_asyncio
import torch

nest_asyncio.apply()

# Authentication token (copy this to your admin UI)
AUTH_TOKEN = "{{ auth_token }}"

# Model configuration
BACKBONE_REPO = "{{ backbone_repo }}"
CODEC_REPO = "{{ codec_repo }}"
DEVICE = "{{ device }}"

# Initialize FastAPI
app = FastAPI(title="VieNeu-TTS Colab Backend")
security = HTTPBearer()

# Global TTS model
tts_model = None

def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)):
    if credentials.credentials != AUTH_TOKEN:
        raise HTTPException(
            status_code=status.HTTP_401_UNAUTHORIZED,
            detail="Invalid authentication token"
        )
    return credentials.credentials

class TTSRequest(BaseModel):
    text: str
    voice_sample_path: str
    voice_transcript: str
    speed: float = 1.0
    watermark: bool = True

@app.on_event("startup")
async def load_model():
    global tts_model
    print("\n" + "="*60)
    print("üöÄ Loading VieNeu-TTS Model...")
    print(f"   Backbone: {BACKBONE_REPO}")
    print(f"   Codec: {CODEC_REPO}")
    print(f"   Device: {DEVICE}")
    print("="*60 + "\n")
    
    from vieneu_tts import VieNeuTTS
    
    tts_model = VieNeuTTS(
        backbone_repo=BACKBONE_REPO,
        backbone_device=DEVICE,
        codec_repo=CODEC_REPO,
        codec_device="cuda" if torch.cuda.is_available() else "cpu"
    )
    print("‚úÖ Model loaded successfully!\n")

@app.post("/tts/synthesize")
async def synthesize(request: TTSRequest, token: str = Depends(verify_token)):
    if tts_model is None:
        raise HTTPException(
            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
            detail="Model not loaded"
        )
    
    try:
        audio_array, sample_rate = tts_model.tts(
            text=request.text,
            voice_sample_path=request.voice_sample_path,
            voice_transcript=request.voice_transcript,
            speed=request.speed,
            watermark=request.watermark
        )
        
        import soundfile as sf
        import io
        buffer = io.BytesIO()
        sf.write(buffer, audio_array, sample_rate, format='WAV')
        audio_bytes = buffer.getvalue()
        audio_base64 = base64.b64encode(audio_bytes).decode('utf-8')
        
        return {
            "audio_base64": audio_base64,
            "sample_rate": sample_rate,
            "duration_ms": int(len(audio_array) / sample_rate * 1000)
        }
    except Exception as e:
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"TTS synthesis failed: {str(e)}"
        )

@app.get("/health")
async def health_check(token: str = Depends(verify_token)):
    gpu_memory_used = 0.0
    if torch.cuda.is_available():
        gpu_memory_used = torch.cuda.memory_allocated() / 1024**3
    
    return {
        "status": "ok",
        "model_loaded": tts_model is not None,
        "gpu_memory_used_gb": gpu_memory_used,
        "gpu_available": torch.cuda.is_available()
    }

# Start ngrok tunnel
ngrok.set_auth_token("YOUR_NGROK_TOKEN")  # Optional: add your ngrok token for custom domains
public_url = ngrok.connect(8000)

print("\n" + "="*60)
print("üéâ VieNeu-TTS Colab Backend is READY!")
print("="*60)
print(f"\nüìç Endpoint URL: {public_url}")
print(f"üîë Auth Token: {AUTH_TOKEN}")
print("\nüìã Copy the above URL and Token to your Admin UI")
print("\n‚ö†Ô∏è  Keep this cell running - Don't stop execution!")
print("="*60 + "\n")

# Start server
uvicorn.run(app, host="0.0.0.0", port=8000)