# üéôÔ∏è Kokoro TTS API Server v5

This notebook runs Kokoro TTS as an API for your NeuralTTS app.

**How to use:**
1. Go to Runtime ‚Üí Change runtime type ‚Üí Select **T4 GPU**
2. Run all cells (Ctrl+F9)
3. Copy the ngrok URL printed at the end
4. Paste it in your Cloud TTS page

**v5 Features:**
- Uses Kokoro's native text processing (same as local-tts)
- Real-time progress from internal chunk generation
- Model cached in Google Drive

## 1Ô∏è‚É£ Install Dependencies

In [None]:
!pip install -q kokoro>=0.9.4 flask flask-cors pyngrok soundfile numpy torch

## 2Ô∏è‚É£ Mount Google Drive & Setup Cache

In [None]:
from google.colab import drive
import os
import shutil

# Mount Google Drive
drive.mount('/content/drive')

# Cache directory in Google Drive
CACHE_DIR = '/content/drive/MyDrive/kokoro_cache'
os.makedirs(CACHE_DIR, exist_ok=True)

# Set HuggingFace cache to our Drive folder
os.environ['HF_HOME'] = CACHE_DIR
os.environ['TRANSFORMERS_CACHE'] = CACHE_DIR
os.environ['HF_HUB_CACHE'] = os.path.join(CACHE_DIR, 'hub')

print(f"‚úÖ Cache directory: {CACHE_DIR}")
print(f"   Existing files: {os.listdir(CACHE_DIR) if os.path.exists(CACHE_DIR) else 'Empty'}")

## 3Ô∏è‚É£ Load Kokoro Model (Uses cache if available)

In [None]:
from kokoro import KPipeline
import torch

print("üîÑ Loading Kokoro model...")
print(f"   GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")

# Initialize pipeline - will use cache from Google Drive
pipeline = KPipeline(lang_code='a')  # 'a' = American English, 'b' = British

print("‚úÖ Model loaded successfully!")

## 4Ô∏è‚É£ Setup ngrok (Get your free token from ngrok.com)

In [None]:
# @title Enter your ngrok authtoken (get free at https://ngrok.com)
NGROK_TOKEN = ""  # @param {type:"string"}

from pyngrok import ngrok

if NGROK_TOKEN:
    ngrok.set_auth_token(NGROK_TOKEN)
    print("‚úÖ ngrok token set!")
else:
    print("‚ö†Ô∏è No ngrok token - using free tier (limited)")
    print("   Get a free token at: https://dashboard.ngrok.com/get-started/your-authtoken")

## 5Ô∏è‚É£ Start API Server (v5 - Native Kokoro Processing)

In [None]:
from flask import Flask, request, send_file, jsonify, Response
from flask_cors import CORS
from pyngrok import ngrok
import soundfile as sf
import numpy as np
import io
import torch
import json
import base64

app = Flask(__name__)
CORS(app)

# All Kokoro voices
VOICES = {
    "af_bella": {"name": "Bella", "gender": "Female", "lang": "en-us"},
    "af_nicole": {"name": "Nicole", "gender": "Female", "lang": "en-us"},
    "af_sarah": {"name": "Sarah", "gender": "Female", "lang": "en-us"},
    "af_sky": {"name": "Sky", "gender": "Female", "lang": "en-us"},
    "af_heart": {"name": "Heart", "gender": "Female", "lang": "en-us"},
    "af_alloy": {"name": "Alloy", "gender": "Female", "lang": "en-us"},
    "af_aoede": {"name": "Aoede", "gender": "Female", "lang": "en-us"},
    "af_jessica": {"name": "Jessica", "gender": "Female", "lang": "en-us"},
    "af_kore": {"name": "Kore", "gender": "Female", "lang": "en-us"},
    "am_adam": {"name": "Adam", "gender": "Male", "lang": "en-us"},
    "am_michael": {"name": "Michael", "gender": "Male", "lang": "en-us"},
    "am_echo": {"name": "Echo", "gender": "Male", "lang": "en-us"},
    "am_eric": {"name": "Eric", "gender": "Male", "lang": "en-us"},
    "am_fenrir": {"name": "Fenrir", "gender": "Male", "lang": "en-us"},
    "am_liam": {"name": "Liam", "gender": "Male", "lang": "en-us"},
    "am_onyx": {"name": "Onyx", "gender": "Male", "lang": "en-us"},
    "am_puck": {"name": "Puck", "gender": "Male", "lang": "en-us"},
    "bf_emma": {"name": "Emma", "gender": "Female", "lang": "en-gb"},
    "bf_isabella": {"name": "Isabella", "gender": "Female", "lang": "en-gb"},
    "bm_george": {"name": "George", "gender": "Male", "lang": "en-gb"},
    "bm_lewis": {"name": "Lewis", "gender": "Male", "lang": "en-gb"},
}

@app.route('/health', methods=['GET'])
def health():
    return jsonify({
        "status": "ok",
        "model": "kokoro",
        "version": "v5",
        "gpu": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU"
    })

@app.route('/api/voices', methods=['GET'])
def get_voices():
    return jsonify(VOICES)

@app.route('/api/tts', methods=['POST'])
def generate_tts():
    """Generate TTS using Kokoro's native chunking with progress streaming."""
    data = request.json
    text = data.get('text', 'Hello world')
    voice = data.get('voice', 'af_bella')
    speed = float(data.get('speed', 1.0))
    stream_progress = data.get('stream', False)
    
    speed = max(0.5, min(2.0, speed))
    
    if not stream_progress:
        # Non-streaming mode - simple blob response
        try:
            print(f"üéôÔ∏è Generating: voice={voice}, speed={speed}, chars={len(text)}")
            
            # Use Kokoro's native generator - NO manual text splitting!
            generator = pipeline(text, voice=voice, speed=speed)
            audio_chunks = []
            for i, (gs, ps, audio) in enumerate(generator):
                audio_chunks.append(audio)
                print(f"   Chunk {i+1} generated")
            
            final_audio = torch.cat(audio_chunks).numpy()
            
            buffer = io.BytesIO()
            sf.write(buffer, final_audio, 24000, format='WAV')
            buffer.seek(0)
            
            print(f"‚úÖ Generated {len(final_audio)/24000:.2f}s of audio")
            return send_file(buffer, mimetype='audio/wav', as_attachment=True, download_name='tts_output.wav')
            
        except Exception as e:
            print(f"‚ùå Error: {e}")
            return jsonify({"error": str(e)}), 500
    
    # Streaming mode with progress from Kokoro's native chunks
    def generate_with_progress():
        try:
            print(f"üéôÔ∏è [Stream] Generating: voice={voice}, speed={speed}, chars={len(text)}")
            
            # Convert generator to list to know total chunks
            # Note: We need to consume once to count, then regenerate
            # First, estimate chunks based on text length (~100 chars per chunk)
            estimated_chunks = max(1, len(text) // 100)
            
            yield f"data: {json.dumps({'type': 'progress', 'current': 0, 'total': estimated_chunks, 'percent': 0, 'status': 'Starting...'})}\n\n"
            
            # Generate using Kokoro's native pipeline (handles text normalization properly)
            generator = pipeline(text, voice=voice, speed=speed)
            audio_chunks = []
            chunk_count = 0
            
            for i, (gs, ps, audio) in enumerate(generator):
                audio_chunks.append(audio)
                chunk_count = i + 1
                
                # Update estimate based on actual progress
                percent = min(95, int((chunk_count / max(estimated_chunks, chunk_count)) * 100))
                
                progress_data = {
                    'type': 'progress',
                    'current': chunk_count,
                    'total': max(estimated_chunks, chunk_count),
                    'percent': percent,
                    'status': f'Processing chunk {chunk_count}...'
                }
                yield f"data: {json.dumps(progress_data)}\n\n"
                print(f"   Chunk {chunk_count} ({percent}%)")
            
            if audio_chunks:
                final_audio = torch.cat(audio_chunks).numpy()
                
                buffer = io.BytesIO()
                sf.write(buffer, final_audio, 24000, format='WAV')
                buffer.seek(0)
                audio_b64 = base64.b64encode(buffer.read()).decode('utf-8')
                
                duration = len(final_audio) / 24000
                print(f"‚úÖ Generated {duration:.2f}s of audio ({chunk_count} native chunks)")
                
                yield f"data: {json.dumps({'type': 'complete', 'audio': audio_b64, 'duration': duration, 'chunks': chunk_count})}\n\n"
            else:
                yield f"data: {json.dumps({'type': 'error', 'message': 'No audio generated'})}\n\n"
                
        except Exception as e:
            print(f"‚ùå Error: {e}")
            yield f"data: {json.dumps({'type': 'error', 'message': str(e)})}\n\n"
    
    return Response(
        generate_with_progress(),
        mimetype='text/event-stream',
        headers={
            'Cache-Control': 'no-cache',
            'Connection': 'keep-alive',
            'X-Accel-Buffering': 'no'
        }
    )

# Start ngrok tunnel
public_url = ngrok.connect(5000)
print("\n" + "="*60)
print("üöÄ API SERVER IS RUNNING! (v5 - Native Kokoro)")
print("="*60)
print(f"\nüìã Copy this URL to your Cloud TTS page:\n")
print(f"   {public_url}")
print(f"\n" + "="*60)
print("\n‚ÑπÔ∏è  v5 uses Kokoro's native text processing")
print("   Same audio quality as local-tts!")
print("\n‚è∞ Keep this notebook running while using Cloud TTS")
print("   Free tier: ~1-2 hours of GPU time per day\n")

app.run(port=5000)