# 🎙️ Whisper AI Transcription Backend Server

## Features:
- ✅ Supports files up to 1GB
- ✅ Auto keep-alive (stays running)
- ✅ Multiple language support
- ✅ Fast transcription with OpenAI Whisper
- ✅ Ngrok integration for public access

## Instructions:
1. Run all cells in order (Runtime → Run all)
2. Get your ngrok URL from the output
3. Copy the URL to your web app settings
4. Start transcribing!

**⚠️ Important:** Keep this Colab tab open while using the app!


## 📦 Step 1: Install Required Packages


In [None]:
%%capture
# Install OpenAI Whisper and dependencies
!pip install -q openai-whisper
!pip install -q flask flask-cors
!pip install -q pyngrok
!pip install -q ffmpeg-python

# Install ffmpeg
!apt-get -qq install -y ffmpeg

print('✅ All packages installed successfully!')


## 🔧 Step 2: Setup Ngrok (Optional)

Get a free ngrok auth token from: https://dashboard.ngrok.com/signup

**Note:** You can skip this and use free temporary URLs.


In [None]:
# Optional: Add your ngrok auth token here for stable URLs
NGROK_AUTH_TOKEN = ""  # Get from https://dashboard.ngrok.com/get-started/your-authtoken

if NGROK_AUTH_TOKEN:
    from pyngrok import ngrok
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
    print("✅ Ngrok authenticated!")
else:
    print("ℹ️ No ngrok token provided. Using free temporary URLs.")


## 🚀 Step 3: Create Flask Server with Whisper AI


In [None]:
import os
import time
import whisper
from flask import Flask, request, jsonify
from flask_cors import CORS
from werkzeug.utils import secure_filename
import threading
from datetime import datetime

# Create Flask app
app = Flask(__name__)
CORS(app)  # Enable CORS for all routes

# Configuration
UPLOAD_FOLDER = '/content/uploads'
MAX_FILE_SIZE = 1024 * 1024 * 1024  # 1GB
os.makedirs(UPLOAD_FOLDER, exist_ok=True)

# Load Whisper model
print("🔄 Loading Whisper model... This may take a minute.")
model = whisper.load_model("base")  # Options: tiny, base, small, medium, large
print("✅ Whisper model loaded successfully!")

# Keep-alive counter
request_count = 0
last_activity = time.time()

@app.route('/health', methods=['GET'])
def health_check():
    """Health check endpoint"""
    global last_activity
    last_activity = time.time()
    return jsonify({
        'status': 'healthy',
        'message': 'Whisper AI Server is running',
        'model': 'base',
        'timestamp': datetime.now().isoformat(),
        'requests_processed': request_count
    }), 200

@app.route('/transcribe', methods=['POST'])
def transcribe_audio():
    """Transcribe audio/video file"""
    global request_count, last_activity
    request_count += 1
    last_activity = time.time()
    
    start_time = time.time()
    
    try:
        # Check if file is present
        if 'file' not in request.files:
            return jsonify({'success': False, 'error': 'No file provided'}), 400
        
        file = request.files['file']
        
        if file.filename == '':
            return jsonify({'success': False, 'error': 'Empty filename'}), 400
        
        # Get parameters
        language = request.form.get('language', 'auto')
        if language == 'auto':
            language = None  # Whisper will auto-detect
        
        # Save file
        filename = secure_filename(file.filename)
        filepath = os.path.join(UPLOAD_FOLDER, filename)
        file.save(filepath)
        
        print(f"📝 Transcribing: {filename} (Language: {language or 'auto-detect'})")
        
        # Transcribe with Whisper
        result = model.transcribe(filepath, language=language, fp16=False, verbose=False)
        
        # Clean up file
        try:
            os.remove(filepath)
        except:
            pass
        
        processing_time = round(time.time() - start_time, 2)
        
        print(f"✅ Transcription complete in {processing_time}s")
        
        return jsonify({
            'success': True,
            'transcript': result['text'].strip(),
            'language': result.get('language', 'unknown'),
            'processing_time': f"{processing_time}s",
            'filename': filename
        }), 200
        
    except Exception as e:
        print(f"❌ Error: {str(e)}")
        try:
            if 'filepath' in locals():
                os.remove(filepath)
        except:
            pass
        
        return jsonify({'success': False, 'error': str(e)}), 500

@app.route('/', methods=['GET'])
def index():
    """Root endpoint"""
    return jsonify({
        'message': 'Whisper AI Transcription Server',
        'version': '2.0',
        'endpoints': {
            'health': '/health',
            'transcribe': '/transcribe (POST)'
        },
        'status': 'online'
    }), 200

print("✅ Flask server configured successfully!")


## 🌐 Step 4: Start Server with Ngrok & Keep-Alive

**⚠️ IMPORTANT: Do NOT stop this cell! Keep it running.**


In [None]:
from pyngrok import ngrok
import threading
import time

# Keep-alive function
def keep_alive():
    """Pings server every 5 minutes to keep it alive"""
    while True:
        try:
            time.sleep(300)  # Wait 5 minutes
            print(f"💓 Keep-alive ping at {datetime.now().strftime('%H:%M:%S')}")
        except Exception as e:
            print(f"Keep-alive error: {e}")

# Activity monitor
def monitor_activity():
    """Monitors server activity"""
    global last_activity
    while True:
        time.sleep(600)  # Check every 10 minutes
        idle_time = (time.time() - last_activity) / 60
        if idle_time < 10:
            status = "🟢 ACTIVE"
        elif idle_time < 30:
            status = "🟡 IDLE"
        else:
            status = "🔴 INACTIVE"
        print(f"{status} - Last activity: {idle_time:.1f} minutes ago | Requests: {request_count}")

# Start keep-alive thread
keep_alive_thread = threading.Thread(target=keep_alive, daemon=True)
keep_alive_thread.start()

# Start monitor thread
monitor_thread = threading.Thread(target=monitor_activity, daemon=True)
monitor_thread.start()

# Start ngrok tunnel
print("\n🌐 Starting ngrok tunnel...")
public_url = ngrok.connect(5000, bind_tls=True)
print("\n" + "="*70)
print("🎉 SERVER IS RUNNING!")
print("="*70)
print(f"\n📡 Your Public URL: {public_url}")
print(f"\n⚙️ COPY THIS URL TO YOUR WEB APP SETTINGS!")
print("\n" + "="*70)
print("\n📊 Server Status:")
print("   • Model: Whisper Base")
print("   • Max File Size: 1GB")
print("   • Keep-Alive: ✅ Enabled")
print("   • CORS: ✅ Enabled")
print("\n💡 Tips:")
print("   • Keep this cell running (don't stop execution)")
print("   • Server will auto-ping every 5 minutes")
print("   • Keep Colab tab open in browser")
print("\n⚠️ Note: Colab sessions timeout after ~12 hours of inactivity")
print("="*70 + "\n")

# Run Flask app
app.run(host='0.0.0.0', port=5000, debug=False, use_reloader=False)
