In [None]:
# ✅ Colab Cell 1: Install Dependencies and Configure Ngrok

# Install required Python packages
!pip install flask flask-cors unsloth gtts --quiet

# Fix a debugpy_repr issue that can occur in Colab with some libraries
import google.colab._debugpy_repr as dbg
dbg.get_shape = lambda obj: None

# Download and unzip Ngrok executable
!wget -q -O ngrok.zip https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.zip
!unzip -o ngrok.zip > /dev/null
!chmod +x ngrok

# --- IMPORTANT: Replace YOUR_NGROK_AUTHTOKEN_HERE with your actual Ngrok authtoken ---
# Get your authtoken from https://dashboard.ngrok.com/get-started/your-authtoken
NGROK_AUTH_TOKEN = 'YOUR_TOKEN' # <--- REPLACE THIS LINE
!./ngrok config add-authtoken {NGROK_AUTH_TOKEN}

print("Dependencies installed and Ngrok configured.")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from flask import Flask, request, jsonify, send_file
from flask_cors import CORS
import threading, time, requests, torch
import re
from unsloth import FastLanguageModel
from peft import PeftModel
from gtts import gTTS
from io import BytesIO

print("Google Drive mounted and libraries imported.")

In [None]:
# ✅ Colab Cell 3: Load Your Fine-Tuned AI Model

# IMPORTANT: Adjust this path to where your fine-tuned model is saved in Google Drive
model_path = ""

print(f"Loading model from: {model_path}")
try:
    base_model, tokenizer = FastLanguageModel.from_pretrained(
        model_name = "unsloth/Llama-3.2-3B-Instruct-unsloth-bnb-4bit",
        max_seq_length = 2048,
        dtype = torch.float16, # Use torch.bfloat16 if your GPU supports it (e.g., A100, H100)
        load_in_4bit = True,
    )
    model = PeftModel.from_pretrained(base_model, model_path)
    model.eval()
    print("Model loaded successfully!")
except Exception as e:
    print(f"Error loading model: {e}")
    print("Please ensure your model path is correct and Google Drive is mounted.")
    # Exit or handle error appropriately if model fails to load
    exit() # This will stop the Colab execution if the model doesn't load

In [None]:
# ✅ Colab Cell 4: Initialize Flask App and Define Chat Endpoint

app = Flask(__name__)
CORS(app)

# Initial system prompt for the main therapy bot
chat_history = [{
    "role": "system",
    "content": (
        """You are 'Aura', a supportive and empathetic AI companion. Your purpose is to provide a safe, non-judgmental space for users to discuss their feelings and to offer practical, evidence-based coping mechanisms and techniques (like mindfulness, breathing exercises, or grounding).

Your core directives are:
1.  **Provide Support & Coping Mechanisms:** Focus on validating feelings and suggesting helpful strategies.
2.  **Do Not Diagnose:** You are not a medical professional. Never diagnose conditions. If asked, gently state this limitation and suggest consulting a qualified professional.
3.  **Stay On Topic:** Your expertise is limited to mental and emotional support. If a user asks about unrelated topics (e.g., coding, homework, general trivia), politely decline and state your purpose is to be a supportive listener.
4.  **CRITICAL SAFETY PROTOCOL:** If a user expresses clear intent, a plan, or is actively attempting self-harm or harm to others, you must immediately stop the conversation and respond with ONLY the following text: "It sounds like you are in crisis, and your safety is the most important thing. It's vital to speak with someone who can provide immediate help. Please contact a suicide prevention hotline. In India, you can reach Vandrevala Foundation at 9999666555 or Aasra at +91-9820466726. Please reach out to them now."
"""
    )
}]

@app.route('/chat', methods=['POST'])
def chat():
    user_input = request.json.get('message', '').strip()
    if not user_input:
        return jsonify({'reply': '⚠️ Please enter a message'}), 400

    response_type = request.json.get('response_type', 'text').lower()

    chat_history.append({"role": "user", "content": user_input})

    prompt = tokenizer.apply_chat_template(
        chat_history,
        tokenize=False,
        add_generation_prompt=True
    )

    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    input_len = inputs.input_ids.shape[1]

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=150,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id
        )

    generated_tokens = output[0][input_len:]
    reply = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()

    chat_history.append({"role": "assistant", "content": reply})

    if response_type == 'audio':
        try:
            tts = gTTS(text=reply, lang='en', slow=False)
            audio_stream = BytesIO()
            tts.write_to_fp(audio_stream)
            audio_stream.seek(0)
            return send_file(audio_stream, mimetype="audio/mpeg")
        except Exception as e:
            print(f"Error generating TTS: {e}")
            return jsonify({"error": f"Could not generate audio: {e}", "text_response": reply}), 500
    else:
        return jsonify({'reply': reply})

print("Flask app defined.")

In [None]:
# ✅ Colab Cell 5: Launch Flask in Background and Start Ngrok Tunnel

def run_flask():
    print("Starting Flask app on port 5000...")
    app.run(host='0.0.0.0', port=5000, debug=False, use_reloader=False)

# Start Flask in a separate thread so Ngrok can run in the main thread
flask_thread = threading.Thread(target=run_flask)
flask_thread.daemon = True # Allows the main program to exit even if the thread is running
flask_thread.start()
print("Flask app thread started.")

# Give Flask a moment to start up
time.sleep(5)
print("Starting Ngrok tunnel...")
# This runs ngrok in the background. get_ipython().system_raw is for Colab.
get_ipython().system_raw('./ngrok http 5000 &')

# Function to retrieve public Ngrok URL
def get_ngrok_url(retries=30, delay=2): # Increased retries to be more robust
    for i in range(retries):
        try:
            r = requests.get('http://localhost:4040/api/tunnels')
            tunnels = r.json()['tunnels']
            if tunnels:
                # Find the HTTPS public URL
                public_url = next((t['public_url'] for t in tunnels if t['proto'] == 'https'), None)
                if public_url:
                    return public_url
            print(f"Ngrok URL not yet available or not HTTPS, retrying... ({i+1}/{retries})")
            time.sleep(delay)
        except requests.exceptions.ConnectionError:
            print(f"Could not connect to Ngrok API, retrying... ({i+1}/{retries})")
            time.sleep(delay)
        except Exception as e:
            print(f"An error occurred while getting Ngrok URL: {e}, retrying... ({i+1}/{retries})")
            time.sleep(delay)
    return None

public_url = get_ngrok_url()
if public_url:
    print(f"\n🧠 AI Therapist is live! Access it at: {public_url}/chat")
    print("\nTo chat with your bot and get a TEXT response, use `curl` or a simple Python script:")
    print(f"Example `curl` command (TEXT):\ncurl -X POST -H \"Content-Type: application/json\" -d '{{\"message\": \"I\\'m feeling really anxious today.\", \"response_type\": \"text\"}}' {public_url}/chat")
    print(f"\nExample Python request (TEXT):\nimport requests\nurl = \"{public_url}/chat\"\nheaders = {{'Content-Type': 'application/json'}}\ndata = {{'message': 'I\\'m having a tough time with stress.', 'response_type': 'text'}}\nresponse = requests.post(url, headers=headers, json=data)\nprint(response.json())")
    print("\nTo chat with your bot and get an AUDIO response, use `curl` or a simple Python script (saves to file):")
    print(f"Example `curl` command (AUDIO):\ncurl -X POST -H \"Content-Type: application/json\" -d '{{\"message\": \"I\\'m feeling really anxious today.\", \"response_type\": \"audio\"}}' {public_url}/chat --output bot_response.mp3")
    print(f"\nExample Python request (AUDIO, saves to file):\nimport requests\nurl = \"{public_url}/chat\"\nheaders = {{'Content-Type': 'application/json'}}\ndata = {{'message': 'I\\'m having a tough time with stress.', 'response_type': 'audio'}}\nresponse = requests.post(url, headers=headers, json=data)\nif response.headers.get('Content-Type') == 'audio/mpeg':\n    with open('bot_audio_response.mp3', 'wb') as f:\n        f.write(response.content)\n    print('Audio response saved to bot_audio_response.mp3')\nelse:\n    print('Non-audio response received:', response.text)")

else:
    print("❌ Ngrok failed to start or retrieve public URL.")
    print("Please check your Ngrok authtoken and ensure the Flask app started correctly.")

print("\nServer setup complete. Keep this cell running to maintain the server and Ngrok tunnel.")

In [None]:
# ✅ Colab Cell 6: Keep Session Alive and Clean Up

# This loop keeps the Colab notebook session alive and the server running.
# Interrupt this cell (e.g., click the stop button in Colab) to stop the server and Ngrok.
try:
    while True:
        time.sleep(1)
except KeyboardInterrupt:
    print("\nServer stopped. Cleaning up ngrok tunnel.")
    # Attempt to kill ngrok process gracefully
    !pkill -f ngrok
    !sudo lsof -i :5000
    print("Ngrok tunnel terminated.")

In [None]:
!fuser -k 5000/tcp
