In [None]:
# 1. Install necessary libraries
!pip install flask flask-cors pyngrok sentence-transformers -q

import json
import re
from flask import Flask, request, jsonify
from flask_cors import CORS
from pyngrok import ngrok
from sentence_transformers import SentenceTransformer, util

# 2. Load and Clean the Data
# We use Regex to extract prompts and completions because the file has extra [source] tags
data_path = '/content/Mazhava_data.txt' # Make sure this matches your uploaded file name

prompts = []
responses = []

try:
    with open(data_path, 'r', encoding='utf-8') as f:
        content = f.read()
        # Regex to find "prompt": "..." and "completion": "..." patterns
        prompt_matches = re.findall(r'"prompt":\s*"(.*?)"', content)
        completion_matches = re.findall(r'"completion":\s*"(.*?)"', content)

        # Pair them up
        for p, c in zip(prompt_matches, completion_matches):
            prompts.append(p)
            responses.append(c)

    print(f"‚úÖ Successfully loaded {len(prompts)} Q&A pairs about Mazhavar Nadu.")

except Exception as e:
    print(f"‚ùå Error loading file: {e}")

# 3. Load the AI Model (Sentence Transformer)
# This model converts text into numbers (embeddings) to compare meaning
print("‚è≥ Loading AI Model... (this takes a minute)")
model = SentenceTransformer('all-MiniLM-L6-v2')
prompt_embeddings = model.encode(prompts, convert_to_tensor=True)
print("‚úÖ AI Model Ready!")

# 4. Setup the Flask Server
app = Flask(__name__)
CORS(app) # Allow React to talk to this server

@app.route('/chat', methods=['POST'])
def chat():
    user_input = request.json.get('message')

    if not user_input:
        return jsonify({"reply": "Please say something!"})

    # AI Logic: Find the most similar question in your database
    user_embedding = model.encode(user_input, convert_to_tensor=True)

    # Calculate similarity scores
    cosine_scores = util.cos_sim(user_embedding, prompt_embeddings)

    # Find the top match
    best_match_index = int(cosine_scores.argmax())
    best_score = cosine_scores[0][best_match_index].item()

    # Threshold: If similarity is too low, the bot doesn't know the answer
    if best_score < 0.3:
        return jsonify({"reply": "I am sorry, I don't have information on that specific topic about Mazhavar Nadu yet."})

    # Return the pre-written answer
    best_answer = responses[best_match_index]

    # Clean up the answer (remove "Bot: " prefix if it exists)
    clean_answer = best_answer.replace("Bot: ", "")

    return jsonify({
        "reply": clean_answer,
        "score": best_score # Useful for debugging
    })

@app.route('/', methods=['GET'])
def home():
    return "Mazhava AI Backend is Running!"

# 5. Expose to the Internet using Ngrok
# IMPORTANT: You need an Ngrok Auth Token (Free) from https://dashboard.ngrok.com/get-started/your-authtoken
NGROK_AUTH_TOKEN = "36WF42qRjJK2oCSTyev3idEULUg_22xthuhzm2t91dnAjRWnZ"  # <--- PASTE YOUR TOKEN HERE
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

public_url = ngrok.connect(5000).public_url
print(f"üöÄ YOUR PUBLIC URL: {public_url}")
print("Copy the URL above and paste it into your React App")

app.run(port=5000)

‚úÖ Successfully loaded 250 Q&A pairs about Mazhavar Nadu.
‚è≥ Loading AI Model... (this takes a minute)
‚úÖ AI Model Ready!
üöÄ YOUR PUBLIC URL: https://isabell-soapless-rhoda.ngrok-free.dev
Copy the URL above and paste it into your React App
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [07/Dec/2025 15:06:18] "OPTIONS /chat HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [07/Dec/2025 15:06:18] "POST /chat HTTP/1.1" 200 -
