In [None]:
#cloud-id="be3462376d494497ac6a0785c37e27b5"
#colab-key="aC1ZcGZKZ0JMbE9ZUkY5a0RkaTc6Q1gtSTVVMTQ4N0xDTGRlRUk4TXFBQQ=="

In [1]:
print("⏳ Step 1: Performing a clean installation of all dependencies...")

# First, uninstall any existing conflicting versions to ensure a clean state
!pip uninstall -y sentence-transformers transformers huggingface-hub

# Now, install the latest stable versions which are compatible with each other
!pip install -q "sentence-transformers" "transformers" "huggingface-hub"
!pip install -q flask pyngrok elasticsearch==7.17.9

print("✅ Dependencies installed cleanly.")

⏳ Step 1: Performing a clean installation of all dependencies...
Found existing installation: sentence-transformers 2.7.0
Uninstalling sentence-transformers-2.7.0:
  Successfully uninstalled sentence-transformers-2.7.0
Found existing installation: transformers 4.31.0
Uninstalling transformers-4.31.0:
  Successfully uninstalled transformers-4.31.0
Found existing installation: huggingface-hub 0.23.0
Uninstalling huggingface-hub-0.23.0:
  Successfully uninstalled huggingface-hub-0.23.0
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m470.2/470.2 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m101.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m558.8/558.8 kB[0m [31m35.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m79.5 MB/s[0m eta [36m0:00:00[0m
[?25h✅ Dependencies installed cl

In [2]:
import os
from pyngrok import ngrok
from elasticsearch import Elasticsearch
from transformers import AutoTokenizer, AutoModel
import torch

print("⏳ Step 2: Connecting to services...")

# ❗️ PASTE YOUR CREDENTIALS HERE
ENDPOINT_URL = "https://my-elasticsearch-project-be3462.es.asia-south1.gcp.elastic.cloud:443"
API_KEY = "aC1ZcGZKZ0JMbE9ZUkY5a0RkaTc6Q1gtSTVVMTQ4N0xDTGRlRUk4TXFBQQ=="
NGROK_AUTHTOKEN = "30M4sFq07MnnJN7ao11As3FeOoo_64mq5pD6GSnPzk62CY2N"


# --- Configure ngrok ---
os.system(f"ngrok config add-authtoken {NGROK_AUTHTOKEN}")


# --- Connect to your Elastic Cloud deployment ---
try:
    # Use the Endpoint URL and API Key to connect
    es_client = Elasticsearch(
        hosts=[ENDPOINT_URL],
        api_key=API_KEY
    )
    # Test the connection by getting cluster info
    es_client.info()
    print("✅ Successfully connected to Elastic Cloud.")

except Exception as e:
    print(f"❌ Error connecting to Elastic Cloud: {e}")
    print("Please double-check your Endpoint URL and API Key.")

⏳ Step 2: Connecting to services...
✅ Successfully connected to Elastic Cloud.


In [3]:
from transformers import AutoTokenizer, AutoModel
import torch

print("\n⏳ Step 3: Loading NLP model and indexing data...")

# Load tokenizer and model from Hugging Face Hub
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-mpnet-base-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-mpnet-base-v2')

# Define the mean pooling function to create a single sentence embedding
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0]
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
    sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
    return sum_embeddings / sum_mask

# Define the main function to convert text to a vector
def encode(text):
    encoded_input = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
    with torch.no_grad():
        model_output = model(**encoded_input)
    sentence_embedding = mean_pooling(model_output, encoded_input['attention_mask'])
    return sentence_embedding.tolist()[0]

print("✅ NLP model loaded.")
print("   - Indexing data into Elasticsearch...")

# Sample Q&A data
documents = [
    {"question": "What is the capital of France?", "answer": "The capital of France is Paris."},
    {"question": "How does a car engine work?", "answer": "An engine converts fuel into mechanical energy through combustion."},
    {"question": "What is solar power?", "answer": "Solar power is energy from the sun that is converted into thermal or electrical energy."},
    {"question": "Who wrote 'Hamlet'?", "answer": "'Hamlet' was written by William Shakespeare."},
    {"question": "How can I fix my vehicle?", "answer": "Automobile repair depends on the specific issue, from changing a tire to servicing the engine."}
]

index_name = "qa_index"
index_mapping = {
    "properties": {
        "question_vector": {"type": "dense_vector", "dims": 768},
        "question": {"type": "text"},
        "answer": {"type": "text"}
    }
}

# Create the index
if es_client.indices.exists(index=index_name):
    es_client.indices.delete(index=index_name)
es_client.indices.create(index=index_name, mappings=index_mapping)

# Index each document
for doc in documents:
    question_vector = encode(doc["question"])
    es_client.index(index=index_name, body={
        "question": doc["question"],
        "answer": doc["answer"],
        "question_vector": question_vector
    })

print("✅ Data indexed successfully.")


⏳ Step 3: Loading NLP model and indexing data...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


✅ NLP model loaded.
   - Indexing data into Elasticsearch...


  es_client.index(index=index_name, body={


✅ Data indexed successfully.


In [5]:
from flask import Flask, request, render_template_string
from pyngrok import ngrok

print("\n⏳ Step 4: Creating and starting the Flask web application...")
app = Flask(__name__)

# --- Search Function ---
def search_semantic(query):
    query_vector = encode(query)
    search_query = {
        "script_score": {
            "query": {"match_all": {}},
            "script": { "source": "cosineSimilarity(params.query_vector, 'question_vector') + 1.0", "params": {"query_vector": query_vector} }
        }
    }
    response = es_client.search(index=index_name, query=search_query)
    results = [{"score": hit["_score"], "question": hit["_source"]["question"], "answer": hit["_source"]["answer"]} for hit in response["hits"]["hits"]]
    return results

# --- HTML Template ---
HTML_TEMPLATE = """
<!DOCTYPE html><html lang="en"><head><title>Semantic Q&A Search</title>
<style>
    body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; margin: 40px; background-color: #f4f4f9; color: #333; }
    .container { max-width: 800px; margin: 0 auto; background-color: #fff; padding: 30px; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }
    h1 { color: #2c3e50; text-align: center; } form { display: flex; margin-bottom: 30px; }
    input[type="text"] { flex-grow: 1; padding: 12px; border: 2px solid #ddd; border-radius: 6px 0 0 6px; font-size: 16px; }
    input[type="submit"] { padding: 12px 25px; border: none; background-color: #3498db; color: white; border-radius: 0 6px 6px 0; font-size: 16px; font-weight: bold; cursor: pointer; }
    .result { border-bottom: 1px solid #eee; padding: 15px 0; } .result:last-child { border-bottom: none; }
    .result h3 { margin: 0 0 5px 0; color: #2980b9; } .result p { margin: 0; }
    .result .score { font-size: 12px; color: #7f8c8d; font-weight: bold; }
</style></head>
<body><div class="container">
    <h1>Semantic Q&A Search (Cloud Ver.)</h1>
    <form method="post"><input type="text" name="query" placeholder="Ask a question..." value="{{ query }}"><input type="submit" value="Search"></form>
    {% if results %}{% for result in results %}<div class="result">
        <h3>{{ result.question }}</h3><p>{{ result.answer }}</p>
        <p class="score">Similarity Score: {{ "%.4f"|format(result.score - 1) }}</p>
    </div>{% endfor %}{% elif query %}<p class="no-results">No results found.</p>{% endif %}
</div></body></html>
"""

# --- Main Route ---
@app.route('/', methods=['GET', 'POST'])
def home():
    results = None; query = ''
    if request.method == 'POST':
        query = request.form.get('query', '')
        if query: results = search_semantic(query)
    return render_template_string(HTML_TEMPLATE, results=results, query=query)

print("✅ Flask application created.")

# --- Start ngrok tunnel and run app ---
public_url = ngrok.connect(5000)
print(f"\n\n🚀🚀🚀 Your App is LIVE! Open this URL in your browser: {public_url} 🚀🚀🚀")
app.run(port=5000)


⏳ Step 4: Creating and starting the Flask web application...
✅ Flask application created.


🚀🚀🚀 Your App is LIVE! Open this URL in your browser: NgrokTunnel: "https://e0418bf4d594.ngrok-free.app" -> "http://localhost:5000" 🚀🚀🚀
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [05/Aug/2025 22:17:50] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [05/Aug/2025 22:17:51] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [05/Aug/2025 22:17:57] "POST / HTTP/1.1" 200 -
