In [8]:
# Install Python packages for document manipulation (python-docx), machine learning (faiss-cpu, sentence-transformers),
# and web app creation (fastapi, uvicorn), along with a tool for local server tunneling (pyngrok)
!pip install python-docx faiss-cpu sentence-transformers fastapi uvicorn pyngrok

# Update the list of software packages available on the system to ensure you get the latest versions
!apt-get update

# Install development tools needed for compiling Python extensions and handling compressed files
!apt-get install -y python3-dev zlib1g-dev


Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:4 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists... Done
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading package lists... Done
Building dependency tree... Done
Reading

In [9]:
import os
import re
import faiss
import torch
import docx
import numpy as np
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer
from typing import Dict, List
from starlette.responses import JSONResponse
import uvicorn
import nest_asyncio
from google.colab import drive, userdata,files
import json
from pyngrok import ngrok

In [10]:
drive.mount('/content/drive')
nest_asyncio.apply()
# Define storage folder
storage_folder = "/content/drive/MyDrive/lifesciences/extracted_data"
os.makedirs(storage_folder, exist_ok=True)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
# Extracts structured data from .docx files, organizing sections and table content into JSON format.

def extract_data_from_docx(docx_file):
    data = []
    doc = docx.Document(docx_file)
    current_section = None

    # Process paragraphs and tables together
    elements = doc.paragraphs + doc.tables
    elements.sort(key=lambda el: el._element.getparent().index(el._element))

    for el in elements:
        if isinstance(el, docx.text.paragraph.Paragraph):
            text = el.text.strip()
            # Detect section headers using regex
            if re.match(r'^\d+(\.\d+)*\s+[A-Za-z ]+', text):
                if current_section:
                    data.append(current_section)
                current_section = {"section_name": text, "table_data": []}
        elif isinstance(el, docx.table.Table):
            table_content = []
            header_row = True  # Flag to skip the header row

            for row in el.rows:
                row_text = [cell.text.strip() for cell in row.cells if cell.text.strip()]
                if header_row:
                    header_row = False  # Skip the first row (header row)
                    continue
                if row_text and not re.match(r'^\d+$', row_text[0]):  # Ignore serial number columns
                    table_content.append(row_text)  # Store row-wise table data

            # Assign table data to the latest detected section
            if current_section and table_content:
                current_section["table_data"].extend(table_content)

    # Ensure the last section is added
    if current_section:
        data.append(current_section)

    return data



# Function to process files
def process_uploaded_file(file_name):
    if file_name.endswith('.docx'):
        extracted_data = extract_data_from_docx(file_name)
        return extracted_data
    else:
        return "Unsupported file type."



filename = "/content/drive/My Drive/lifesciences/training_documents/ProtonGlow_URS.docx"
if not os.path.exists(filename):
  print(f"File not found: {filename}")

print(f"\nProcessing file: {filename}")
extracted_data = process_uploaded_file(filename)

if isinstance(extracted_data, list) and extracted_data:
    print("\nExtracted Data:")
    print(json.dumps(extracted_data, indent=4))  # Beautify JSON output

    # Save the extracted JSON data to the specified path
    output_path = "/content/drive/MyDrive/lifesciences/extracted_data/json/extracted_data.json"
    with open(output_path, "w") as json_file:
        json.dump(extracted_data, json_file, indent=4)
    print(f"\nExtracted data saved to: {output_path}")
else:
    print("No valid data found or unsupported file type.")



Processing file: /content/drive/My Drive/lifesciences/training_documents/ProtonGlow_URS.docx

Extracted Data:
[
    {
        "section_name": "1 General Requirements",
        "table_data": [
            [
                "1.1",
                "The SCADA system must be compatible with existing hardware and software to ensure seamless integration and operation."
            ],
            [
                "1.2",
                "The system should be scalable to accommodate future expansion and increased capacity requirements without significant redesign."
            ],
            [
                "1.3",
                "System reliability is crucial; hence, it must support high uptime with redundant components to prevent service interruption."
            ],
            [
                "1.4",
                "The SCADA must ensure secure and straightforward user accessibility, employing robust authentication mechanisms."
            ],
            [
                "1.5",
      

In [12]:

# Define the output folder
output_folder = "/content/drive/MyDrive/lifesciences/extracted_data/text"
os.makedirs(output_folder, exist_ok=True)

# Create all_sections.txt with section names (without numbers)
all_sections_path = os.path.join(output_folder, "all_sections.txt")
with open(all_sections_path, "w") as section_file:
    for section in extracted_data:
        section_name = re.sub(r'^\d+(\.\d+)*\s+', '', section["section_name"])  # Remove leading numbers
        section_file.write(section_name + "\n")
print(f"Section names saved to: {all_sections_path}")

# Create individual text files for each section's table data
for section in extracted_data:
    section_name = re.sub(r'^\d+(\.\d+)*\s+', '', section["section_name"])  # Clean section name
    section_filename = os.path.join(output_folder, f"{section_name}.txt")
    if section.get("table_data"):
        with open(section_filename, "w") as table_file:
            for row in section["table_data"]:
                table_file.write(f"{row[0]}: {row[1]}\n")  # Format as "1.1: Description"

    print(f"Table data saved to: {section_filename}")

Section names saved to: /content/drive/MyDrive/lifesciences/extracted_data/text/all_sections.txt
Table data saved to: /content/drive/MyDrive/lifesciences/extracted_data/text/General Requirements.txt
Table data saved to: /content/drive/MyDrive/lifesciences/extracted_data/text/Utilites.txt
Table data saved to: /content/drive/MyDrive/lifesciences/extracted_data/text/Facility Requirements.txt
Table data saved to: /content/drive/MyDrive/lifesciences/extracted_data/text/Safety Requirements.txt
Table data saved to: /content/drive/MyDrive/lifesciences/extracted_data/text/Environmental Requirements.txt
Table data saved to: /content/drive/MyDrive/lifesciences/extracted_data/text/Calibration Requirements.txt
Table data saved to: /content/drive/MyDrive/lifesciences/extracted_data/text/Documentation.txt
Table data saved to: /content/drive/MyDrive/lifesciences/extracted_data/text/Training Requirements.txt
Table data saved to: /content/drive/MyDrive/lifesciences/extracted_data/text/Vendor and Warrant

In [13]:
# This code generates sentence embeddings for each text file in the specified folder and saves them as .npy files.
# It uses a pre-trained SentenceTransformer model to encode the text and stores the embeddings in a separate folder.

import os
import numpy as np
from sentence_transformers import SentenceTransformer

# Load model
model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-mpnet-base-v2")

# Define paths
text_folder = "/content/drive/MyDrive/lifesciences/extracted_data/text"
embedding_folder = "/content/drive/MyDrive/lifesciences/extracted_data/embeddings"

# Ensure the embedding folder exists
os.makedirs(embedding_folder, exist_ok=True)

def generate_embeddings(documents):
    print("Generating embeddings...")
    embeddings = model.encode(documents, normalize_embeddings=True, show_progress_bar=True)
    print(f"Generated embeddings for {len(documents)} documents.")
    return embeddings

# Iterate over text files
for file_name in os.listdir(text_folder):
    if file_name.endswith(".txt"):
        text_file_path = os.path.join(text_folder, file_name)
        embedding_file_path = os.path.join(embedding_folder, file_name.replace(".txt", ".npy"))

        # Read text file line by line
        with open(text_file_path, "r", encoding="utf-8") as f:
            lines = f.readlines()

        # Generate embedding for each line
        if lines:
            embeddings = generate_embeddings([line.strip() for line in lines if line.strip()])  # Strip empty lines
            # Save embeddings as .npy file
            np.save(embedding_file_path, embeddings)
            print(f"Saved: {embedding_file_path}")
        else:
            print(f"Skipping empty file: {file_name}")


Generating embeddings...


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Generated embeddings for 67 documents.
Saved: /content/drive/MyDrive/lifesciences/extracted_data/embeddings/all_sections.npy
Skipping empty file: General_Requirements.txt
Skipping empty file: Utilites.txt
Skipping empty file: Facility_Requirements.txt
Skipping empty file: Safety_Requirements.txt
Skipping empty file: Environmental_Requirements.txt
Skipping empty file: Calibration_Requirements.txt
Skipping empty file: Documentation.txt
Skipping empty file: Training_Requirements.txt
Skipping empty file: Vendor_and_Warranty_Specifications.txt
Generating embeddings...


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Generated embeddings for 100 documents.
Saved: /content/drive/MyDrive/lifesciences/extracted_data/embeddings/Automation.npy
Skipping empty file: Platform_and_Networking_Requirements.txt
Skipping empty file: Hardware_Requirements.txt
Skipping empty file: Software_Requirements.txt
Skipping empty file: System_Performance_Requirements.txt
Skipping empty file: Historical_Data_Trending.txt
Skipping empty file: Alarm_and_Events.txt
Skipping empty file: Operational_Requirements.txt
Skipping empty file: User_Roles_and_Access_Requirements.txt
Skipping empty file: Password_Rules.txt
Skipping empty file: Time_Synchronization.txt
Skipping empty file: Security_Requirements.txt
Skipping empty file: Anti_Virus_and_Patching.txt
Skipping empty file: Electronic_Signatures_(21_CFR_11).txt
Skipping empty file: Electronic_Records_(21_CFR_11)_-_Audit_Trail.txt
Skipping empty file: Electronic_Records_(21_CFR_11)_-_Data_Retention.txt
Skipping empty file: Electronic_Records_(21_CFR_11)_-_Backup_and_Disaster_Rec

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Generated embeddings for 88 documents.
Saved: /content/drive/MyDrive/lifesciences/extracted_data/embeddings/Process_Control.npy
Skipping empty file: Temperature_Control.txt
Skipping empty file: Pressure_Control.txt
Skipping empty file: pH_Control.txt
Generating embeddings...


Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Generated embeddings for 43 documents.
Saved: /content/drive/MyDrive/lifesciences/extracted_data/embeddings/Vessels.npy
Skipping empty file: Design_and_Dimensional_Requirements.txt
Skipping empty file: Material_of_Construction.txt
Skipping empty file: Construction_Compliance.txt
Skipping empty file: Temperature_and_Pressure_Controls.txt
Skipping empty file: Agitation_and_Mixing_Requirements.txt
Skipping empty file: Sealing_and_Closure_Mechanisms.txt
Skipping empty file: Safety_Features_and_Protocols.txt
Skipping empty file: Instrumentation_and_Monitoring.txt
Skipping empty file: Cleaning_and_Maintenance.txt
Skipping empty file: Regulatory_and_Compliance_Standards.txt
Skipping empty file: Documentation_and_Record-Keeping.txt
Skipping empty file: Surface_Finish_Requirements.txt
Skipping empty file: Structural_Integrity.txt
Generating embeddings...


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Generated embeddings for 105 documents.
Saved: /content/drive/MyDrive/lifesciences/extracted_data/embeddings/Machine_Safety.npy
Skipping empty file: Emergency_Stop_Systems.txt
Skipping empty file: Safety_Doors_and_Guards.txt
Skipping empty file: Light_Gates_and_Safety_Sensors.txt
Skipping empty file: Safety_Control_Systems.txt
Skipping empty file: Operator_Training_and_Signage.txt
Skipping empty file: Regulatory_Compliance.txt
Generating embeddings...


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Generated embeddings for 126 documents.
Saved: /content/drive/MyDrive/lifesciences/extracted_data/embeddings/Process_Safety.npy
Skipping empty file: Reactor_Design_and_Safety_Features.txt
Skipping empty file: Chemical_Hazard_Management.txt
Skipping empty file: Process_Monitoring_and_Control.txt
Skipping empty file: Safety_in_Chemical_Reactions.txt
Skipping empty file: Compliance_and_Standards.txt
Generating embeddings...


Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Generated embeddings for 91 documents.
Saved: /content/drive/MyDrive/lifesciences/extracted_data/embeddings/General Requirements.npy
Skipping empty file: Facility Requirements.txt
Skipping empty file: Safety Requirements.txt
Skipping empty file: Environmental Requirements.txt
Skipping empty file: Calibration Requirements.txt
Skipping empty file: Training Requirements.txt
Skipping empty file: Vendor and Warranty Specifications.txt
Skipping empty file: Platform and Networking Requirements.txt
Skipping empty file: Hardware Requirements.txt
Skipping empty file: Software Requirements.txt
Skipping empty file: System Performance Requirements.txt
Skipping empty file: Historical Data Trending.txt
Skipping empty file: Alarm and Events.txt
Skipping empty file: Operational Requirements.txt
Skipping empty file: User Roles and Access Requirements.txt
Skipping empty file: Password Rules.txt
Skipping empty file: Time Synchronization.txt
Skipping empty file: Security Requirements.txt
Skipping empty fil

Batches:   0%|          | 0/3 [00:00<?, ?it/s]

Generated embeddings for 88 documents.
Saved: /content/drive/MyDrive/lifesciences/extracted_data/embeddings/Process Control.npy
Skipping empty file: Temperature Control.txt
Skipping empty file: Pressure Control.txt
Skipping empty file: pH Control.txt
Skipping empty file: Design and Dimensional Requirements.txt
Skipping empty file: Material of Construction.txt
Skipping empty file: Construction Compliance.txt
Skipping empty file: Temperature and Pressure Controls.txt
Skipping empty file: Agitation and Mixing Requirements.txt
Skipping empty file: Sealing and Closure Mechanisms.txt
Skipping empty file: Safety Features and Protocols.txt
Skipping empty file: Instrumentation and Monitoring.txt
Skipping empty file: Cleaning and Maintenance.txt
Skipping empty file: Regulatory and Compliance Standards.txt
Skipping empty file: Documentation and Record-Keeping.txt
Skipping empty file: Surface Finish Requirements.txt
Skipping empty file: Structural Integrity.txt
Generating embeddings...


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Generated embeddings for 105 documents.
Saved: /content/drive/MyDrive/lifesciences/extracted_data/embeddings/Machine Safety.npy
Skipping empty file: Emergency Stop Systems.txt
Skipping empty file: Safety Doors and Guards.txt
Skipping empty file: Light Gates and Safety Sensors.txt
Skipping empty file: Safety Control Systems.txt
Skipping empty file: Operator Training and Signage.txt
Skipping empty file: Regulatory Compliance.txt
Generating embeddings...


Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Generated embeddings for 126 documents.
Saved: /content/drive/MyDrive/lifesciences/extracted_data/embeddings/Process Safety.npy
Skipping empty file: Reactor Design and Safety Features.txt
Skipping empty file: Chemical Hazard Management.txt
Skipping empty file: Process Monitoring and Control.txt
Skipping empty file: Safety in Chemical Reactions.txt
Skipping empty file: Compliance and Standards.txt


In [14]:
# This code generates FAISS indices from pre-existing sentence embeddings and saves them as .faiss files.
# It loads each embedding, normalizes it for cosine similarity, and creates an index using FAISS's inner product approach.

def build_faiss_index(embeddings):
    print(embeddings.shape)
    print(embeddings)
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatIP(dimension)  # Inner Product for cosine similarity
    faiss.normalize_L2(embeddings)  # Normalize embeddings for cosine similarity
    index.add(embeddings)
    print(f"FAISS index built with {index.ntotal} vectors.")
    return index

# Define paths
embedding_folder = "/content/drive/MyDrive/lifesciences/extracted_data/embeddings"
index_folder = "/content/drive/MyDrive/lifesciences/extracted_data/indices"

# Ensure the index folder exists
os.makedirs(index_folder, exist_ok=True)

# Iterate over embedding files
for file_name in os.listdir(embedding_folder):
    if file_name.endswith(".npy"):
        embedding_file_path = os.path.join(embedding_folder, file_name)
        index_file_path = os.path.join(index_folder, file_name.replace(".npy", ".faiss"))

        # Load embedding
        embeddings = np.load(embedding_file_path)
        index = build_faiss_index(embeddings)

        # Save the FAISS index
        faiss.write_index(index, index_file_path)
        print(f"Saved index: {index_file_path}")


(67, 768)
[[ 0.01158482  0.00627963 -0.00232812 ...  0.00890424 -0.06893508
  -0.0078697 ]
 [-0.02814495 -0.00369864 -0.01223859 ...  0.01970144  0.00069355
  -0.04721618]
 [-0.03634675  0.0563109  -0.00440414 ... -0.00384795 -0.06290505
  -0.00643051]
 ...
 [-0.07224856 -0.01723876 -0.00183229 ...  0.03353939 -0.05956593
  -0.00745108]
 [-0.04296942 -0.04180515 -0.00335171 ...  0.0101522  -0.02170281
  -0.03594915]
 [-0.0074975   0.03984377 -0.0040993  ...  0.03248137  0.00873847
  -0.00735179]]
FAISS index built with 67 vectors.
Saved index: /content/drive/MyDrive/lifesciences/extracted_data/indices/all_sections.faiss
(100, 768)
[[-0.01329423 -0.03994859 -0.00340861 ... -0.0012974  -0.05363689
  -0.02402186]
 [-0.04070505 -0.01480285 -0.00294345 ...  0.01307529 -0.07201272
  -0.04572273]
 [-0.05249444 -0.06674421 -0.00351468 ...  0.03062927 -0.06661747
  -0.01391998]
 ...
 [-0.05485249 -0.03054994 -0.00257907 ...  0.0140585  -0.02423945
  -0.01958125]
 [-0.04757617 -0.02740052 -0.001

In [None]:
# This FastAPI application serves a query endpoint for searching relevant sections from a FAISS index.
# It uses a pre-trained sentence-transformers model to generate embeddings for user queries and searches
# both a general section index and section-specific indices to return relevant section data.
# The application is exposed via Ngrok for remote access.


# Define the FastAPI app
app = FastAPI()

# Paths for FAISS indices and section files
index_folder = "/content/drive/MyDrive/lifesciences/extracted_data/indices"
all_sections_index_path = os.path.join(index_folder, "all_sections.faiss")
section_file_path = "/content/drive/MyDrive/lifesciences/extracted_data/text/all_sections.txt"
section_folder_path="/content/drive/MyDrive/lifesciences/extracted_data/text/"

# Load the sentence-transformers model for query embeddings
try:
    model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
    print("✅ Sentence Transformer model loaded successfully.")
except Exception as e:
    print(f"❌ Error loading the sentence transformer model: {e}")
    raise RuntimeError("Model loading failed")

# Load the all_sections.faiss index
try:
    if os.path.exists(all_sections_index_path):
        all_sections_index = faiss.read_index(all_sections_index_path)
        print(f"✅ Loaded FAISS index from: {all_sections_index_path}")
        # Check the number of vectors and the dimensions
        print("Number of vectors:", all_sections_index.ntotal)
        print("Dimensions of each vector:", all_sections_index.d)

    else:
        print(f"❌ FAISS index file not found: {all_sections_index_path}")
        raise FileNotFoundError("FAISS index file missing")
except Exception as e:
    print(f"❌ Error loading FAISS index: {e}")
    raise RuntimeError("Failed to load FAISS index")

# Load section names from the text file
with open(section_file_path, "r", encoding="utf-8") as f:
    section_names = [line.strip() for line in f.readlines()]

# Define the structure of the input data (query)
class QueryInput(BaseModel):
    inputs: str

def encode_query(query):
    print(f"Encoding query: {query}")
    return model.encode([query], normalize_embeddings=True).astype('float32')

# Function to search for the most relevant section from the all_sections.faiss index
def search_index(query: str, index: faiss.Index, k, original_text_list):
    try:
        print(f"🔎 Generating embedding for the query: {query}")
        query_embedding =  encode_query(query)
        print(f"🔍 Searching FAISS index for the query...")
        distances, indices = index.search(query_embedding, k)  # Find the closest section (top 1 result)
        print(f"Distances: {distances}")
        print(f"Indices: {indices}")

        for idx, dist in zip(indices[0], distances[0]):
            print(f"Doc Index: {idx}, Similarity: {dist}")
            print(f"Document: {original_text_list[idx]}")
            print("-" * 50)

        results = [original_text_list[i] for i, d in zip(indices[0], distances[0])]
        print(f"Found {len(results)} matching documents.")
        return results if results else ["No relevant documents found."]
    except Exception as e:
        print(f"❌ Error during search: {e}")
        raise HTTPException(status_code=500, detail="Error during FAISS search")

# Function to get the relevant section file
def get_section_file(section_name: str):
    try:
        section_file_path = os.path.join(index_folder, f"{section_name}.faiss")
        print(f"📂 Looking for section file: {section_file_path}")

        if os.path.exists(section_file_path):
            print(f"✅ Section file found: {section_file_path}")
            return faiss.read_index(section_file_path)
        else:
            print(f"❌ Section file not found: {section_file_path}")
            return None
    except Exception as e:
        print(f"❌ Error accessing section file: {e}")
        raise HTTPException(status_code=500, detail="Error accessing section file")

# FastAPI Endpoints
@app.on_event("startup")
async def startup_event():
    ngrok.set_auth_token(userdata.get('ngrok_auth_token'))  # Replace with actual token if needed
    public_url = ngrok.connect(8000)
    print(f"API available at: {public_url}")

@app.post("/query")
async def query_section(query: QueryInput):
    try:
        print(f"📥 Received query: {query.inputs}")

        # Step 1: Search for the relevant section in the FAISS index
        section_name = search_index(query.inputs, all_sections_index, 1, section_names)
        print(f"✅ section_name {section_name}")
        # Step 2: Load the relevant section's FAISS index
        section_index_file = get_section_file(section_name[0])

        if not section_index_file:
            print(f"❌ Section '{section_name}' not found.")
            raise HTTPException(status_code=404, detail="Section not found.")


      # Load section names from the text file
        with open(os.path.join(section_folder_path, f"{section_name[0]}.txt"), "r", encoding="utf-8") as f:
            section_content = [line.strip() for line in f.readlines()]

        results = search_index(query.inputs, section_index_file, 10, section_content)
        print(f"✅ result {results}")

        # Step 5: Return the relevant information from the section index
        print(f"✅ Returning results: Section {section_name}, {results}")
        combined_results = "\n".join(results)
        response = [{"generated_text": combined_results}]
        return response

    except Exception as e:
        print(f"❌ Error handling query: {e}")
        response = [{"generated_text": "Sorry, no training data available for this query"}]
        return response

# Run the FastAPI server with Uvicorn and expose it via Ngrok
if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)


ERROR:asyncio:Task exception was never retrieved
future: <Task finished name='Task-13' coro=<Server.serve() done, defined at /usr/local/lib/python3.11/dist-packages/uvicorn/server.py:68> exception=KeyboardInterrupt()>
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/main.py", line 579, in run
    server.run()
  File "/usr/local/lib/python3.11/dist-packages/uvicorn/server.py", line 66, in run
    return asyncio.run(self.serve(sockets=sockets))
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 30, in run
    return loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 92, in run_until_complete
    self._run_once()
  File "/usr/local/lib/python3.11/dist-packages/nest_asyncio.py", line 133, in _run_once
    handle._run()
  File "/usr/lib/python3.11/asyncio/events.py", line 84, in _run
    s

✅ Sentence Transformer model loaded successfully.
✅ Loaded FAISS index from: /content/drive/MyDrive/lifesciences/extracted_data/indices/all_sections.faiss
Number of vectors: 67
Dimensions of each vector: 768


INFO:     Started server process [19635]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


API available at: NgrokTunnel: "https://3770-35-197-82-22.ngrok-free.app" -> "http://localhost:8000"
📥 Received query: section : abcd
🔎 Generating embedding for the query: section : abcd
Encoding query: section : abcd
🔍 Searching FAISS index for the query...
Distances: [[0.38790077]]
Indices: [[6]]
Doc Index: 6, Similarity: 0.38790076971054077
Document: Documentation
--------------------------------------------------
Found 1 matching documents.
✅ section_name ['Documentation']
📂 Looking for section file: /content/drive/MyDrive/lifesciences/extracted_data/indices/Documentation.faiss
❌ Section file not found: /content/drive/MyDrive/lifesciences/extracted_data/indices/Documentation.faiss
❌ Section '['Documentation']' not found.
❌ Error handling query: 404: Section not found.
INFO:     95.223.75.30:0 - "POST /query HTTP/1.1" 200 OK
📥 Received query: Tell me some thing about Process Control
🔎 Generating embedding for the query: Tell me some thing about Process Control
Encoding query: Tell m