In [1]:
import os
os.chdir(r"\Visual Studio practice\aviation-chatbot")
print(os.getcwd())

d:\Visual Studio practice\aviation-chatbot


In [2]:
from pathlib import Path
import json
import faiss
import numpy as np
import sys
import os

if os.getcwd() not in sys.path:
    sys.path.append(os.getcwd())

from src.config import CHUNKS_PATH, FAISS_INDEX_PATH

# Load chunks
with open(CHUNKS_PATH, "r", encoding="utf-8") as f:
    chunks = json.load(f)

print("Chunks loaded:", len(chunks))

# Load FAISS index
index = faiss.read_index(str(FAISS_INDEX_PATH))
print("FAISS index loaded, size:", index.ntotal)


Chunks loaded: 4434
FAISS index loaded, size: 4434


In [3]:
from sentence_transformers import SentenceTransformer
from src.config import EMBEDDING_MODEL_NAME

embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME)


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
def retrieve_chunks(query, top_k=5):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(
        np.array(query_embedding).astype("float32"), top_k
    )

    results = []
    for idx in indices[0]:
        results.append(chunks[idx])

    return results


In [5]:
# query = "What are the responsibilities of airport apron control?"
query = "What is SCADA and how it works?"

retrieved = retrieve_chunks(query, top_k=3)

for r in retrieved:
    print(f"[{r['document_name']} | Page {r['page_number']}]")
    print(r["text"][:300])
    print("-" * 80)


[scada_manual.pdf | Page 15]
Unit 2:
What Is SCADA?
--------------------------------------------------------------------------------
[scada_manual.pdf | Page 185]
Appendix B:
Glossary of
SCADA Terminology
--------------------------------------------------------------------------------
[scada_manual.pdf | Page 8]
Since the first edition was published, several software products have been
developed that are called "SCADA" packages. It is important for the
reader of this book to be aware that while these software products may be
incorporated as parts of a SCAD A system, the definition of SCADA used
in this book
--------------------------------------------------------------------------------


In [6]:
def answer_from_context(query, retrieved_chunks):
    context = "\n\n".join([c["text"] for c in retrieved_chunks])
    answer = f"Question: {query}\n\nRelevant Information:\n{context}"
    return answer


In [7]:
print(answer_from_context(query, retrieved))


Question: What is SCADA and how it works?

Relevant Information:
Unit 2:
What Is SCADA?

Appendix B:
Glossary of
SCADA Terminology

Since the first edition was published, several software products have been
developed that are called "SCADA" packages. It is important for the
reader of this book to be aware that while these software products may be
incorporated as parts of a SCAD A system, the definition of SCADA used
in this book is much broader than the definition used by the
manufacturers of these software packages.


In [8]:
from langchain_community.llms import Ollama
from src.config import LLM_MODEL_NAME

llm = Ollama(
    model=LLM_MODEL_NAME,
    temperature=0.1
)


  llm = Ollama(


In [9]:
from src.config import SYSTEM_PROMPT
print("System prompt loaded from config")


In [10]:
def generate_answer(query, retrieved_chunks):
    context = "\n\n".join(
        [
            f"(Source: {c['document_name']}, Page {c['page_number']})\n{c['text']}"
            for c in retrieved_chunks
        ]
    )

    prompt = f"""
{SYSTEM_PROMPT}

Context:
{context}

Question:
{query}

Answer:
"""

    return llm.invoke(prompt)


In [11]:
def retrieve_chunks(query, top_k=3):
    query_embedding = embedding_model.encode([query])

    distances, indices = index.search(
        np.array(query_embedding).astype("float32"), top_k
    )

    results = []
    for idx in indices[0]:
        results.append(chunks[idx])

    return results


In [12]:
import os
print(os.getcwd())

d:\Visual Studio practice\aviation-chatbot


In [13]:
# query = "What are the responsibilities of apron control?"
# query = "What are aviation safety regulations"
# query = "who is Dhoni?"
query = "What is SCADA and how it works?"
retrieved = retrieve_chunks(query, top_k=3)

answer = generate_answer(query, retrieved)
print(answer)


According to the provided context (Source: scada_manual.pdf, Page 15), Unit 2 defines SCADA as:

"SCADA stands for Supervisory Control And Data Acquisition. It is a system that monitors and controls industrial processes from a remote location."

As for how it works, there isn't a specific explanation in this section of the manual. However, we can infer that SCADA systems typically involve sensors or data acquisition devices that collect data from various sources, which is then transmitted to a central control center where operators can monitor and control the process remotely.

For more detailed information on SCADA terminology, you may refer to Appendix B: Glossary of SCADA Terminology (Source: scada_manual.pdf, Page 185).
