In [6]:
from pathlib import Path
import os

# Move to project root if needed
PROJECT_ROOT = Path().resolve().parent
os.chdir(PROJECT_ROOT)

print("Project root:", PROJECT_ROOT)


Project root: D:\Visual Studio practice


In [7]:
import sys
import json
from pathlib import Path

# Ensure src/ is importable
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

from src.config import CHUNKS_PATH

with open(CHUNKS_PATH, "r", encoding="utf-8") as f:
    chunks = json.load(f)

print("Total chunks loaded:", len(chunks))


Total chunks loaded: 5011


In [8]:
texts = [c["text"] for c in chunks]

print("Sample text preview:")
print(texts[0][:300])


Sample text preview:
Airport Operations
About the Authors
Norman J. Ashford was Professor of Transport Planning at the Loughborough University
of Technology, England, from 1972 to 1997. He holds bachelor’s, master’s, and doctoral
degrees in civil engineering. Dr. Ashford worked as a civil engineer in Canada and taught
a


In [9]:
from sentence_transformers import SentenceTransformer
from src.config import EMBEDDING_MODEL_NAME

embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME)

print("Embedding model loaded:", EMBEDDING_MODEL_NAME)




Embedding model loaded: all-MiniLM-L6-v2


In [10]:
from src.config import EMBEDDING_BATCH_SIZE

embeddings = embedding_model.encode(
    texts,
    batch_size=EMBEDDING_BATCH_SIZE,
    show_progress_bar=True
)

print("Embeddings generated.")
print("Embeddings shape:", embeddings.shape)


Batches: 100%|██████████| 157/157 [01:25<00:00,  1.84it/s]


Embeddings generated.
Embeddings shape: (5011, 384)


In [11]:
import faiss
import numpy as np

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)

index.add(np.array(embeddings).astype("float32"))

print("FAISS index size:", index.ntotal)


FAISS index size: 5011


In [12]:
from src.config import FAISS_INDEX_PATH

faiss.write_index(index, str(FAISS_INDEX_PATH))

print("FAISS index saved to:", FAISS_INDEX_PATH)


FAISS index saved to: D:\Visual Studio practice\aviation-chatbot\data\faiss_index.bin
