In [1]:
import json
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# -------------------------------
# 1. Load Chunks
# -------------------------------
with open("../data/rag_chunks/chunks.json", "r", encoding="utf-8") as f:
    chunks = json.load(f)

print(f"📄 Loaded {len(chunks)} chunks")

📄 Loaded 810 chunks


In [3]:
# -------------------------------
# 2. Load Embedding Model
# -------------------------------
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
embedding_dim = model.get_sentence_embedding_dimension()

In [4]:
# -------------------------------
# 3. Create Embeddings
# -------------------------------
embeddings = [model.encode(ch["text"]) for ch in chunks]
embeddings = np.array(embeddings).astype("float32")

In [5]:
# -------------------------------
# 4. Build FAISS Index
# -------------------------------
index = faiss.IndexFlatL2(embedding_dim)   # L2 distance index
index.add(embeddings)

print(f"✅ FAISS index built with {index.ntotal} vectors")

✅ FAISS index built with 810 vectors


In [6]:
# -------------------------------
# 5. Save Index + Metadata
# -------------------------------
faiss.write_index(index, "../data/rag_chunks/faiss_index.idx")

with open("../data/rag_chunks/metadata.json", "w", encoding="utf-8") as f:
    json.dump(chunks, f, ensure_ascii=False, indent=2)

print("💾 Saved FAISS index + metadata")

💾 Saved FAISS index + metadata


In [9]:
def search(query, top_k=5):
    # Encode query
    query_vec = model.encode([query]).astype("float32")
    
    # Search in FAISS
    D, I = index.search(query_vec, top_k)
    
    # Collect results
    results = []
    for idx in I[0]:
        if idx == -1: continue
        results.append(chunks[idx])
    return results

# 🔍 Example query
results = search("What was the revenue from operations in FY25?")
for r in results:
    print("-", r["text"])


- Revenue from operations in Q4 FY25 was 64,479. (Source: financial_statement_fixed_2024) Revenue from operations in Q3 FY24 was 63,973. (Source: financial_statement_fixed_2024) Revenue from operations in Q4 FY24 was 61,237. (Source: financial_statement_fixed_2024) Revenue from operations in FY25 was 2,55,324. (Source: financial_statement_fixed_2024)
- Revenue from operations in Q4 FY24 was 51,488. (Source: financial_statement_fixed_2024) Revenue from operations in FY25 was 2,14,853. (Source: financial_statement_fixed_2024) Revenue from operations in FY24 was 2,02,359. (Source: financial_statement_fixed_2024) Other income in Q4 FY25 was 1,922. (Source: financial_statement_fixed_2024)
- March 31, December 31, March in FY25 was March. (Source: financial_statement_fixed_2024) March 31, December 31, March in FY24 was 31,. (Source: financial_statement_fixed_2024) Revenue from operations in Q4 FY25 was 54,136. (Source: financial_statement_fixed_2024) Revenue from operations in Q3 FY24 was 53