In [1]:
!pip install faiss-gpu-cu11 PyPDF2

Collecting faiss-gpu-cu11
  Downloading faiss_gpu_cu11-1.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting nvidia-cuda-runtime-cu11>=11.8.89 (from faiss-gpu-cu11)
  Downloading nvidia_cuda_runtime_cu11-11.8.89-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cublas-cu11>=11.11.3.6 (from faiss-gpu-cu11)
  Downloading nvidia_cublas_cu11-11.11.3.6-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Downloading faiss_gpu_cu11-1.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (48.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.3/48.3 MB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m21.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading nvidia_cublas_cu11-11.11.3.6-py3-none-ma

In [2]:
from pathlib import Path
import json
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import os
from typing import List, Dict

In [3]:
import PyPDF2
def extract_text_pdf(pdf_path: str) -> str:
        text = ""
        with open(pdf_path, "rb") as f:
            reader = PyPDF2.PdfReader(f)
            for page in reader.pages:
                page_text = page.extract_text() or ""
                text += page_text

        if len(text.strip()) > 30:
            return text

In [4]:
DOCS_FOLDER = Path("/content/documents")
DOCS_FOLDER.mkdir(exist_ok=True)

CHUNKS_JSON = Path("/content/chunks.json")
FAISS_INDEX_PATH = Path("/content/faiss.index")

# Models (≤1B)
EMBED_MODEL = "BAAI/bge-small-en-v1.5"
LLM_MODEL = "Qwen/Qwen1.5-0.5B-Chat"

In [5]:
from google.colab import userdata
HF_TOKEN = userdata.get("HF_TOKEN")

In [7]:
def extract_text_pdf(pdf_path: str) -> str:
    text = ""
    with open(pdf_path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            page_text = page.extract_text() or ""
            text += page_text

    if len(text.strip()) > 30:
        return text

In [8]:
def load_pdfs(folder: Path) -> List[Dict]:
    docs = []
    for pdf in folder.glob("*.pdf"):
        content = extract_text_pdf(str(pdf))
        docs.append({"content": content, "metadata": {"file": pdf.name}})
    return docs

In [9]:
def chunk_text(text: str, chunk_size=250, overlap=75):
    words = text.split()
    chunks = []
    start = 0
    while start < len(words):
        end = start + chunk_size
        chunk = " ".join(words[start:end])
        chunks.append(chunk)
        start += chunk_size - overlap
    return chunks

In [10]:
def prepare_documents(folder: Path):
    raw = load_pdfs(folder)
    documents = []
    for doc in raw:
        for chunk in chunk_text(doc["content"]):
            documents.append({"content": chunk, "metadata": doc["metadata"]})
    return documents

In [11]:
docs = prepare_documents(DOCS_FOLDER)
print("Total text chunks:", len(docs))

with CHUNKS_JSON.open("w") as f:
    json.dump(docs, f, indent=2)

Total text chunks: 11


In [12]:
embed_model = SentenceTransformer(EMBED_MODEL)

def embed(texts):
    return embed_model.encode(
        texts, convert_to_numpy=True, normalize_embeddings=True
    )

def build_faiss(docs):
    texts = [d["content"] for d in docs]
    emb = embed(texts)
    dim = emb.shape[1]

    index = faiss.IndexFlatIP(dim)  # cosine similarity
    index.add(emb)
    return index, emb


index, emb = build_faiss(docs)
faiss.write_index(index, str(FAISS_INDEX_PATH))

print("FAISS index ready!")


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

FAISS index ready!


In [13]:
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
    LLM_MODEL,
    token=HF_TOKEN,
    device_map="auto"
)

llm = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=200,
    temperature=0.1
)

print("LLM ready!")

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.24G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/206 [00:00<?, ?B/s]

Device set to use cuda:0


LLM ready!


In [21]:
class RAG:
    def __init__(self, docs, index):
        self.docs = docs
        self.index = index
        self.embed = embed_model
        self.llm = llm

    def retrieve(self, query, k=3):
        q = self.embed.encode(
            [query], convert_to_numpy=True, normalize_embeddings=True
        )
        scores, idx = self.index.search(q, k)
        return [self.docs[i]["content"] for i in idx[0]]

    def answer(self, question, k=3):
        chunks = self.retrieve(question, k)
        context = "\n\n".join(chunks)

        prompt = f"""
              Use ONLY the context below to answer the question. If the context does not contain the answer, respond exactly with: Information not found.

              Rewrite, clarify, or answer strictly using the provided context. Do not use outside knowledge or make assumptions.

              Context:
              {context}

              Question:
              {question}

              Answer (follow all rules):

              If the answer is fully supported by the context, provide it clearly and concisely.
              If the context is ambiguous or partially relevant, explain that the answer cannot be determined and reply with: Information not found.
              If the context does not support any answer, reply with: Information not found.

              Answer:
              """

        output = self.llm(prompt)[0]["generated_text"]
        return output


rag = RAG(docs, index)
print("RAG system is ready!")


RAG system is ready!


In [22]:
question = "What is package pricing?"
print("Question:", question)

answer = rag.answer(question)
print("\nAnswer:\n", answer)


Question: What is package pricing?

Answer:
 
Use ONLY the context below to answer the question. If the context does not contain the answer, respond exactly with: Information not found.

Rewrite, clarify, or answer strictly using the provided context. Do not use outside knowledge or make assumptions.

# Context:
# # INDECIMAL — Package Comparison & Specification Wallets (Internal Reference) Version: 1.0 Audience: Sales, Estimation, AI Assistant Knowledge Base Last Updated: 2025 -12-21 ## 1) Package Pricing (Indicative / Per Sqft) These are shown as per -sqft package rates (inclusive of GST) on the public comparison page: - Essential: ₹1,851 /sqft (incl. GST) - Premier (Most Popular): ₹1,995 /sqft (incl. GST) - Infinia: ₹2,250 /sqft (incl. GST) - Pinnacle: ₹2,450 /sqft (incl. GST) ## 2) Structure Specifications (Highlights) ### Steel (Fe 550 / Fe 550D) - Essential: Sunvik, Kamadhenu, or equivalent up to ₹68,000/MT - Premier: JSW or Jindal Neo up to ₹74,000/MT - Infinia: JSW or Jindal Pa

In [18]:
print(rag.answer("Explain the pricing plans."))
# rag.answer("What are the materials used?")
# rag.answer("Summarize the document.")
# rag.answer("Does it include granite flooring?")



Use ONLY the context below to answer.

If answer not present, say: "Information not found."

Context:
-delivery support: long -term warranty/maintenance commitments. - Transparency: 100% transparent pricing and process. - Timelines: fixed project timelines, with penalties for delays. - Quality assurance: branded materials and on -site quality checks. - Updates: real -time project tracking dashboard and visibility. ## 5) Customer Journey (“How We Work”) The publicly described journey is structured in stages so customers know what happens next: 1) Raise a request - Customer shares plot details and vision; team connects to start. 2) Meet our experts - Consultation with architects and construction specialists. 3) Get home financing - Guidance through steps from documentation to disbursal (where applicable). 4) Design your custom home - Collaborative architectural design tailored to the family’s needs. 5) Receive plans - Detailed design + cost plans with transparent pricing. 6) Book with u