In [1]:
%pip install -q langchain langchain-community transformers sentence-transformers faiss-cpu requests
!pip install pypdf

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m41.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m39.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pypdf
  Downloading pypdf-6.0.0-py3-none-any.whl.metadata (7.1 kB)
Downloading pypdf-6.0.0-py3-none-any.whl (310 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.5/310.5 kB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-6.0.0


In [3]:
import requests
import tempfile
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from transformers import pipeline

# Sources
text_sources = {
    'I.P.C': 'https://github.com/SaiSudheerKankanala/SAIbot/raw/main/ipc.pdf',
    'Constitution': 'https://github.com/SaiSudheerKankanala/SAIbot/raw/main/indian%20constitution.pdf',
    'Garuda': 'https://github.com/SaiSudheerKankanala/SAIbot/raw/main/GarudaPurana.pdf',
    'Bhagavad Gita': 'https://github.com/SaiSudheerKankanala/SAIbot/raw/main/Bhagavad-gita_As_It_Is.pdf',
    'Quran': 'https://github.com/SaiSudheerKankanala/SAIbot/raw/main/quran-allah.pdf'
}

# Function to download and load PDF
def load_pdf(url, source_name):
    response = requests.get(url)
    response.raise_for_status()
    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
        tmp.write(response.content)
        tmp_path = tmp.name
    loader = PyPDFLoader(tmp_path)
    docs = loader.load()
    for d in docs:
        d.metadata["source"] = source_name
    return docs

# Load all documents
all_docs = {name: load_pdf(url, name) for name, url in text_sources.items()}

# Overlapping chunking
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=400,   # smaller for better focus
    chunk_overlap=50
)

# Embeddings + VectorDB
vector_dbs = {}
embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

for name, docs in all_docs.items():
    chunks = text_splitter.split_documents(docs)
    if chunks:
        vector_dbs[name] = FAISS.from_documents(chunks, embedding=embedder)

# Local answer generator (FLAN-T5)
generator = pipeline("text2text-generation", model="google/flan-t5-base")

def generate_answer(question, context):
    prompt = f"""
    You are a helpful assistant.
    Answer the question ONLY if it is clearly answered in the given context.
    If the context is unrelated or unclear, respond with exactly: "Not mentioned in this source."

    Question: {question}
    Context: {context}

    Answer:
    """
    return generator(prompt, max_new_tokens=200, clean_up_tokenization_spaces=True)[0]['generated_text']

# Main QA function
def answer_question(question, k=2):
    final_output = []
    for source_name in text_sources.keys():
        if source_name in vector_dbs:
            docs = vector_dbs[source_name].similarity_search(question, k=k)
            if docs:
                context = "\n\n".join([doc.page_content for doc in docs])
                answer = generate_answer(question, context).strip()
                # Only include if real answer
                if answer and answer != "Not mentioned in this source.":
                    final_output.append(f"According to {source_name.capitalize()}: {answer}")
    return "\n\n".join(final_output) if final_output else "No relevant answer found in any source."

# Chat loop
if __name__ == "__main__":
    while True:
        question = input("You: ")
        if question.lower() in ["exit", "quit", "bye"]:
            print("Bot: Goodbye!")
            break
        print(answer_question(question))


Device set to use cuda:0


You: Explain the section related to murder in the IPC.
According to I.p.c: 1[108A. Abetment in India of offences outside India.—A person abets an offence within the (b) A instigates B to murder D. B in pursuance of the instigation stabs D. D recov ers from the wound. A is guilty of instigating B to commit murder.

According to Garuda: 91. If a woman dies in the fulness of pre gnancy, her womb should be cut open, and the child drawn out and placed on the ground, and she alone be burned. 92-93. If a child dies on the bank of the Ganges, it should simply be thrown into the Ganges; if in another place, it should be buried in the ground, up to twenty-seven months old.

According to Bhagavad gita: The Vedic injunction is ma himsyat sarva bhutani: never commit violence to anyone. Nor does understanding that the living entity is not killed encourage animal slaughter. Killing the body of anyone without authority is abominable for the great sin he has committed. Therefore, the king's punishment 