In [1]:
%pip install -q langchain langchain-community transformers sentence-transformers faiss-cpu requests
!pip install pypdf

Note: you may need to restart the kernel to use updated packages.


In [None]:


import requests
import tempfile
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from transformers import pipeline
from tqdm.notebook import tqdm
import time
import sys

# ---------------------------
# Sources
# ---------------------------
text_sources = {
    'I.P.C': 'https://github.com/SaiSudheerKankanala/SAIbot/raw/main/ipc.pdf',
    'Constitution': 'https://github.com/SaiSudheerKankanala/SAIbot/raw/main/indian%20constitution.pdf',
    'Garuda': 'https://github.com/SaiSudheerKankanala/SAIbot/raw/main/GarudaPurana.pdf',
    'Bhagavad Gita': 'https://github.com/SaiSudheerKankanala/SAIbot/raw/main/Bhagavad-gita_As_It_Is.pdf',
    'Quran': 'https://github.com/SaiSudheerKankanala/SAIbot/raw/main/quran-allah.pdf'
}

# ---------------------------
# Download + Load PDFs
# ---------------------------
def load_pdf(url, source_name):
    response = requests.get(url)
    response.raise_for_status()
    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
        tmp.write(response.content)
        tmp_path = tmp.name
    loader = PyPDFLoader(tmp_path)
    docs = loader.load()
    for d in docs:
        d.metadata["source"] = source_name
    return docs

print("📚 Loading documents...")
all_docs = {}
for name, url in tqdm(text_sources.items()):
    all_docs[name] = load_pdf(url, name)

# ---------------------------
# Split documents into chunks
# ---------------------------
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=400,
    chunk_overlap=50
)

# ---------------------------
# Build FAISS vector DBs
# ---------------------------
vector_dbs = {}
embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

print("🔎 Creating embeddings...")
for name, docs in tqdm(all_docs.items()):
    chunks = text_splitter.split_documents(docs)
    if chunks:
        vector_dbs[name] = FAISS.from_documents(chunks, embedding=embedder)

# ---------------------------
# Local LLM (FLAN-T5)
# ---------------------------
print("🤖 Loading model...")
generator = pipeline("text2text-generation", model="google/flan-t5-base")

def generate_answer(question, context):
    prompt = f"""
    You are a helpful assistant.
    Answer the question ONLY if it is clearly answered in the given context.
    If the context is unrelated or unclear, respond with exactly: "Not mentioned in this source."

    Question: {question}
    Context: {context}

    Answer:
    """
    return generator(prompt, max_new_tokens=200, clean_up_tokenization_spaces=True)[0]['generated_text']

# ---------------------------
# QA Function
# ---------------------------
def answer_question(question, k=2):
    final_output = []
    for source_name in text_sources.keys():
        if source_name in vector_dbs:
            docs = vector_dbs[source_name].similarity_search(question, k=k)
            if docs:
                context = "\n\n".join([doc.page_content for doc in docs])
                answer = generate_answer(question, context).strip()
                if answer and answer != "Not mentioned in this source.":
                    final_output.append(f"According to {source_name.capitalize()}: {answer}")
    return "\n\n".join(final_output) if final_output else "No relevant answer found in any source."

# ---------------------------
# Chat Helpers
# ---------------------------
def chat(question: str):
    """Ask a question and get an answer (for notebooks)."""
    return answer_question(question)

def interactive_chat():
    """Interactive loop (for terminal)."""
    print("Bot: Hello! Ask me anything. Type 'exit' to quit.")
    while True:
        question = input("You: ")
        if question.lower() in ["exit", "quit", "bye"]:
            print("Bot: Goodbye!")
            break
        print("Bot:", answer_question(question))

# Detect environment (Notebook vs Script)
def is_notebook():
    try:
        shell = get_ipython().__class__.__name__
        if shell == "ZMQInteractiveShell":
            return True   # Jupyter notebook
        elif shell == "TerminalInteractiveShell":
            return False  # IPython terminal
        else:
            return False
    except NameError:
        return False      # Standard Python script

# ---------------------------
# Entry Point
# ---------------------------
if __name__ == "__main__":
    if is_notebook():
        print("👉 Running in Notebook mode. Use chat('your question').")
    else:
        interactive_chat()


📚 Loading documents...


  0%|          | 0/5 [00:00<?, ?it/s]

  embedder = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


🔎 Creating embeddings...


  0%|          | 0/5 [00:00<?, ?it/s]

🤖 Loading model...


Device set to use mps:0


👉 Running in Notebook mode. Use chat('your question').


In [3]:
chat("What is Dharma in Bhagavad Gita?")


"According to Bhagavad gita: Discharge of One's Prescribed Duty in Krsna Consciousness"