In [None]:
# !pip install langchain-text-splitters langchain-community langchain-openai langchain-faiss sentence-transformers

In [None]:
# %pip install faiss-cpu
# Just doing cpu for now, GPU version is giving me headache

In [None]:
# !nvcc -V

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Tue_Feb_27_16:28:36_Pacific_Standard_Time_2024
Cuda compilation tools, release 12.4, V12.4.99
Build cuda_12.4.r12.4/compiler.33961263_0


In [None]:
# !pip install torch torchvision --index-url https://download.pytorch.org/whl/cu124

In [None]:
# %pip install pypdf transformers accelerate huggingface_hub 

In [None]:
import torch; print(torch.__version__);
print(torch.cuda.is_available())

2.6.0+cu124
True


In [12]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from transformers import pipeline

In [13]:
# 1. Load document
loader = PyPDFLoader("Dataset/Discoveries-and-Origin.pdf")
documents = loader.load()

In [None]:
# Split
splitter = RecursiveCharacterTextSplitter(chunk_size=480, chunk_overlap=50)
docs = splitter.split_documents(documents)

In [15]:
# Embed and store (FAISS)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db = FAISS.from_documents(docs, embeddings)

In [16]:
# retriever init
retriever = db.as_retriever(search_kwargs={"k": 4})
globals()["retriever"] = retriever

In [17]:
# small LLM + RAG helper (flan-t5-small)
hf_pipe = pipeline("text2text-generation",  model="google/flan-t5-small", max_length=256, device=0,)

def smolLLM(prompt: str) -> str:
    out = hf_pipe(prompt, do_sample=False)
    # Transformers returns different shapes across versions
    if isinstance(out, list) and len(out) > 0 and isinstance(out[0], dict):
        return (
            out[0].get("generated_text")
            or out[0].get("summary_text")
            or str(out[0])
        )
    return str(out)

Device set to use cuda:0


In [18]:
# Robust document-to-text extractor
def extract_text(item):
    """Works with Document, (doc,score) tuples, dicts, etc."""
    try:
        if hasattr(item, "page_content"):
            return item.page_content
        if isinstance(item, (list, tuple)) and len(item) > 0 and hasattr(item[0], "page_content"):
            return item[0].page_content
        if isinstance(item, dict) and "page_content" in item:
            return item["page_content"]
    except Exception:
        pass
    return str(item)

# RAG answer function
def rag_answer(query: str, k: int = 4) -> str:
    docs = None

    # retriever
    try:
        if "retriever" in globals() and hasattr(retriever, "invoke"):
            docs = retriever.invoke(query)
        elif "retriever" in globals() and hasattr(retriever, "get_relevant_documents"):
            docs = retriever.get_relevant_documents(query)
    except Exception:
        docs = None

    # db.similarity_search
    if docs is None:
        try:
            if hasattr(db, "similarity_search"):
                docs = db.similarity_search(query, k=k)
        except Exception:
            docs = None

    # Build context
    if not docs:
        return "No documents retrieved, cannot answer."

    context_parts = [extract_text(d) for d in docs]
    context = "\n---\n".join(context_parts)

    # Prompt + call smolLLM
    prompt = (
        "Use the following context to answer the question. "
        "If the answer is not in the context, say *I don't know*.\n\n"
        f"Context:\n{context}\n\n"
        f"Question: {query}\nAnswer concisely:"
    )
    return smolLLM(prompt)

In [19]:
answer = rag_answer("Who invented the fire engine?")
print("Answer:", answer)

Token indices sequence length is longer than the specified maximum sequence length for this model (557 > 512). Running this sequence through the model will result in indexing errors


Answer: John Hautsch


In [20]:
answer = rag_answer("Summarise Indigo section")
print("Answer:", answer)

Answer: (iii)


In [21]:
answer = rag_answer("Who prohibited use of ribbon-loom and when?")
print('Answer: ', answer)

Answer:  the States-General, as early as the 11th of August 1623, if they did not totally prohibit the use of the ribbon-loom


In [23]:
answer = rag_answer("Where is West Indies located?")
print('Answer: ', answer)

Answer:  East Indies
