In [None]:
import os
import time
import langchain
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate


In [16]:
# No API key needed for Ollama (runs locally)


In [17]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="phi3:mini-128k",  # Using phi3 mini model with 128k context window
    temperature=0.7,
)


In [18]:
prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", """You are a helpful RAG (Retrieval-Augmented Generation) assistant. Answer the user's question using only the information provided in the context below. If the answer cannot be found in the context, politely indicate that you do not have enough information.
Always cite facts, avoid speculation, and do not use any knowledge beyond what is provided in the context."""),
        ("user", """Context:
{context}

Question: {question}

Answer based on the context above:"""),
    ]
)

### (1) Load PDF data

You can either:
- Load a single PDF: Use `PyPDFLoader("path/to/file.pdf")`
- Load multiple PDFs from a directory: Use `DirectoryLoader`

In [19]:
# Option 1: Load a single PDF file
# loader = PyPDFLoader("path/to/your/file.pdf")
# data = loader.load()

# Option 2: Load all PDFs from a directory
loader = DirectoryLoader(
    "data/",  # Directory containing your PDF files
    glob="**/*.pdf",  # Pattern to match PDF files
    loader_cls=PyPDFLoader
)
data = loader.load()
print(f"Loaded {len(data)} pages from PDF(s)")
len(data)

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)


Loaded 18 pages from PDF(s)


18

### (2) Split data to create chunks

In [20]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=500
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)

In [21]:
len(docs)

99

In [22]:
docs[0]

Document(metadata={'producer': 'macOS Version 15.6.1 (Build 24G90) Quartz PDFContext', 'creator': 'PyPDF', 'creationdate': "D:20250918101353Z00'00'", 'moddate': "D:20250918101353Z00'00'", 'source': 'data\\Class notes, Session 10.pdf', 'total_pages': 7, 'page': 0, 'page_label': '1'}, page_content='Class notes: Session 10 | Introduction to Philosophy HS 221 Thought Experiments on Perception and Reality One of the central discussions in Session 11 revolved around thought experiments that question the relationship between the world and human perception. The “falling tree in a forest” problem is the most famous of these. If a tree falls in a dense forest and no one is around to hear it, does it make a sound? Physically, we can define sound as vibrations traveling through a medium—air pressure waves that ripple outward. But sound as an experience requires an organism with an auditory system and a brain to interpret those vibrations. Without a subject, there are only waves, not sound. This di

### (3) Create embeddings for these chunks and save them to ChromaDB

ChromaDB will store the embeddings persistently on disk.

In [None]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Create ChromaDB vector store with persistent storage
vectorstore = Chroma.from_documents(
    documents=docs,
    embedding=embeddings,
    persist_directory="./chroma_db",  # Directory to persist the database
    collection_name="pdf_collection"   # Name for this collection
)

print(f"✅ Created ChromaDB with {len(docs)} document chunks")


In [None]:
# ChromaDB automatically persists to disk, no need for pickle!
# The database is stored in ./chroma_db directory
print("✅ ChromaDB is automatically persisted to ./chroma_db directory")


In [None]:
# Load existing ChromaDB from disk (if it exists)
import os

if os.path.exists("./chroma_db"):
    vectorstore = Chroma(
        persist_directory="./chroma_db",
        embedding_function=embeddings,
        collection_name="pdf_collection"
    )
    print("✅ Loaded existing ChromaDB from disk")
else:
    print("⚠️ No existing ChromaDB found. Please run the cell above to create it.")


### 🔍 Test Similarity Search

Let's see which chunks are being retrieved with our similarity threshold!

In [None]:
# Test the similarity search to see what chunks are retrieved
test_query = "what is the document about?"

# Perform similarity search with scores
results_with_scores = vectorstore.similarity_search_with_score(
    test_query,
    k=3  # Get top 3 chunks
)

print(f"🔍 Query: '{test_query}'\n")
print(f"📊 Found {len(results_with_scores)} chunks with similarity >= 0.7\n")
print("="*80)

for i, (doc, score) in enumerate(results_with_scores, 1):
    print(f"\n📄 Chunk {i} | Similarity Score: {score:.4f}")
    print(f"Content Preview: {doc.page_content[:200]}...")
    print(f"Source: {doc.metadata.get('source', 'Unknown')}")
    print("-"*80)


### (4) Retrieve similar embeddings for a given question and call LLM to retrieve final answer

In [26]:
from langchain.chains import RetrievalQA

# Create retriever with similarity search
# k=3 means retrieve top 3 most similar chunks
# score_threshold=0.7 means only return chunks with similarity score >= 0.7
retriever = vectorstore.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={
        "k": 3,                    # Return top 3 chunks
        "score_threshold": 0.7     # Only chunks with similarity >= 0.7
    }
)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # "stuff" means put all retrieved docs into the prompt
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt_template}  # Uses your custom prompt!
)

print("✅ RAG chain created with similarity search (k=3, threshold=0.7)")
chain


RetrievalQA(verbose=False, combine_documents_chain=StuffDocumentsChain(verbose=False, llm_chain=LLMChain(verbose=False, prompt=ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template="You are a helpful RAG (Retrieval-Augmented Generation) assistant. Answer the user's question using only the information provided in the context below. If the answer cannot be found in the context, politely indicate that you do not have enough information.\nAlways cite facts, avoid speculation, and do not use any knowledge beyond what is provided in the context."), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Context:\n{context}\n\nQuestion: {question}\n\nAnswer based on the context above:'), additional_kwargs={})]), llm=ChatO

In [27]:
def gen(query):
    langchain.debug=False
    return chain({"query": query}, return_only_outputs=True)['result']

### 📺 Streaming Functions

You now have **three ways** to get answers from your RAG bot:

1. **`gen(query)`** - Standard (waits for complete response)
   - Returns the full answer after generation is complete
   - Good for simple use cases

2. **`gen_stream(query)`** - Basic streaming
   - Prints text as it's generated using built-in callback
   - Uses `StreamingStdOutCallbackHandler`

3. **`gen_stream_advanced(query)`** - Advanced streaming ⭐ Recommended
   - Custom callback for better Jupyter notebook integration
   - Shows "🤖 Answer:" prefix for better formatting
   - Most interactive experience!

In [28]:
# Streaming version - prints text as it's generated
def gen_stream(query):
    """
    Stream the response token by token as the model generates it.
    """
    from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
    
    # Create retriever with same settings
    retriever = vectorstore.as_retriever(
        search_type="similarity_score_threshold",
        search_kwargs={"k": 3, "score_threshold": 0.7}
    )
    
    # Create a new chain with streaming enabled
    streaming_chain = RetrievalQA.from_chain_type(
        llm=ChatOllama(model="phi3:mini-128k", temperature=0.7, streaming=True, callbacks=[StreamingStdOutCallbackHandler()]),
        chain_type="stuff",
        retriever=retriever,
        chain_type_kwargs={"prompt": prompt_template}
    )
    
    print("Answer: ", end="", flush=True)
    result = streaming_chain({"query": query}, return_only_outputs=True)['result']
    print("\n")  # Add newline after streaming
    return result


In [29]:
# Test streaming - watch the text appear word by word!
gen_stream("what is the document about ?")

Answer: TheThe document document appears to appears to be a be a detailed summary detailed summary or trans or transcriptcript from from Session  Session 77,, titled " titled "IntroductionIntroduction to to Philosophy Philosophy H HSS  222211,"," of of an under an undergradgraduateuate course course at at a a technical inst technical instituteitute.. The The session covers session covers various various philosoph philosophicalical concepts concepts related related to to per perceptionception,, reality reality,, and and existence existence by by expl exploring theoring the distin distinctionsctions between between syntax syntax ( (structstructuralural naming naming)) and and ont ontologyology ( (actualactual existence). existence). It It discusses discusses classical classical deb debates onates on Form Formss versus versus empirical empirical realities realities in Pl in Platoato's's philosophy and modern philosophy and modern logical consist logical consistencyency as as discussed dis

'The document appears to be a detailed summary or transcript from Session 7, titled "Introduction to Philosophy HS 221," of an undergraduate course at a technical institute. The session covers various philosophical concepts related to perception, reality, and existence by exploring the distinctions between syntax (structural naming) and ontology (actual existence). It discusses classical debates on Forms versus empirical realities in Plato\'s philosophy and modern logical consistency as discussed by W.V.O. Quine. The conversation also touches upon scriptures, interpreting them critically rather than accepting passages at face value to avoid moral rationalizations that can justify wrongdoing under the guise of righteousness. Furthermore, it examines hermeneutics—the interpretation theory emphasizing historical context and biases in understanding texts like religious scriptures. The session concludes with a discussion on philosophical interpretations using examples from Christianity (Bib

In [30]:
# Advanced streaming with custom callback (better for notebooks)
import sys
from langchain.callbacks.base import BaseCallbackHandler

class StreamCallback(BaseCallbackHandler):
    """Custom callback handler for streaming output in Jupyter notebooks."""
    
    def on_llm_new_token(self, token: str, **kwargs) -> None:
        """Print each new token as it's generated."""
        print(token, end="", flush=True)

def gen_stream_advanced(query):
    """
    Stream the response with a custom callback handler.
    This version works better in Jupyter notebooks.
    """
    # Create retriever with same settings
    retriever = vectorstore.as_retriever(
        search_type="similarity_score_threshold",
        search_kwargs={"k": 3, "score_threshold": 0.7}
    )
    
    # Create streaming LLM with custom callback
    streaming_llm = ChatOllama(
        model="phi3:mini-128k", 
        temperature=0.7, 
        streaming=True,
        callbacks=[StreamCallback()]
    )
    
    # Create chain with streaming LLM
    streaming_chain = RetrievalQA.from_chain_type(
        llm=streaming_llm,
        chain_type="stuff",
        retriever=retriever,
        chain_type_kwargs={"prompt": prompt_template}
    )
    
    print("🤖 Answer: ", end="", flush=True)
    result = streaming_chain({"query": query}, return_only_outputs=True)['result']
    print("\n")
    return result


In [31]:
# Test advanced streaming - better output formatting
gen_stream_advanced("what is the document about?")

🤖 Answer: TheThe document document appears to appears to be be from from Session Session 7 7 | | Introduction Introduction to to Philosoph Philosophyy H HSS  222121,, where where it it opens opens with with a a found foundational distinctionational distinction between between syntax syntax and ont and ontologyology.. The The session session also also touches touches upon upon various various philosoph philosophicalical them themeses such as such as script scriptureure interpretation within interpretation within cultural bi cultural biasesases, the, the relationship between relationship between per perception andception and reality reality in in understanding understanding dign dignityity,, religion religion''s connections connection to to existence through existence through logical logical f fallaallaciescies like like cheating cheating for r for righighteousteousnessness,, and and historical historical debates debates on on interpre interpretingting texts texts without bias without bi

"The document appears to be from Session 7 | Introduction to Philosophy HS 221, where it opens with a foundational distinction between syntax and ontology. The session also touches upon various philosophical themes such as scripture interpretation within cultural biases, the relationship between perception and reality in understanding dignity, religion's connection to existence through logical fallacies like cheating for righteousness, and historical debates on interpreting texts without bias. Additionally, it briefly mentions the ontological argument for God’s existence as a philosophical concept discussed during this session. The aim seems to be introducing students to fundamental concepts in philosophy related to language (syntax), reality (ontology), morality, religion, and interpretation of scriptures."

In [32]:
print(gen("what is the document about?"))

The document appears to be a collection of session notes from an Introduction to Philosophy course, specifically Session 7 in Week 2 (HSTS 221), titled "Syntax to Ontology." The session discusses fundamental philosophical concepts such as syntax and ontology. Syntax refers to the structural or linguistic aspect of naming without establishing real existence, while ontology deals with questions about being and what exists in reality. This distinction is related to classical debates between Plato's Forms and contemporary philosophy that emphasizes logical criteria for existence.

The session also touches on how language can obscure meaning through examples like the word "ghost" or saying, “God exists.” The instructor encourages critical thinking in interpreting scriptures rather than blind reliance due to their potential misinterpretation and historical context-dependence as noted by philosophers Paul Ricoeur and Hans-Georg Gadamer.

Moreover, the discussion extends into ethical considera