In [1]:
pip install -r requirement.txt

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

In [None]:
from langchain.document_loaders import PyPDFLoader

# Load the PDF
loader = PyPDFLoader("docs/rag_chunking_example.pdf")
documents = loader.load()


In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)


In [5]:
print(chunks)

[Document(metadata={'source': 'docs/rag_chunking_example.pdf', 'page': 0}, page_content='RAG Chunking Example Document\nThis is an example document designed for testing chunking methods in Retrieval-Augmented\nGeneration (RAG). The document contains multiple paragraphs of text that can be split into\nmanageable chunks. Each chunk represents a coherent section of information that can be used for\nsemantic search and retrieval tasks. By creating this document, we aim to provide a realistic dataset\nfor learning and practicing chunking strategies.'), Document(metadata={'source': 'docs/rag_chunking_example.pdf', 'page': 0}, page_content='for learning and practicing chunking strategies.\nChunking is a crucial step in many AI-driven pipelines. Effective chunking ensures that the text is\ndivided into sections that are neither too long nor too short. This helps optimize both retrieval\naccuracy and processing speed. This document is structured to facilitate experimentation with\nvarious chunk

In [6]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

# Initialize embeddings model (using SentenceTransformers)
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Create Chroma database
vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory="./chroma_store")


  from .autonotebook import tqdm as notebook_tqdm


In [7]:
retriever = vectorstore.as_retriever()

In [8]:
from langchain_ollama import OllamaLLM  # Correct import

# Initialize the Ollama LLM
llm = OllamaLLM(model="llama3", base_url="http://127.0.0.1:11434")  # Replace with your model and base URL

In [9]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

In [10]:
# Ask a question
query = "What is the document about?"
result = qa_chain.invoke({"query": query})  # Correct way to call the chain

# Access the outputs
print(result["result"])  # The generated response
print(result["source_documents"])  # The retrieved documents


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


I don't know.
[Document(metadata={'page': 6, 'source': 'docs/rag_chunking_example.pdf'}, page_content='segmentation.\nThis is an example document designed for testing chunking methods in Retrieval-Augmented\nGeneration (RAG). The document contains multiple paragraphs of text that can be split into\nPage 7'), Document(metadata={'page': 6, 'source': 'docs/rag_chunking_example.pdf'}, page_content='segmentation.\nThis is an example document designed for testing chunking methods in Retrieval-Augmented\nGeneration (RAG). The document contains multiple paragraphs of text that can be split into\nPage 7'), Document(metadata={'page': 6, 'source': 'docs/rag_chunking_example.pdf'}, page_content='segmentation.\nThis is an example document designed for testing chunking methods in Retrieval-Augmented\nGeneration (RAG). The document contains multiple paragraphs of text that can be split into\nPage 7'), Document(metadata={'page': 6, 'source': 'docs/rag_chunking_example.pdf'}, page_content='segmentation

In [11]:
# Ask a question
query = "What is the document containing?"
result = qa_chain.invoke({"query": query})  # Correct way to call the chain

# Access the outputs
print(result["result"])  # The generated response
print(result["source_documents"])  # The retrieved documents


According to the context, the document containing is "an example document designed for testing chunking methods in Retrieval-Augmented Generation (RAG)". It also mentions that this document contains multiple paragraphs of text that can be split into segments.
[Document(metadata={'page': 6, 'source': 'docs/rag_chunking_example.pdf'}, page_content='segmentation.\nThis is an example document designed for testing chunking methods in Retrieval-Augmented\nGeneration (RAG). The document contains multiple paragraphs of text that can be split into\nPage 7'), Document(metadata={'page': 6, 'source': 'docs/rag_chunking_example.pdf'}, page_content='segmentation.\nThis is an example document designed for testing chunking methods in Retrieval-Augmented\nGeneration (RAG). The document contains multiple paragraphs of text that can be split into\nPage 7'), Document(metadata={'page': 6, 'source': 'docs/rag_chunking_example.pdf'}, page_content='segmentation.\nThis is an example document designed for testi

In [12]:
# Ask a question
query = "What is chunking?"
result = qa_chain.invoke({"query": query})  # Correct way to call the chain

# Access the outputs
print(result["result"])  # The generated response
print(result["source_documents"])  # The retrieved documents


I don't know. The provided context only mentions "chunking techniques" without defining what chunking actually is. It seems to be a general term used in the text, but no further explanation or definition is given.
[Document(metadata={'page': 13, 'source': 'docs/rag_chunking_example.pdf'}, page_content='various chunking techniques, including overlapping windows, recursive splitting, and semantic\nsegmentation.\nPage 14'), Document(metadata={'page': 13, 'source': 'docs/rag_chunking_example.pdf'}, page_content='various chunking techniques, including overlapping windows, recursive splitting, and semantic\nsegmentation.\nPage 14'), Document(metadata={'page': 13, 'source': 'docs/rag_chunking_example.pdf'}, page_content='various chunking techniques, including overlapping windows, recursive splitting, and semantic\nsegmentation.\nPage 14'), Document(metadata={'page': 13, 'source': 'docs/rag_chunking_example.pdf'}, page_content='various chunking techniques, including overlapping windows, recurs

## Custom Context

In [25]:
from langchain.chains import RetrievalQA

query = "What is chunking?"

# Retrieve top-k documents
retrieved_docs = retriever.get_relevant_documents(query=query)  # Modify your query as needed
top_k = 5  # Set the number of top documents you want to retrieve
retrieved_docs = retrieved_docs[:top_k]  # Retrieve only the top-k documents

# Combine the content of the documents into a single context
context = "\n\n".join([doc.page_content for doc in retrieved_docs])

# Feed the combined context to the LLM
prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"

# Generate the response using Ollama
response = llm.invoke(prompt)

# Print the response and the source documents
print("Answer:", response)
print("Source Documents:", [doc.metadata for doc in retrieved_docs])


Answer: According to the context, Chunking refers to various techniques used to divide or segment text into smaller units called "chunks". The specific techniques mentioned are:

1. Overlapping Windows
2. Recursive Splitting
3. Semantic Segmentation

These techniques aim to group words or phrases together based on their meaning, structure, or other characteristics to create meaningful and coherent chunks of text.
Source Documents: [{'page': 13, 'source': 'docs/rag_chunking_example.pdf'}, {'page': 13, 'source': 'docs/rag_chunking_example.pdf'}, {'page': 13, 'source': 'docs/rag_chunking_example.pdf'}]


In [24]:
from langchain.chains import RetrievalQA

# Retrieve top-k documents
retrieved_docs = retriever.get_relevant_documents("What is the document about?")  # Modify your query as needed
top_k = 5  # Set the number of top documents you want to retrieve
retrieved_docs = retrieved_docs[:top_k]  # Retrieve only the top-k documents

# Combine the content of the documents into a single context
context = "\n\n".join([doc.page_content for doc in retrieved_docs])

# Feed the combined context to the LLM
query = "What is the document about?"
prompt = f"Context:\n{context} /n/nQuestion: {query}\nAnswer:"

# Generate the response using Ollama
response = llm.invoke(prompt)

# Print the response and the source documents
print("Answer:", response)
print("Source Documents:", [doc.metadata for doc in retrieved_docs])


Answer: The document is not actually about anything, as it appears to be a test case for chunking methods in Retrieval-Augmented Generation (RAG) - it's just a repeating template of text designed to evaluate segmentation algorithms!
Source Documents: [{'page': 6, 'source': 'docs/rag_chunking_example.pdf'}, {'page': 6, 'source': 'docs/rag_chunking_example.pdf'}, {'page': 6, 'source': 'docs/rag_chunking_example.pdf'}]


## Custom Context with .invoke()

In [34]:
from langchain.chains import RetrievalQA

query = "What is chunking?"

# Retrieve top-k documents using the invoke method
retrieved_docs = retriever.invoke(query)  # Modify this to use the invoke method
top_k = 3  # Set the number of top documents you want to retrieve

# Ensure that the retrieved_docs is a list and slice to top_k
if isinstance(retrieved_docs, dict):
    retrieved_docs = retrieved_docs.get('documents', [])

retrieved_docs = retrieved_docs[:top_k]  # Retrieve only the top-k documents

# Combine the content of the documents into a single context
context = "\n\n".join([doc.page_content for doc in retrieved_docs])

# Feed the combined context to the LLM
prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"

# Generate the response using Ollama
response = llm.invoke(prompt)

# Print the response and the source documents
print("Answer:", response)
print("Source Documents:", [doc.metadata for doc in retrieved_docs])


Answer: Chunking refers to a technique used in natural language processing (NLP) to break down text into smaller units or "chunks" that are easier to process and analyze. These chunks can be phrases, sentences, clauses, or even individual words. Chunking is often used for tasks such as named entity recognition, part-of-speech tagging, sentiment analysis, and topic modeling.

The three chunking techniques mentioned in the context (overlapping windows, recursive splitting, and semantic segmentation) are different approaches to chunking text into meaningful units:

1. Overlapping windows: This technique involves dividing text into overlapping segments of a fixed size.
2. Recursive splitting: This approach recursively breaks down text into smaller chunks until a desired level of granularity is reached.
3. Semantic segmentation: This technique involves identifying meaningful units in text based on semantic or linguistic features, such as part-of-speech tags, named entities, or topic modelin

## Automatic RetrievalQA Chain

In [28]:
retriever.search_kwargs["k"] = 3  # Set top-k for retrieval

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

# Ask a question
query = "What is the document about?"
result = qa_chain.invoke({"query": query})

# Access outputs
print("Answer:", result["result"])
print("Source Documents:", result["source_documents"])


Answer: I don't know.
Source Documents: [Document(metadata={'page': 6, 'source': 'docs/rag_chunking_example.pdf'}, page_content='segmentation.\nThis is an example document designed for testing chunking methods in Retrieval-Augmented\nGeneration (RAG). The document contains multiple paragraphs of text that can be split into\nPage 7'), Document(metadata={'page': 6, 'source': 'docs/rag_chunking_example.pdf'}, page_content='segmentation.\nThis is an example document designed for testing chunking methods in Retrieval-Augmented\nGeneration (RAG). The document contains multiple paragraphs of text that can be split into\nPage 7'), Document(metadata={'page': 6, 'source': 'docs/rag_chunking_example.pdf'}, page_content='segmentation.\nThis is an example document designed for testing chunking methods in Retrieval-Augmented\nGeneration (RAG). The document contains multiple paragraphs of text that can be split into\nPage 7')]
