In [1]:
pip install -r requirement.txt

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

In [3]:
from langchain.document_loaders import PyPDFLoader

# If you want to load all PDFs from a directory:
directory_path = "docs/"
pdf_files = [os.path.join(directory_path, file) for file in os.listdir(directory_path) if file.endswith(".pdf")]

# Load documents from multiple PDFs
documents = []
for pdf_file in pdf_files:
    loader = PyPDFLoader(pdf_file)
    documents.extend(loader.load())

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Split the documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)


In [5]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

# Initialize embeddings model (using SentenceTransformers)
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Create Chroma database
vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory="./chroma_store")
print(f"Chroma database created with {len(chunks)} chunks!")

Chroma database created with 68 chunks!


In [6]:
retriever = vectorstore.as_retriever()

In [7]:
from langchain_ollama import OllamaLLM  # Correct import

# Initialize the Ollama LLM
llm = OllamaLLM(model="llama3", base_url="http://127.0.0.1:11434")  # Replace with your model and base URL

In [8]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

## Custom Context

In [None]:
from langchain.chains import RetrievalQA

query = "What is chunking?"

# Retrieve top-k documents using the invoke method
retrieved_docs = retriever.invoke(query)  # Modify this to use the invoke method
top_k = 5  # Set the number of top documents you want to retrieve

# Ensure that the retrieved_docs is a list and slice to top_k
if isinstance(retrieved_docs, dict):
    retrieved_docs = retrieved_docs.get('documents', [])

retrieved_docs = retrieved_docs[:top_k]  # Retrieve only the top-k documents

# Combine the content of the documents into a single context
context = "\n\n".join([doc.page_content for doc in retrieved_docs])

# Feed the combined context to the LLM
prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"

# Generate the response using Ollama
response = llm.invoke(prompt)

# Print the response and the source documents
print("Answer:", response)
print("Source Documents:", [doc.metadata for doc in retrieved_docs])


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Failed to multipart ingest runs: langsmith.utils.LangSmithAuthError: Authentication failed for https://api.smith.langchain.com/runs/multipart. HTTPError('401 Client Error: Unauthorized for url: https://api.smith.langchain.com/runs/multipart', '{"detail":"Invalid token"}')trace=005d6a84-7335-47e4-9b79-8c0d33ddcfe9,id=005d6a84-7335-47e4-9b79-8c0d33ddcfe9; trace=b4094e62-1f45-4cf4-bf37-350a0612faae,id=b4094e62-1f45-4cf4-bf37-350a0612faae


Answer: According to the context, Chunking refers to the process of breaking down large documents or text data into smaller, manageable pieces (or chunks). Each chunk is designed to be semantically coherent and easily retrievable.
Source Documents: [{'page': 0, 'source': 'docs/RAG_Chunking_Tutorial.pdf'}, {'page': 13, 'source': 'docs/rag_chunking_example.pdf'}, {'page': 4, 'source': 'docs/rag_chunking_example.pdf'}, {'page': 10, 'source': 'docs/rag_chunking_example.pdf'}]


In [10]:
from langchain.chains import RetrievalQA

query = "What is the advantages or effect when using chunking?"

# Retrieve top-k documents using the invoke method
retrieved_docs = retriever.invoke(query)  # Modify this to use the invoke method
top_k = 5  # Set the number of top documents you want to retrieve

# Ensure that the retrieved_docs is a list and slice to top_k
if isinstance(retrieved_docs, dict):
    retrieved_docs = retrieved_docs.get('documents', [])

retrieved_docs = retrieved_docs[:top_k]  # Retrieve only the top-k documents

# Combine the content of the documents into a single context
context = "\n\n".join([doc.page_content for doc in retrieved_docs])

# Feed the combined context to the LLM
prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"

# Generate the response using Ollama
response = llm.invoke(prompt)

# Print the response and the source documents
print("Answer:", response)
print("Source Documents:", [doc.metadata for doc in retrieved_docs])


Failed to multipart ingest runs: langsmith.utils.LangSmithAuthError: Authentication failed for https://api.smith.langchain.com/runs/multipart. HTTPError('401 Client Error: Unauthorized for url: https://api.smith.langchain.com/runs/multipart', '{"detail":"Invalid token"}')trace=33e0d8b2-b5ca-4ecf-b3d0-6008c287ff9b,id=33e0d8b2-b5ca-4ecf-b3d0-6008c287ff9b; trace=b4094e62-1f45-4cf4-bf37-350a0612faae,id=b4094e62-1f45-4cf4-bf37-350a0612faae
Failed to multipart ingest runs: langsmith.utils.LangSmithAuthError: Authentication failed for https://api.smith.langchain.com/runs/multipart. HTTPError('401 Client Error: Unauthorized for url: https://api.smith.langchain.com/runs/multipart', '{"detail":"Invalid token"}')trace=aa845c02-b946-4c6a-a248-4bb8ae578bdc,id=aa845c02-b946-4c6a-a248-4bb8ae578bdc; trace=33e0d8b2-b5ca-4ecf-b3d0-6008c287ff9b,id=33e0d8b2-b5ca-4ecf-b3d0-6008c287ff9b
Failed to multipart ingest runs: langsmith.utils.LangSmithAuthError: Authentication failed for https://api.smith.langchain

Answer: According to the provided context, effective chunking ensures that:

* The text is divided into sections that are neither too long nor too short.
* This helps optimize both retrieval and learning processes.

In other words, the advantages of using chunking include:

* Optimized retrieval: Chunking allows for efficient retrieval of specific information from large documents or datasets.
* Optimized learning: By dividing text into manageable pieces (chunks), learners can focus on specific sections of content, making it easier to learn and retain new information.
Source Documents: [{'page': 9, 'source': 'docs/rag_chunking_example.pdf'}, {'page': 3, 'source': 'docs/rag_chunking_example.pdf'}, {'page': 0, 'source': 'docs/RAG_Chunking_Tutorial.pdf'}, {'page': 4, 'source': 'docs/rag_chunking_example.pdf'}]


In [11]:
from langchain.chains import RetrievalQA

query = "What is RAG?"

# Retrieve top-k documents using the invoke method
retrieved_docs = retriever.invoke(query)  # Modify this to use the invoke method
top_k = 5  # Set the number of top documents you want to retrieve

# Ensure that the retrieved_docs is a list and slice to top_k
if isinstance(retrieved_docs, dict):
    retrieved_docs = retrieved_docs.get('documents', [])

retrieved_docs = retrieved_docs[:top_k]  # Retrieve only the top-k documents

# Combine the content of the documents into a single context
context = "\n\n".join([doc.page_content for doc in retrieved_docs])

# Feed the combined context to the LLM
prompt = f"Context:\n{context}\n\nQuestion: {query}\nAnswer:"

# Generate the response using Ollama
response = llm.invoke(prompt)

# Print the response and the source documents
print("Answer:", response)
print("Source Documents:", [doc.metadata for doc in retrieved_docs])


Answer: Retrieval-Augmented Generation (RAG) is a framework that combines pre-trained language models with external knowledge bases to enhance the accuracy and relevance of generated text.
Source Documents: [{'page': 4, 'source': 'docs/rag_chunking_example.pdf'}, {'page': 10, 'source': 'docs/rag_chunking_example.pdf'}, {'page': 0, 'source': 'docs/RAG_Chunking_Tutorial.pdf'}, {'page': 1, 'source': 'docs/RAG_Chunking_Tutorial.pdf'}]


## Including Prompt Engineering

In [12]:
from langchain.chains import RetrievalQA

query = "What is RAG?"

# Retrieve top-k documents using the invoke method
retrieved_docs = retriever.invoke(query)  # Modify this to use the invoke method
top_k = 5  # Set the number of top documents you want to retrieve

# Ensure that the retrieved_docs is a list and slice to top_k
if isinstance(retrieved_docs, dict):
    retrieved_docs = retrieved_docs.get('documents', [])

retrieved_docs = retrieved_docs[:top_k]  # Retrieve only the top-k documents

# Combine the content of the documents into a single context
context = "\n\n".join([doc.page_content for doc in retrieved_docs])

# Feed the combined context to the LLM
prompt = f"""
You are an expert AI assistant. Use the provided context to answer the question in a clear, concise, and professional manner. 
If the context is insufficient, respond with "The context provided does not contain enough information to answer this question."

### Context:
{context}

### Instructions:
1. Summarize key details from the context where relevant.
2. Answer the question in a way that is easy to understand for a general audience.
3. Where applicable, provide examples or additional clarifications to make the response more insightful.

### Question:
{query}

### Answer:
"""

# Generate the response using Ollama
response = llm.invoke(prompt)

# Print the response and the source documents
print("Answer:", response)
print("Source Documents:", [doc.metadata for doc in retrieved_docs])


Answer: Based on the provided context, I'd be happy to help!

RAG stands for Retrieval-Augmented Generation. It's a framework that combines pre-trained language models with external knowledge bases to enhance the accuracy and relevance of generated text. In other words, instead of relying solely on the model's internal knowledge, RAG retrieves relevant chunks of information from external sources, such as document collections, to assist in generation.

To put it simply, RAG is a way to improve the quality of AI-generated text by bringing in additional information and insights from outside sources. This allows for more accurate and relevant responses to user queries.

I hope that helps clarify things! Let me know if you have any further questions or if there's anything else I can help with.
Source Documents: [{'page': 4, 'source': 'docs/rag_chunking_example.pdf'}, {'page': 10, 'source': 'docs/rag_chunking_example.pdf'}, {'page': 0, 'source': 'docs/RAG_Chunking_Tutorial.pdf'}, {'page': 1,

In [13]:
from langchain.chains import RetrievalQA

query = "What is RAG?"

# Retrieve top-k documents using the invoke method
retrieved_docs = retriever.invoke(query)  # Modify this to use the invoke method
top_k = 5  # Set the number of top documents you want to retrieve

# Ensure that the retrieved_docs is a list and slice to top_k
if isinstance(retrieved_docs, dict):
    retrieved_docs = retrieved_docs.get('documents', [])

retrieved_docs = retrieved_docs[:top_k]  # Retrieve only the top-k documents

# Combine the content of the documents into a single context
context = "\n\n".join([doc.page_content for doc in retrieved_docs])

# Advanced prompt with retrieval and Llama 3's knowledge
prompt = f"""
You are an expert AI assistant. Use the provided context and your own knowledge to answer the question in a clear, concise, and professional manner. 

### Instructions:
1. First, prioritize using the context to provide the answer.
2. If additional information is needed, supplement your response with your own knowledge.
3. Always provide the sources for any information retrieved from the context.
4. If the context does not answer the question and you rely solely on your own knowledge, clearly state that no external sources were used.

### Context:
{context}

### Question:
{query}

### Answer:
"""

# Generate the response using Ollama
response = llm.invoke(prompt)

# Print response and source information
print("Answer:", response)
print("Source Documents:")
for doc in retrieved_docs:
    print(f"- Page Content: {doc.page_content[:200]}...")  # Truncated for readability
    print(f"  Metadata: {doc.metadata}")


Answer: Based on the provided context, RAG (Retrieval-Augmented Generation) is a framework that combines pre-trained language models with external knowledge bases to enhance the accuracy and relevance of generated text. This framework retrieves relevant chunks of information from external sources, such as document collections, to assist in generation.

Sources: Context provided

Note: No additional information was required beyond what was provided in the context.
Source Documents:
- Page Content: RAG Chunking Example Document
accuracy and processing speed. This document is structured to facilitate experimentation with
various chunking techniques, including overlapping windows, recursive splitt...
  Metadata: {'page': 4, 'source': 'docs/rag_chunking_example.pdf'}
- Page Content: RAG Chunking Example Document
accuracy and processing speed. This document is structured to facilitate experimentation with
various chunking techniques, including overlapping windows, recursive splitt...
  Metad