Install the third party libraries for this project.

In [None]:
!pip install langchain langchain-core langchain-classic langchain_community langchain_text_splitters langchain_openai langgraph langsmith pydantic pypdf chromadb

Import the necessary modules from the third party libraries for this project.

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv, find_dotenv
import os

# Load configuration from .env file
loaded_env = load_dotenv(find_dotenv(), override=True)

Test to ensure the LLM is working.

In [None]:
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.5)
response = llm.invoke("Who is the president of America?")
print(response.content)

Context: This is external information provided to the LLM. It could be a document, a database record, a webpage, or any other source of facts. The LLM doesn't inherently "know" everything, so providing relevant context allows it to answer questions that are specific to that information.

Instruction: These are the directives given to the LLM on how to process the context and answer the question. Instructions guide the model's behavior, such as "summarize the following text," "answer based only on the provided context," "compare and contrast," or "extract specific entities."

Question: This is the specific query or task the user wants the LLM to address. The LLM uses the provided context and instructions to formulate an answer to this question.

LangChain provides a structural way to call the LLM model.

In [None]:
from langchain_core.messages import HumanMessage, SystemMessage

messages = [
    SystemMessage(content="You are a friendly and helpful AI assistant who responds concisely."),
    HumanMessage(content="Hello, how are you today?")
]

response = llm.invoke(messages)
print(response.content)

Retrieval-Augmented Generation (RAG) is an AI framework designed to enhance the accuracy and reliability of Large Language Models (LLMs) by grounding their responses in external, relevant information. Instead of relying solely on the LLM's pre-trained knowledge, RAG introduces a retrieval step where relevant documents or data (the context) are first fetched from a knowledge base. This context, along with specific directives (instructions) and the user's question, is then fed to the LLM. This process helps LLMs generate more factual, current, and contextually appropriate answers, significantly reducing the likelihood of 'hallucinations' and improving overall performance, especially for domain-specific or rapidly changing information.

Create two functions. First function for splitting the content, another function for loading into the vector store.

In [6]:
def load_and_split_pdf(pdf_path, chunk_size=1000, chunk_overlap=200):
    """
    Load a PDF file and split it into chunks using LangChain.

    Args:
        pdf_path (str): Path to the PDF file
        chunk_size (int): Size of each text chunk (default: 1000)
        chunk_overlap (int): Overlap between chunks (default: 200)

    Returns:
        list: List of document chunks
    """
    # Load PDF
    loader = PyPDFLoader(pdf_path)
    documents = loader.load()

    # Initialize text splitter
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        separators=["\n\n", "\n", " ", ""]
    )

    # Split documents into chunks
    chunks = text_splitter.split_documents(documents)

    return chunks


def load_chunks_to_chroma(chunks, collection_name="pdf_documents", persist_directory="./chroma_db"):
    """
    Load document chunks into ChromaDB vector store.

    Args:
        chunks (list): List of document chunks
        collection_name (str): Name of the Chroma collection
        persist_directory (str): Directory to save the Chroma database

    Returns:
        Chroma: Chroma vector store instance
    """

    # OpenAIEmbeddings will automatically pick up the API key from the environment (OPENAI_API_KEY or OPENAI_APIKEY)
    embeddings = OpenAIEmbeddings()

    # Create Chroma vector store from documents
    vector_store = Chroma.from_documents(
        documents=chunks,
        embedding=embeddings,
        collection_name=collection_name,
        persist_directory=persist_directory
    )

    return vector_store

Read the PDF file and split it into text chunks. Then load the chunks into the vector store.

In [None]:
import chromadb

pdf_file = "example.pdf"  # Replace with your PDF file path
vector_store = None

# Delete the specific collection
vector_db_directory = "./chroma_db"
collection_name = "pdf_documents"

if os.path.exists(vector_db_directory):
    client = chromadb.PersistentClient(path=vector_db_directory)
    try:
        client.delete_collection(name=collection_name)
        print(f"Deleted collection '{collection_name}'")
    except Exception as e:
        print(f"Collection might not exist: {e}")

try:
    # Load and split the PDF
    text_chunks = load_and_split_pdf(pdf_file)

    print(f"Successfully loaded and split PDF into {len(text_chunks)} chunks")

    # Display all chunks
    if text_chunks:
        print("\n" + "="*80)
        for i, chunk in enumerate(text_chunks, 1):
            print(f"\n--- CHUNK {i}/{len(text_chunks)} ---")
            print(f"Page: {chunk.metadata.get('page', 'N/A')}")
            print(f"Source: {chunk.metadata.get('source', 'N/A')}")
            print(f"Content Length: {len(chunk.page_content)} characters")
            print(f"\nContent:\n{chunk.page_content}")
            print("="*80)

        # Load chunks into ChromaDB
        print("\n\nLoading chunks into ChromaDB vector store...")
        vector_store = load_chunks_to_chroma(
            chunks=text_chunks,
            collection_name=collection_name,
            persist_directory=vector_db_directory
        )
        print(f"Successfully loaded {len(text_chunks)} chunks into ChromaDB!")


except FileNotFoundError:
    print(f"Error: PDF file '{pdf_file}' not found")
except Exception as e:
    print(f"Error: {e}")

Now, we can query the vector database using natural language.

In [None]:
# Example: Perform a similarity search
query = "Where should I visit in China?"  # Replace with your query
results = vector_store.similarity_search(query, k=3)

print(f"\nTop {len(results)} similar documents:")
for i, doc in enumerate(results, 1):
    print(f"\n--- Result {i} ---")
    print(f"Content: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")

After retrieving from the vector database, we can pass the raw result to the LLM to retrieve the answer and rephrase it.

In [None]:
from langchain_core.prompts import PromptTemplate

prompt_template = """
You are an AI assistant helping answer questions based on the given context.

Context:
{context}

Question:
{question}
"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template
)

input_for_prompt = {
    "context": results[0].page_content,
    "question": query
}

chain = prompt | llm

result = chain.invoke(input_for_prompt)

print(result.content)

A vector database is highly efficient at finding data points (like text chunks) that are numerically close to each other in an embedding space, which often correlates with semantic similarity. However, it does not comprehend or interpret the actual meaning of the text content itself. Its function is to identify patterns of similarity based on numerical representations, not to reason about or understand the information in a human-like cognitive sense. That deeper semantic understanding and reasoning is typically the role of a large language model (LLM) that processes the retrieved content.

In the code below, we use LLM model to rerank the records retrieved from vector database.

In [None]:
retriever = vector_store.as_retriever(search_kwargs={ "k" : 3})

from langchain_openai import ChatOpenAI
from langchain_classic.retrievers import ContextualCompressionRetriever
from langchain_classic.retrievers.document_compressors import LLMChainExtractor
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0.9)

prompt_template = """You are an AI assistant helping answer questions based on the given context.
Use only the information provided in the context to answer the question.

Context:
{context}

Question:
{question}

Answer:"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template
)

# Create reranking compressor using the same LLM
compressor = LLMChainExtractor.from_llm(llm)

# Wrap the existing retriever with the reranker
rerank_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=retriever
)

def format_docs(docs):
    """Combine multiple document texts into a single string for the prompt."""
    combined_docs = "\n\n".join(doc.page_content for doc in docs)
    return combined_docs

rag_chain = (
    {"context": rerank_retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print(rag_chain.invoke( "What is the place of interest for France?"))

Your turn to play with the rerank retriever.

In [None]:
retriever = vector_store.as_retriever(search_kwargs={ "k" : 3})

rerank_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=retriever
)

query = "What should I eat in France?"

# Invoke the rerank_retriever directly
retrieved_docs = rerank_retriever.invoke(query)

print(f"\nRetrieved and re-ranked documents ({len(retrieved_docs)}):\n")
for i, doc in enumerate(retrieved_docs, 1):
    print(f"--- Document {i} ---")
    print(f"Content: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")
    print("\n" + "="*50 + "\n")