## Advanced Retrieval Techniques

Notebook walking through evaluating Parent Document Retrieval and Contextual Compression (Rerank)

In [None]:
import os
from getpass import getpass
from datetime import datetime
os.environ['OPENAI_API_KEY'] = getpass("Enter your OpenAI API key: ")
os.environ["COHERE_API_KEY"] = getpass("Enter your Cohere API Key: ")
os.environ["LANGSMITH_API_KEY"] = getpass("Enter your LangSmith API Key:")
import os
os.environ["LANGCHAIN_TRACING_V2"] = "false" 

def enable_tracing_for_evaluation():
    os.environ["LANGCHAIN_TRACING_V2"] = "true"
    os.environ["LANGCHAIN_PROJECT"] = f"RAG_Evaluation_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

def disable_tracing():
    os.environ["LANGCHAIN_TRACING_V2"] = "false"

### Data preparation and retrieval setup for contextual compression

In [93]:
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import PyMuPDFLoader

path = "data/"
loader = DirectoryLoader(path, glob="*.pdf", loader_cls=PyMuPDFLoader)
docs = loader.load()

In [94]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=750, chunk_overlap=150)
#trying smaller chunks after baseline evaluation
split_documents = text_splitter.split_documents(docs)
len(split_documents)


2483

In [95]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

In [96]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams

client = QdrantClient(":memory:")

client.create_collection(
    collection_name="Contextual_Compression_Loan_Data",
    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
)

vectorstore = QdrantVectorStore(
    client=client,
    collection_name="Contextual_Compression_Loan_Data",
    embedding=embeddings,
)

vectorstore.add_documents(split_documents)

['dfcac70cd3934facbe8eb05ecbf127bf',
 '8f64d3b7e70b49eb890a6355a3ccc290',
 '6d5f5706cef74784aabff421263fcf72',
 '2611b60442af4bb5aadd1f06079c4ef7',
 '12ee4e352f4a49cf8804135df3834dc6',
 '22725d497d9246398bbbd2dd513abb72',
 '77b52e01a7604465b1cd2c509a87038b',
 'd3212ad159fb49dd8b59983df2685f42',
 '62021972fab6412f803ebae49f31ce4d',
 'd179c9845a3046098c62efc59d441db0',
 '938c45928d7146658295195914dcf288',
 '16668321a9434145b6518c118791bebf',
 '183f5b395b724e31874fc8c1b033be91',
 'c4daff477462433caa829ea3c0fe5a61',
 '84bfc1e168a24c0a9234b8c6df6ffbb1',
 '1ec585a5239c45938e949baf6a9ed955',
 '342be892a2fb4049a842ce4798349e24',
 '45b0c0e581534073b718f6a0099d72e3',
 'ee056801cc764cd09facb498eb9cd8a0',
 '81499d4d49f84fe8b4ab982cb5e43fcf',
 '367a81dc0c6a4e88960eec476f80c50a',
 '180173d2c00140828f180e2031cf30da',
 'a65393bdd8ea4f4898f0af3e3c033738',
 '8ccffa6e8f0d42618a37a2569bda05f2',
 'dddf7a5eec0c452a94ca841169f44aa9',
 'c6da496c73724a99a508f2f909703908',
 '30fd720228dd44ce8c2cae861438f332',
 

In [97]:
retriever = vectorstore.as_retriever(search_kwargs={"k" : 15})
#trying more documents after baseline evaluation

In [98]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_cohere import CohereRerank

def retrieve_contextual(state):
    compressor = CohereRerank(model="rerank-v3.5", top_n=10)
    compression_retriever = ContextualCompressionRetriever(
        base_compressor=compressor, 
        base_retriever=retriever, 
        search_kwargs={"k": 5}
    )
    retrieved_docs = compression_retriever.invoke(state["question"])
    
    return {"context": retrieved_docs}

### Data preparation and retrieval setup for parent document

In [99]:
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
from qdrant_client import QdrantClient, models

parent_docs = docs
child_splitter = RecursiveCharacterTextSplitter(chunk_size=750)

In [100]:
from langchain_qdrant import QdrantVectorStore

client = QdrantClient(location=":memory:")

client.create_collection(
    collection_name="Parent_Document_Loan_Data",
    vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE)
)

parent_document_vectorstore = QdrantVectorStore(
    collection_name="Parent_Document_Loan_Data", embedding=OpenAIEmbeddings(model="text-embedding-3-small"), client=client
)

In [101]:
store = InMemoryStore()

parent_document_retriever = ParentDocumentRetriever(
    vectorstore = parent_document_vectorstore,
    docstore=store,
    child_splitter=child_splitter,
)

In [102]:
parent_document_retriever.add_documents(parent_docs, ids=None)


In [103]:
def retrieve_parent_documents(state):
    try:
        retrieved_docs = parent_document_retriever.invoke(state["question"])
        return {"context": retrieved_docs}
    except Exception as e:
        print(f"Error in retrieve_parent_documents: {e}")
        return {"context": []}

In [104]:
from langchain.prompts import ChatPromptTemplate

RAG_PROMPT = """\
You are a helpful assistant who answers questions based ONLY on the provided context. 
IMPORTANT: You must NOT use any external knowledge. If the context doesn't contain the answer, say "I don't have enough information in the provided documents to answer this question."

Context:
{context}

Question:
{question}

Answer (use ONLY information from the context above):
"""
#trying more strict prompt after baseline evaluation

rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)

In [105]:
from langchain_openai import ChatOpenAI

openai_chat_model = ChatOpenAI(model="gpt-4.1-nano")

In [106]:
def generate(state):
  docs_content = "\n\n".join(doc.page_content for doc in state["context"])
  messages = rag_prompt.format_messages(question=state["question"], context=docs_content)
  response = openai_chat_model.invoke(messages)
  return {"response" : response.content}

In [107]:
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
from langchain_core.documents import Document

class State(TypedDict):
  question: str
  context: List[Document]
  response: str

In [108]:
contextual_compression_graph_builder = StateGraph(State).add_sequence([retrieve_contextual, generate])
contextual_compression_graph_builder.add_edge(START, "retrieve_contextual")
contextual_compression_graph = contextual_compression_graph_builder.compile()

In [109]:
parent_document_graph_builder = StateGraph(State).add_sequence([retrieve_parent_documents, generate])
parent_document_graph_builder.add_edge(START, "retrieve_parent_documents")
parent_document_graph = parent_document_graph_builder.compile()

In [None]:
from langchain_community.tools.tavily_search import TavilySearchResults

tavily_tool = TavilySearchResults(max_results=5)
#This tool will get the latest news/updates on the bill and the student loan repayment plan

In [111]:
from backend.main import timeline_tool, complete_form_tool, tool_comparison_tool

tool_belt = [
    tavily_tool,
    timeline_tool,
    complete_form_tool,
    tool_comparison_tool,
]

In [112]:
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-4.1", temperature=0)

In [113]:
model = model.bind_tools(tool_belt)

In [114]:
from typing import TypedDict, Annotated
from langgraph.graph.message import add_messages
import operator
from langchain_core.messages import HumanMessage, BaseMessage
from langchain_core.documents import Document

class AgentState(TypedDict):
    messages: Annotated[list, add_messages]
    context: List[Document]

In [115]:
from langgraph.prebuilt import ToolNode

def call_model(state):
    messages = state["messages"]
    response = model.invoke(messages)
    return {
        "messages" : [response],
        "context" : state.get("context", [])
        }


tool_node = ToolNode(tool_belt)

In [116]:
from langgraph.graph import StateGraph, END

uncompiled_graph = StateGraph(AgentState)

uncompiled_graph.add_node("agent", call_model)
uncompiled_graph.add_node("action", tool_node)

<langgraph.graph.state.StateGraph at 0x30ff550d0>

In [117]:
uncompiled_graph.set_entry_point("agent")

<langgraph.graph.state.StateGraph at 0x30ff550d0>

In [118]:
def should_continue(state):
  last_message = state["messages"][-1]

  if last_message.tool_calls:
    return "action"

  return END

uncompiled_graph.add_conditional_edges(
    "agent",
    should_continue
)

<langgraph.graph.state.StateGraph at 0x30ff550d0>

In [121]:
uncompiled_graph.add_edge("action", "agent")

Adding an edge to a graph that has already been compiled. This will not be reflected in the compiled graph.


<langgraph.graph.state.StateGraph at 0x30ff550d0>

In [122]:
compiled_graph = uncompiled_graph.compile()

### Evaluation

In [123]:
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini"))
generator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

In [35]:
from ragas.testset import TestsetGenerator

generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
import time

# Add delays between operations
print("Starting generation...")
dataset = generator.generate_with_langchain_docs(docs, testset_size=5)
time.sleep(10)  # Wait 10 seconds
print("Generation complete!")

# Enable tracing only for evaluation
enable_tracing_for_evaluation()
# Run your evaluation
# Then disable tracing
disable_tracing()


Starting generation...


Applying HeadlinesExtractor:   0%|          | 0/452 [00:00<?, ?it/s]

Applying HeadlineSplitter:   0%|          | 0/480 [00:00<?, ?it/s]

unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to ap

Applying SummaryExtractor:   0%|          | 0/864 [00:00<?, ?it/s]

Property 'summary' already exists in node '475946'. Skipping!
Property 'summary' already exists in node '54e73b'. Skipping!
Property 'summary' already exists in node '50bd02'. Skipping!
Property 'summary' already exists in node 'e7377c'. Skipping!
Property 'summary' already exists in node 'cce2b5'. Skipping!
Property 'summary' already exists in node '4ffefb'. Skipping!
Property 'summary' already exists in node 'a87fe9'. Skipping!
Property 'summary' already exists in node '7f37dc'. Skipping!
Property 'summary' already exists in node '291a33'. Skipping!
Property 'summary' already exists in node '2e9538'. Skipping!
Property 'summary' already exists in node '48656a'. Skipping!
Property 'summary' already exists in node 'a3222b'. Skipping!
Property 'summary' already exists in node 'bc60f2'. Skipping!
Property 'summary' already exists in node '531bc1'. Skipping!
Property 'summary' already exists in node '50a393'. Skipping!
Property 'summary' already exists in node 'b18b6b'. Skipping!
Property

Applying CustomNodeFilter:   0%|          | 0/83 [00:00<?, ?it/s]

Node 3d085e2b-707e-4541-9dec-74f5f5a92ad0 does not have a summary. Skipping filtering.
Node fe7f780f-aea9-40a3-9039-81d65b4a3018 does not have a summary. Skipping filtering.


Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   0%|          | 0/1010 [00:00<?, ?it/s]

unable to apply transformation: node.property('summary') must be a string, found '<class 'NoneType'>'
Property 'summary_embedding' already exists in node '54e73b'. Skipping!
Property 'summary_embedding' already exists in node '50bd02'. Skipping!
Property 'summary_embedding' already exists in node 'cce2b5'. Skipping!
Property 'summary_embedding' already exists in node '475946'. Skipping!
Property 'summary_embedding' already exists in node '291a33'. Skipping!
Property 'summary_embedding' already exists in node '7f37dc'. Skipping!
Property 'summary_embedding' already exists in node 'a87fe9'. Skipping!
Property 'summary_embedding' already exists in node '4ffefb'. Skipping!
Property 'summary_embedding' already exists in node '10198c'. Skipping!
Property 'summary_embedding' already exists in node 'bc60f2'. Skipping!
Property 'summary_embedding' already exists in node 'e7377c'. Skipping!
Property 'summary_embedding' already exists in node 'b18b6b'. Skipping!
Property 'summary_embedding' alrea

Applying [CosineSimilarityBuilder, OverlapScoreBuilder]:   0%|          | 0/2 [00:00<?, ?it/s]

unable to apply transformation: Node 4fd798d9-22bc-43ee-a9f5-0647526c38df has no summary_embedding


Generating personas:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Scenarios:   0%|          | 0/2 [00:00<?, ?it/s]

Generating Samples:   0%|          | 0/6 [00:00<?, ?it/s]

Generation complete!


AttributeError: module 'datetime' has no attribute 'now'

In [36]:
dataset.to_pandas()

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,Could you explain the exceptions to the minimu...,[2. Nonstandard terms that are substantially e...,Chapter 7 discusses exceptions to the minimum ...,single_hop_specifc_query_synthesizer
1,What is the minmum loan period if a student is...,[Minimum Loan Period: Standard Term and SE9W N...,For credit-hour programs with standard terms s...,single_hop_specifc_query_synthesizer
2,how summer 2 work with SAY loan limits?,[Summer terms are generally not considered to ...,If the summer term is split into modules like ...,single_hop_specifc_query_synthesizer
3,What are the key deadlines related to the IRS'...,[<1-hop>\n\nProvision: In addition to paid tax...,The IRS is barred from issuing any additional ...,multi_hop_specific_query_synthesizer
4,Can you explane how the special depreciation a...,[<1-hop>\n\nProvision: This provision makes se...,The special depreciation allowance for qualifi...,multi_hop_specific_query_synthesizer
5,How do the rules for abbreviated loan periods ...,[<1-hop>\n\nRules for Abbreviated Loan Periods...,"According to Chapter 1, the rules for abbrevia...",multi_hop_specific_query_synthesizer


In [132]:
for test_row in dataset:
  response = contextual_compression_graph.invoke({"question" : test_row.eval_sample.user_input})
  test_row.eval_sample.response = response["response"]
  test_row.eval_sample.retrieved_contexts = [context.page_content for context in response["context"]]

In [134]:
from ragas import EvaluationDataset
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import Faithfulness, ResponseRelevancy, ContextEntityRecall, ContextPrecision
from ragas import evaluate, RunConfig

evaluation_dataset = EvaluationDataset.from_pandas(dataset.to_pandas())

evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini", max_tokens=8192))

custom_run_config = RunConfig(timeout=500)

result = evaluate(
    dataset=evaluation_dataset,
    metrics=[ContextPrecision(), Faithfulness(), ResponseRelevancy(), ContextEntityRecall()],
    llm=evaluator_llm,
    run_config=custom_run_config
)
result

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

{'context_precision': 0.8454, 'faithfulness': 0.9815, 'answer_relevancy': 0.6391, 'context_entity_recall': 0.5591}

In [135]:
for test_row in dataset:
  response = parent_document_graph.invoke({"question" : test_row.eval_sample.user_input})
  test_row.eval_sample.response = response["response"]
  test_row.eval_sample.retrieved_contexts = [context.page_content for context in response["context"]]

In [136]:
from ragas import EvaluationDataset
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import Faithfulness, ResponseRelevancy, ContextEntityRecall, ContextPrecision
from ragas import evaluate, RunConfig

parent_evaluation_dataset = EvaluationDataset.from_pandas(dataset.to_pandas())

parent_evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-mini", max_tokens=8192))

custom_run_config = RunConfig(timeout=500)

result = evaluate(
    dataset=parent_evaluation_dataset,
    metrics=[ContextPrecision(), Faithfulness(), ResponseRelevancy(), ContextEntityRecall()],
    llm=parent_evaluator_llm,
    run_config=custom_run_config
)
result

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

{'context_precision': 0.9861, 'faithfulness': 0.7325, 'answer_relevancy': 0.6335, 'context_entity_recall': 0.4750}