In [1]:
!uv add langchain langchain-core langchain-community langchain-text-splitters langsmith

[2mResolved [1m314 packages[0m [2min 13ms[0m[0m
[2mAudited [1m193 packages[0m [2min 606ms[0m[0m


In [2]:
!uv add pypdf beautifulsoup4 selenium requests lxml jq tiktoken sentence-transformers langchain-huggingface faiss-cpu chromadb "langchain[google-genai]"

[2mResolved [1m314 packages[0m [2min 1ms[0m[0m
[2mAudited [1m193 packages[0m [2min 16ms[0m[0m


# Data Loading

In [3]:
from langchain_community.document_loaders import PyPDFLoader

# 1. Create the loader
loader = PyPDFLoader("../data/raw/Database System Concepts 7th Ed.pdf")

# 2. Load and split (by page)
pages = loader.load() # .load() returns a list of Documents

# 3. Check the results
print(f"Loaded {len(pages)} pages (Documents).")

# Preview the content of page 0 (the first page)
print(f"\n--- Page 1000 Content (Partial) ---")
print(pages[999].page_content[:500])

# Metadata for page 0 (source and page number)
# Notice how the metadata is automatically populated!
print(f"\n--- Page 1000 Metadata ---")
print(pages[999].metadata)

  from .autonotebook import tqdm as notebook_tqdm


Loaded 2195 pages (Documents).

--- Page 1000 Content (Partial) ---
Page 652
When we use a B+-tree for ﬁle organization, space
utilization is particularly important, since the space
occupied by the records is likely to be much more than the
space occupied by keys and pointers. We can improve the
utilization of space in a B+-tree by involving more sibling
nodes in redistribution during splits and merges. The
technique is applicable to both leaf nodes and nonleaf
nodes, and it works as follows:
During insertion, if a node is full, the system attempts to
redistribu

--- Page 1000 Metadata ---
{'producer': 'calibre 6.28.1', 'creator': 'calibre 6.28.1', 'creationdate': '2024-02-19T20:36:37+00:00', 'author': 'SILBERSCHATZ;', 'moddate': '2024-02-19T20:36:37+00:00', 'title': 'ISE EBook Online for Database System Concepts', 'source': '../data/raw/Database System Concepts 7th Ed.pdf', 'total_pages': 2195, 'page': 999, 'page_label': '1000'}


# Data Splitting

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# 1. Create the splitter (200 chars, 50 char overlap)
# It will automatically use the separators ["\n\n", "\n", " ", ""]
recursive_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=50,
    length_function=len
)

# 2. Split the documents
recursive_chunks = recursive_splitter.split_documents(pages)

print(f"--- Split into {len(recursive_chunks)} chunks ---")

# 3. Check the results (compare them to the CharacterTextSplitter)
print("\n--- Chunk 1 ---")
print(recursive_chunks[0].page_content)

print("\n--- Chunk 2 ---")
print(recursive_chunks[1].page_content)

print("\n--- Chunk 3 ---")
print(recursive_chunks[2].page_content)

print("\n--- Chunk 4 ---")
print(recursive_chunks[3].page_content)

--- Split into 19706 chunks ---

--- Chunk 1 ---
D A T A B A S E 
SYSTEM CONCEPTS
SEVENTH EDITION
Abraham Silberschatz
Yale University
Henry F. Korth
Lehigh University
S. Sudarshan
Indian Institute of Technology, Bombay

--- Chunk 2 ---
Page ii
 
DATABASE SYSTEM CONCEPTS
Published by McGraw-Hill Education, 2 Penn Plaza, New York,
NY 10121. Copyright © 2020 by McGraw-Hill Education. All

--- Chunk 3 ---
rights reserved. Printed in the United States of America. No
part of this publication may be reproduced or distributed in
any form or by any means, or stored in a database or

--- Chunk 4 ---
retrieval system, without the prior written consent of
McGraw-Hill Education, including, but not limited to, in any
network or other electronic storage or transmission, or


# Embedding

In [5]:
import torch
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

# the parameters for this model can be chosen from https://sbert.net/docs/package_reference/sentence_transformer/SentenceTransformer.html#sentence_transformers.SentenceTransformer

# If the backend supports cuda, we use it
if torch.cuda.is_available():
  model_kwargs = {"device": "cuda"}
else:
  model_kwargs = {"device": "cpu"}

embeddings = HuggingFaceEmbeddings(
    model_name="distiluse-base-multilingual-cased-v1",
    model_kwargs=model_kwargs
    )

# Vector Store & Retriever

In [None]:
from langchain_community.vectorstores import FAISS

# This will take a few minutes without GPU
# However, you can finish it in just a few seconds with GPU!
myvectorstore = FAISS.from_documents(recursive_chunks, embeddings)

# You can store the index
myvectorstore.save_local("myfaissidx")
# You can see the file now

In [9]:
# You can load the stored index.
# You need to specify the embedding model by passing an embedding object parameter
new_vector_store = FAISS.load_local(
    "myfaissidx", embeddings, allow_dangerous_deserialization=True
)

# we're doing it without any parameter, but you can set `k` and `filter`, etc.
docs = new_vector_store.similarity_search("Bloom filter", 10)
print(docs)

[Document(id='6d12f061-162b-405d-a9f8-3d4454741072', metadata={'producer': 'calibre 6.28.1', 'creator': 'calibre 6.28.1', 'creationdate': '2024-02-19T20:36:37+00:00', 'author': 'SILBERSCHATZ;', 'moddate': '2024-02-19T20:36:37+00:00', 'title': 'ISE EBook Online for Database System Concepts', 'source': '../data/raw/Database System Concepts 7th Ed.pdf', 'total_pages': 2195, 'page': 1830, 'page_label': '1831'}, page_content='dynamic hashing techniques.\n24.1 Bloom Filter'), Document(id='60ce35ba-1c56-475b-9186-3b7f9f47632e', metadata={'producer': 'calibre 6.28.1', 'creator': 'calibre 6.28.1', 'creationdate': '2024-02-19T20:36:37+00:00', 'author': 'SILBERSCHATZ;', 'moddate': '2024-02-19T20:36:37+00:00', 'title': 'ISE EBook Online for Database System Concepts', 'source': '../data/raw/Database System Concepts 7th Ed.pdf', 'total_pages': 2195, 'page': 1088, 'page_label': '1089'}, page_content='ﬁgure.'), Document(id='6acb542b-c39b-4a27-873a-6df5a0d08c2f', metadata={'producer': 'calibre 6.28.1',

In [10]:
for i in range(10):
    print(docs[i].page_content)

dynamic hashing techniques.
24.1 Bloom Filter
ﬁgure.
24.2.4.2 Lookup Operations Using Bloom Filters
Lookup operations in stepped-merge index have to
separately search each of the trees. Thus, compared to the
to blockchains.4
only slightly worse than on a regular B+-tree.
The Bloom ﬁlter check thus works very well for point
lookups, allowing a signiﬁcant fraction of the trees to be
and the system load is light, trees across all levels could
potentially get merged into a single tree at some level r.
24.2.4.2 Lookup Operations Using Bloom Filters
TOPICS
Chapter 24 Advanced Indexing Techniques
24.1 Bloom Filter 1175
24.2 Log-Structured Merge Tree and Variants 1176
skipped, as long as suﬃcient memory is available to store
all the Bloom ﬁlters in memory. With I key values in the
index, approximately 10I bits of memory will be required. To
reduce the main memory overhead, some of the Bloom
ﬁlters may be stored on ﬂash storage.
Note that for range lookups, the Bloom ﬁlter optimization
cannot b

In [12]:
myretriever = myvectorstore.as_retriever(search_type="mmr")

In [13]:
myretriever.invoke("Bloom filter")

[Document(id='6d12f061-162b-405d-a9f8-3d4454741072', metadata={'producer': 'calibre 6.28.1', 'creator': 'calibre 6.28.1', 'creationdate': '2024-02-19T20:36:37+00:00', 'author': 'SILBERSCHATZ;', 'moddate': '2024-02-19T20:36:37+00:00', 'title': 'ISE EBook Online for Database System Concepts', 'source': '../data/raw/Database System Concepts 7th Ed.pdf', 'total_pages': 2195, 'page': 1830, 'page_label': '1831'}, page_content='dynamic hashing techniques.\n24.1 Bloom Filter'),
 Document(id='4cc35d03-e530-40bb-a697-730f9d0bad8d', metadata={'producer': 'calibre 6.28.1', 'creator': 'calibre 6.28.1', 'creationdate': '2024-02-19T20:36:37+00:00', 'author': 'SILBERSCHATZ;', 'moddate': '2024-02-19T20:36:37+00:00', 'title': 'ISE EBook Online for Database System Concepts', 'source': '../data/raw/Database System Concepts 7th Ed.pdf', 'total_pages': 2195, 'page': 1841, 'page_label': '1842'}, page_content='only slightly worse than on a regular B+-tree.\nThe Bloom ﬁlter check thus works very well for point

# LLM Integration & Building a RAG Agent

In [None]:
import os
from langchain.chat_models import init_chat_model

os.environ["GOOGLE_API_KEY"] = "YOUR API KEY" # enter your api key

model = init_chat_model("google_genai:gemini-2.5-flash") # you may change to another model if you wish

In [15]:
from langchain.agents.middleware import dynamic_prompt, ModelRequest
from langchain.agents import create_agent

@dynamic_prompt
def prompt_with_context(request: ModelRequest) -> str:
    """Inject context into state messages."""
    last_query = request.state["messages"][-1].text

    retrieved_docs = myvectorstore.similarity_search(last_query) # using the faiss vector store from our own dataset

    docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)

    system_message = (
        "You are a helpful assistant. Use the following context in your response:"
        f"\n\n{docs_content}"
    )

    return system_message


agent = create_agent(model, tools=[], middleware=[prompt_with_context]) # using the Google Gemini model

In [16]:
agent.invoke({"messages": [{"role": "user", "content": "Explain about Bloom filter in detail."}]})

{'messages': [HumanMessage(content='Explain about Bloom filter in detail.', additional_kwargs={}, response_metadata={}, id='fefd74d1-f3b9-48b1-bfd0-fc1eae34c88a'),
  AIMessage(content='A Bloom filter is a **space-efficient probabilistic data structure** that is used to test whether an element is a member of a set. It\'s a clever way to check for set membership without storing the actual elements, allowing for significant memory savings, especially for very large sets.\n\nIt was invented by Burton Howard Bloom in 1970.\n\nHere\'s a detailed breakdown:\n\n### Core Concept\n\nImagine you have a very large list of items (e.g., all the URLs you\'ve ever visited, or all the potentially malicious IP addresses). You want to quickly check if a *new* item is in that list without storing the entire list in memory, which could be massive. A Bloom filter can tell you:\n1.  **Definitely not in the set.**\n2.  **Probably in the set** (with a small chance of error, called a "false positive").\n\n### H

In [17]:
result = agent.invoke({"messages": [{"role": "user", "content": "Explain about Bloom filter in detail."}]})
print(result["messages"][-1].pretty_print())


A Bloom filter is a **space-efficient probabilistic data structure** that is used to test whether an element is a member of a set. It can tell you with certainty that an element is *not* in the set, or that it *might* be in the set (with a certain probability of error).

Here's a detailed breakdown:

### 1. What is a Bloom Filter?

At its core, a Bloom filter is a simple array of bits (a **bitmap**), all initialized to 0. It uses multiple hash functions to add and check for the presence of elements.

*   **Probabilistic:** This is its defining characteristic. While it never produces "false negatives" (it will never say an element is *not* in the set if it actually is), it *can* produce "false positives" (it might say an element *is* in the set when it actually isn't).
*   **Space-Efficient:** It uses significantly less memory than storing the actual elements of the set.
*   **Time-Efficient:** Adding and checking for elements are very fast, constant-time operations (O(k), where k is t

In [19]:
query = "Explain about B+ trees in detail."

for step in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


Explain about B+ trees in detail.

A B+-tree is a self-balancing tree data structure widely used in database management systems and file systems for efficient storage and retrieval of data. It's a variation of the B-tree, optimized particularly for disk-based storage and operations like range queries.

Here's a detailed breakdown of B+-trees:

### Core Structure and Key Differences from B-trees

The fundamental difference between a B+-tree and a B-tree lies in how data is stored and retrieved, particularly in their non-leaf (internal) nodes versus leaf nodes.

1.  **Non-Leaf (Internal) Nodes:**
    *   These nodes store **only keys** and pointers to child nodes. They act purely as an index to guide the search down to the appropriate leaf node.
    *   The provided context mentions what might "appear in a nonleaf B-tree node, compared to B+-trees," implying that B-trees might store data pointers directly in internal nodes, whereas B+-trees strictly do not. This design choice in B+-tree

In [20]:
from langchain.agents.middleware import dynamic_prompt, ModelRequest
from langchain_core.prompts import ChatPromptTemplate

# We use 'myvectorstore' created in Section 3.
print(f"Vector store (myvectorstore) is ready: {myvectorstore is not None}")

@dynamic_prompt
def prompt_with_context_and_rewrite(request: ModelRequest) -> str:
    # 1. Extract the user's last query from request.state["messages"]
    last_query = request.state["messages"][-1].content
    print(f"\n--- [Middleware] Original Query: '{last_query}' ---")

    # 2. Request to rewrite the query
    rewrite_system_msg = """You are an expert query assistant. Your task is to rewrite the user's question into an optimized query for a vector database search. Your rewritten query will be used for similarity search.
    Only output the rewritten query."""

    # Make a template
    rewrite_template = ChatPromptTemplate(
        [
            ("system", rewrite_system_msg),
            ("human", "{user_input}")
        ]
    )

    # Fill in the template with the query content
    rewrite_prompt_value = rewrite_template.invoke(
        {
            "user_input": last_query,
        }
    )

    rewrite_response = model.invoke(rewrite_prompt_value.messages)

    rewritten_query = rewrite_response.content
    print(f"--- [Middleware] Rewritten Query: '{rewritten_query}' ---")

    # 3. Search for documents
    try:
      retrieved_docs = myvectorstore.similarity_search(rewritten_query, k=3) # Get top 3
    except Exception as e:
      print(f"Check your vector store: {e}")
      retrieved_docs = []

    # 4. Join the page_content of the retrieved docs into a single string
    docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)

    print(f"--- [Middleware] Retrieved {len(retrieved_docs)} docs ---")

    # 5. Dynamically create the system prompt to be sent to the LLM
    system_message = (
        "You are a helpful assistant. Use the following context in your response:"
        "\n\n--- CONTEXT ---"
        f"\n{docs_content}"
        "\n--- END CONTEXT ---"
    )

    return system_message

agent = create_agent(model, tools=[], middleware=[prompt_with_context_and_rewrite])

Vector store (myvectorstore) is ready: True


In [21]:
query = "I'm working on my homework. Please explain me about the transaction. There are database system concepts in the vector database."

for step in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


I'm working on my homework. Please explain me about the transaction. There are database system concepts in the vector database.

--- [Middleware] Original Query: 'I'm working on my homework. Please explain me about the transaction. There are database system concepts in the vector database.' ---
--- [Middleware] Rewritten Query: 'Explain transactions, ACID properties, concurrency control, and recovery mechanisms within the context of database system concepts, specifically detailing their relevance and implementation in vector databases.' ---
--- [Middleware] Retrieved 3 docs ---

Based on the provided text, here's what we can gather about transactions in database systems:

*   **Core Properties:** Transactions are central to implementing the **atomicity** and **durability** properties within a database.
    *   **Atomicity:** This means that a transaction is treated as a single, indivisible unit of work. Either all of its operations are completed successfully, or none of them are.
    

# Evaluate the RAG Agent

In [None]:
import os
os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = "YOUR API KEY" # enter your langsmith api key here

In [23]:
from langchain_core.documents import Document

# (1) Define a simple helper class
class RAGContextHolder:
    def __init__(self):
        # A variable to store the most recently retrieved docs
        self.last_retrieved_docs = []

    def set_docs(self, docs: list[Document]):
        """Called by the middleware to save the retrieved docs"""
        self.last_retrieved_docs = docs

    def get_docs(self) -> list[Document]:
        """Called by the evaluation function to get the saved docs"""
        return self.last_retrieved_docs

# (2) Create a "global" instance of this class
context_holder = RAGContextHolder()

print("--- Context Holder Ready ---")

--- Context Holder Ready ---


In [24]:
from langchain.agents.middleware import dynamic_prompt, ModelRequest
from langchain_core.prompts import ChatPromptTemplate

print(f"Vector store (myvectorstore) is ready: {myvectorstore is not None}")

@dynamic_prompt
def prompt_with_context_and_rewrite(request: ModelRequest) -> str:
    last_query = request.state["messages"][-1].content
    print(f"\n--- [Middleware] Original Query: '{last_query}' ---")

    rewrite_system_msg = """You are an expert query assistant. Your task is to rewrite the user's question into an optimized query for a vector database search. Your rewritten query will be used for similarity search.
    Only output the rewritten query."""

    rewrite_template = ChatPromptTemplate(
        [
            ("system", rewrite_system_msg),
            ("human", "{user_input}")
        ]
    )

    rewrite_prompt_value = rewrite_template.invoke(
        {
            "user_input": last_query,
        }
    )

    rewrite_response = model.invoke(rewrite_prompt_value.messages)

    rewritten_query = rewrite_response.content
    print(f"--- [Middleware] Rewritten Query: '{rewritten_query}' ---")

    try:
      retrieved_docs = myvectorstore.similarity_search(rewritten_query, k=3) # Get top 3
    except Exception as e:
      print(f"Check your vector store: {e}")
      retrieved_docs = []

    ############### NEW STEP - store the retrieved docs ##################
    context_holder.set_docs(retrieved_docs)
    print(f"--- [Middleware] Saved {len(retrieved_docs)} docs to Context Holder ---")
    ######################################################################

    docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)

    print(f"--- [Middleware] Retrieved {len(retrieved_docs)} docs ---")

    system_message = (
        "You are a helpful assistant. Use the following context in your response:"
        "\n\n--- CONTEXT ---"
        f"\n{docs_content}"
        "\n--- END CONTEXT ---"
    )

    return system_message

agent = create_agent(model, tools=[], middleware=[prompt_with_context_and_rewrite])

Vector store (myvectorstore) is ready: True


In [25]:
def run_agent_for_evaluation(input_query: str) -> dict:
    """
    A wrapper function that LangSmith evaluation will call.
    inputs_dict must be in the format {"question": "..."}.
    """

    # 1. Run the agent
    # (This call internally triggers the 'prompt_with_context_and_rewrite_and_save' middleware)
    result = agent.invoke({"messages": [{"role": "user", "content": input_query}]})
    answer = result["messages"][-1].content

    # 2. Get the "hidden" retrieved docs
    retrieved_docs = context_holder.get_docs()

    # 3. Return in the format required by the evaluation tutorial
    return {
        "answer": answer,
        "documents": [d.page_content for d in retrieved_docs]
    }

# test
print("--- Wrapper Function Test ---")
test_output = run_agent_for_evaluation("Explain about Bloom filter in detail.")
print(f"Answer: {test_output['answer'][:50]}...")
print(f"Documents Count: {len(test_output['documents'])}")

--- Wrapper Function Test ---

--- [Middleware] Original Query: 'Explain about Bloom filter in detail.' ---
--- [Middleware] Rewritten Query: 'Detailed explanation of Bloom filter' ---
--- [Middleware] Saved 3 docs to Context Holder ---
--- [Middleware] Retrieved 3 docs ---
Answer: Based on the provided context, here's what we know...
Documents Count: 3


In [27]:
def target(inputs: dict) -> dict:
    return run_agent_for_evaluation(inputs["query"])

In [28]:
from typing_extensions import Annotated, TypedDict
from langchain.messages import HumanMessage, SystemMessage

# output schema for structured output
class RelevanceGrade(TypedDict):
    explanation: Annotated[str, ..., "Explain your reasoning for the score"]
    relevant: Annotated[
        int, ..., "Score from 1 to 5, where 5 is most relevant and 1 is least relevant"
    ]

# Grade prompt
relevance_instructions = """You are an impartial evaluator. Your task is to assess the relevance of a provided ANSWER to a given QUESTION using a 1-5 score.

You will be given a QUESTION and an ANSWER. Here is the grading criteria:
- **1 (Poor):** The ANSWER is completely off-topic, evasive, or does not address the QUESTION at all.
- **2 (Fair):** The ANSWER is tangentially related but does not directly answer the core of the QUESTION.
- **3 (Average):** The ANSWER partially addresses the QUESTION but misses key aspects or includes irrelevant information.
- **4 (Good):** The ANSWER directly addresses the QUESTION and is helpful, but could be slightly more complete or concise.
- **5 (Excellent):** The ANSWER directly, fully, and helpfully addresses the QUESTION's intent.

Explain your reasoning in a step-by-step manner. First, analyze the question's intent. Second, analyze the answer's content. Finally, provide your score from 1 to 5.
"""

# Grader LLM
relevance_llm = model.with_structured_output(
    RelevanceGrade, method="json_schema", strict=True
)

# Evaluator
def relevance(inputs: dict, outputs: dict) -> bool:
    messages = [
        SystemMessage(content=relevance_instructions),
        HumanMessage(content=f"QUESTION: {inputs['query']}\nANSWER: {outputs['answer']}")
    ]
    grade = relevance_llm.invoke(messages)
    return grade["relevant"]

In [29]:
# Grade output schema
class GroundedGrade(TypedDict):
    explanation: Annotated[str, ..., "Explain your reasoning for the score"]
    grounded: Annotated[
        int, ..., "Score from 1 to 5, where 5 is fully grounded and 1 is hallucinated"
    ]

# Grade prompt
grounded_instructions = """You are an impartial evaluator. Your task is to assess whether an ANSWER is "grounded in" a set of provided CONTEXTS using a 1-5 score.

You will be given a set of CONTEXTS and an ANSWER. Here are the grading criteria:
- **1 (Not Grounded):** The ANSWER contains significant information or claims that are NOT supported by the CONTEXTS (i.e., hallucination).
- **2 (Poorly Grounded):** The ANSWER contains some claims that are not supported, or significantly misrepresents the CONTEXTS.
- **3 (Partially Grounded):** The ANSWER is mostly supported by the CONTEXTS, but may contain minor claims or details not found in the CONTEXTS.
- **4 (Well Grounded):** The ANSWER is almost entirely supported by the CONTEXTS, with only very minor embellishments.
- **5 (Fully Grounded):** Every single claim in the ANSWER is explicitly supported by the provided CONTEXTS.

Explain your reasoning in a step-by-step manner. First, break down the ANSWER into individual claims. Second, for each claim, check if it is supported by the CONTEXTS. Finally, provide your score from 1 to 5.
"""

# Grader LLM
grounded_llm = model.with_structured_output(
    GroundedGrade, method="json_schema", strict=True
)

# Evaluator
def groundedness(inputs: dict, outputs: dict) -> bool:
# --- FIX ---
    # The 'run_agent_for_evaluation' wrapper returns a list of strings in the 'documents' key
    if not outputs["documents"]:
        # If no document was retrieved, any answer (other than "I don't know") is by definition ungrounded.
        return 1

    doc_string = "\n\n".join(outputs["documents"])

    answer_string = f"CONTEXTS: {doc_string}\n\nANSWER: {outputs['answer']}"

    messages = [
        SystemMessage(content=grounded_instructions),
        HumanMessage(content=answer_string)
    ]

    grade = grounded_llm.invoke(messages)
    return grade["grounded"]

In [30]:
# Grade output schema
class RetrievalRelevanceGrade(TypedDict):
    explanation: Annotated[str, ..., "Explain your reasoning for the score"]
    retrieval: Annotated[
        int,
        ...,
        "Score from 1 to 5, where 5 is highly relevant and 1 is not relevant",
    ]

# Grade prompt
retrieval_relevance_instructions = """You are an impartial evaluator. Your task is to assess the relevance of a set of retrieved CONTEXTS to a given QUESTION using a 1-5 score.

You will be given a QUESTION and a set of CONTEXTS. Here are the grading criteria:
- **1 (Poor):** ALL retrieved CONTEXTS are completely irrelevant to the QUESTION.
- **2 (Fair):** Most CONTEXTS are irrelevant, but one or two might be tangentially related.
- **3 (Average):** Some CONTEXTS are relevant to the QUESTION, but many are irrelevant or contain noise.
- **4. (Good):** Most CONTEXTS are relevant and helpful for answering the QUESTION.
- **5 (Excellent):** ALL retrieved CONTEXTS are highly relevant and crucial for answering the QUESTION.

Explain your reasoning in a step-by-step manner. First, analyze the QUESTION's intent. Second, examine each CONTEXT for its relevance. Finally, provide your score from 1 to 5 based on the overall relevance of the set.
"""

# Grader LLM
retrieval_relevance_llm = model.with_structured_output(RetrievalRelevanceGrade, method="json_schema", strict=True)

def retrieval_relevance(inputs: dict, outputs: dict) -> bool:
    """An evaluator for document relevance"""

    if not outputs["documents"]:
        return 1 # No contexts retrieved, so they cannot be relevant.

    doc_string = "\n\n".join(outputs["documents"])

    answer_string = f"CONTEXTS: {doc_string}\n\nQUESTION: {inputs['query']}"

    messages = [
        SystemMessage(content=retrieval_relevance_instructions),
        HumanMessage(content=answer_string)
    ]

    # Run evaluator
    grade = retrieval_relevance_llm.invoke(messages)
    return grade["retrieval"]

In [31]:
examples = [
    {
        "inputs": {"query": "Explain about Bloom filter in detail."},
    },
    {
        "inputs": {"query": "Explain about B+ trees in detail."},
    },
]

In [32]:
from langsmith import Client

client = Client()
dataset_name = "RAG evaluation_01"
dataset = client.create_dataset(dataset_name=dataset_name)
client.create_examples(
    dataset_id=dataset.id,
    examples=examples
)

{'example_ids': ['e1aff425-b616-410a-8828-7deed2d1fa4b',
  'f651a2f3-99d7-4b46-839f-f4ee486e525a'],
 'count': 2}

In [33]:
experiment_results = client.evaluate(
    target,
    data=dataset_name,
    evaluators=[groundedness, relevance, retrieval_relevance],
    experiment_prefix="rag-doc-relevance",
    metadata={"version": "none"},
)

View the evaluation results for experiment: 'rag-doc-relevance-3d9bf098' at:
https://smith.langchain.com/o/779968f0-7271-4507-b369-04e333f1ec3d/datasets/a8c4466f-90df-4c3c-8f5f-744be462bcfe/compare?selectedSessions=4be3fc6f-8bb2-4e52-831d-d3d2ffd22790




0it [00:00, ?it/s]


--- [Middleware] Original Query: 'Explain about Bloom filter in detail.' ---
--- [Middleware] Rewritten Query: 'Bloom filter: detailed explanation, working principles, use cases, advantages, disadvantages, and applications.' ---
--- [Middleware] Saved 3 docs to Context Holder ---
--- [Middleware] Retrieved 3 docs ---


1it [00:24, 24.59s/it]


--- [Middleware] Original Query: 'Explain about B+ trees in detail.' ---
--- [Middleware] Rewritten Query: 'B+ trees detailed explanation' ---
--- [Middleware] Saved 3 docs to Context Holder ---
--- [Middleware] Retrieved 3 docs ---


2it [01:10, 35.36s/it]


In [34]:
experiment_results.to_pandas()

Unnamed: 0,inputs.query,outputs.answer,outputs.documents,error,feedback.groundedness,feedback.relevance,feedback.retrieval_relevance,execution_time,example_id,id
0,Explain about Bloom filter in detail.,Based on the provided context:\n\nA Bloom filt...,"[Bloom ﬁlter, which uses bitmaps. Bloom ﬁlters...",,5,4,2,4.808055,e1aff425-b616-410a-8828-7deed2d1fa4b,019ad410-6f1f-7729-8fb0-9c5f17d7168d
1,Explain about B+ trees in detail.,"Based on the provided context, here's what we ...","[appear in a nonleaf B-tree node, compared to ...",,2,5,4,9.219572,f651a2f3-99d7-4b46-839f-f4ee486e525a,019ad410-cf2c-7199-aecf-3ee9e907c4b3
