# Step 3: Grade Document Relevance

This notebook tests the document grading node. After documents are retrieved, this step uses an LLM to double-check if each document is actually relevant to the user's question, filtering out any false positives from the vector search.

In [1]:
import os
from langchain_community.llms import Ollama
from langchain_core.documents import Document
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

In [2]:
# --- Configuration ---
MODEL_NAME = "llama3"
llm = Ollama(model=MODEL_NAME, temperature=0)

  llm = Ollama(model=MODEL_NAME, temperature=0)


In [3]:
def grade_documents(state: dict, llm) -> dict:
    """
    Grades the relevance of retrieved documents.
    This function is defined locally for experimentation.
    """
    print("---CHECKING DOCUMENT RELEVANCE---")
    question = state['question']
    documents = state['documents']
    
    prompt_template = """
    You are a grader assessing the relevance of a retrieved document to a user question about emissions.
    Give a binary score 'yes' or 'no'. 'yes' means the document is relevant, 'no' means it's not.

    Retrieved document:
    {document_content}

    User question: {question}

    Grade (yes/no):
    """
    prompt = PromptTemplate(template=prompt_template, input_variables=["question", "document_content"])
    grader_chain = prompt | llm | StrOutputParser()
    
    filtered_docs = []
    for d in documents:
        score = grader_chain.invoke({"question": question, "document_content": d.page_content})
        grade = score.strip().lower()
        if "yes" in grade:
            print(f"Grade is 'yes' for document: {d.metadata['source']}")
            filtered_docs.append(d)
        else:
            print(f"Grade is 'no' for document: {d.metadata['source']}")
    
    return {"documents": filtered_docs}

In [4]:
# --- Mock State ---
# We will simulate the state after the 'retrieve' node has run.
question = "What are the record retention requirements for GHG reports?"

# This document is highly relevant
relevant_doc = Document(
    page_content="All records, including calibration data, input data for emission equations, and the final annual GHG report, must be retained for a period of at least three years from the date of submission.",
    metadata={"source": "sample_ghg_reporting_rule.txt"}
)

# This document is irrelevant
irrelevant_doc = Document(
    page_content="The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France.",
    metadata={"source": "random_facts.txt"}
)

initial_state = {
    "question": question,
    "documents": [relevant_doc, irrelevant_doc]
}

print("--- Initial Documents ---")
print(f"{len(initial_state['documents'])} documents to be graded.")

--- Initial Documents ---
2 documents to be graded.


In [5]:
result = grade_documents(initial_state, llm)

print("\n--- Graded Documents ---")
print(f"{len(result['documents'])} documents were deemed relevant.")
for doc in result['documents']:
    print(f"Kept document from source: {doc.metadata['source']}")

---CHECKING DOCUMENT RELEVANCE---
Grade is 'yes' for document: sample_ghg_reporting_rule.txt
Grade is 'no' for document: random_facts.txt

--- Graded Documents ---
1 documents were deemed relevant.
Kept document from source: sample_ghg_reporting_rule.txt
