In [1]:
from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from pydantic import BaseModel, Field
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings

In [12]:
# Data model
class GradeState(BaseModel):
    """Binary score for relevance check on retrieved documents."""
    
    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )

In [13]:
def retrieval_agent(fetch_k:int, k:int):
    embed_model = OllamaEmbeddings(base_url="http://localhost:11434", model="bge-m3:latest")
    vectorstore = Chroma(collection_name="collection_01", persist_directory="./db/chroma_db_02", embedding_function=embed_model)
    retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={'k': k, "fetch_k":fetch_k})
    return retriever

retriever = retrieval_agent(fetch_k=10, k=3)
retriever


VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000001DC02A78410>, search_type='mmr', search_kwargs={'k': 3, 'fetch_k': 10})

In [14]:
question = """
according to lr rule, explain about ships with installed process plant for chemicals
"""
docs = retriever.invoke(question)
docs

[Document(metadata={'File Name': "Lloyd's Register Rules and Regulations for the Classification of Ships, July 2022", 'File Path': "/content/drive/MyDrive/Rules/LR/Lloyd's Register Rules and Regulations for the Classification of Ships, July 2022.pdf", 'First Division': 'Rules', 'Page': 1650, 'Second Division': 'LR'}, page_content="This page explains Lloyd's Register Rules and Regulations for the Classification of Ships, July 2022, that belongs to catogories of Rules and LR./nwith the process plant are to be submitted, including: • Arrangements for preventing the ingress of water into the ship or structure where process plant and equipment protrude through the weather deck. • Proposed emergency flooding procedures and their control. 3.2.6 Particulars of the proposed storage arrangements of hazardous and/or toxic substances, feedstocks and products in bulk, on the ship or structure, are to be submitted. 3.3 Process plant 3.3.1 A description of the expected method of operation of the proc

In [15]:
def retrieval_grader_agent(state):
   
    llm = ChatGroq(temperature=0, model_name= "deepseek-r1-distill-llama-70b")  # deepseek-r1-distill-llama-70b   llama-3.3-70b-versatile
    structured_llm_grader = llm.with_structured_output(state)
    # Prompt
    system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
        If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
        It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
        Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
    grade_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system),
            ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
        ]
    )
    retrieval_grader = grade_prompt | structured_llm_grader
    return retrieval_grader

retrieval_grader = retrieval_grader_agent(state=GradeState)
retrieval_grader

ChatPromptTemplate(input_variables=['document', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template="You are a grader assessing relevance of a retrieved document to a user question. \n \n        If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n\n        It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n\n        Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['document', 'question'], input_types={}, partial_variables={}, template='Retrieved document: \n\n {document} \n\n User question: {question}'), additional_kwargs={})])
| RunnableBinding(bound=ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x

In [16]:
for doc in docs:
    result = retrieval_grader.invoke({"question": question, "document": doc.page_content})
    print(result.binary_score)

yes
yes
yes


In [17]:
yes_result = [doc for doc in docs if retrieval_grader.invoke({"question": question, "document": doc.page_content}).binary_score == 'yes']
yes_result


[Document(metadata={'File Name': "Lloyd's Register Rules and Regulations for the Classification of Ships, July 2022", 'File Path': "/content/drive/MyDrive/Rules/LR/Lloyd's Register Rules and Regulations for the Classification of Ships, July 2022.pdf", 'First Division': 'Rules', 'Page': 1650, 'Second Division': 'LR'}, page_content="This page explains Lloyd's Register Rules and Regulations for the Classification of Ships, July 2022, that belongs to catogories of Rules and LR./nwith the process plant are to be submitted, including: • Arrangements for preventing the ingress of water into the ship or structure where process plant and equipment protrude through the weather deck. • Proposed emergency flooding procedures and their control. 3.2.6 Particulars of the proposed storage arrangements of hazardous and/or toxic substances, feedstocks and products in bulk, on the ship or structure, are to be submitted. 3.3 Process plant 3.3.1 A description of the expected method of operation of the proc

In [18]:
no_result = [doc for doc in docs if retrieval_grader.invoke({"question": question, "document": doc.page_content}).binary_score == 'no']
no_result

[]