In [1]:
from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from langchain_ollama.chat_models import ChatOllama
from pydantic import BaseModel, Field
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings

In [2]:
embed_model = OllamaEmbeddings(base_url="http://localhost:11434", model="bge-m3:latest")
print(embed_model)
vectorstore = Chroma(collection_name="collection_01", persist_directory="./db/chroma_db_02", embedding_function=embed_model)
# retriever = vectorstore.as_retriever()
retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={'k': 3, "fetch_k":5})
retriever

model='bge-m3:latest' base_url='http://localhost:11434' client_kwargs={}


VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x00000259EA5ABCE0>, search_type='mmr', search_kwargs={'k': 3, 'fetch_k': 5})

In [3]:
# Data model
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )

In [4]:
# llm = ChatGroq(temperature=0, model_name= "llama-3.2-11b-text-preview")
llm = ChatOllama(base_url="http://localhost:11434", model="llama3.2:latest", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeDocuments)
structured_llm_grader

RunnableBinding(bound=ChatOllama(model='llama3.2:latest', temperature=0.0, base_url='http://localhost:11434'), kwargs={'tools': [{'type': 'function', 'function': {'name': 'GradeDocuments', 'description': 'Binary score for relevance check on retrieved documents.', 'parameters': {'properties': {'binary_score': {'description': "Documents are relevant to the question, 'yes' or 'no'", 'type': 'string'}}, 'required': ['binary_score'], 'type': 'object'}}}], 'tool_choice': 'any'}, config={}, config_factories=[])
| PydanticToolsParser(first_tool_only=True, tools=[<class '__main__.GradeDocuments'>])

In [5]:
# Prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
    ]
)
retrieval_grader = grade_prompt | structured_llm_grader
retrieval_grader


ChatPromptTemplate(input_variables=['document', 'question'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template="You are a grader assessing relevance of a retrieved document to a user question. \n \n    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n\n    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n\n    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['document', 'question'], input_types={}, partial_variables={}, template='Retrieved document: \n\n {document} \n\n User question: {question}'), additional_kwargs={})])
| RunnableBinding(bound=ChatOllama(model='llama3.2:latest', temperature=0.0, base_url='http://localhost:11434

In [6]:
%timeit
question = """
according to lr rule, explain about ships with installed process plant for chemicals
"""
docs = retriever.invoke(question)
docs

[Document(metadata={'File Name': "Lloyd's Register Rules and Regulations for the Classification of Ships, July 2022", 'File Path': "/content/drive/MyDrive/Rules/LR/Lloyd's Register Rules and Regulations for the Classification of Ships, July 2022.pdf", 'First Division': 'Rules', 'Page': 1650, 'Second Division': 'LR'}, page_content="This page explains Lloyd's Register Rules and Regulations for the Classification of Ships, July 2022, that belongs to catogories of Rules and LR./nwith the process plant are to be submitted, including: • Arrangements for preventing the ingress of water into the ship or structure where process plant and equipment protrude through the weather deck. • Proposed emergency flooding procedures and their control. 3.2.6 Particulars of the proposed storage arrangements of hazardous and/or toxic substances, feedstocks and products in bulk, on the ship or structure, are to be submitted. 3.3 Process plant 3.3.1 A description of the expected method of operation of the proc

In [7]:
doc_txt = docs[2].page_content
doc_txt

"This page explains Lloyd's Register Rules and Regulations for the Classification of Ships, July 2022, that belongs to catogories of Rules and LR./nor otherwise linked to the shore, sea bed or other stationary vessel or structure, will be eligible to be classed `100A1 (T) moored (oil, ammonia, etc.) processing (tanker, barge, etc.) for service at . . .', see also Pt 7, Ch 2, 1.4 Certification of process plant 1.4.5 . 2.1.3 Ships of Category 2, which have chemical plants installed and designed for operation while the ship is in harbour, will be eligible to be classed `100A(T) chemical process plant installed − for operation only when moored in harbour', see also Pt 7, Ch 2, 1.4 Certification of process plant 1.4.5 . 2.1.4 Specialised ships of Category 3 which have chemical plants designed to operate only while the ship is fully supported on the sea bed, will be eligible to be classed `A chemical process plant pontoon/platform − to be operated only when grounded on prepared foundations a

In [8]:
result = retrieval_grader.invoke({"question": question, "document": doc_txt})
result.binary_score

'yes'

In [9]:
grade_no_docs = [grade_no_doc for grade_no_doc in docs if retrieval_grader.invoke({"question": question, "document": doc_txt}).binary_score == 'no']
grade_no_docs

[]

In [10]:
grade_yes_docs = [grade_yes_doc for grade_yes_doc in docs if retrieval_grader.invoke({"question": question, "document": doc_txt}).binary_score == 'yes']
grade_yes_docs

[Document(metadata={'File Name': "Lloyd's Register Rules and Regulations for the Classification of Ships, July 2022", 'File Path': "/content/drive/MyDrive/Rules/LR/Lloyd's Register Rules and Regulations for the Classification of Ships, July 2022.pdf", 'First Division': 'Rules', 'Page': 1650, 'Second Division': 'LR'}, page_content="This page explains Lloyd's Register Rules and Regulations for the Classification of Ships, July 2022, that belongs to catogories of Rules and LR./nwith the process plant are to be submitted, including: • Arrangements for preventing the ingress of water into the ship or structure where process plant and equipment protrude through the weather deck. • Proposed emergency flooding procedures and their control. 3.2.6 Particulars of the proposed storage arrangements of hazardous and/or toxic substances, feedstocks and products in bulk, on the ship or structure, are to be submitted. 3.3 Process plant 3.3.1 A description of the expected method of operation of the proc