## RAG (Langchain)

In [8]:
import os
from dotenv import load_dotenv
load_dotenv()

from typing import Iterator

from langchain_core.document_loaders import BaseLoader
from langchain_core.documents import Document as LCDocument

from docling.document_converter import DocumentConverter

class DoclingPDFLoader(BaseLoader):

    def __init__(self, file_path: str | list[str]) -> None:
        self._file_paths = file_path if isinstance(file_path, list) else [file_path]
        self._converter = DocumentConverter()

    def lazy_load(self) -> Iterator[LCDocument]:
        for source in self._file_paths:
            dl_doc = self._converter.convert(source).document
            text = dl_doc.export_to_markdown()
            yield LCDocument(page_content=text)


FILE_PATH = "docs/sample.pdf"  # Contract

from langchain_text_splitters import RecursiveCharacterTextSplitter

loader = DoclingPDFLoader(file_path=FILE_PATH)
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
)

docs = loader.load()
splits = text_splitter.split_documents(docs)
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
HF_EMBED_MODEL_ID = "BAAI/bge-small-en-v1.5"
embeddings = HuggingFaceEmbeddings(model_name=HF_EMBED_MODEL_ID)

from tempfile import TemporaryDirectory

from langchain_milvus import Milvus

MILVUS_URI = os.environ.get(
    "MILVUS_URI", "milvus_demo.db",
)

# Define the index parameters for local mode
index_params = {
    "index_type": "IVF_FLAT",  # Use IVF_FLAT instead of HNSW for local mode
    "metric_type": "L2",       # L2 distance metric
    "params": {"nlist": 1024}  # Number of cluster units
}
vectorstore = Milvus.from_documents(
    splits,
    embeddings,
    connection_args={"uri": MILVUS_URI},
    index_params=index_params,  # Add the index parameters here
    drop_old=True,
)



In [9]:
from langchain_huggingface import HuggingFaceEndpoint

HF_API_KEY = os.environ.get("HF_API_KEY")
HF_LLM_MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.3"

llm = HuggingFaceEndpoint(
    repo_id=HF_LLM_MODEL_ID,
    huggingfacehub_api_token=HF_API_KEY,
)

from typing import Iterable

from langchain_core.documents import Document as LCDocument
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough


def format_docs(docs: Iterable[LCDocument]):
    return "\n\n".join(doc.page_content for doc in docs)


retriever = vectorstore.as_retriever()

prompt = PromptTemplate.from_template(
    "Context information is below.\n---------------------\n{context}\n---------------------\nGiven the context information and not prior knowledge, answer the query.\nQuery: {question}\nAnswer:\n"
)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


In [10]:
rag_chain.invoke("Explain Duties?")

"Under this contract, the Consultant agrees to exercise special skill to accomplish certain results in a manner reasonably satisfactory to the Commission. These results are specified in Exhibit A: Scope of Services, which is incorporated by reference in this Agreement. The Consultant is responsible for providing the key personnel listed in the Agreement to perform the services specified. The Consultant also agrees to indemnify, defend, and hold harmless the Commission from any claims, demands, losses, damages, defense costs, or liability of any kind or nature that may arise out of the Consultant's negligence, recklessness, or willful misconduct under the terms of this Agreement. This includes any damage to the person(s), or property(ies) of the Consultant and third parties."

In [11]:
rag_chain.invoke("What about insurance?")

"1. The consultant should have Comprehensive or Commercial General Liability Insurance coverage in the minimum amount of one million dollars ($1,000,000) combined single limit (CSL), including coverage for bodily injury, personal injury, broad form property damage, contractual liability, and cross-liability.\n2. Professional Liability Insurance is required in the minimum amount of one million dollars ($1,000,000) combined single limit, but only if both the consultant and the commission initial a specific clause.\n3. The consultant should ensure that any insurance policy required by this document includes language that the insurer's legal obligation to defend or indemnify the commission is not conditioned on the performance of any act(s) by the named insured. Such insurance policy should also name the commission as a named insured.\n4. The consultant is responsible for paying all deductibles and self-insured retentions (SIR) required to be paid under any insurance policy that may provid