## AI Agent to perform document compliance checks

In [None]:
from dotenv import load_dotenv
from pydantic import BaseModel, RootModel
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
# from langchain_community.chat_models import ChatOllama
from langchain.agents import create_tool_calling_agent, AgentExecutor
from google_doc_processing import doc_parse
import ipywidgets as widgets
from IPython.display import display
from typing import List
import json, os

load_dotenv()

In [None]:
llm = ChatAnthropic(model="claude-3-5-haiku-20241022")   


class ComplianceResult(BaseModel):
    rule: str
    status: bool
    document: str
    reason: str

class ComplianceResults(RootModel[List[ComplianceResult]]):
    pass


parser = PydanticOutputParser(pydantic_object=ComplianceResults)

In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """
            You are a compliance officer, you must check that the submitted documents follow the given instructions and the given fields match, within a document or across documents, according to the rules given to you.
            Check to ensure document field values within the parsed document json file match the rules set out by user query and use neccessary tools to check for the same. For each rule used for matching, state whether they pass or fail, list out documents that do not comply with the specified rule within user query and which field or condition being checked causes the fail.
            Wrap the output in this format and provide no other text\n{format_instructions}
            """,
        ),
        ("placeholder", "{chat_history}"),
        ("human", "{query}"),
        ("placeholder", "{agent_scratchpad}"),
    ]
).partial(format_instructions=parser.get_format_instructions())

In [None]:
tools =[] #FUTURE:  add tools to more accurately identify type of document / train model to classify documents and extract different kinds of information)
agent = create_tool_calling_agent(
    tools=tools,
    llm=llm,
    prompt=prompt
)
agent_executor = AgentExecutor(agent=agent, tools= tools, verbose=False)

## Upload and parse documents

In [None]:
uploader = widgets.FileUpload(accept='.pdf,.png,.jpg,.jpeg', multiple=True)
display(uploader)

In [None]:
if uploader.value:
    uploaded_files = []
    for file_info in uploader.value:
        filename = file_info['name']
        content = file_info['content']
        temp_path = f"temp_{filename}"
        with open(temp_path, "wb") as f:  #FUTURE: add check for file existence
            f.write(content)
        uploaded_files.append(temp_path)
    file_paths = doc_parse.invoke({"files": uploaded_files})

    print(f"Parsed document files: {file_paths}")
else:
    print("No files uploaded.")

In [None]:
#delete temporary files
for path in uploaded_files:
    try:
        os.remove(path)
        print(f"Deleted: {path}")
    except FileNotFoundError:
        print(f"File not found: {path}")
    except Exception as e:
        print(f"Error deleting {path}: {e}")

In [None]:
parsed_docs = []
for file_path in file_paths:
    with open(file_path, "r", encoding="utf-8") as f:
        parsed_docs.append(json.load(f))

doc_string = json.dumps(parsed_docs, indent=2)
# print(doc_string)

## Formulate query and extract response

In [None]:
rules = input("Enter your compliance rules (e.g. due_date must be after invoice_date): ")
input_to_agent = {
    "Rules": f"Compliance Rules:\n{rules}",
    "Documents" : doc_string
}

raw_response = agent_executor.invoke({"query": input_to_agent})

try:
    structured_response = parser.parse(raw_response.get("output")[0]["text"])
    for rule in structured_response.root:
        print(f"Rule     : {rule.rule}")
        print(f"Status   : {rule.status}")
        print(f"Document : {rule.document}")
        print(f"Reason   : {rule.reason}")
        print("-" * 40)
except Exception as e:
    print("Error parsing response", e, "Raw Response - ", raw_response)