In [1]:
from langchain_community.document_loaders.excel import UnstructuredExcelLoader
from langchain_community.document_loaders import AmazonTextractPDFLoader
from langchain_community.document_loaders import PDFPlumberLoader
from langchain.prompts import PromptTemplate
from langchain.llms.bedrock import Bedrock
from langchain.chains import LLMChain
import boto3
import botocore

config = botocore.config.Config(
    read_timeout=900,
    connect_timeout=900,
    retries={"max_attempts": 3}
)

bedrock_client = boto3.client(
    service_name="bedrock-runtime",
    region_name="us-east-1",
    config=config,
)

textract_client = boto3.client("textract", region_name="us-west-2")
textract_features=["LAYOUT"]
file_path = "s3://my-s3-doc-loader/Fully executed- ADIC Lease 07-02-01.pdf"
loader = AmazonTextractPDFLoader(file_path,textract_features,client=textract_client)
docs = loader.load()

file_path = r"C:\Users\Lenovo\Documents\Project-vs code\Amazon Transcribe\Bryckle\Clause_pdf.pdf"
loader = PDFPlumberLoader(file_path)
clauses = loader.load()



In [3]:
template = """
    You are provided with two input documents: a rental agreement document{docs} and a separate clauses document{clauses}. Your task is to compare the clauses specified in the clauses document with those present in the rental agreement document. Follow these steps to accomplish the task:

       - Parse the text of both documents to extract individual clauses.
       - Match corresponding clauses between the rental agreement and clauses document.
       - Analyze each pair of matched clauses to identify similarities and differences.
       - Generate a structured output presenting the comparison results, including:
            - For matched clauses: indicate similarities or differences in language or content.
            - For clauses present in one document but not the other: notify that the document is silent on the specific question.
       - Ensure the tool handles unanswered questions gracefully, encouraging users to review both documents for any overlooked clauses.
       - The output should be presented in a clear and understandable format, facilitating easy interpretation by users.
       - Prioritize accuracy in the comparison process, minimizing errors in clause extraction and matching.
       - Provide good and accurate answers
       - if you make sure and verified that the answer for the clause(question) is not found on the given rental agreement print 'this document is silent on this'
       - Try to give the brief answers so that user can understand easily
       - provide answers in the format like 
                                        question:(mention in detail)
                                        answer:(provide detailed answers)
          and assign serial number for all
     


"""
qa_prompt = PromptTemplate(template=template, input_variables=["docs","clauses"])
llm = Bedrock(model_id="anthropic.claude-v2:1",client=bedrock_client,model_kwargs = {"temperature":1e-10,"max_tokens_to_sample": 40000})
llm_chain = LLMChain(prompt=qa_prompt, llm=llm, verbose= False)
result = llm_chain.run(docs=docs,clauses=clauses)
print(result)

 Here is the comparison of clauses between the clauses document and the rental agreement document:

1. Address of Premises:  
   Question: Describe the complete physical address of the premises leased with city, state, zipcode as per the document
   Answer: The address of the premises is 8560 Upland Drive, Parker, Colorado 80134

2. Rentable Area:
   Question: What is the square footage or number of seats/workstation leased? Explain the terms and conditions associated with how it is calculated.  
   Answer: The rentable area of the premises is approximately 148,204 square feet, consisting of the West Building (64,327 sf), East Building (64,327 sf) and Connector (19,550 sf). The final measurement will be determined as per Section 1.1 of the agreement after construction is complete.

3. Tenant:  
   Question: What is full and accurate legal name of the lessee's company that executed this lease?
   Answer: The tenant is Advanced Digital Information Corporation, a Washington corporation

4