In [10]:
from langchain_community.document_loaders.excel import UnstructuredExcelLoader
from langchain_community.document_loaders import AmazonTextractPDFLoader
from langchain_community.document_loaders import PDFPlumberLoader
from langchain.prompts import PromptTemplate
from langchain.llms.bedrock import Bedrock
from langchain.chains import LLMChain
import boto3
import botocore

config = botocore.config.Config(
    read_timeout=900,
    connect_timeout=900,
    retries={"max_attempts": 3}
)

bedrock_client = boto3.client(
    service_name="bedrock-runtime",
    region_name="us-east-1",
    config=config,
)

textract_client = boto3.client("textract", region_name="us-west-2")
textract_features=["LAYOUT"]
file_path = "s3://my-s3-doc-loader/Fully Executed 03.01.2022- Briarwood Lease .pdf"
loader = AmazonTextractPDFLoader(file_path,textract_features,client=textract_client)
docs = loader.load()

file_path = r"C:\Users\Lenovo\Documents\Project-vs code\Amazon Transcribe\Bryckle\Clause_pdf.pdf"
loader = PDFPlumberLoader(file_path)
clauses = loader.load()



In [None]:
all_page_content = ""
for doc in clauses:
    all_page_content += doc.page_content
print(all_page_content)

In [12]:
template = """
    You are provided with two input documents: a rental agreement document{docs} and a separate clauses document{clauses}. Your task is to compare the clauses specified in the clauses document with those present in the rental agreement document. Follow these steps to accomplish the task:

       - Parse the text of both documents to extract individual clauses.
       - Match corresponding clauses between the rental agreement and clauses document.
       - Analyze each pair of matched clauses to identify similarities and differences.
       - Generate a structured output presenting the comparison results, including:
            - For matched clauses: indicate similarities or differences in language or content.
            - For clauses present in one document but not the other: notify that the document is silent on the specific question.
       - Ensure the tool handles unanswered questions gracefully, encouraging users to review both documents for any overlooked clauses.
       - The output should be presented in a clear and understandable format, facilitating easy interpretation by users.
       - Prioritize accuracy in the comparison process, minimizing errors in clause extraction and matching.
       - Provide good and accurate answers


"""
qa_prompt = PromptTemplate(template=template, input_variables=["docs","clauses"])
llm = Bedrock(model_id="anthropic.claude-v2:1",client=bedrock_client,model_kwargs = {"temperature":1e-10,"max_tokens_to_sample": 40000})
llm_chain = LLMChain(prompt=qa_prompt, llm=llm, verbose= False)
result = llm_chain.run(docs=docs,clauses=clauses)
print(result)

 Here is the structured output comparing the clauses specified in the clauses document with those present in the rental agreement document:

Address of Premises: 
Rental Agreement: The address of the premises is 10770 East Briarwood Avenue, Centennial, Colorado 80112
Clauses Document: Silent

Rentable Area:  
Rental Agreement: The rentable area is 74,736 square feet
Clauses Document: What is the square footage or number of seats/workstation leased? 

Tenant:
Rental Agreement: Quantum Corporation
Clauses Document: What is full and accurate legal name of the lessee's company that executed this lease?

Landlord:  
Rental Agreement: Briarwood Acquisition LLC
Clauses Document: What is full and accurate legal name of the lessor's company that executed this lease?

Lease Commencement Date:  
Rental Agreement: The lease commencement date is March 1, 2022
Clauses Document: What is the lease start date and describe the conditions based on which it is calculated in the agreement in a few sentence