# Contract Document Analyzer

## Install Dependencies

In [1]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


## Setup Files

### Import API Key

In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

True

## Load Documents

In [3]:
from langchain.document_loaders import PyPDFLoader

pdfs_source = [
    os.path.join("contracts", f) for f in os.listdir("contracts") if f.endswith(".pdf")
]

pdfs = []
for source in pdfs_source:
    pdf_loader = PyPDFLoader(source)
    pdf = pdf_loader.load()
    pdfs.append(pdf)

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 22 0 (offset 0)
Ignoring wrong pointing object 24 0 (offset 0)


In [4]:
print(pdfs[0][0].page_content)
print(pdfs[0][0].metadata)

  WEBSITE DEVELOPMENT CONTRACT FORM Client:   Contact:  Address: Phone:    E-Mail:   Services: [Name and briefly describe any additional services not in the form below]  Website Development Services (the “Service(s)”).      Service Fee:  $______________ per month, payable in advance, subject to the terms of Section 2.5 herein. Initial Service Term:  For subscription agreements – delete if not applicable [One] Year  Service Capacity: ___________________   [Note: include any limits on usage.]   Also, if additional fees will be required for overages, include details here or in fees section above]  Improvement or Optimization Services:  Company will use commercially reasonable efforts to provide Client the services described in the Statement of Work (“SOW”) attached as Exhibit A hereto (“Implementation Services”), and Client shall pay Company the Improvement Fee in accordance with the terms herein. Improvement Services Fee (one-time):  $____________   (Feel free to modify the form above to

## Preparing Data

### Chunking Documents

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    separators=["\n", "\n\n", "."],
    chunk_size=1000,
    chunk_overlap=200,
)

chunks = []

for pdf in pdfs:
    pdf_chunk = text_splitter.split_documents(pdf)
    chunks.extend(pdf_chunk)

In [6]:
print(chunks[1].page_content)
print(chunks[1].metadata)

. Improvement Services Fee (one-time):  $____________   (Feel free to modify the form above to match whatever you want to add into the contract below, remove this text)
{'source': 'contracts/MonthlySubscriptionContract.pdf', 'page': 0}


## Model

### Load Model and Embeddings

In [7]:
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.vectorstores import FAISS

# temperature 0 we dont want creativity with existing data
# Token Limit 1 million TPM (tokens per minute)
llm = ChatGoogleGenerativeAI(temperature=0, model="gemini-1.5-flash")
max_tokens_per_min = 1e6

# Loading Gemini Embeddings and Embedding document chunks to vectors
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector_store = FAISS.from_documents(chunks, embedding=embeddings)

  from .autonotebook import tqdm as notebook_tqdm


### Prompts

In [8]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

# Prompt to generate relevant search query for the retriever
contextualize_q_system_prompt ="""
    Given a chat history and the latest user question
    which might reference context in the chat history,
    formulate a standalone question which can be understood
    without the chat history. Do NOT answer the question,
    just reformulate it if needed and otherwise return it as is.
"""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [9]:
qa_prompt_string = """
You are a contract review assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question.
If you don't know the answer, say that you don't know. Keep the answer concise..
Never provide incorrect information. If it's a greeting respond nicely.

Context: {context}
"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_prompt_string),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

### Chains

In [10]:
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

history_aware_retriever = create_history_aware_retriever(
    llm, vector_store.as_retriever(), contextualize_q_prompt
)

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

### Running Inference

In [11]:
import langchain
from langchain_core.messages import AIMessage, HumanMessage

query = """
    Detect potential risks in the contracts.
    For every detected risk, give a concise explanation.
"""

langchain.debug = True

chat_history = []
response = rag_chain.invoke({"input": query, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=query),
        AIMessage(content=response["answer"]),
    ]
)

[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain] Entering Chain run with input:
[0m{
  "input": "\n    Detect potential risks in the contracts.\n    For each risk give its source document name.\n",
  "chat_history": []
}
[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain > chain:RunnableAssign<context>] Entering Chain run with input:
[0m{
  "input": "\n    Detect potential risks in the contracts.\n    For each risk give its source document name.\n",
  "chat_history": []
}
[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain > chain:RunnableAssign<context> > chain:RunnableParallel<context>] Entering Chain run with input:
[0m{
  "input": "\n    Detect potential risks in the contracts.\n    For each risk give its source document name.\n",
  "chat_history": []
}
[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain > chain:RunnableAssign<context> > chain:RunnableParallel<context> > chain:retrieve_documents] Entering Chain run with input:
[0m{
  "input": "\n   

In [12]:
chat_history

[HumanMessage(content='\n    Detect potential risks in the contracts.\n    For each risk give its source document name.\n', additional_kwargs={}, response_metadata={}),
 AIMessage(content="The provided text is a contract between a client and Meanbee Limited. Here are some potential risks identified:\n\n* **Risk:** The client may fail to comply with its obligations under the agreement, leading to potential losses for Meanbee. \n    * **Source:**  Section iii. of the contract, which states that Meanbee is not liable for losses caused by the client's failure to comply with its obligations.\n* **Risk:** The client may be subject to fines, penalties, or sanctions due to non-compliance with statutory or regulatory obligations, and Meanbee may not be liable for these consequences.\n    * **Source:** Section iv. of the contract, which states that Meanbee is not liable for fines, penalties, or sanctions imposed on the client due to non-compliance with statutory or regulatory obligations.\n* **R

In [13]:
query = "Explain It"
response = rag_chain.invoke({"input": query, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=query),
        AIMessage(content=response["answer"]),
    ]
)

[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain > chain:RunnableAssign<context>] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain > chain:RunnableAssign<context> > chain:RunnableParallel<context>] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain > chain:RunnableAssign<context> > chain:RunnableParallel<context> > chain:retrieve_documents] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain > chain:RunnableAssign<context> > chain:RunnableParallel<context> > chain:retrieve_documents > chain:RunnableLambda] Entering Chain run with input:
[0m[inputs]
[36;1m[1;3m[chain/end][0m [1m[chain:retrieval_chain > chain:RunnableAssign<context> > chain:RunnableParallel<context> > chain:retrieve_documents > chain:RunnableLambda] [0ms]

In [14]:
chat_history

[HumanMessage(content='\n    Detect potential risks in the contracts.\n    For each risk give its source document name.\n', additional_kwargs={}, response_metadata={}),
 AIMessage(content="The provided text is a contract between a client and Meanbee Limited. Here are some potential risks identified:\n\n* **Risk:** The client may fail to comply with its obligations under the agreement, leading to potential losses for Meanbee. \n    * **Source:**  Section iii. of the contract, which states that Meanbee is not liable for losses caused by the client's failure to comply with its obligations.\n* **Risk:** The client may be subject to fines, penalties, or sanctions due to non-compliance with statutory or regulatory obligations, and Meanbee may not be liable for these consequences.\n    * **Source:** Section iv. of the contract, which states that Meanbee is not liable for fines, penalties, or sanctions imposed on the client due to non-compliance with statutory or regulatory obligations.\n* **R