# Contract Document Analyzer

## Install Dependencies

In [9]:
%pip install -r requirements.txt

I0000 00:00:1727680982.264017   24957 fork_posix.cc:77] Other threads are currently calling into gRPC, skipping fork() handlers


Note: you may need to restart the kernel to use updated packages.


## Setup Files

### Import API Key

In [10]:
import os
from dotenv import load_dotenv

load_dotenv()

True

## Load Documents

In [11]:
from langchain.document_loaders import PyPDFLoader

pdfs_source = [
    os.path.join("contracts", f) for f in os.listdir("contracts") if f.endswith(".pdf")
]

pdfs = []
for source in pdfs_source:
    pdf_loader = PyPDFLoader(source)
    pdf = pdf_loader.load()
    pdfs.append(pdf)

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 22 0 (offset 0)
Ignoring wrong pointing object 24 0 (offset 0)


In [12]:
print(pdfs[0][0].page_content)
print(pdfs[0][0].metadata)

  WEBSITE DEVELOPMENT CONTRACT FORM Client:   Contact:  Address: Phone:    E-Mail:   Services: [Name and briefly describe any additional services not in the form below]  Website Development Services (the “Service(s)”).      Service Fee:  $______________ per month, payable in advance, subject to the terms of Section 2.5 herein. Initial Service Term:  For subscription agreements – delete if not applicable [One] Year  Service Capacity: ___________________   [Note: include any limits on usage.]   Also, if additional fees will be required for overages, include details here or in fees section above]  Improvement or Optimization Services:  Company will use commercially reasonable efforts to provide Client the services described in the Statement of Work (“SOW”) attached as Exhibit A hereto (“Implementation Services”), and Client shall pay Company the Improvement Fee in accordance with the terms herein. Improvement Services Fee (one-time):  $____________   (Feel free to modify the form above to

## Preparing Data

### Chunking Documents

In [13]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    separators=["\n", "\n\n", "."],
    chunk_size=1000,
    chunk_overlap=200,
)

chunks = []

for pdf in pdfs:
    pdf_chunk = text_splitter.split_documents(pdf)
    chunks.extend(pdf_chunk)

In [14]:
print(chunks[1].page_content)
print(chunks[1].metadata)

. Improvement Services Fee (one-time):  $____________   (Feel free to modify the form above to match whatever you want to add into the contract below, remove this text)
{'source': 'contracts/MonthlySubscriptionContract.pdf', 'page': 0}


## Model

### Load Model and Embeddings

In [15]:
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.vectorstores import FAISS

# temperature 0 we dont want creativity with existing data
# Token Limit 1 million TPM (tokens per minute)
llm = ChatGoogleGenerativeAI(temperature=0, model="gemini-1.5-flash")
max_tokens_per_min = 1e6

# Loading Gemini Embeddings and Embedding document chunks to vectors
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector_store = FAISS.from_documents(chunks, embedding=embeddings)

### Prompts

In [21]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

# Prompt to generate relevant search query for the retriever
contextualize_q_system_prompt ="""
    Given a chat history and the latest user question
    which might reference context in the chat history,
    formulate a standalone question which can be understood
    without the chat history. Do NOT answer the question,
    just reformulate it if needed and otherwise return it as is.
"""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [22]:
qa_prompt_string = """
You are a contract review assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question.
If you don't know the answer, say that you don't know. Keep the answer concise..
Never provide incorrect information. If it's a greeting respond nicely.

Context: {context}
"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_prompt_string),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

### Chains

In [23]:
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

history_aware_retriever = create_history_aware_retriever(
    llm, vector_store.as_retriever(), contextualize_q_prompt
)

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

### Running Inference

In [26]:
import langchain
from langchain_core.messages import AIMessage, HumanMessage

query = "Give Risks and Liabilities within the contracts"

langchain.debug = True

chat_history = []
response = rag_chain.invoke({"input": query, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=query),
        AIMessage(content=response["answer"]),
    ]
)

[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain] Entering Chain run with input:
[0m{
  "input": "Summarize Maintenance Agreement",
  "chat_history": []
}
[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain > chain:RunnableAssign<context>] Entering Chain run with input:
[0m{
  "input": "Summarize Maintenance Agreement",
  "chat_history": []
}
[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain > chain:RunnableAssign<context> > chain:RunnableParallel<context>] Entering Chain run with input:
[0m{
  "input": "Summarize Maintenance Agreement",
  "chat_history": []
}
[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain > chain:RunnableAssign<context> > chain:RunnableParallel<context> > chain:retrieve_documents] Entering Chain run with input:
[0m{
  "input": "Summarize Maintenance Agreement",
  "chat_history": []
}
[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain > chain:RunnableAssign<context> > chain:RunnableParallel<context> > chain:retrieve_document

In [28]:
chat_history

[HumanMessage(content='Summarize Maintenance Agreement', additional_kwargs={}, response_metadata={}),
 AIMessage(content='This Maintenance Agreement is between the Client and Meanbee Limited for web development, design, support and consultancy services for a 6 month period. Meanbee agrees to devote a certain number of days per month to assignments determined by the Client. \n', additional_kwargs={}, response_metadata={})]

In [29]:
query = "Explain It"
response = rag_chain.invoke({"input": query, "chat_history": chat_history})
chat_history.extend(
    [
        HumanMessage(content=query),
        AIMessage(content=response["answer"]),
    ]
)

[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain > chain:RunnableAssign<context>] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain > chain:RunnableAssign<context> > chain:RunnableParallel<context>] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain > chain:RunnableAssign<context> > chain:RunnableParallel<context> > chain:retrieve_documents] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:retrieval_chain > chain:RunnableAssign<context> > chain:RunnableParallel<context> > chain:retrieve_documents > chain:RunnableLambda] Entering Chain run with input:
[0m[inputs]
[36;1m[1;3m[chain/end][0m [1m[chain:retrieval_chain > chain:RunnableAssign<context> > chain:RunnableParallel<context> > chain:retrieve_documents > chain:RunnableLambda] [0ms]

In [30]:
chat_history

[HumanMessage(content='Summarize Maintenance Agreement', additional_kwargs={}, response_metadata={}),
 AIMessage(content='This Maintenance Agreement is between the Client and Meanbee Limited for web development, design, support and consultancy services for a 6 month period. Meanbee agrees to devote a certain number of days per month to assignments determined by the Client. \n', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Explain It', additional_kwargs={}, response_metadata={}),
 AIMessage(content="This agreement outlines the terms of a service contract between a client and a web development company called Meanbee Limited. \n\nHere's a breakdown:\n\n* **Duration:** The agreement lasts for 6 months.\n* **Services:** Meanbee will provide web development, design, support, and consultancy services.\n* **Work Commitment:** Meanbee commits to dedicating a specific number of days each month to tasks assigned by the client.\n* **Communication:** The primary mode of commu