In [52]:
%pip install langchain
%pip install langchain-openai
%pip install python-dotenv
%pip install pypdf
%pip install PyPDF2
%pip install chromadb
%pip install streamlit

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Using cached pypdf2-3.0.1-py3-none-any.whl (232 kB)
Installing collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [1]:
%reload_ext autoreload
%autoreload 2

In [54]:
from dotenv import load_dotenv
load_dotenv()  # take environment variables from .env.

True

In [55]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI()

In [56]:
llm.invoke("What is a legally binding contract and its termination period?")

AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-RtkFm***************************************oyCA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

## Working with LangChainExpressionLanguage(LCEL) to create a chain

In [5]:
from langchain_core.prompts import ChatPromptTemplate

# add output parser to the chain, specifically string parser for the output
from langchain_core.output_parsers import StrOutputParser

output_parser = StrOutputParser()

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an expert in reviewing a contract between Cloud Investments and Mr Jack Robinson"),
    ("user", "{input}")
])

In [6]:
chain = prompt | llm | output_parser

## Asking our LLM a question regarding the contract
Since it doesn't actually know about the contract between the 2 parties, it won't give us the direct/correct answer we want

In [24]:
chain.invoke({"input": "Who owns the IP"})

'In the contract between Cloud Investments and Mr. Jack Robinson, the ownership of intellectual property (IP) rights should be clearly outlined. Typically, in a business contract, the ownership of IP created by an employee or contractor during the course of their work for the company would belong to the company, unless otherwise specified in the contract. It is important to review the specific terms of the contract to determine who owns the IP rights in this particular case.'

## Working on the retriever

In [47]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("../data/Raptor Contract.pdf")
pages = loader.load()

In [48]:
len(pages)

73

In [49]:
# Splitting the documents into chunks
from langchain.text_splitter  import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000, chunk_overlap=200, length_function=len
)
document_chunks = text_splitter.split_documents(pages)
len(document_chunks)


302

In [50]:
# storing the chunks in a vector store
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

# save to disk
db = Chroma.from_documents(pages, embeddings, persist_directory="../data/chroma_db")


In [51]:
# load from disk
vector_store = Chroma(persist_directory="../data/chroma_db", embedding_function=embeddings)

query = 'What does Closing Debt Amount mean?'
docs_retrieved = vector_store.similarity_search(query)
print(docs_retrieved[0].page_content)

case
of
each
of
(a)
and
(b),
as
a
result
of
the 
execution
and
delivery
of
this
Agreement
or
the
consummation
of
the
Contemplated 
Transactions.
“
Closing
Cash
Amount
”
means
the
sum
of
all
cash
and
cash
equivalents
of
the
Acquired 
Companies
as
of
immediately
prior
to
the
Closing,
less
(without
duplication)
the
amount
of 
outstanding
checks
as
of
the
Closing.
“
Closing
Debt
Amount
”
means
the
amount
of
Debt
of
the
Acquired
Companies 
(disregarding
Debt
of
any
Acquired
Company
to
another
Acquired
Company)
as
of
immediately 
prior
to
the
Closing
determined
without
giving
effect
to
any
reduction
to
such
amount
occurring 
as
a
result
of
the
repayment
of
Debt
on
the
Closing
Date.
-
3
-
112923184_5


## Create a RAG Chain

In [29]:
# create chain for documents
from langchain.chains.combine_documents import create_stuff_documents_chain

template = """"Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}
"""
prompt = ChatPromptTemplate.from_template(template)
document_chain = create_stuff_documents_chain(llm, prompt)

In [30]:
# create retrieval chain

from langchain.chains import create_retrieval_chain

retriever = vector_store.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [32]:
response = retrieval_chain.invoke({
    "input": "What does Closing Debt Amount mean?"
})
response

{'input': 'What does Closing Debt Amount mean?',
 'context': [Document(page_content='case\nof\neach\nof\n(a)\nand\n(b),\nas\na\nresult\nof\nthe \nexecution\nand\ndelivery\nof\nthis\nAgreement\nor\nthe\nconsummation\nof\nthe\nContemplated \nTransactions.\n“\nClosing\nCash\nAmount\n”\nmeans\nthe\nsum\nof\nall\ncash\nand\ncash\nequivalents\nof\nthe\nAcquired \nCompanies\nas\nof\nimmediately\nprior\nto\nthe\nClosing,\nless\n(without\nduplication)\nthe\namount\nof \noutstanding\nchecks\nas\nof\nthe\nClosing.\n“\nClosing\nDebt\nAmount\n”\nmeans\nthe\namount\nof\nDebt\nof\nthe\nAcquired\nCompanies \n(disregarding\nDebt\nof\nany\nAcquired\nCompany\nto\nanother\nAcquired\nCompany)\nas\nof\nimmediately \nprior\nto\nthe\nClosing\ndetermined\nwithout\ngiving\neffect\nto\nany\nreduction\nto\nsuch\namount\noccurring \nas\na\nresult\nof\nthe\nrepayment\nof\nDebt\non\nthe\nClosing\nDate.\n-\n3\n-\n112923184_5', metadata={'page': 7, 'source': '../data/Raptor Contract.docx.pdf'}),
  Document(page_conten

In [41]:
response = retrieval_chain.invoke({
    "input": "Under what circumstances and to what extent, the Sellers are responsible for a breach of representations and warranties?"
})
response

{'input': 'Under what circumstances and to what extent, the Sellers are responsible for a breach of representations and warranties?',
 'context': [Document(page_content='or\nthe \nEscrow\nAgreement,\nor\nany\nconsent,\nacknowledgment\nor\nrelease\nrelating\nto\nthis\nAgreement\nor\nthe \nEscrow\nAgreement;\nand\n(v)\ntake\nall\nother\nactions\npermitted\nor\nrequired\nto\nbe\ntaken\nby\nor\non\nbehalf\nof\nthe \nSellers\nunder\nthis\nAgreement\nor\nthe\nEscrow\nAgreement\nand\nexercise\nany\nand\nall\nrights\nthat\nthe \nSellers\nor\nthe\nSellers’\nRepresentative\nare\npermitted\nor\nrequired\nto\ndo\nor\nexercise\nunder\nthis \nAgreement\nor\nthe\nEscrow\nAgreement.\n(d)\nLiability\n.\nThe\nSellers’\nRepresentative\nshall\nnot\nbe\nheld\nliable\nby\nany\nof\nthe \nSellers\nfor\nactions\nor\nomissions\nin\nexercising\nor\nfailing\nto\nexercise\nall\nor\nany\nof\nthe\npower\nand \nauthority\nof\nthe\nSellers’\nRepresentative\npursuant\nto\nthis\nAgreement,\nexcept\nin\nthe\ncase\nof\nth

In [42]:
response = retrieval_chain.invoke({
    "input": "How much is the escrow amount?"
})
response

{'input': 'How much is the escrow amount?',
 'context': [Document(page_content='the\nBuyer ,\nor\nits\ndesignee,\nin\naccordance\nwith\nthe\nterms\nof \nthe\nEscrow\nAgreement\n(and\nany\nremaining\nbalance\nof\nthe\nEscrow\nAmount\nnot\nrequired\nto\nbe \npaid\nto\nthe\nBuyer\nshall\nbe\nreleased\nto\nCompany\nSecurityholders\nin\naccordance\nwith\nthe\nterms\nof \nthe\nEscrow\nAgreement).\nSection\nI.04\nEscrow\n.\n(a)\nAt\nClosing,\nBuyer\nwill\ndeposit\nthe\nEscrow\nAmount\nin\nescrow\non\nbehalf\nof\nthe \nSellers\nin\naccordance\nwith\nthe\nEscrow\nAgreement.\nThe\nEscrow\nAmount\nshall\nbe\nheld\nand, \nsubject\nto\nSection\n2.07,\nreleased\nto\nthe\nCompany\nSecurityholders\nin\naccordance\nwith\nthe \nprovisions\nof\nthe\nEscrow\nAgreement\nwith\nthe\nCompany\nSecurityholders\nbeing\nentitled\nto\nshare\nin \nsuch\nreleased\namounts\nin\naccordance\nwith\ntheir\nPro\nRata\nPercentages.\nFrom\nand\nafter\nthe \nClosing,\nBuyer\nand\nthe\nSellers’\nRepresentative\nwill\ndirect\n

In [43]:
response['context']

[Document(page_content='the\nBuyer ,\nor\nits\ndesignee,\nin\naccordance\nwith\nthe\nterms\nof \nthe\nEscrow\nAgreement\n(and\nany\nremaining\nbalance\nof\nthe\nEscrow\nAmount\nnot\nrequired\nto\nbe \npaid\nto\nthe\nBuyer\nshall\nbe\nreleased\nto\nCompany\nSecurityholders\nin\naccordance\nwith\nthe\nterms\nof \nthe\nEscrow\nAgreement).\nSection\nI.04\nEscrow\n.\n(a)\nAt\nClosing,\nBuyer\nwill\ndeposit\nthe\nEscrow\nAmount\nin\nescrow\non\nbehalf\nof\nthe \nSellers\nin\naccordance\nwith\nthe\nEscrow\nAgreement.\nThe\nEscrow\nAmount\nshall\nbe\nheld\nand, \nsubject\nto\nSection\n2.07,\nreleased\nto\nthe\nCompany\nSecurityholders\nin\naccordance\nwith\nthe \nprovisions\nof\nthe\nEscrow\nAgreement\nwith\nthe\nCompany\nSecurityholders\nbeing\nentitled\nto\nshare\nin \nsuch\nreleased\namounts\nin\naccordance\nwith\ntheir\nPro\nRata\nPercentages.\nFrom\nand\nafter\nthe \nClosing,\nBuyer\nand\nthe\nSellers’\nRepresentative\nwill\ndirect\nthe\nEscrow\nAgent\nto\ndisburse \npayments\nfrom\nthe\n