In [1]:
from docx import Document

docx_path = "../Evaluation Sets/Raptor Contract.docx"

doc = Document(docx_path)

docx_texts = [paragraph.text.strip() for paragraph in doc.paragraphs]

docx_texts = [text for text in docx_texts if text]


In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter, SentenceTransformersTokenTextSplitter

In [3]:
character_splitter = RecursiveCharacterTextSplitter(
    separators=["\n\n", "\n", ". ", " ", ""],
    chunk_size=500,
    chunk_overlap=0
)
character_split_texts = character_splitter.split_text('\n\n'.join(docx_texts))

print(character_split_texts[10])
print(f"\nTotal chunks: {len(character_split_texts)}")

Representative.

Total chunks: 650


In [4]:
token_splitter = SentenceTransformersTokenTextSplitter(chunk_overlap=0, tokens_per_chunk=256)

token_split_texts = []
for text in character_split_texts:
    token_split_texts += token_splitter.split_text(text)

print(token_split_texts[10])
print(f"\nTotal chunks: {len(token_split_texts)}")

  from tqdm.autonotebook import tqdm, trange


representative.

Total chunks: 650


In [5]:
import chromadb
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction

embedding_function = SentenceTransformerEmbeddingFunction()
print(embedding_function([token_split_texts[10]]))

[[-0.05328620597720146, -0.002339437836781144, 0.02058427408337593, 0.0042345719411969185, -0.026011338457465172, 0.03291021287441254, 0.03313444182276726, -0.013939635828137398, -0.005626088008284569, -0.016696039587259293, -0.04306734725832939, -0.0321505106985569, 0.022991394624114037, -0.020406104624271393, -0.0048293243162333965, 0.04074518010020256, 0.03148449584841728, 0.007110103033483028, -0.02592775784432888, 0.05327778682112694, 0.013863139785826206, 0.06337061524391174, 0.022657351568341255, 0.05106052756309509, 0.004572986159473658, 0.019555382430553436, -0.029988251626491547, 0.017169199883937836, -0.009934392757713795, -0.11862093210220337, 0.023795006796717644, -0.04086438566446304, 0.009681900031864643, -0.0022818869911134243, -0.0023032110184431076, 0.05665221065282822, 0.029677940532565117, -0.022787921130657196, 0.02758784405887127, -0.09012836962938309, -0.03344545513391495, 0.020511381328105927, -0.042250003665685654, 0.006441268604248762, 0.001968566095456481, 0.

In [6]:
chroma_client = chromadb.Client()
chroma_collection = chroma_client.create_collection("Contract1", embedding_function=embedding_function)

ids = [str(i) for i in range(len(token_split_texts))]

chroma_collection.add(ids=ids, documents=token_split_texts)
chroma_collection.count()

650

In [7]:
query = "Under what circumstances and to what extent the Sellers are responsible for a breach of representations and warranties?"

results = chroma_collection.query(query_texts=[query], n_results=5)
retrieved_documents = results['documents'][0]

for document in retrieved_documents:
    print(document)
    print('\n')

. further, each seller agrees that, ( i ) were such seller to breach any of the covenants contained in this section 6. 08, the damage to the buyer and the acquired companies may be irreparable, ( ii ) the buyer, in addition to any other remedies available to it, shall be entitled to seek preliminary and permanent injunctive relief against any breach or threatened breach by such seller of any such covenants together with an award of its reasonable attorneys ’ fees incurred in enforcing its rights


liability. the sellers ’ representative shall not be held liable by any of the sellers for actions or omissions in exercising or failing to exercise all or any of the power and authority of the sellers ’ representative pursuant to this agreement, except in the case of the sellers ’ representative ’ s gross negligence, bad faith or willful misconduct


provisions concerning the sellers ’ representative.


. the sellers ’ representative shall be entitled to rely on the advice of counsel, public

In [8]:
import os
import openai
#from openai import OpenAI

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']

#openai_client = OpenAI()

In [9]:
def rag(query, retrieved_documents, model="gpt-3.5-turbo"):
    information = "\n\n".join(retrieved_documents)

    messages = [
        {
            "role": "system",
            "content": "You are a helpful expert contract advisor assistant. Your users are asking questions about information contained in the contract."
            "You will be shown the user's question, and the relevant information from the contract. Answer the user's question using only this information."
        },
        {"role": "user", "content": f"Question: {query}. \n Information: {information}"}
    ]
    
    #openai_client = OpenAI()
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
    )
    content = response.choices[0].message.content
    return content

In [10]:
output = rag(query=query, retrieved_documents=retrieved_documents)

print(output)

The sellers are responsible for a breach of representations and warranties in accordance with Section 6.08 of the contract. If a seller breaches any of the covenants contained in the contract, the buyer may suffer irreparable damage. In such cases, the buyer has the right to seek preliminary and permanent injunctive relief against the breaching seller, in addition to any other remedies available, and may be awarded reasonable attorneys' fees. The sellers' representative will not be held liable for actions or omissions, except in cases of gross negligence, bad faith, or willful misconduct. Each seller is individually responsible for indemnifying the sellers' representative from losses arising out of serving as the representative, except for losses caused by gross negligence, bad faith, or willful misconduct.


In [11]:
query2 = "How much is the escrow amount?"

results = chroma_collection.query(query_texts=[query2], n_results=5)
retrieved_documents = results['documents'][0]

for document in retrieved_documents:
    print(document)
    print('\n')

“ escrow amount ” means, $ 1, 000, 000 “ facilities ” means any buildings, plants, improvements or structures located on the real property.


escrow.


funds its pro rata percentage of the amount, if any, by which such re - calculated final purchase price exceeds the estimated purchase price paid at closing in accordance with section 2. 05 ( a ) ( i ) and section 2. 07 ( a ) and the escrow agent shall release the escrow amount to company securityholders in accordance with the escrow agreement or ( ii ) the amount, if any, by which such estimated purchase price paid at closing in accordance with section 2. 05 ( a ) ( i ) and section 2. 07 ( a ) exceeds such


at closing, buyer will deposit the escrow amount in escrow on behalf of the sellers in accordance with the escrow agreement. the escrow amount shall be held and, subject to section 2. 07, released to the company securityholders in accordance with the provisions of the escrow agreement with the company securityholders being entitled

In [12]:
output = rag(query=query2, retrieved_documents=retrieved_documents)

print(output)

The escrow amount is $1,000,000.
