In [1]:
import numpy as np
import pandas as pd
import langchain

In [2]:
from langchain_classic.document_loaders import DirectoryLoader, TextLoader
from langchain_core.documents import Document
from dotenv import load_dotenv

  from pydantic.v1.fields import FieldInfo as FieldInfoV1


In [3]:
load_dotenv()

True

In [4]:
loader = DirectoryLoader(
    path=r'C:\Projects\contracts',
    glob='**/*.txt',
    loader_cls=TextLoader,
    loader_kwargs={'encoding': 'utf-8'}
)

In [5]:
loader

<langchain_community.document_loaders.directory.DirectoryLoader at 0x1e1f930f230>

In [6]:
docs = loader.load()

In [7]:
docs

[Document(metadata={'source': 'C:\\Projects\\contracts\\2ThemartComInc_19990826_10-12G_EX-10.10_6700288_EX-10.10_Co-Branding Agreement_ Agency Agreement.txt'}, page_content='CO-BRANDING AND ADVERTISING AGREEMENT\n\nTHIS CO-BRANDING AND ADVERTISING AGREEMENT (the "Agreement") is made as of June 21, 1999 (the "Effective Date") by and between I-ESCROW, INC., with its principal place of business at 1730 S. Amphlett Blvd., Suite 233, San Mateo, California 94402 ("i-Escrow"), and 2THEMART.COM, INC. having its principal place of business at 18301 Von Karman Avenue, 7th Floor, Irvine, California 92612 ("2TheMart").\n\n1. DEFINITIONS.\n\n(a) "CONTENT" means all content or information, in any medium, provided by a party to the other party for use in conjunction with the performance of its obligations hereunder, including without limitation any text, music, sound, photographs, video, graphics, data or software. Content provided by 2TheMart is referred to herein as "2TheMart Content" and Content p

In [8]:
## split the documents into chunks

In [9]:
import re

def split_into_sections(text):
    """
    Try to split document into sections using flexible patterns.
    If no sections found, returns full text as single section.
    """

    # flexible heading detection
    pattern = r'\n(?=(SECTION|Section|ARTICLE|Article|[A-Z][A-Z\s]{4,}|[0-9]+\.[0-9]+|[0-9]+\.)\s)'

    parts = re.split(pattern, text)

    sections = []
    buffer = ""

    for part in parts:
        if part is None:
            continue

        # detect heading-like text
        if re.match(r'(SECTION|Section|ARTICLE|Article)', part) or \
           re.match(r'[A-Z][A-Z\s]{4,}', part) or \
           re.match(r'[0-9]+\.', part):

            if buffer.strip():
                sections.append(buffer.strip())
            buffer = part
        else:
            buffer += part

    if buffer.strip():
        sections.append(buffer.strip())

    # if no real sections found → return full doc
    if len(sections) <= 1:
        return [text]

    return sections

In [10]:
final_chunks = []

for doc in docs:
    section = split_into_sections(doc.page_content)
    print(f"The section: {section}")
    for sec in section:
        final_chunks.append(Document(
            page_content=sec,
            metadata=doc.metadata
        ))

The section: ['CO-BRANDING AND ADVERTISING AGREEMENT\n\nTHIS CO-BRANDING AND ADVERTISING AGREEMENT (the "Agreement") is made as of June 21, 1999 (the "Effective Date") by and between I-ESCROW, INC., with its principal place of business at 1730 S. Amphlett Blvd., Suite 233, San Mateo, California 94402 ("i-Escrow"), and 2THEMART.COM, INC. having its principal place of business at 18301 Von Karman Avenue, 7th Floor, Irvine, California 92612 ("2TheMart").', '1.', '1. DEFINITIONS.\n\n(a) "CONTENT" means all content or information, in any medium, provided by a party to the other party for use in conjunction with the performance of its obligations hereunder, including without limitation any text, music, sound, photographs, video, graphics, data or software. Content provided by 2TheMart is referred to herein as "2TheMart Content" and Content provided by i-Escrow is referred to herein as "i-Escrow Content."\n\n(b) "CO-BRANDED SITE" means the web-site accessible through Domain Name, for the Serv

In [11]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [12]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=500,
    separators=[
        "\n\n",
        "\n",
        ". "
    ]
)

In [13]:
splitted_docs = splitter.split_documents(final_chunks)

In [14]:
print("Total chunks: ",len(splitted_docs))

Total chunks:  459


In [15]:
print(splitted_docs[0].page_content[:300])

CO-BRANDING AND ADVERTISING AGREEMENT

THIS CO-BRANDING AND ADVERTISING AGREEMENT (the "Agreement") is made as of June 21, 1999 (the "Effective Date") by and between I-ESCROW, INC., with its principal place of business at 1730 S. Amphlett Blvd., Suite 233, San Mateo, California 94402 ("i-Escrow"), a


In [16]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.retrievers import BM25Retriever
from langchain_classic.retrievers import EnsembleRetriever

In [17]:
embedding_model = OllamaEmbeddings(model='nomic-embed-text')

  embedding_model = OllamaEmbeddings(model='nomic-embed-text')


In [18]:
vector_db = FAISS.from_documents(documents=splitted_docs, embedding=embedding_model)

In [19]:
vector_db.save_local("faiss_index")

In [20]:
### initialize chain and retriever

In [21]:
retriever = vector_db.as_retriever()

In [22]:
bm25_retriever = BM25Retriever.from_documents(documents=splitted_docs)
bm25_retriever.k = 5

In [23]:
from langchain_classic.chains import create_retrieval_chain
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import PromptTemplate
from langchain_ollama import ChatOllama
from langchain_google_genai import ChatGoogleGenerativeAI

In [24]:
hybrid_retriever = EnsembleRetriever(
    retrievers=[retriever, bm25_retriever],
    weights=[0.7, 0.3]
)

In [25]:
llm = ChatOllama(model='tinyllama:latest')
rewriting_llm = ChatGoogleGenerativeAI(model='gemini-2.5-flash-lite')

In [26]:
query_prompt = PromptTemplate(
    template="""
You are an helpful AI legal document assistant. You help the users in rewriting their query whenever user passes a query to you.

Rewrite the user's question into a clear and complete legal query with proper punctuation and with right grammatical patterns
that will help the retrieve relevant contract clause.

Do not answer the question. Only rewrite it

Follow the guidelines as mentioned below as it is don't miss any guideline
Guidelines:
1) Rewrite the user's question into a clear and complete legal query with proper punctuation and with right grammatical patterns.
2) Do not answer the question
3) Only rewrite the query
4) 

User question:
{input}

Rewritten query:
""",
input_variables=['input'],
validate_template=True
)

In [27]:
prompt = PromptTemplate(
    template="""
You are an helpful AI legal document assistant. You help the users in resolving their queries. Your main task is to resolve the user query
from the provided context from legal contracts. You answer from the provided context don't hallucinate yourself and don't make up of new rules.

Below some guidelines are provided follow them as strictly as possible. Guidelines goes as follows:
Guidelines:
1) Use only provided context
2) Do not make up information and don't hallucinate
3) If the answer is not present in the context, say: "Answer not found in provided context"
4) If the answer is present in the context, then say according to the context.
4) Be precise and professional
5) Be friendly with user don't answer the user in a rude way. Be polite and professional

Context:
{context}

Question:
{input}

AI:
""",
input_variables=['context', 'input'],
validate_template=True
)

In [28]:
document_chain = create_stuff_documents_chain(
    llm=llm,
    prompt=prompt
)

In [29]:
from langchain_core.output_parsers import StrOutputParser

In [30]:
rewrite_chain = query_prompt | rewriting_llm | StrOutputParser()

In [31]:
rewritten_query = rewrite_chain.invoke({
    "input": "Late Penalty?"
})

In [32]:
rewritten_query

'What are the contractual provisions regarding late payment penalties?'

In [35]:
retrieval_chain = create_retrieval_chain(hybrid_retriever, document_chain)

In [36]:
response = retrieval_chain.invoke({
    "input": rewritten_query
})

In [37]:
response

{'input': 'What are the contractual provisions regarding late payment penalties?',
 'context': [Document(id='6c9cc2d7-2980-4a8d-879a-08ef279498f3', metadata={'source': 'C:\\Projects\\contracts\\ACCURAYINC_09_01_2010-EX-10.31-DISTRIBUTOR AGREEMENT.txt'}, page_content='. In the event that full payment is not received by Accuray, Accuray shall not be liable to Distributor for any margin or  commission unless and until it has received payment of amounts sufficient to cover the costs incurred by Accuray to provide the  applicable Products to Distributor and the applicable Services to Customer ("Accuray Cost"). Distributor acknowledges and agrees  that it shall not be entitled to receive payment of any margin or commission until Accuray has received payment of the Accuray  Cost amount in relation to the applicable Products and Services.     6. TERM AND TERMINATION     6.1. Term'),
  Document(id='c99e9e6a-bdc4-483d-a2da-bb5aae879c33', metadata={'source': 'C:\\Projects\\contracts\\ABILITYINC_0

In [38]:
sources = vector_db.similarity_search_with_score(query="What is late payment penalty?")

In [39]:
sources

[(Document(id='6c9cc2d7-2980-4a8d-879a-08ef279498f3', metadata={'source': 'C:\\Projects\\contracts\\ACCURAYINC_09_01_2010-EX-10.31-DISTRIBUTOR AGREEMENT.txt'}, page_content='. In the event that full payment is not received by Accuray, Accuray shall not be liable to Distributor for any margin or  commission unless and until it has received payment of amounts sufficient to cover the costs incurred by Accuray to provide the  applicable Products to Distributor and the applicable Services to Customer ("Accuray Cost"). Distributor acknowledges and agrees  that it shall not be entitled to receive payment of any margin or commission until Accuray has received payment of the Accuray  Cost amount in relation to the applicable Products and Services.     6. TERM AND TERMINATION     6.1. Term'),
  np.float32(340.68414)),
 (Document(id='2c5589ba-4806-4bd3-993e-faff2fbc02c6', metadata={'source': 'C:\\Projects\\contracts\\ABILITYINC_06_15_2020-EX-4.25-SERVICES AGREEMENT.txt'}, page_content='. (c) Prov

In [40]:
for doc, score in sources:
    print("SCORE: ",score)
    print("SOURCE: ",doc.metadata['source'])

SCORE:  340.68414
SOURCE:  C:\Projects\contracts\ACCURAYINC_09_01_2010-EX-10.31-DISTRIBUTOR AGREEMENT.txt
SCORE:  352.74683
SOURCE:  C:\Projects\contracts\ABILITYINC_06_15_2020-EX-4.25-SERVICES AGREEMENT.txt
SCORE:  367.83496
SOURCE:  C:\Projects\contracts\ADAMSGOLFINC_03_21_2005-EX-10.17-ENDORSEMENT AGREEMENT.txt
SCORE:  369.27725
SOURCE:  C:\Projects\contracts\ACCURAYINC_09_01_2010-EX-10.31-DISTRIBUTOR AGREEMENT.txt


In [41]:
sources[0][1]

np.float32(340.68414)

In [42]:
filtered_docs = []

for doc, score in sources:
    if score <= sources[0][1] * 1.15:
        filtered_docs.append(doc)

In [43]:
filtered_docs

[Document(id='6c9cc2d7-2980-4a8d-879a-08ef279498f3', metadata={'source': 'C:\\Projects\\contracts\\ACCURAYINC_09_01_2010-EX-10.31-DISTRIBUTOR AGREEMENT.txt'}, page_content='. In the event that full payment is not received by Accuray, Accuray shall not be liable to Distributor for any margin or  commission unless and until it has received payment of amounts sufficient to cover the costs incurred by Accuray to provide the  applicable Products to Distributor and the applicable Services to Customer ("Accuray Cost"). Distributor acknowledges and agrees  that it shall not be entitled to receive payment of any margin or commission until Accuray has received payment of the Accuray  Cost amount in relation to the applicable Products and Services.     6. TERM AND TERMINATION     6.1. Term'),
 Document(id='2c5589ba-4806-4bd3-993e-faff2fbc02c6', metadata={'source': 'C:\\Projects\\contracts\\ABILITYINC_06_15_2020-EX-4.25-SERVICES AGREEMENT.txt'}, page_content='. (c) Provider shall allow the Recipie

In [44]:
response

{'input': 'What are the contractual provisions regarding late payment penalties?',
 'context': [Document(id='6c9cc2d7-2980-4a8d-879a-08ef279498f3', metadata={'source': 'C:\\Projects\\contracts\\ACCURAYINC_09_01_2010-EX-10.31-DISTRIBUTOR AGREEMENT.txt'}, page_content='. In the event that full payment is not received by Accuray, Accuray shall not be liable to Distributor for any margin or  commission unless and until it has received payment of amounts sufficient to cover the costs incurred by Accuray to provide the  applicable Products to Distributor and the applicable Services to Customer ("Accuray Cost"). Distributor acknowledges and agrees  that it shall not be entitled to receive payment of any margin or commission until Accuray has received payment of the Accuray  Cost amount in relation to the applicable Products and Services.     6. TERM AND TERMINATION     6.1. Term'),
  Document(id='c99e9e6a-bdc4-483d-a2da-bb5aae879c33', metadata={'source': 'C:\\Projects\\contracts\\ABILITYINC_0

In [45]:
print("ANSWER: \n")
print(response['answer'])

print("\nSOURCES:\n")
i = 1
for document in filtered_docs:
    print(f"---Evidence {i} ---")
    print(document.metadata['source'])
    print(document.page_content[:400])
    i+=1


ANSWER: 

Section 7.5 of the Straight Alliance Agreement sets out additional contractual provisions regarding late payment penalties. Here are some key points to consider:

1. Late payment penalties: The Straight Alliance Agreement provides that if a CuStomer makes a payment before receiving delivery, the CuStomer shall be deemed to have waived its right to any additional or consequential damages or interest arising from late payment. If the payment is subsequently withheld, the CuStomer agrees to pay accrued interest on the unpaid balance at a rate of 1% per month until the due date.

2. Compounding interest: The Straight Alliance Agreement also provides that if a CuStomer fails to make payment within 7 days of receiving delivery, then AccuraY shall be entitled to an additional 5 days' grace period, which will be compounded daily at a rate of 1% per day until the due date.

3. Default interest: The Straight Alliance Agreement also provides that if a CuStomer fails to make payment afte