### The virtual env setup and installation of all modules is done. Please refer to the requirements.txt for the list of required modules

## Imports

In [1]:
#importing the packages
import langchain
import langgraph
import pinecone
import pypdf
import tiktoken

In [2]:
#loading the .env file
from dotenv import load_dotenv
import os
env_path = os.path.abspath(os.path.join(os.getcwd(), "..", ".env"))
load_dotenv(env_path)

True

In [3]:
from langchain_community.document_loaders import PyPDFLoader
import re

In [4]:
from ipywidgets import FileUpload
uploader = FileUpload(accept=".pdf", multiple=False)
uploader

FileUpload(value=(), accept='.pdf', description='Upload')

In [5]:
uploader.value


({'name': 'sample-doc.pdf',
  'type': 'application/pdf',
  'size': 461010,
  'content': <memory at 0x000002546CF6A380>,
  'last_modified': datetime.datetime(2025, 12, 5, 13, 27, 46, 557000, tzinfo=datetime.timezone.utc)},)

In [6]:
if uploader.value:
    uploaded_file = uploader.value[0]  # tuple â†’ first element

    file_name = uploaded_file['name']
    content = uploaded_file['content']  # memoryview object

    # Convert memoryview â†’ bytes
    content_bytes = bytes(content)

    # Save file locally
    save_path = f"sampledocs/{file_name}"  # or "uploaded_contract.pdf"
    with open(save_path, "wb") as f:
        f.write(content_bytes)

    print("Saved file as:", save_path)
else:
    print("No file uploaded yet!")


Saved file as: sampledocs/sample-doc.pdf


In [7]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader(save_path)
pages = loader.load()

print("Pages:", len(pages))


Pages: 66


In [8]:
print(pages[1].page_content[:500])

2 
 
 
1. SALE: 
  The sale deed is the main document by which a seller transfers his right on the property to the 
purchaser, who then acquires absolute ownership of the property. It is also referred to as the 
conveyance deed. On completion of all formalities, a sale deed is prepared. This is the main 
document for transfer of ownership of property. The  deed is executed by all the parties concerned. 
All pages of the deed are to be signed. The deed should be witnessed by at least two witnesse


## Content Cleaning

In [9]:
# Combine all pages into a single raw text
raw_text = "\n".join(page.page_content for page in pages)

print("Raw text preview:\n")
print(raw_text[:500])
print("\n---")
print(f"Total characters: {len(raw_text)}")


Raw text preview:

1 
 
Introduction: 
Registration refers to the recording of the content s of a document with a Registering 
Officer appointed by the Government. The main purpo se of registration is to ensure information 
about all deals are recorded and maintained apart f rom giving the document its authenticity. It 
gives information to the people regarding legal rig hts and obligations arising or affecting a 
particular property. The registered documents may a fterwards be of legal importance, and also 
aid i

---
Total characters: 175181


In [10]:
import re

def clean_text(text: str) -> str:
    text = text.replace("\x00", "")  # remove null bytes
    text = re.sub(r"\s+", " ", text)  # collapse whitespace
    return text.strip()

cleaned_text = clean_text(raw_text)

print("Preview after cleaning:\n")
print(cleaned_text[:500])
print("\n---")
print("Total words:", len(cleaned_text.split()))


Preview after cleaning:

1 Introduction: Registration refers to the recording of the content s of a document with a Registering Officer appointed by the Government. The main purpo se of registration is to ensure information about all deals are recorded and maintained apart f rom giving the document its authenticity. It gives information to the people regarding legal rig hts and obligations arising or affecting a particular property. The registered documents may a fterwards be of legal importance, and also aid in prevent

---
Total words: 28911


In [11]:
heading_pattern = r"\b([A-Z][A-Z\s]{3,})\b"

headings = re.findall(heading_pattern, cleaned_text)
unique_headings = list(dict.fromkeys([h.strip() for h in headings]))  # remove duplicates, keep order

unique_headings


['NOC',
 'SALE',
 'GENERAL SALE DEED',
 'VENDOR',
 'VENDEE',
 'NOW THIS DEED OF SALE WITNESSETH AS FOLLOWS',
 'RLSC',
 'ALSC',
 'THE MEASUREMENTS',
 'IN WITNESS WHEREOF',
 'SALE DEED',
 'VENDOR VENDEE WITNESSES',
 'GIFT',
 'DEED OF GIFT OF IMMOVABLE PROPERTY THIS DEED OF GIFT',
 'DONOR',
 'DONEE',
 'WHEREAS',
 'AND WHEREAS',
 'NOW THIS DEED WITNESSETH',
 'AND',
 'DON EE',
 'AND FURTHER',
 'SCHEDULE',
 'MORTGAGE',
 'SIMPLE MORTGAGE DEED',
 'MORTGAGOR',
 'ONE PART',
 'MORTGAGEE',
 'OTHER PART',
 'NOW THIS DEED WITNESSETH THAT',
 'AND THIS DEED FURTHER WITNESSETH THAT',
 'AND IT IS HEREBY AGREED AND DECLARED',
 'AND IT IS FURTHER AGREED AND DECLARED',
 'AND IT IS FURTHER AGREED',
 'AND IT IS FURTHER AGREED BY THE MORTGAGOR',
 'WITNESSES',
 'DEED OF MORTGAGE BY CONDITIONAL SALE THIS DEED OF',
 'VEN DOR',
 'PURCHASER',
 'TO HOLD',
 'THE SCHEDULE ABOVE REFERRED TO',
 'LEASE',
 'GENERAL LEASE DEED',
 'LESSOR',
 'LESSEE',
 'NOW THIS DEED WITNESSETH AS FOLLOWS',
 'THE LESSOR HEREBY DEMISES UNTO

In [12]:
clauses = {}

for i, heading in enumerate(unique_headings):
    start = cleaned_text.find(heading)
    
    if i < len(unique_headings) - 1:
        end = cleaned_text.find(unique_headings[i + 1])
    else:
        end = len(cleaned_text)
    
    clause_text = cleaned_text[start:end].strip()
    clauses[heading] = clause_text

# Show first clause block as sample
first_key = list(clauses.keys())[0]
clauses[first_key][:800]


'NOC from local authorities if the document for regi stration conveys land converted as house site without the approval layout. (i) Patta transfer application duly filled and signed. Compulsory registrable documents: Sl.No Situation / documents 1. Instruments of gift of immovable property 2. Other non-testamentary instruments which purport or operate to create, declare, assign, limit or extinguish, whether in present or in future, any right, title or interest, whether vested or contingent, of the value of one h undred rupees, and upwards, to or in immovable property 3. Non-testamentary instruments which acknowledge t he receipt or payment of any consideration on account of the creation, declarati on, assignment, limitation or extinction of any such right, title or interest 4. Leases of immo'

In [13]:
import re

heading_pattern = r"""
(
    ^\d{1,2}\.\s+.*?$       # e.g., 1. Payment Terms
  | ^\d{1,2}\.\d{1,2}\s+.*?$  # e.g., 1.1 Sub clause
  | ^[A-Z][A-Z\s]{3,}$      # ALL CAPS headings
)
"""

matches = re.findall(heading_pattern, raw_text, re.MULTILINE | re.VERBOSE)
matches


['1. Instruments of gift of immovable property ',
 '2. Other non-testamentary instruments which purport  or operate to create, declare, ',
 '3. Non-testamentary instruments which acknowledge t he receipt or payment of any ',
 '4. Leases of immovable property from year to year, or for any term exceeding one ',
 '5. Non-testamentary instruments transferring or ass igning any decree or order of a ',
 '6. Authority to adopt a son and not conferred by a Will.',
 '1. SALE: ',
 'GENERAL SALE DEED \n ',
 '1.  That in consideration of payment of Rsâ€¦â€¦â€¦â€¦â€¦â€¦â€¦(Rupee sâ€¦â€¦â€¦â€¦â€¦â€¦â€¦â€¦â€¦.) only ',
 '2. \n That the Vendor hereby assures the Vendee that the said property is free from all kinds of ',
 '3.  That the Vendor further covenants with the Vendee t hat knowingly or otherwise he has not ',
 '4.  That the Vendor further assures the Vendee that he has got a clear, effectual, subsisting ',
 '5.  That the Vendor further covenants with the Vendee t hat if there remains any undis

In [None]:
clauses = {}

for i, heading in enumerate(headings):  # 'headings' = your extracted list
    start = cleaned_text.find(heading)
    
    # determine end of section
    if i < len(headings) - 1:
        end = cleaned_text.find(headings[i+1])
    else:
        end = len(cleaned_text)

    section_text = cleaned_text[start:end].strip()
    clauses[heading] = section_text

# Show total clauses
len(clauses)

172

In [15]:
sample_heading = list(clauses.keys())[0]
print("Heading:", sample_heading)
print("\n---\n")
print(clauses[sample_heading][:1500])


Heading: NOC 

---

NOC from local authorities if the document for regi stration conveys land converted as house site without the approval layout. (i) Patta transfer application duly filled and signed. Compulsory registrable documents: Sl.No Situation / documents 1. Instruments of gift of immovable property 2. Other non-testamentary instruments which purport or operate to create, declare, assign, limit or extinguish, whether in present or in future, any right, title or interest, whether vested or contingent, of the value of one h undred rupees, and upwards, to or in immovable property 3. Non-testamentary instruments which acknowledge t he receipt or payment of any consideration on account of the creation, declarati on, assignment, limitation or extinction of any such right, title or interest 4. Leases of immovable property from year to year, or for any term exceeding one year, or reserving a yearly rent 5. Non-testamentary instruments transferring or ass igning any decree or order of a

## Creating AGENTS

As the OpenAI API free version is not available, I am using the langchain-grok and accessing the llama model

In [16]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    api_key=os.getenv("GROQ_API_KEY")
)


In [17]:
AGENT_PROMPTS = {
    "compliance": """
You are a **Regulatory Compliance Analysis Agent** specializing in auditing legal and operational clauses in contracts.

### Your Objectives
- Identify compliance risks, violations, missing mandatory clauses, or inconsistencies.
- Check if the clause follows required government, financial, environmental, and operational regulations.
- Highlight missing approvals/NOCs/permits/licenses required for execution.
- Detect ambiguous or unenforceable compliance language.

### Your Analysis Procedure
1. Interpret the clause in clear terms.
2. Identify every compliance-related requirement.
3. Evaluate if the clause satisfies legal and regulatory obligations.
4. Flag any risks, missing documents, gaps, or violations.
5. Provide clear compliance recommendations.

### Output Format
Return the answer in this structure:

**1. Interpretation**  
A simple explanation of what the clause means.

**2. Compliance Risks**  
- Itemized bullet list of potential risks  
- Cite the specific problematic wording

**3. Missing Requirements**  
List permits, NOCs, statutory filings, approvals, supporting documents.

**4. Recommendations**  
Clear actionable steps to make the clause compliant.
""",
"finance": """
You are a **Financial Terms Analysis Agent** specializing in monetary clauses in legal contracts.

### Your Objectives
- Extract and interpret financial obligations, penalties, fees, deposits, rent, consideration amounts, etc.
- Detect vague or risky financial language.
- Identify payment timelines, interest rates, liabilities, default penalties.
- Flag inconsistencies that could create financial disputes.

### Your Analysis Procedure
1. Break down every financial term and obligation.
2. Identify unclear, missing, or high-risk monetary conditions.
3. Detect obligations tied to taxes, duties, maintenance, reimbursements.
4. Highlight risk exposure for both parties.

### Output Format

**1. Financial Breakdown**  
List all monetary elements in the clause.

**2. Financial Risks**  
- Vague or missing payment terms  
- Potential liabilities  
- Ambiguous obligations

**3. Inconsistencies**  
Compare obligations mentioned vs. implied vs. missing.

**4. Recommendations**  
Financial clarity and risk-reduction steps.
""", 
"legal": """
You are a **Legal Interpretation Agent** trained to read, analyze, and break down contractual clauses for accuracy, enforceability, and legal soundness.

### Your Objectives
- Interpret legal meaning precisely but explain simply.
- Identify rights, duties, liabilities of each party.
- Detect loopholes, ambiguous phrasing, unenforceable terms.
- Identify legal risks or missing elements required for a valid clause.

### Your Analysis Procedure
1. Precisely interpret the clause in plain English.
2. Identify legal obligations of all parties.
3. Detect ambiguous, contradictory, or unenforceable language.
4. Spot missing legal components essential for validity.
5. Highlight legal risks.

### Output Format
Respond in this structured format:

**1. Legal Interpretation**  
Explain the meaning clearly.

**2. Obligations Identified**  
- Party A responsibilities  
- Party B responsibilities  
- Conditions, timelines, dependencies

**3. Legal Issues / Risks**  
List loopholes, unclear terms, unenforceability problems.

**4. Missing Legal Elements**  
Documents, approvals, signatures, timelines, etc.

**5. Recommendations**  
Add clarity, precision, enforceability.
""",
"operations": """
You are an **Operations Workflow Analysis Agent** specializing in identifying deliverables, timelines, processes, and practical execution steps in a contract.

### Your Objectives
- Extract all operational duties and workflows.
- Identify deliverables, timelines, responsibilities, and dependencies.
- Detect vague instructions or missing execution details.
- Highlight operational risks or inconsistencies.

### Your Analysis Procedure
1. Break the clause into operational actions.
2. Identify who is responsible for what.
3. Detect missing timelines, unclear deliverables, or contradictions.
4. Highlight operational bottlenecks or risk-prone areas.

### Output Format

**1. Operational Breakdown**  
List the tasks, responsibilities, and processes.

**2. Missing Details**  
Timelines, deliverables, performance measures, reporting steps.

**3. Risks & Ambiguities**  
Operational failure points.

**4. Recommendations**  
Clear actions to improve operational clarity.
"""
}

In [20]:
def analyze_clause_with_agent(agent_type: str, clause_text: str):
    if agent_type not in AGENT_PROMPTS:
        raise ValueError(f"Unknown agent: {agent_type}")

    prompt = (
        AGENT_PROMPTS[agent_type]
        + "\n\n---\nCLAUSE TO ANALYZE:\n"
        + clause_text
        + "\n---\nProvide your structured analysis below."
    )

    response = llm.invoke(prompt)
    return response.content


In [22]:
analysis_test = analyze_clause_with_agent("legal", clauses[sample_heading])
print(analysis_test)

**1. Legal Interpretation**
The given clause pertains to the registration of immovable property and the necessary documents required for such registration. It mentions the need for a No Objection Certificate (NOC) from local authorities if the land is being converted into a house site without an approved layout. Additionally, it lists various types of compulsory registrable documents, including gifts, leases, and other non-testamentary instruments that affect the rights, title, or interest in immovable property. These documents must be duly filled, signed, and registered to be considered valid.

**2. Obligations Identified**
- **Party A (Seller/Transferor) Responsibilities**: 
  - To obtain a NOC from local authorities if the land is being converted into a house site without an approved layout.
  - To provide duly filled and signed Patta transfer application.
  - To ensure all necessary documents for registration are provided and are in order.
- **Party B (Buyer/Transferee) Responsibil

## Week-2 Tasks

### 1. Developing a Planning module to handle multi-agent collaboration

In [33]:
import json
import re

def extract_json(text):
    """
    Extract the first valid JSON object from the LLM response.
    """
    try:
        # Try direct load
        return json.loads(text)
    except:
        pass

    # If the model added extra text, try regex to pull the JSON part
    match = re.search(r"\{.*\}", text, re.DOTALL)
    if match:
        try:
            return json.loads(match.group(0))
        except:
            pass

    raise ValueError("No valid JSON found in model output:\n" + text)


In [23]:
from langchain_groq import ChatGroq

planner_llm = ChatGroq(
    model="llama-3.1-8b-instant",   # fast + perfect for planning
    api_key=os.getenv("GROQ_API_KEY")
)

In [24]:
PLANNING_PROMPT = """
You are a Planning Controller Agent.

Your tasks:
- Read the contract clause.
- Decide which specialized agents are needed:
    - compliance
    - finance
    - legal
    - operations
- Give the decision in a structured JSON plan.

The JSON must be in this format:

{
  "agents": ["legal", "compliance"],
  "reason": "Short explanation of why these agents are needed.",
  "steps": [
      "Run legal agent for interpretation",
      "Run compliance agent for regulatory validation"
  ]
}

Now read the clause and generate a plan.

CLAUSE:
{{CLAUSE}}
"""


In [34]:
import json

def planning_module(clause_text):
    prompt = f"""
You are a Contract Planning Module.

Read the clause below and decide which AI agents must be activated.

Return ONLY a JSON in this exact structure:

{{
  "agents": ["legal", "compliance"],
  "reason": "why these agents are needed",
  "steps": [
      "analyze with legal",
      "analyze with compliance"
  ]
}}

Clause:
\"\"\"{clause_text}\"\"\"
"""

    response = llm.invoke(prompt).content
    plan = extract_json(response)
    return plan



In [35]:
sample_clause = clauses[sample_heading]  # or choose from your parsed dict

plan = planning_module(sample_clause)
plan

{'agents': ['legal', 'compliance'],
 'reason': 'The clause deals with legal documents, registration, and property transfer, which requires analysis by legal and compliance agents to ensure adherence to regulations and laws.',
 'steps': ['analyze with legal', 'analyze with compliance']}

In [36]:
def run_agents_on_clause(clause_text, plan):
    results = {}

    for agent in plan["agents"]:
        print(f"\nðŸ”¹ Running agent: {agent} ...")

        analysis = analyze_clause_with_agent(agent, clause_text)
        results[agent] = analysis

    return results


In [37]:
def analyze_clause_pipeline(clause_text):
    print("ðŸ“Œ Step 1: Planning...")
    plan = planning_module(clause_text)
    print("âž¡ Plan:", plan)

    print("\nðŸ“Œ Step 2: Running agents...")
    agent_outputs = run_agents_on_clause(clause_text, plan)

    print("\nðŸ“Œ Step 3: Final Combined Output Ready.")
    return {
        "plan": plan,
        "agents": agent_outputs
    }


In [38]:
result = analyze_clause_pipeline(sample_clause)
result

ðŸ“Œ Step 1: Planning...
âž¡ Plan: {'agents': ['legal', 'compliance'], 'reason': 'The clause deals with legal documents, registration, and property rights, requiring analysis by legal and compliance agents to ensure adherence to regulations and laws.', 'steps': ['analyze with legal', 'analyze with compliance']}

ðŸ“Œ Step 2: Running agents...

ðŸ”¹ Running agent: legal ...

ðŸ”¹ Running agent: compliance ...

ðŸ“Œ Step 3: Final Combined Output Ready.


{'plan': {'agents': ['legal', 'compliance'],
  'reason': 'The clause deals with legal documents, registration, and property rights, requiring analysis by legal and compliance agents to ensure adherence to regulations and laws.',
  'steps': ['analyze with legal', 'analyze with compliance']},
 'agents': {'legal': '**1. Legal Interpretation**\nThe given clause pertains to the requirement of a No Objection Certificate (NOC) from local authorities for the registration of land converted into a house site without an approved layout. It also lists various types of documents that are compulsorily registrable, including gifts, leases, and assignments of immovable property, among others. These documents must be duly filled, signed, and meet specific criteria such as value thresholds to be considered registrable.\n\n**2. Obligations Identified**\n- **Party A Responsibilities**: Obtain NOC from local authorities for land conversion, prepare and sign necessary documents for registration (e.g., patta