In [2]:
from groq import Groq
from dotenv import load_dotenv
import os
import pdfplumber
import pandas as pd
import re

In [3]:
load_dotenv()
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
print("Groq client ready")


Groq client ready


In [4]:
def call_llm(text):
    response = client.chat.completions.create(
        model="llama-3.1-8b-instant",  # current working model
        messages=[{"role": "user", "content": text}],
        temperature=0
    )
    return response.choices[0].message.content


In [5]:
def load_contract(path):
    ext = os.path.splitext(path)[1].lower()

    if ext == ".txt":
        with open(path, "r", encoding="utf-8") as f:
            return f.read()

    elif ext == ".pdf":
        text = []
        with pdfplumber.open(path) as pdf:
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text:
                    text.append(page_text)
        return "\n".join(text)

    else:
        raise ValueError("Unsupported file type")


In [6]:
def chunk_text(text, chunk_size=8000, overlap=500):
    chunks = []
    start = 0

    while start < len(text):
        end = start + chunk_size
        chunks.append(text[start:end])
        start = end - overlap

    return chunks


In [7]:
CONTRACT_GUIDELINES = """

1. Scope
- Detailed functional scope along with detailed Business Requirement Specifications (BRS) should be included in every contract.
- Scope should be clear, unambiguous, and quantifiable to the extent possible.
- For packaged solutions, desired functionality with modules and sub-modules should be mentioned.
- For custom-built solutions, prioritized functional BRS should be included as part of annexures.
- For services contracts, services should be clearly defined including asset coverage, service window, service levels, and locations.
- A reasonable margin (e.g., 15%) for Change Requests (CRs) should be defined.

2. Duration
- Deadline for project completion should be clearly mentioned.
- AMC for a minimum of 3 years should accompany product implementation contracts.
- A post go-live support period of at least 4 weeks should be included.

3. Deliverables
- Expected deliverables should be clearly specified.
- Timelines for deliverables should be defined.
- Responsibility for delivery should be clearly mentioned.
- Acceptance and performance criteria for deliverables should be specified.

4. Location / Geographical Scope
- Locations covered under the contract should be clearly defined.
- Level of coverage across locations should be specified.

5. Timelines
- A high-level project plan with timelines and responsibilities should be included.

6. Technical Architecture
- Proposed technical architecture should be mentioned for technical implementations.
- Architecture should cover front end, servers, platform, databases, and middleware.

7. Scope Assumptions and Exclusions
- Any assumptions or exclusions should be clearly specified.

8. Service Level Agreements (SLAs)
- Expected service levels for each service should be defined.
- SLA monitoring frequency and mechanism should be specified.
- Bonus and penalty criteria related to SLAs should be mentioned.

9. Project Structure, Roles and Responsibilities
- Ownership of deliverables should be clearly defined.
- Roles and responsibilities of vendor, business, and IT stakeholders should be documented.

10. Handling Changes to Baseline
- A clearly defined change management process should be included.

11. Commercial Model
- Commercial model (fixed price, T&M, hybrid) should be specified.
- Mechanism for handling scope changes should be predefined.
- Man-month rates should be defined where applicable.

12. Billing Model
- Billing milestones or billing cycles should be clearly defined.
- Payment cycles should align with milestones and progress.
- Payments should be linked to acceptance of deliverables where possible.

13. Overhead Expenses
- Mechanism for managing vendor out-of-pocket expenses (OPEs) should be specified.
- Applicable limits should be mentioned.

14. Incentives / Disincentives
- Bonus and penalty criteria should be specified.
- Penalties should be capped at mutually agreed levels (e.g., 10%).

15. Procurement and Licensing Model
- Responsibility for procurement of hardware and software licenses should be defined.

16. Operations Model
- Service delivery model should be defined for ongoing operations.

17. Acceptance and Performance Criteria
- Acceptance criteria and process should be clearly specified.
- Acceptance should be linked to vendor payments.

18. Performance Criteria
- Criteria for measuring project performance should be defined.

19. Discretionary Hours / Development Effort
- Discretionary or buffer hours should be clearly defined.

20. Minimum Skill Set
- Expected skill sets for vendor resources should be defined.

21. Resource Deployment Model
- Onsite deployment should be preferred.
- Minimum onsite resource requirements should be specified where remote work is used.

22. Subcontracting / Consortium Guidelines
- Subcontracting should be avoided or limited (preferably less than 30%).

23. Handholding Support (Post Go-Live)
- Post go-live handholding support period should be specified.
- Handholding support should be at least one month and separate from AMC.

24. Ongoing Operations Support
- Functional and technical support model should be defined.

25. Technical SOPs and Training to Operations Team
- High-level transition and training plan should be included.
- Number of trainings and training man-days should be specified.

26. Training and Awareness of End Users
- High-level end-user training plan should be included.
- Training coverage and effort should be specified.

27. Intellectual Property and Source Code Ownership
- Ownership of IP and source code should be clearly mentioned for custom-built solutions.

28. Resource Replacement / Transitioning Guidelines
- Vendor should avoid replacing project resources.
- If required, a clear transition and change management plan should be defined.

29. Requirements / Support from Client
- Vendor should specify required client support such as:
  - Hardware procurement
  - License procurement
  - Dedicated project manager
  - Logistics and operational support

30. Governance (Risk, Issue, Change and Quality Management) and Reporting
- Governance and reporting should follow PMO guidelines.

31. Organization Change Management and Communication
- Vendor should support organization change management as per PMO guidelines.

32. Contract Exit Mechanism
- Contract exit terms and conditions should be clearly defined.
"""


In [8]:
def summarize_chunk(chunk, guidelines):
    prompt = f"""
You are a contract intelligence assistant.

Contract Review Guidelines:
{guidelines}

Task:
Summarize the following contract text STRICTLY in the context of the guidelines.

Rules:
- Only extract information relevant to the guidelines
- Map clauses to guideline-related topics where possible
- If something is unclear or incomplete, mention it explicitly
- Do NOT analyze risk
- Do NOT provide opinions
- Keep output concise and factual

Contract Text:
{chunk}

Output:
Concise factual notes aligned to the guidelines.
"""
    return call_llm(prompt)


In [9]:
def batch_reduce(summaries, batch_size=5):
    reduced = []

    for i in range(0, len(summaries), batch_size):
        batch = summaries[i:i+batch_size]
        batch_text = "\n\n".join(batch)

        prompt = f"""
Combine the following notes into a concise unified summary.
Do not lose important details.

Notes:
{batch_text}
"""
        reduced.append(call_llm(prompt))

    return reduced


In [10]:
REVIEW_SECTIONS = [
    "Scope",
    "Duration",
    "Deliverables",
    "Location / Geographical Scope",
    "Timelines",
    "Technical Architecture",
    "Scope Assumptions and Exclusions",
    "Service Level Agreements (SLAs)",
    "Project Structure, Roles and Responsibilities",
    "Handling Changes to Baseline",
    "Commercial Model",
    "Billing Model",
    "Overhead Expenses",
    "Incentives / Disincentives",
    "Procurement and Licensing Model",
    "Operations Model",
    "Acceptance Criteria",
    "Performance Criteria",
    "Discretionary Hours / Development Effort",
    "Minimum Skill Set",
    "Resource Deployment / Replacement Guidelines",
    "Subcontracting / Consortium Guidelines",
    "Handholding Support (Post Go-Live)",
    "Ongoing Operations Support",
    "Technical SOPs and Training to Ops Team",
    "Training and Awareness of End Users",
    "Intellectual Property and Source Code Ownership",
    "Requirements / Support from Client",
    "Governance and Reporting",
    "Contract Exit Mechanism",
    "Payment Terms",
    "Non-Functional Requirements"
]


In [11]:
def generate_contract_review(notes):
    sections = "\n".join(
        f"{i+1}. {section}"
        for i, section in enumerate(REVIEW_SECTIONS)
    )

    prompt = f"""
You are a contract review assistant.

Generate a contract review strictly following internal PMO review style.

Rules:
- Follow the exact section order.
- For EACH section include:
  Status: Covered / Partially Covered / Not Covered / NA
  Impact: Low / Medium / High / NA
  Key Points: bullet points
  Remarks: only if applicable
- If information is missing, write "Not specified in the contract".
- Base your assessment ONLY on the provided contract notes.
- Do NOT provide legal advice.

Sections:
{sections}

Contract notes:
{notes}
"""
    return call_llm(prompt)


In [12]:
# 1. Load contract
contract_text = load_contract("contract.pdf")
print("Contract loaded:", len(contract_text), "characters")

Contract loaded: 17691 characters


In [13]:
# 2. Chunk
chunks = chunk_text(contract_text)
print("Chunks:", len(chunks))

Chunks: 3


In [14]:
# 3. FIRST PASS (compress chunks)
chunk_summaries = [summarize_chunk(chunk, CONTRACT_GUIDELINES) 
                   for chunk in chunks]
print("Chunk summaries done")

Chunk summaries done


In [15]:
# 4. REDUCTION PASSES
level_2 = batch_reduce(chunk_summaries)
level_3 = batch_reduce(level_2)


In [16]:
combined_notes = "\n\n".join(level_3)
print("Final notes size:", len(combined_notes))

Final notes size: 5532


In [17]:
# 5. FINAL REVIEW
final_review = generate_contract_review(combined_notes)


In [None]:
# CHECKLIST_COLUMNS = [
#     "Clause Name",
#     "Clause Text",
#     "Risk Level",
#     "Issue Identified",
#     "Recommendation",
#     "Comments"
# ]

In [18]:
rows = []
sections = re.split(r"\*\*(\d+\.\s.+?)\*\*", final_review)

for i in range(1, len(sections), 2):
    header = sections[i].strip()
    body = sections[i + 1]

    clause_number, clause_title = header.split(".", 1)
    clause_number = clause_number.strip()
    clause_title = clause_title.strip()

    status = re.search(r"Status:\s*(.+)", body)
    impact = re.search(r"Impact:\s*(.+)", body)
    remarks = re.search(r"Remarks:\s*(.+)", body, re.DOTALL)

    key_points_match = re.search(r"Key Points:(.*?)- Remarks:", body, re.DOTALL)
    key_points = ""
    if key_points_match:
        key_points = "\n".join(
            line.replace("•", "").strip()
            for line in key_points_match.group(1).splitlines()
            if "•" in line
        )

    rows.append({
        "Clause Number": clause_number,
        "Clause Title": clause_title,
        "Status": status.group(1).strip() if status else "",
        "Impact": impact.group(1).strip() if impact else "",
        "Key Points": key_points,
        "Remarks": remarks.group(1).strip() if remarks else ""
    })


df = pd.DataFrame(rows)
df.to_csv("final_contract_review.csv", index=False)

print("✅ CSV generated: final_contract_review_version3"".csv")

✅ CSV generated: final_contract_review_version3.csv
