In [1]:
from groq import Groq
from dotenv import load_dotenv
import os
import pdfplumber


In [2]:
load_dotenv()
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
print("Groq client ready")


Groq client ready


In [3]:
def call_llm(text):
    response = client.chat.completions.create(
        model="llama-3.1-8b-instant",  # current working model
        messages=[{"role": "user", "content": text}],
        temperature=0
    )
    return response.choices[0].message.content


In [4]:
def load_contract(path):
    ext = os.path.splitext(path)[1].lower()

    if ext == ".txt":
        with open(path, "r", encoding="utf-8") as f:
            return f.read()

    elif ext == ".pdf":
        text = []
        with pdfplumber.open(path) as pdf:
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text:
                    text.append(page_text)
        return "\n".join(text)

    else:
        raise ValueError("Unsupported file type")


In [5]:
def chunk_text(text, chunk_size=8000, overlap=500):
    chunks = []
    start = 0

    while start < len(text):
        end = start + chunk_size
        chunks.append(text[start:end])
        start = end - overlap

    return chunks


In [6]:
def summarize_chunk(chunk):
    prompt = f"""
Summarize the following contract text into concise factual notes.
- Extract clauses, obligations, payments, timelines, responsibilities
- Do NOT analyze
- Do NOT format
- Keep it short and factual

Text:
{chunk}
"""
    return call_llm(prompt)


In [7]:
def batch_reduce(summaries, batch_size=5):
    reduced = []

    for i in range(0, len(summaries), batch_size):
        batch = summaries[i:i+batch_size]
        batch_text = "\n\n".join(batch)

        prompt = f"""
Combine the following notes into a concise unified summary.
Do not lose important details.

Notes:
{batch_text}
"""
        reduced.append(call_llm(prompt))

    return reduced


In [8]:
REVIEW_SECTIONS = [
    "Scope",
    "Duration",
    "Deliverables",
    "Location / Geographical Scope",
    "Timelines",
    "Technical Architecture",
    "Scope Assumptions and Exclusions",
    "Service Level Agreements (SLAs)",
    "Project Structure, Roles and Responsibilities",
    "Handling Changes to Baseline",
    "Commercial Model",
    "Billing Model",
    "Overhead Expenses",
    "Incentives / Disincentives",
    "Procurement and Licensing Model",
    "Operations Model",
    "Acceptance Criteria",
    "Performance Criteria",
    "Discretionary Hours / Development Effort",
    "Minimum Skill Set",
    "Resource Deployment / Replacement Guidelines",
    "Subcontracting / Consortium Guidelines",
    "Handholding Support (Post Go-Live)",
    "Ongoing Operations Support",
    "Technical SOPs and Training to Ops Team",
    "Training and Awareness of End Users",
    "Intellectual Property and Source Code Ownership",
    "Requirements / Support from Client",
    "Governance and Reporting",
    "Contract Exit Mechanism",
    "Payment Terms",
    "Non-Functional Requirements"
]


In [9]:
def generate_contract_review(notes):
    sections = "\n".join(
        f"{i+1}. {section}"
        for i, section in enumerate(REVIEW_SECTIONS)
    )

    prompt = f"""
You are a contract review assistant.

Generate a contract review strictly following internal PMO review style.

Rules:
- Follow the exact section order.
- For EACH section include:
  Status: Covered / Partially Covered / Not Covered / NA
  Impact: Low / Medium / High / NA
  Key Points: bullet points
  Remarks: only if applicable
- If information is missing, write "Not specified in the contract".
- Do NOT provide legal advice.

Sections:
{sections}

Contract notes:
{notes}
"""
    return call_llm(prompt)


In [14]:
# 1. Load contract
contract_text = load_contract("contract.pdf")
print("Contract loaded:", len(contract_text), "characters")

Contract loaded: 17691 characters


In [15]:
# 2. Chunk
chunks = chunk_text(contract_text)
print("Chunks:", len(chunks))

Chunks: 3


In [16]:
# 3. FIRST PASS (compress chunks)
chunk_summaries = [summarize_chunk(chunk) for chunk in chunks]
print("Chunk summaries done")

Chunk summaries done


In [17]:
# 4. REDUCTION PASSES
level_2 = batch_reduce(chunk_summaries)
level_3 = batch_reduce(level_2)


In [18]:
combined_notes = "\n\n".join(level_3)
print("Final notes size:", len(combined_notes))

Final notes size: 2736


In [19]:
# 5. FINAL REVIEW
final_review = generate_contract_review(combined_notes)
print(final_review)

**Contract Review**

**1. Scope**

- Status: Covered
- Impact: Medium
- Key Points:
  • The parties will engage in the internet-based sale and promotion of tour products.
  • Party B will provide business consultancy and technical services to Party A and its subsidiaries.
  • Exclusive cooperation between Party A and Party B.
- Remarks: The scope of the cooperation is clearly defined, but it may be beneficial to include more specific details about the services to be provided by Party B.

**2. Duration**

- Status: Covered
- Impact: Medium
- Key Points:
  • The term of cooperation commences from the execution date.
  • The term of cooperation ends on the expiration date of the operation term of Party B.
  • Termination prior to expiration requires specific circumstances, including bankruptcy, force majeure, or failure to provide services for more than three consecutive years.
- Remarks: The duration of the cooperation is clearly defined, but it may be beneficial to include a specific en