In [1]:
!pip install pandas

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import pandas as pd

df = pd.read_csv("../data/contract_evaluation_dataset_pavan-srinivas.csv")
df.head()

Unnamed: 0,contract_id,contract_text,expected_apr,expected_term,expected_monthly_payment,expected_penalty
0,1,This contract is entered between Prime Auto Fi...,9.75,36.0,825.0,late payment fee of $50
1,2,This leasing agreement between ABC Motors and ...,6.2,48.0,640.0,
2,3,This agreement is signed between DriveEasy Lea...,,24.0,510.0,early termination fee of $300
3,4,This auto loan contract between National Motor...,11.4,60.0,940.0,
4,5,The contract between CarHub Leasing and Priya ...,,36.0,720.0,late payment fee of $25


In [3]:
expected_output_schema = {
    "apr": None,
    "term_months": None,
    "monthly_payment": None,
    "penalty_clause": None
}


In [4]:
PROMPT_TEMPLATE = """
You are an information extraction system.

Extract ONLY the following fields from the contract text below:
- APR
- Term (in months)
- Monthly payment
- Penalty clause

Rules:
- Return output in valid JSON format only
- If a value is NOT explicitly mentioned, return null
- Do NOT infer, calculate, or assume any values
- Do NOT add extra fields
- Do NOT explain anything

Return JSON in this exact structure:
{{
  "apr": null,
  "term_months": null,
  "monthly_payment": null,
  "penalty_clause": null
}}

Contract text:
\"\"\"{contract_text}\"\"\"
"""


In [5]:
def extract_contract_fields(contract_text, llm_client):
    prompt = PROMPT_TEMPLATE.format(contract_text=contract_text)
    response = llm_client(prompt)
    return json.loads(response)

In [6]:
sample_df = df.sample(5, random_state=42)
sample_df

Unnamed: 0,contract_id,contract_text,expected_apr,expected_term,expected_monthly_payment,expected_penalty
27,28,This auto lease agreement between CarEase Ltd ...,,30.0,575.0,
15,16,This auto financing agreement between National...,,72.0,890.0,
23,24,This agreement between DriveSmart Leasing and ...,,12.0,390.0,
17,18,This vehicle loan document between SpeedTrack ...,,48.0,,foreclosure penalty of $400
8,9,This lease contract signed by Urban Auto and K...,,18.0,495.0,early termination fee of $200


In [7]:
def dummy_llm_client(prompt):
    return """
    {
        "apr": null,
        "term_months": null,
        "monthly_payment": null,
        "penalty_clause": null
    }
    """


In [8]:
import json
results = []

for _, row in sample_df.iterrows():
    extracted = extract_contract_fields(
        row["contract_text"],
        dummy_llm_client
    )

    results.append({
        "contract_text": row["contract_text"],
        "llm_apr": extracted["apr"],
        "llm_term": extracted["term_months"],
        "llm_payment": extracted["monthly_payment"],
        "llm_penalty": extracted["penalty_clause"],
        "expected_apr": row["expected_apr"],
        "expected_term": row["expected_term"],
        "expected_payment": row["expected_monthly_payment"],
        "expected_penalty": row["expected_penalty"]
    })

results_df = pd.DataFrame(results)
results_df


Unnamed: 0,contract_text,llm_apr,llm_term,llm_payment,llm_penalty,expected_apr,expected_term,expected_payment,expected_penalty
0,This auto lease agreement between CarEase Ltd ...,,,,,,30.0,575.0,
1,This auto financing agreement between National...,,,,,,72.0,890.0,
2,This agreement between DriveSmart Leasing and ...,,,,,,12.0,390.0,
3,This vehicle loan document between SpeedTrack ...,,,,,,48.0,,foreclosure penalty of $400
4,This lease contract signed by Urban Auto and K...,,,,,,18.0,495.0,early termination fee of $200


In [9]:
def exact_match(llm_value, expected_value):
    return int(str(llm_value).strip() == str(expected_value).strip())

In [10]:
results_df["apr_match"] = results_df.apply(
    lambda r: exact_match(r["llm_apr"], r["expected_apr"]), axis=1
)

results_df["term_match"] = results_df.apply(
    lambda r: exact_match(r["llm_term"], r["expected_term"]), axis=1
)

results_df["payment_match"] = results_df.apply(
    lambda r: exact_match(r["llm_payment"], r["expected_payment"]), axis=1
)

results_df["penalty_match"] = results_df.apply(
    lambda r: exact_match(r["llm_penalty"], r["expected_penalty"]), axis=1
)

results_df


Unnamed: 0,contract_text,llm_apr,llm_term,llm_payment,llm_penalty,expected_apr,expected_term,expected_payment,expected_penalty,apr_match,term_match,payment_match,penalty_match
0,This auto lease agreement between CarEase Ltd ...,,,,,,30.0,575.0,,0,0,0,0
1,This auto financing agreement between National...,,,,,,72.0,890.0,,0,0,0,0
2,This agreement between DriveSmart Leasing and ...,,,,,,12.0,390.0,,0,0,0,0
3,This vehicle loan document between SpeedTrack ...,,,,,,48.0,,foreclosure penalty of $400,0,0,0,0
4,This lease contract signed by Urban Auto and K...,,,,,,18.0,495.0,early termination fee of $200,0,0,0,0
