# üöÄ Autonomous Insurance Claims Processing Agent

This notebook implements a lightweight rule-based insurance claims agent that:

- Extracts key fields from FNOL (First Notice of Loss) documents
- Validates mandatory fields
- Classifies and routes claims
- Provides reasoning for routing decisions


## üì¶ Step 1: Install and Import Dependencies


In [2]:
# Install if needed
# !pip install pypdf

from pypdf import PdfReader
import json


## üîé Step 2: Extract Form Fields from PDF


In [4]:
def extract_form_fields(file_path):
    reader = PdfReader(file_path)
    fields = reader.get_fields()
    data = {}

    if fields:
        for key, field in fields.items():
            value = field.get("/V")

            if value == "/Yes":
                data[key] = True
            elif value == "/Off":
                data[key] = False
            else:
                data[key] = value

    return data


## üß∞ Step 3: Helper Functions


In [5]:
def safe_int(val):
    try:
        return int(val)
    except:
        return None


def is_invalid(value):
    return value is None or str(value).strip().lower() in ["", "n/a", "na"]


## üìù Step 4: Dynamic Description Extraction


In [6]:
def extract_description(form_data):
    for key, value in form_data.items():
        if isinstance(value, str):
            lower_val = value.lower()
            if any(word in lower_val for word in [
                "collision",
                "damage",
                "accident",
                "scratched",
                "injury",
                "injured",
                "fraud",
                "staged",
                "inconsistent"
            ]):
                return value
    return None


## üìä Step 5: Map Required Fields into Structured Format


In [8]:
def map_required_fields(form_data):

    description = extract_description(form_data)

    mapped = {
        "policyInformation": {
            "policyNumber": form_data.get("POLICY NUMBER"),
            "policyholderName": form_data.get("NAME OF INSURED First Middle Last"),
            "effectiveDates": form_data.get("Text3")
        },

        "incidentInformation": {
            "date": form_data.get("Text3"),
            "time": form_data.get("Text4"),
            "location": form_data.get("STREET LOCATION OF LOSS"),
            "description": description
        },

        "involvedParties": {
            "claimant": form_data.get("NAME OF INSURED First Middle Last"),
            "thirdParties": form_data.get("NAME  ADDRESSRow1_2"),
            "contactDetails": form_data.get("PHONE  CELL HOME BUS PRIMARY")
        },

        "assetDetails": {
            "assetType": form_data.get("TYPE BODY"),
            "assetID": form_data.get("PLATE NUMBER"),
            "estimatedDamage": safe_int(form_data.get("Text45"))
        },

        "otherMandatoryFields": {
            "claimType": "Vehicle",
            "attachments": form_data.get(
                "REMARKS ACORD 101 Additional Remarks Schedule may be attached if more space is required"
            ),
            "initialEstimate": safe_int(form_data.get("Text45"))
        }
    }

    return mapped


## ‚úÖ Step 6: Validate Mandatory Fields


In [9]:
def check_missing_fields(mapped):

    required_values = [
        mapped["policyInformation"]["policyNumber"],
        mapped["policyInformation"]["policyholderName"],
        mapped["incidentInformation"]["date"],
        mapped["incidentInformation"]["location"],
        mapped["incidentInformation"]["description"],
        mapped["assetDetails"]["estimatedDamage"],
        mapped["otherMandatoryFields"]["claimType"]
    ]

    required_names = [
        "policyNumber",
        "policyholderName",
        "date",
        "location",
        "description",
        "estimatedDamage",
        "claimType"
    ]

    missing = []

    for value, name in zip(required_values, required_names):
        if is_invalid(value):
            missing.append(name)

    return missing


## üö¶ Step 7: Routing Logic (Priority-Based Workflow)


In [10]:
def route_claim(mapped, missing):

    description = (mapped["incidentInformation"]["description"] or "").lower()
    estimate = mapped["assetDetails"]["estimatedDamage"] or 0

    if missing:
        return "Manual Review", "Mandatory fields missing."

    if any(word in description for word in ["fraud", "staged", "inconsistent"]):
        return "Investigation Flag", "Suspicious keywords detected."

    if "injury" in description or "injured" in description:
        return "Specialist Queue", "Injury-related claim."

    if estimate < 25000:
        return "Fast-track", "Estimated damage below 25,000."

    return "Manual Review", "High damage value."


## ‚öôÔ∏è Step 8: Main Processing Function


In [11]:
def process_claim(file_path):

    raw_data = extract_form_fields(file_path)
    mapped = map_required_fields(raw_data)
    missing = check_missing_fields(mapped)
    route, reason = route_claim(mapped, missing)

    return {
        "extractedFields": mapped,
        "missingFields": missing,
        "recommendedRoute": route,
        "reasoning": reason
    }


## üß™ Step 9: Test the Agent


In [13]:
file_path = "claim4_injury.pdf"   # change file name for testing

result = process_claim(file_path)
print(json.dumps(result, indent=4))


{
    "extractedFields": {
        "policyInformation": {
            "policyNumber": "POL-INJ-4040",
            "policyholderName": "RAHUL MEHTA",
            "effectiveDates": "02/09/2026 "
        },
        "incidentInformation": {
            "date": "02/09/2026 ",
            "time": "07:20",
            "location": "DWARAKA SECTOR 12 MAIN ROAD",
            "description": "Vehicle collided with another car at intersection.\rDriver sustained minor injury to left arm.\rPassenger reported neck pain after accident.\rMedical assistance was required at the scene.\r"
        },
        "involvedParties": {
            "claimant": "RAHUL MEHTA",
            "thirdParties": "Sarah Miller",
            "contactDetails": "987654321"
        },
        "assetDetails": {
            "assetType": "suv",
            "assetID": "DL09AA1234",
            "estimatedDamage": 32000
        },
        "otherMandatoryFields": {
            "claimType": "Vehicle",
            "attachments": null,
   

In [20]:
file_path = "claim3_investigation.pdf"   

result = process_claim(file_path)
print(json.dumps(result, indent=4))


{
    "extractedFields": {
        "policyInformation": {
            "policyNumber": "POL-INV-3030",
            "policyholderName": "ANITA SHARMA",
            "effectiveDates": "01/14/2026 "
        },
        "incidentInformation": {
            "date": "01/14/2026 ",
            "time": "08:15",
            "location": "LINK ROAD NEAR METRO STATION",
            "description": "Rear -end collision reported at traffic signal.\rClaim circumstances appear inconsistent based on initial review.\rPossibility of staged accident cannot be ruled out.\rDamage pattern seems unusual.\r"
        },
        "involvedParties": {
            "claimant": "ANITA SHARMA",
            "thirdParties": "Sarah Miller",
            "contactDetails": "9988012345"
        },
        "assetDetails": {
            "assetType": "sedan",
            "assetID": "MH02AB9988",
            "estimatedDamage": 32000
        },
        "otherMandatoryFields": {
            "claimType": "Vehicle",
            "attachm

In [19]:
file_path = "claim2_manual_review.pdf"   

result = process_claim(file_path)
print(json.dumps(result, indent=4))


{
    "extractedFields": {
        "policyInformation": {
            "policyNumber": null,
            "policyholderName": "Verma",
            "effectiveDates": "01/14/2026 "
        },
        "incidentInformation": {
            "date": "01/14/2026 ",
            "time": "05:45",
            "location": "parking area inorbit mall",
            "description": "vehicle was scratched while parked.\runknown vehicle likely caused side panel damage. no injuries reported.\r"
        },
        "involvedParties": {
            "claimant": "Verma",
            "thirdParties": "Sarah Miller",
            "contactDetails": "9988776655"
        },
        "assetDetails": {
            "assetType": "hatchback",
            "assetID": "TS09XY7788",
            "estimatedDamage": null
        },
        "otherMandatoryFields": {
            "claimType": "Vehicle",
            "attachments": null,
            "initialEstimate": null
        }
    },
    "missingFields": [
        "policyNumber",
 

In [18]:
file_path = "claim1_fasttrack.pdf"   


result = process_claim(file_path)
print(json.dumps(result, indent=4))


{
    "extractedFields": {
        "policyInformation": {
            "policyNumber": "N/A",
            "policyholderName": "David Alan Thompson",
            "effectiveDates": "02/02/2026 "
        },
        "incidentInformation": {
            "date": "02/02/2026 ",
            "time": "10:30",
            "location": "Intersection of 5th Ave & Pine Street",
            "description": "Minor rear-end collision at a traffic light.Insured\u2019s vehicle was struck at low speed by another vehicle.\rNo injuries reported. Damage limited to rear bumper."
        },
        "involvedParties": {
            "claimant": "David Alan Thompson",
            "thirdParties": "Sarah Miller",
            "contactDetails": "(206) 555-2147"
        },
        "assetDetails": {
            "assetType": "Sedan",
            "assetID": "WA-AB1234",
            "estimatedDamage": 18000
        },
        "otherMandatoryFields": {
            "claimType": "Vehicle",
            "attachments": null,
     