In [1]:
with open("Insurance_auto_data.csv", "r") as f:
    lines = f.readlines()
lines[:5]

['CLAIM_ID,CLAIM_DATE,CUSTOMER_ID,CLAIM_AMOUNT,PREMIUM_COLLECTED,PAID_AMOUNT,CITY,REJECTION_REMARKS\n',
 'CLM100021,2025-04-01,CUST14285,10419.0,2198.59,6964.46,PUNE,\n',
 'CLM100013,2025-04-01,CUST26471,42468.0,8982.2,30119.67,GUWAHATI,\n',
 'CLM100099,2025-04-02,CUST29309,55897.0,1861.78,55657.15,GUWAHATI,\n',
 'CLM100044,2025-04-02,CUST30275,71785.0,13154.99,53629.3,PUNE,\n']

In [11]:
def clean_csv_data(file_path):
    cleaned_data = []

    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    headers = [h.strip() for h in lines[0].strip().split(',')]
    
    for line in lines[1:]:
        values = [v.strip() for v in line.strip().split(',')]
        if len(values) != len(headers):
            continue  

        row = dict(zip(headers, values))

        
        for key in ['CLAIM_AMOUNT', 'PREMIUM_COLLECTED', 'PAID_AMOUNT']:
            try:
                row[key] = float(row[key]) if row[key] else 0.0
            except:
                row[key] = 0.0

        for key in headers:
            if isinstance(row[key], str) and row[key].strip() == "":
                row[key] = None

        cleaned_data.append(row)

    return cleaned_data


In [12]:
data = clean_csv_data("Insurance_auto_data.csv")
data[:3]  


[{'CLAIM_ID': 'CLM100021',
  'CLAIM_DATE': '2025-04-01',
  'CUSTOMER_ID': 'CUST14285',
  'CLAIM_AMOUNT': 10419.0,
  'PREMIUM_COLLECTED': 2198.59,
  'PAID_AMOUNT': 6964.46,
  'CITY': 'PUNE',
  'REJECTION_REMARKS': None},
 {'CLAIM_ID': 'CLM100013',
  'CLAIM_DATE': '2025-04-01',
  'CUSTOMER_ID': 'CUST26471',
  'CLAIM_AMOUNT': 42468.0,
  'PREMIUM_COLLECTED': 8982.2,
  'PAID_AMOUNT': 30119.67,
  'CITY': 'GUWAHATI',
  'REJECTION_REMARKS': None},
 {'CLAIM_ID': 'CLM100099',
  'CLAIM_DATE': '2025-04-02',
  'CUSTOMER_ID': 'CUST29309',
  'CLAIM_AMOUNT': 55897.0,
  'PREMIUM_COLLECTED': 1861.78,
  'PAID_AMOUNT': 55657.15,
  'CITY': 'GUWAHATI',
  'REJECTION_REMARKS': None}]

In [13]:
def analyze_city_performance(data):
    cities = ["PUNE", "KOLKATA", "RANCHI", "GUWAHATI"]
    summary = {}

    for city in cities:
        summary[city] = {
            "total_claims": 0,
            "total_premium": 0.0,
            "total_claim_amount": 0.0,
            "rejected_claims": 0
        }

    for row in data:
        city = row.get("CITY")
        if city in summary:
            summary[city]["total_claims"] += 1
            summary[city]["total_premium"] += row["PREMIUM_COLLECTED"]
            summary[city]["total_claim_amount"] += row["CLAIM_AMOUNT"]
            if row["REJECTION_REMARKS"] is not None:
                summary[city]["rejected_claims"] += 1

    return summary


In [14]:
city_report = analyze_city_performance(data)
for city, stats in city_report.items():
    print(f"\n📍 {city}")
    for k, v in stats.items():
        print(f"   {k.replace('_', ' ').title()}: {v}")



📍 PUNE
   Total Claims: 37
   Total Premium: 369254.7900000001
   Total Claim Amount: 1543919.0
   Rejected Claims: 3

📍 KOLKATA
   Total Claims: 16
   Total Premium: 140279.78
   Total Claim Amount: 847893.0
   Rejected Claims: 0

📍 RANCHI
   Total Claims: 17
   Total Premium: 148858.6
   Total Claim Amount: 570328.0
   Rejected Claims: 2

📍 GUWAHATI
   Total Claims: 24
   Total Premium: 261314.84000000005
   Total Claim Amount: 1148030.0
   Rejected Claims: 3


In [15]:

REJECTION_REASONS_MAP = {
    "fake_document": "Fake_document",
    "not_covered": "Not_Covered",
    "policy_expired": "Policy_expired"
}

def handle_error(error_message):
    print(f"Error: {error_message}")
    return "Error"

def contains_rejection_reason(rejection_text, reason):
    try:
        if rejection_text and isinstance(rejection_text, str):
            return reason.lower() in rejection_text.lower()
    except Exception as e:
        handle_error(f"Error in contains_rejection_reason: {str(e)}")
    return False

def map_rejection_reason(rejection_text):
    try:
        if rejection_text and isinstance(rejection_text, str):
            for reason, rejection_class in REJECTION_REASONS_MAP.items():
                if contains_rejection_reason(rejection_text, reason):
                    return rejection_class
            return "Unknown"
        else:
            return "NoRemark"
    except Exception as e:
        handle_error(f"Error in map_rejection_reason: {str(e)}")
        return "Error"

def complex_rejection_classifier(remark_text):
    try:
        if not isinstance(remark_text, str) or len(remark_text.strip()) == 0:
            return "Invalid Remark"

        if contains_rejection_reason(remark_text, "fake_document"):
            return "Fake_document"
        elif contains_rejection_reason(remark_text, "not_covered"):
            return "Not_Covered"
        elif contains_rejection_reason(remark_text, "policy_expired"):
            return "Policy_expired"
        else:
            return map_rejection_reason(remark_text)
    except Exception as e:
        handle_error(f"Error in complex_rejection_classifier: {str(e)}")
        return "Error"


In [16]:

for row in data:
    remark = row.get("REJECTION_REMARKS")
    row["REJECTION_CLASS"] = complex_rejection_classifier(remark)

# Show a few rows that had rejection remarks
[x for x in data if x["REJECTION_CLASS"] != "Invalid Remark"][:5]


[{'CLAIM_ID': 'CLM100038',
  'CLAIM_DATE': '2025-04-10',
  'CUSTOMER_ID': 'CUST55227',
  'CLAIM_AMOUNT': 15564.0,
  'PREMIUM_COLLECTED': 19124.37,
  'PAID_AMOUNT': 0.0,
  'CITY': 'PUNE',
  'REJECTION_REMARKS': 'Rejection reason: Policy_expired in verification.',
  'REJECTION_CLASS': 'Policy_expired'},
 {'CLAIM_ID': 'CLM100059',
  'CLAIM_DATE': '2025-04-10',
  'CUSTOMER_ID': 'CUST37384',
  'CLAIM_AMOUNT': 72187.0,
  'PREMIUM_COLLECTED': 6783.05,
  'PAID_AMOUNT': 0.0,
  'CITY': 'RANCHI',
  'REJECTION_REMARKS': 'Policy rejected: Policy_expired noted.',
  'REJECTION_CLASS': 'Policy_expired'},
 {'CLAIM_ID': 'CLM100096',
  'CLAIM_DATE': '2025-04-14',
  'CUSTOMER_ID': 'CUST24471',
  'CLAIM_AMOUNT': 15373.0,
  'PREMIUM_COLLECTED': 10412.56,
  'PAID_AMOUNT': 0.0,
  'CITY': 'PUNE',
  'REJECTION_REMARKS': 'Fake_document reason led to rejection.',
  'REJECTION_CLASS': 'Fake_document'},
 {'CLAIM_ID': 'CLM100090',
  'CLAIM_DATE': '2025-04-17',
  'CUSTOMER_ID': 'CUST61146',
  'CLAIM_AMOUNT': 85451.0,