# TASK - 1

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
def preprocess_insurance_data(filepath):
    cleaned_data = []

    with open(filepath, 'r') as file:
        headers = file.readline().strip().split(',')

        for line in file:
            row = line.strip().split(',')
            if len(row) != len(headers):
                continue

            data_dict = {}
            for i, value in enumerate(row):
                key = headers[i].strip()
                val = value.strip()


                if val.lower() in ["", "null", "none", "na"]:
                    val = None


                if key.upper() in ['CLAIM_AMOUNT', 'PREMIUM_COLLECTED', 'PAID_AMOUNT'] and val is not None:
                    try:
                        val = float(val)
                    except:
                        val = None

                data_dict[key.upper()] = val


            if data_dict.get('CLAIM_AMOUNT') is None and data_dict.get('PAID_AMOUNT') is not None:
                data_dict['CLAIM_AMOUNT'] = data_dict['PAID_AMOUNT']

            if data_dict.get('PAID_AMOUNT') is None and data_dict.get('CLAIM_AMOUNT') is not None:
                data_dict['PAID_AMOUNT'] = 0.0

            if data_dict.get('PREMIUM_COLLECTED') is None:
                data_dict['PREMIUM_COLLECTED'] = 0.0

            cleaned_data.append(data_dict)

    return cleaned_data


filename = '/content/drive/MyDrive/Insurance_auto_data.csv'


cleaned_data = preprocess_insurance_data(filename)

In [3]:
data = preprocess_insurance_data('/content/drive/MyDrive/Insurance_auto_data.csv')
print(data)


[{'CLAIM_ID': 'CLM100021', 'CLAIM_DATE': '2025-04-01', 'CUSTOMER_ID': 'CUST14285', 'CLAIM_AMOUNT': 10419.0, 'PREMIUM_COLLECTED': 2198.59, 'PAID_AMOUNT': 6964.46, 'CITY': 'PUNE', 'REJECTION_REMARKS': None}, {'CLAIM_ID': 'CLM100013', 'CLAIM_DATE': '2025-04-01', 'CUSTOMER_ID': 'CUST26471', 'CLAIM_AMOUNT': 42468.0, 'PREMIUM_COLLECTED': 8982.2, 'PAID_AMOUNT': 30119.67, 'CITY': 'GUWAHATI', 'REJECTION_REMARKS': None}, {'CLAIM_ID': 'CLM100099', 'CLAIM_DATE': '2025-04-02', 'CUSTOMER_ID': 'CUST29309', 'CLAIM_AMOUNT': 55897.0, 'PREMIUM_COLLECTED': 1861.78, 'PAID_AMOUNT': 55657.15, 'CITY': 'GUWAHATI', 'REJECTION_REMARKS': None}, {'CLAIM_ID': 'CLM100044', 'CLAIM_DATE': '2025-04-02', 'CUSTOMER_ID': 'CUST30275', 'CLAIM_AMOUNT': 71785.0, 'PREMIUM_COLLECTED': 13154.99, 'PAID_AMOUNT': 53629.3, 'CITY': 'PUNE', 'REJECTION_REMARKS': None}, {'CLAIM_ID': 'CLM100014', 'CLAIM_DATE': '2025-04-02', 'CUSTOMER_ID': 'CUST38169', 'CLAIM_AMOUNT': 18565.0, 'PREMIUM_COLLECTED': 2606.4, 'PAID_AMOUNT': 12849.24, 'CITY': 

# For tabular form



In [4]:
def print_full_table(data):
    if not data:
        print("No data to display.")
        return

    headers = list(data[0].keys())
    col_widths = [len(header) for header in headers]


    for row in data:
        for i, key in enumerate(headers):
            val = str(row.get(key, ""))
            col_widths[i] = max(col_widths[i], len(val))


    header_row = " | ".join([headers[i].ljust(col_widths[i]) for i in range(len(headers))])
    print(header_row)
    print("-" * len(header_row))


    for row in data:
        row_str = " | ".join([
            str(row.get(key, "")).ljust(col_widths[i]) for i, key in enumerate(headers)
        ])
        print(row_str)


print_full_table(cleaned_data)


CLAIM_ID  | CLAIM_DATE | CUSTOMER_ID | CLAIM_AMOUNT | PREMIUM_COLLECTED | PAID_AMOUNT | CITY     | REJECTION_REMARKS                                
----------------------------------------------------------------------------------------------------------------------------------------------------
CLM100021 | 2025-04-01 | CUST14285   | 10419.0      | 2198.59           | 6964.46     | PUNE     | None                                             
CLM100013 | 2025-04-01 | CUST26471   | 42468.0      | 8982.2            | 30119.67    | GUWAHATI | None                                             
CLM100099 | 2025-04-02 | CUST29309   | 55897.0      | 1861.78           | 55657.15    | GUWAHATI | None                                             
CLM100044 | 2025-04-02 | CUST30275   | 71785.0      | 13154.99          | 53629.3     | PUNE     | None                                             
CLM100014 | 2025-04-02 | CUST38169   | 18565.0      | 2606.4            | 12849.24    | RANCHI   | None   

# TASK - 2

In [5]:
def analyze_city_shutdown(data):

    target_cities = ["PUNE", "KOLKATA", "RANCHI", "GUWAHATI"]
    city_stats = {}

    for entry in data:
        city = entry.get("CITY")
        if not city or city.upper() not in target_cities:
            continue

        city = city.upper()

        if city not in city_stats:
            city_stats[city] = {
                'claims': 0,
                'total_premium': 0.0,
                'total_claim_amount': 0.0,
                'rejected': 0
            }

        city_stats[city]['claims'] += 1


        premium = entry.get("PREMIUM_COLLECTED")
        if isinstance(premium, (int, float)):
            city_stats[city]['total_premium'] += premium


        claim_amt = entry.get("CLAIM_AMOUNT")
        if isinstance(claim_amt, (int, float)):
            city_stats[city]['total_claim_amount'] += claim_amt


        if entry.get("REJECTION_REMARKS"):
            city_stats[city]['rejected'] += 1


    result = []
    recommended_city = None
    lowest_profit = float('inf')

    for city, stats in city_stats.items():
        profit = stats['total_premium'] - stats['total_claim_amount']
        rejection_rate = stats['rejected'] / stats['claims'] if stats['claims'] else 0

        result.append({
            'City': city,
            'Total Claims': stats['claims'],
            'Total Premium Collected': round(stats['total_premium'], 2),
            'Total Claim Amount': round(stats['total_claim_amount'], 2),
            'Rejected Claims': stats['rejected'],
            'Rejection Rate': round(rejection_rate, 2),
            'Profit': round(profit, 2)
        })

        if profit < lowest_profit:
            lowest_profit = profit
            recommended_city = city

    return result, recommended_city


In [6]:
city_report, shutdown_city = analyze_city_shutdown(cleaned_data)


def print_city_report(report):
    headers = list(report[0].keys())
    col_widths = [len(h) for h in headers]

    for row in report:
        for i, h in enumerate(headers):
            col_widths[i] = max(col_widths[i], len(str(row[h])))

    header_row = " | ".join([h.ljust(col_widths[i]) for i, h in enumerate(headers)])
    print(header_row)
    print("-" * len(header_row))

    for row in report:
        row_str = " | ".join([str(row[h]).ljust(col_widths[i]) for i, h in enumerate(headers)])
        print(row_str)


print_city_report(city_report)
print(f"\n Recommended city for shutdown: {shutdown_city}")


City     | Total Claims | Total Premium Collected | Total Claim Amount | Rejected Claims | Rejection Rate | Profit    
----------------------------------------------------------------------------------------------------------------------
PUNE     | 37           | 369254.79               | 1661971.39         | 3               | 0.08           | -1292716.6
GUWAHATI | 24           | 261314.84               | 1204306.31         | 3               | 0.12           | -942991.47
RANCHI   | 17           | 148858.6                | 700079.16          | 2               | 0.12           | -551220.56
KOLKATA  | 16           | 140279.78               | 870815.66          | 0               | 0.0            | -730535.88

📉 Recommended city for shutdown: PUNE


# TASK - 3

In [7]:
REJECTION_REASONS_MAP = {
    "fake_document": "Fake_document",
    "not_covered": "Not_Covered",
    "policy_expired": "Policy_expired"
}


def handle_error(error_message):
    print(f"Error: {error_message}")
    return "Error"


def contains_rejection_reason(rejection_text, reason):
    try:
        if rejection_text and isinstance(rejection_text, str):
            return reason.lower() in rejection_text.lower()
    except Exception as e:
        handle_error(f"Error in contains_rejection_reason: {str(e)}")
    return False


def map_rejection_reason(rejection_text):
    try:
        if rejection_text and isinstance(rejection_text, str):
            for reason, rejection_class in REJECTION_REASONS_MAP.items():
                if contains_rejection_reason(rejection_text, reason):
                    return rejection_class
            return "Unknown"
        else:
            return "NoRemark"
    except Exception as e:
        handle_error(f"Error in map_rejection_reason: {str(e)}")
        return "Error"


def complex_rejection_classifier(remark_text):
    try:
        if not isinstance(remark_text, str) or len(remark_text.strip()) == 0:
            return "Invalid Remark"

        if contains_rejection_reason(remark_text, "fake_document"):
            return "Fake_document"
        elif contains_rejection_reason(remark_text, "not_covered"):
            return "Not_Covered"
        elif contains_rejection_reason(remark_text, "policy_expired"):
            return "Policy_expired"
        else:
            return map_rejection_reason(remark_text)
    except Exception as e:
        handle_error(f"Error in complex_rejection_classifier: {str(e)}")
        return "Error"


In [8]:
def classify_rejections(data):
    for entry in data:
        remark = entry.get("REJECTION_REMARKS")
        entry["REJECTION_CLASS"] = (
            complex_rejection_classifier(remark) if remark else "No Remark"
        )
    return data


classified_data = classify_rejections(cleaned_data)


In [9]:
def print_classification_sample(data):
    print("CLAIM_ID | REJECTION_REMARKS                                | REJECTION_CLASS")
    print("--------------------------------------------------------------------------")
    for row in data[: :]:
        print(f"{str(row.get('CLAIM_ID')).ljust(8)} | "
              f"{str(row.get('REJECTION_REMARKS')).ljust(45)} | "
              f"{row.get('REJECTION_CLASS')}")


print_classification_sample(classified_data)


CLAIM_ID | REJECTION_REMARKS                                | REJECTION_CLASS
--------------------------------------------------------------------------
CLM100021 | None                                          | No Remark
CLM100013 | None                                          | No Remark
CLM100099 | None                                          | No Remark
CLM100044 | None                                          | No Remark
CLM100014 | None                                          | No Remark
CLM100062 | None                                          | No Remark
CLM100010 | None                                          | No Remark
CLM100012 | None                                          | No Remark
CLM100029 | None                                          | No Remark
CLM100053 | None                                          | No Remark
CLM100054 | None                                          | No Remark
CLM100006 | None                                          | No Remark
CLM1000