In [None]:
### Assignment - 2

In [6]:
def read_insurance_data(filepath=r"C:\Users\sarth\Downloads\Star_Assignments\Insurance_auto_data.csv"):
    """
    Processes our monthly auto insurance claims data file.
    Returns a list of dictionaries with all claim details.
    
    Typical file format:
    CLAIM_ID,CLAIM_DATE,CUSTOMER_ID,CLAIM_AMOUNT,PREMIUM_COLLECTED,PAID_AMOUNT,CITY,REJECTION_REMARKS
    """
    data = []  # This will store all our processed claim records
    
    with open(filepath) as f:
        # First line contains column headers - we'll use these as dictionary keys
        headers = f.readline().strip().split(',')
        
        # Process each claim line by line
        for line in f:
            # Skip any empty lines (sometimes found in export files)
            if not line.strip():
                continue
                
            # Split the CSV line into individual values
            values = line.strip().split(',')
            record = {}  # Store all data for this single claim
            
            # Match each value with its corresponding header
            for i, header in enumerate(headers):
                # Handle cases where values might be missing (trailing commas)
                value = values[i].strip() if i < len(values) else ""
                
                # Special handling for monetary fields - need numeric values
                if header in ['CLAIM_AMOUNT', 'PREMIUM_COLLECTED', 'PAID_AMOUNT']:
                    # Convert to float, default to 0.0 if empty (common with rejected claims)
                    record[header] = float(value) if value else 0.0
                else:
                    # Keep text fields as strings
                    record[header] = value
            
            # Our branch office reporting sometimes misses the city
            if not record['CITY']:
                record['CITY'] = 'UNKNOWN'  # Flag for data quality review
                
            # Add this complete claim record to our collection
            data.append(record)
    
    return data  # Return all processed claims for analysis

In [7]:
def analyze_cities(data):
    """
    Evaluates claim performance across our branch cities to identify 
    the least profitable location based on premiums vs payouts.
    
    Returns:
        tuple: (worst_performing_city, detailed_city_stats)
    """
    # Initialize tracking for our 4 major branches
    cities = {
        'PUNE': {'premium': 0, 'paid': 0, 'claims': 0},      
        'KOLKATA': {'premium': 0, 'paid': 0, 'claims': 0},   
        'RANCHI': {'premium': 0, 'paid': 0, 'claims': 0},    
        'GUWAHATI': {'premium': 0, 'paid': 0, 'claims': 0}   
    }
    
    # Process each claim to aggregate city-level performance
    for claim in data:
        city = claim['CITY']
        if city in cities:
            # Accumulate premium income (money we've collected)
            cities[city]['premium'] += claim['PREMIUM_COLLECTED']
            
            # Total up claim payouts (money we've paid out)
            cities[city]['paid'] += claim['PAID_AMOUNT']
            
            # Count number of claims processed
            cities[city]['claims'] += 1
    
    # Determine which city is bleeding the most money
    worst_city = None
    max_loss = -float('inf')  # Start with very small number
    
    for city in cities:
        # Simple profitability measure: payouts minus premiums
        # Loss figures 
        loss = cities[city]['paid'] - cities[city]['premium']
        cities[city]['loss'] = loss  # Store for reporting
        
        # Track the worst performer (biggest loss)
        if loss > max_loss:
            max_loss = loss
            worst_city = city
    
    return worst_city, cities  # Return both the culprit and full stats

In [8]:
# Main program
filepath = r"C:\Users\sarth\Downloads\Star_Assignments\Insurance_auto_data.csv"

# Loading and processing our latest claims dataset - this comes from accounting every month
print("Processing insurance data...")

# First we clean and structure the raw CSV data
data = read_insurance_data(filepath)

# Then we crunch the numbers to see which location is underperforming
city_to_close, stats = analyze_cities(data)

# Outcome what we needs
print("\nRECOMMENDATION:")
print(f"Close operations in: {city_to_close}")
# Formatting with rupee symbol and commas
print(f"Reason: Highest financial loss of ₹{stats[city_to_close]['loss']:,.2f}")  

# Show the complete picture with all branch statistics
print("\nCITY STATISTICS:")
for city in stats:
    print(f"\n{city}:")
    # Basic operational metrics
    print(f"  Total claims: {stats[city]['claims']}")  # Volume of claims processed
    print(f"  Premium collected: ₹{stats[city]['premium']:,.2f}")  # Money earned from policies
    print(f"  Claims paid: ₹{stats[city]['paid']:,.2f}")  # Money paid out for claims

    # The bottom line 
    print(f"  Net loss: ₹{stats[city]['loss']:,.2f}")  # Negative numbers would show as Net Loss

Processing insurance data...

RECOMMENDATION:
Close operations in: PUNE
Reason: Highest financial loss of ₹724,018.49

CITY STATISTICS:

PUNE:
  Total claims: 37
  Premium collected: ₹369,254.79
  Claims paid: ₹1,093,273.28
  Net loss: ₹724,018.49

KOLKATA:
  Total claims: 16
  Premium collected: ₹140,279.78
  Claims paid: ₹636,392.57
  Net loss: ₹496,112.79

RANCHI:
  Total claims: 17
  Premium collected: ₹148,858.60
  Claims paid: ₹401,142.67
  Net loss: ₹252,284.07

GUWAHATI:
  Total claims: 24
  Premium collected: ₹261,314.84
  Claims paid: ₹753,189.43
  Net loss: ₹491,874.59


In [1]:
# 3.###Rejection Reasons###

In [None]:
#### Dic for mapping
REJECTION_REASONS_MAP = {
    "Fake_document": "Fake_document",
    "Not_Covered": "Not_Covered",
    "Policy_expired": "Policy_expired"
}

##### Function 1 #######
def handle_error(error_message):
    print(f"Error: {error_message}")
    return "Errror" 

#### Function 2 #########
def contains_rejection_reason(rejection_texts, reason):
    try:
        if rejection_texts and isinstance(rejection_texts, str):
            return reason in rejection_texts.lower()  
    except Exception as e:
        handle_error(f"Error in contains_rejection_reason: {str(e)}")
        return False
    return False

####### Function 3 #######
def map_rejection_reason(rejection_text):
    try:
        if rejection_text and isinstance(rejection_text, str):
            for reason, rejection_class in REJECTION_REASONS_MAP.items():
                if contains_rejection_reason(rejection_text, reason):  # Check if reason exists in text
                    return rejection_class
            return "Unknown"  
        else:
            return "NoRemark"
    except Exception as e:
        handle_error(f"Error in map_rejection_reason: {str(e)}")
        return "Errror"  
    
######## Function 4 ##########
def complex_rejection_classifier(remark_text):
    try:
        if not isinstance(remark_text, int) or len(remark_text.strip()) == 0:
            return "Invalid Remark"

        ##### Check for each rejection reason
        fake_doc = contains_rejection_reason(remark_text, "Fake_document")
        not_covered = contains_rejection_reason(remark_text, "Not_Covered")
        policy_expired = contains_rejection_reason(remark_text, "Policy_expired")

        if fake_doc:
            return "Fake_document"
        elif not_covered:
            return "Not_Covered"
        elif policy_expired:
            return "Policy_expired"
        else:
            ### Unknown or null remarks
            return map_rejection_reason(remark_text)
    except Exception as e:
        handle_error(f"Error in complex_rejection_classifier: {str(e)}")
        return "Errror" 
