In [None]:
# Credit Risk Segmentation

## Objective
Segment customers into risk levels (LOW, MEDIUM, HIGH) based on outstanding debt
and payment delay metrics to support credit decision-making.

In [1]:
import pandas as pd
df = pd.read_csv("credit_transactions.csv")
df.head()

Unnamed: 0,transaction_id,customer_id,transaction_date,total_amount,paid_amount,days_past_due
0,1,C001,2024-01-05,1500,1500,0
1,2,C001,2024-02-10,1800,1800,0
2,3,C002,2024-01-12,2200,1800,15
3,4,C002,2024-03-01,2000,1500,30
4,5,C003,2024-01-20,3000,1000,60


In [2]:
df["outstanding_debt"] = df["total_amount"] - df["paid_amount"]

summary = df.groupby("customer_id").agg({
    "outstanding_debt": "sum",
    "days_past_due": "mean"
}).reset_index()

summary

Unnamed: 0,customer_id,outstanding_debt,days_past_due
0,C001,0,0.0
1,C002,900,22.5
2,C003,4000,75.0
3,C004,0,0.0
4,C005,3500,52.5
5,C006,0,0.0
6,C007,1200,27.5
7,C008,2500,75.0
8,C009,0,0.0


In [3]:
def risk_level(row):
    if row["outstanding_debt"] > 3000 or row["days_past_due"] > 60:
        return "HIGH"
    elif row["outstanding_debt"] > 1000:
        return "MEDIUM"
    else:
        return "LOW"

summary["risk_level"] = summary.apply(risk_level, axis=1)
summary

Unnamed: 0,customer_id,outstanding_debt,days_past_due,risk_level
0,C001,0,0.0,LOW
1,C002,900,22.5,LOW
2,C003,4000,75.0,HIGH
3,C004,0,0.0,LOW
4,C005,3500,52.5,HIGH
5,C006,0,0.0,LOW
6,C007,1200,27.5,MEDIUM
7,C008,2500,75.0,HIGH
8,C009,0,0.0,LOW


In [4]:
summary["risk_level"].value_counts()

Unnamed: 0_level_0,count
risk_level,Unnamed: 1_level_1
LOW,5
HIGH,3
MEDIUM,1


In [None]:
## Key Insights

- A small group of customers is classified as **HIGH risk**, concentrating
  the highest outstanding debt and payment delays.
- MEDIUM risk customers represent an opportunity for early intervention.
- LOW risk customers show stable payment behavior and lower exposure.

This segmentation can be used to prioritize collections and adjust credit policies.