In [None]:
# Credit Risk Segmentation

## Objective
Segment customers into risk levels (LOW, MEDIUM, HIGH) based on outstanding debt
and payment delay metrics to support credit decision-making.

In [None]:
import pandas as pd

df = pd.read_csv("../data/credit_transactions.csv")
df.head()

In [None]:
df["outstanding_debt"] = df["total_amount"] - df["paid_amount"]

summary = df.groupby("customer_id").agg({
    "outstanding_debt": "sum",
    "days_past_due": "mean"
}).reset_index()

summary

In [None]:
def risk_level(row):
    if row["outstanding_debt"] > 3000 or row["days_past_due"] > 60:
        return "HIGH"
    elif row["outstanding_debt"] > 1000:
        return "MEDIUM"
    else:
        return "LOW"

summary["risk_level"] = summary.apply(risk_level, axis=1)
summary

In [None]:
summary["risk_level"].value_counts()

In [None]:
## Key Insights

- A small group of customers is classified as **HIGH risk**, concentrating
  the highest outstanding debt and payment delays.
- MEDIUM risk customers represent an opportunity for early intervention.
- LOW risk customers show stable payment behavior and lower exposure.

This segmentation can be used to prioritize collections and adjust credit policies.