# EWS 2 â€” Fixed-Term Loans (Rolling Features via rolling.py)

This notebook builds an **Early Warning System (EWS)** for a **fixed-term loan portfolio** (e.g., personal loans) using the same rolling feature generator (`src/features/rolling.py`).

Compared to revolving:
- No utilization; focus on **scheduled payment shortfall** and **delinquency transitions**
- Rolling features capture **recurrence/persistence** (miss counts, any DPD) and **volatility** of shortfall

**What you get**
- Synthetic loan-month panel data (safe for GitHub)
- Rolling features via `RollingSpec`
- Simple transition-derived features (lags) + EWS rules
- Monitoring view and an alert list


In [None]:
import sys
from pathlib import Path

# Make repo src importable
REPO_ROOT = Path('.').resolve()
SRC = REPO_ROOT / 'src'
if str(SRC) not in sys.path:
    sys.path.insert(0, str(SRC))

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

np.random.seed(42)
pd.set_option('display.max_columns', 200)


In [None]:
from features.rolling import RollingSpec, add_rolling_features

## 1) Simulate fixed-term loan panel (synthetic)

In [None]:
def simulate_fixed_term_panel(n_loans=5000, n_months=30, start="2023-01-31") -> pd.DataFrame:
    start_date = pd.to_datetime(start)
    dates = pd.date_range(start_date, periods=n_months, freq="M")

    loan_id = np.arange(n_loans)
    term = np.random.choice([24, 36, 48], size=n_loans, p=[0.35, 0.45, 0.20])
    channel = np.random.choice(["Branch", "Online", "Partner"], size=n_loans, p=[0.30, 0.55, 0.15])
    risk_band = np.random.choice(["A", "B", "C", "D"], size=n_loans, p=[0.30, 0.35, 0.25, 0.10])

    band_amt_mu = {"A": 12000, "B": 10000, "C": 8500, "D": 7000}
    principal = np.array([max(2000, np.random.normal(band_amt_mu[b], 2500)) for b in risk_band])
    principal = np.round(principal, 0)

    band_stress = {"A": 0.10, "B": 0.20, "C": 0.33, "D": 0.48}
    base_stress = np.array([np.random.beta(2, 10) + band_stress[b] for b in risk_band])
    base_stress = np.clip(base_stress, 0, 1)

    sched_pmt = np.round(principal / term + np.random.normal(0, 20, size=n_loans), 2)
    sched_pmt = np.clip(sched_pmt, 20, None)

    dpd_prev = np.zeros(n_loans, dtype=int)
    rows = []

    for t, d in enumerate(dates):
        macro = 0.06 * np.sin(2 * np.pi * (t / 12))

        pay_ratio = (
            0.85
            + 0.10 * np.random.normal(0, 1, size=n_loans)
            - 0.45 * base_stress
            - 0.10 * macro
        )
        pay_ratio = np.clip(pay_ratio, 0, 1.2)

        p_miss = np.clip(0.02 + 0.35 * base_stress + 0.10 * (1 - np.clip(pay_ratio,0,1)) + 0.08 * macro, 0, 0.95)
        missed = (np.random.rand(n_loans) < p_miss).astype(int)

        actual_pmt = sched_pmt * pay_ratio
        actual_pmt[missed == 1] = 0.0

        shortfall_ratio = np.clip((sched_pmt - actual_pmt) / sched_pmt, 0, 1)

        dpd = dpd_prev.copy()
        deteriorate = (missed == 1) | (shortfall_ratio > 0.6)
        dpd[deteriorate & (dpd_prev == 0)] = 30
        dpd[deteriorate & (dpd_prev == 30)] = 60
        dpd[deteriorate & (dpd_prev == 60)] = 90
        dpd[deteriorate & (dpd_prev == 90)] = 90

        cure = (missed == 0) & (shortfall_ratio < 0.2)
        dpd[cure & (dpd_prev == 30)] = 0
        dpd[cure & (dpd_prev == 60)] = 30
        dpd[cure & (dpd_prev == 90)] = 60

        score = (
            -4.2
            + 2.2 * (dpd >= 30).astype(int)
            + 3.0 * (dpd >= 60).astype(int)
            + 3.6 * (dpd >= 90).astype(int)
            + 2.0 * np.maximum(shortfall_ratio - 0.3, 0)
            + 1.5 * missed
            + 1.6 * base_stress
            + 0.7 * macro
        )
        p_default = 1 / (1 + np.exp(-score))
        default_next_6m = (np.random.rand(n_loans) < p_default).astype(int)

        rows.append(pd.DataFrame({
            "loan_id": loan_id,
            "date": d,
            "term": term,
            "channel": channel,
            "risk_band": risk_band,
            "principal": principal,
            "sched_pmt": sched_pmt,
            "actual_pmt": actual_pmt,
            "pay_ratio": np.clip(actual_pmt / sched_pmt, 0, 1.2),
            "shortfall_ratio": shortfall_ratio,
            "missed_payment": missed,
            "dpd_bucket": dpd,
            "default_next_6m": default_next_6m,
        }))

        dpd_prev = dpd

    return pd.concat(rows, ignore_index=True).sort_values(["loan_id","date"]).reset_index(drop=True)

df = simulate_fixed_term_panel()
df.head()

## 2) Rolling features via `rolling.py`

In [None]:
specs = [
    RollingSpec("shortfall_ratio", 3, "mean", "shortfall_3m_avg"),
    RollingSpec("shortfall_ratio", 6, "mean", "shortfall_6m_avg"),
    RollingSpec("shortfall_ratio", 6, "std",  "shortfall_6m_std"),
    RollingSpec("missed_payment", 3, "sum", "miss_3m_sum"),
    RollingSpec("missed_payment", 6, "sum", "miss_6m_sum"),
    RollingSpec("dpd_bucket", 6, "any_ge", "any_dpd30_6m", threshold=30),
    RollingSpec("dpd_bucket", 12, "any_ge", "any_dpd60_12m", threshold=60),
]

df_feat = add_rolling_features(df, group_cols="loan_id", date_col="date", specs=specs)

# Add lags (transition features)
g = df_feat.groupby("loan_id", sort=False)
df_feat["dpd_lag1"] = g["dpd_bucket"].shift(1)
df_feat["dpd_worsened"] = ((df_feat["dpd_bucket"] - df_feat["dpd_lag1"]) > 0).astype(int)

feat_cols = [s.out_col for s in specs] + ["dpd_lag1","dpd_worsened"]
model_df = df_feat.dropna(subset=feat_cols + ["default_next_6m"]).copy()
model_df.shape

## 3) Rule-based alerts + monitoring view

In [None]:
TH_SHORTFALL_AVG = 0.35
TH_MISS_6M = 2

model_df["alert_rule"] = (
    (model_df["any_dpd30_6m"] == 1)
    | (model_df["miss_6m_sum"] >= TH_MISS_6M)
    | (model_df["shortfall_3m_avg"] >= TH_SHORTFALL_AVG)
    | (model_df["dpd_worsened"] == 1)
).astype(int)

monthly = (
    model_df.groupby("date")
    .agg(loans=("loan_id","nunique"),
         alert_rate=("alert_rule","mean"),
         default_rate=("default_next_6m","mean"))
)

plt.figure(figsize=(10,4))
plt.plot(monthly.index, monthly["alert_rate"], label="Alert rate")
plt.plot(monthly.index, monthly["default_rate"], label="Default (next 6m) rate")
plt.title("Fixed-term EWS: Alert Rate vs Default Rate")
plt.xlabel("Month")
plt.ylabel("Rate")
plt.legend()
plt.tight_layout()
plt.show()

monthly.tail()

## 4) Latest-month alert list

In [None]:
latest = model_df["date"].max()
alert_list = model_df[model_df["date"] == latest].copy()
alert_list = alert_list.sort_values(["alert_rule","dpd_bucket","shortfall_3m_avg","miss_6m_sum"], ascending=False)

cols = [
    "loan_id","date","risk_band","channel","term",
    "alert_rule","dpd_bucket","dpd_lag1","dpd_worsened",
    "shortfall_ratio","shortfall_3m_avg","shortfall_6m_std",
    "missed_payment","miss_6m_sum",
    "any_dpd30_6m","any_dpd60_12m",
    "default_next_6m"
]
alert_list[cols].head(25)