In [1]:
# compare_lr_vs_simple_meta.py
# ------------------------------------------------------------
# Apples-to-apples comparison: Logistic Regression vs simple_meta NN
# - Same split
# - Same preprocessing (numeric: impute+scale; categorical: impute+OHE)
# - Same business objective threshold sweep
# ------------------------------------------------------------

import numpy as np
import pandas as pd
from dataclasses import dataclass
from typing import Tuple, List, Dict

# sklearn
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, confusion_matrix, classification_report
)

# # tensorflow / keras
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Dense, Dropout, Concatenate
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
# ------------------------------------------------------------
# Config
# ------------------------------------------------------------
@dataclass
class Config:
    csv_path: str = "/Users/peekay/Downloads/Loan_default.csv"
    target: str = "Default"
    drop_cols: Tuple[str, ...] = ("LoanID",)
    test_size: float = 0.2
    random_state: int = 42
    cv_folds: int = 5

    # Business economics (adjust as needed)
    revenue_per_good: float = 125_000 * 0.13   # ~16,250
    loss_per_default: float = 144_000 * 0.16   # ~23,040

    # Threshold sweep grid
    threshold_low: float = 0.05
    threshold_high: float = 0.95
    threshold_points: int = 37

    # Logistic Regression hyperparams
    lr_use_class_weight_balanced: bool = False
    lr_solver: str = "liblinear"   # or "lbfgs"
    lr_C: float = 1.0
    lr_max_iter: int = 2000

    # simple_meta NN hyperparams
    nn_use_class_weight_balanced: bool = False
    nn_epochs: int = 200          # << increased
    nn_batch_size: int = 256
    nn_val_split: float = 0.2
    nn_patience: int = 12
    nn_lr: float = 1e-3
    nn_dropout: float = 0.3

CFG = Config()

# Reproducibility
np.random.seed(CFG.random_state)
tf.random.set_seed(CFG.random_state)

# ------------------------------------------------------------
# Data helpers
# ------------------------------------------------------------
def load_data(cfg: Config) -> pd.DataFrame:
    df = pd.read_csv(cfg.csv_path)
    for c in cfg.drop_cols:
        if c in df.columns:
            df = df.drop(columns=c)
    return df

def split_cols(df: pd.DataFrame, target: str) -> Tuple[pd.DataFrame, pd.Series, List[str], List[str]]:
    y = df[target].astype(int)
    X = df.drop(columns=target)
    num_cols = X.select_dtypes(include=[np.number]).columns.tolist()
    cat_cols = [c for c in X.columns if c not in num_cols]
    return X, y, num_cols, cat_cols

# ------------------------------------------------------------
# Preprocessing
# ------------------------------------------------------------
def make_numeric_preproc():
    return Pipeline([
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler())
    ])

def make_categorical_preproc():
    return Pipeline([
        ("imputer", SimpleImputer(strategy="most_frequent")),
        ("ohe", OneHotEncoder(handle_unknown="ignore", sparse_output=False))
    ])

def fit_transform_preprocessors(X_train, X_test, num_cols, cat_cols):
    num_pre = make_numeric_preproc()
    cat_pre = make_categorical_preproc()
    X_train_num = num_pre.fit_transform(X_train[num_cols])
    X_test_num  = num_pre.transform(X_test[num_cols])
    X_train_cat = cat_pre.fit_transform(X_train[cat_cols]) if len(cat_cols) > 0 else np.empty((len(X_train), 0))
    X_test_cat  = cat_pre.transform(X_test[cat_cols]) if len(cat_cols) > 0 else np.empty((len(X_test), 0))
    return num_pre, cat_pre, X_train_num, X_test_num, X_train_cat, X_test_cat

def build_lr_preprocessor(num_cols, cat_cols):
    return ColumnTransformer([
        ("num", make_numeric_preproc(), num_cols),
        ("cat", make_categorical_preproc(), cat_cols)
    ])

# ------------------------------------------------------------
# Models
# ------------------------------------------------------------
def build_logistic_regression(cfg: Config) -> LogisticRegression:
    class_weight = "balanced" if cfg.lr_use_class_weight_balanced else None
    return LogisticRegression(
        solver=cfg.lr_solver,
        C=cfg.lr_C,
        max_iter=cfg.lr_max_iter,
        class_weight=class_weight,
        n_jobs=None if cfg.lr_solver == "liblinear" else -1,
        random_state=cfg.random_state
    )

def build_simple_meta_nn(num_dim: int, cat_dim: int, cfg: Config) -> Model:
    in_num = Input(shape=(num_dim,), name="num_input")
    x_num = Dense(64, activation="relu")(in_num)
    x_num = Dropout(cfg.nn_dropout)(x_num)
    x_num = Dense(32, activation="relu")(x_num)

    in_cat = Input(shape=(cat_dim,), name="cat_input")
    x_cat = Dense(64, activation="relu")(in_cat)
    x_cat = Dropout(cfg.nn_dropout)(x_cat)
    x_cat = Dense(32, activation="relu")(x_cat)

    fused = Concatenate(name="fuse")([x_num, x_cat])
    z = Dense(32, activation="relu")(fused)
    z = Dropout(cfg.nn_dropout)(z)
    z = Dense(16, activation="relu")(z)
    out = Dense(1, activation="sigmoid", name="default_risk")(z)

    model = Model(inputs=[in_num, in_cat], outputs=out, name="simple_meta")
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=cfg.nn_lr),
                  loss="binary_crossentropy",
                  metrics=["accuracy"])
    return model

# ------------------------------------------------------------
# Business scoring
# ------------------------------------------------------------
def business_eval(y_true: np.ndarray,
                  y_prob: np.ndarray,
                  threshold: float,
                  revenue_per_good: float,
                  loss_per_default: float) -> Dict:
    # y_pred==1 => decline; y_pred==0 => approve
    y_pred = (y_prob >= threshold).astype(int)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred, labels=[0,1]).ravel()
    revenue = tn * revenue_per_good
    loss = fn * loss_per_default
    profit = revenue - loss
    return dict(
        threshold=threshold, tn=tn, fp=fp, fn=fn, tp=tp,
        precision=precision_score(y_true, y_pred, zero_division=0),
        recall=recall_score(y_true, y_pred, zero_division=0),
        f1=f1_score(y_true, y_pred, zero_division=0),
        accuracy=accuracy_score(y_true, y_pred),
        revenue=revenue, loss=loss, profit=profit
    )

def sweep_thresholds(y_true: np.ndarray,
                     y_prob: np.ndarray,
                     low: float, high: float, points: int,
                     revenue_per_good: float, loss_per_default: float):
    thresholds = np.linspace(low, high, points)
    grid = [business_eval(y_true, y_prob, t, revenue_per_good, loss_per_default) for t in thresholds]
    best = max(grid, key=lambda r: r["profit"])
    return best, grid

# ------------------------------------------------------------
# Debug: payoff checks & sensitivity
# ------------------------------------------------------------
def print_payoff_debug(df_all: pd.DataFrame, y: pd.Series, cfg: Config):
    print("\n=== PAYOFF DEBUG ===")
    rev = cfg.revenue_per_good
    loss = cfg.loss_per_default
    print(f"Assumptions: revenue_per_good=${rev:,.0f}, loss_per_default=${loss:,.0f}")
    be_pd = rev / (rev + loss) if (rev + loss) > 0 else np.nan
    print(f"Break-even default probability (per approved): p*loss = (1-p)*revenue -> p* = {be_pd:.4f}")

    cols = df_all.columns
    cols_lower = [c.lower() for c in cols]
    has_amt = "loanamount" in cols_lower
    has_rate = "interestrate" in cols_lower

    def safe_col(name_lower):
        idx = cols_lower.index(name_lower)
        return cols[idx]

    if has_amt or has_rate:
        tmp = df_all.copy()
        tmp["__y"] = y.values
        if has_amt:
            la = safe_col("loanamount")
            print("\nLoanAmount means by class (train+test):")
            print(tmp.groupby("__y")[la].mean().rename({0:"No Default",1:"Default"}))
        if has_rate:
            ir = safe_col("interestrate")
            print("\nInterestRate means by class (train+test):")
            print(tmp.groupby("__y")[ir].mean().rename({0:"No Default",1:"Default"}))
    else:
        print("Note: Could not find LoanAmount/InterestRate columns for sanity check.")

def print_threshold_debug(name: str, best: Dict):
    approvals = best["tn"] + best["fn"]
    approved_default_rate = (best["fn"] / approvals) if approvals > 0 else 0.0
    print(f"\n{name} — Approval stats @ business-optimal threshold")
    print(f"Approvals: {approvals:,} ({approvals / (best['tn'] + best['fp'] + best['fn'] + best['tp']):.1%} of test set)")
    print(f"Default rate among approved (FN / approvals): {approved_default_rate:.3%}")
    denom = approvals if approvals > 0 else 1
    print(f"Revenue per approved: ${best['revenue']/denom:,.0f} | Loss per approved: ${best['loss']/denom:,.0f}")

def sensitivity_table(name: str, best: Dict, cfg: Config):
    # Evaluate profit sensitivity at model's best threshold under +/-20% payoff multipliers
    rev0, loss0 = cfg.revenue_per_good, cfg.loss_per_default
    tn, fn = best["tn"], best["fn"]
    print(f"\n{name} — Profit sensitivity @ best threshold")
    print("rev_mult  loss_mult   Revenue ($)     Loss ($)        Profit ($)")
    for rev_mult in [0.8, 1.0, 1.2]:
        for loss_mult in [0.8, 1.0, 1.2]:
            revenue = tn * rev0 * rev_mult
            loss = fn * loss0 * loss_mult
            profit = revenue - loss
            print(f"{rev_mult:7.1f}  {loss_mult:8.1f}   {revenue:12,.0f}   {loss:12,.0f}   {profit:12,.0f}")
        print()

# ------------------------------------------------------------
# Utilities
# ------------------------------------------------------------
def print_metrics_header(title: str):
    print("\n" + title)
    print("-" * len(title))

def print_summary_block(name: str, prob, y_test, cfg: Config):
    # Default 0.50
    y_pred = (prob >= 0.5).astype(int)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred, labels=[0,1]).ravel()
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    auc = roc_auc_score(y_test, prob)

    print_metrics_header(f"{name} — Test @ threshold=0.50")
    print(f"Accuracy:  {acc:.4f} | Precision: {prec:.4f} | Recall: {rec:.4f} | F1: {f1:.4f} | ROC AUC: {auc:.4f}")
    print(f"Confusion  TN={tn:,}  FP={fp:,}  FN={fn:,}  TP={tp:,}")
    print("\nClassification report:")
    print(classification_report(y_test, y_pred, target_names=["No Default","Default"]))

    best, grid = sweep_thresholds(y_test, prob,
                                  CFG.threshold_low, CFG.threshold_high, CFG.threshold_points,
                                  CFG.revenue_per_good, CFG.loss_per_default)
    print_metrics_header(f"{name} — Business-optimal threshold")
    print(f"Threshold: {best['threshold']:.4f} | Profit: ${best['profit']:,.0f}")
    print(f"Revenue:   ${best['revenue']:,.0f} | Loss: ${best['loss']:,.0f}")
    print(f"Accuracy:  {best['accuracy']:.4f} | Precision: {best['precision']:.4f} | Recall: {best['recall']:.4f} | F1: {best['f1']:.4f}")
    print(f"Confusion  TN={best['tn']:,}  FP={best['fp']:,}  FN={best['fn']:,}  TP={best['tp']:,}")
    return best

# ------------------------------------------------------------
# Main
# ------------------------------------------------------------
def main(cfg: Config):
    print("=== LR vs simple_meta NN: unified preprocessing, split, and business scoring ===")
    df = load_data(cfg)
    X, y, num_cols, cat_cols = split_cols(df, cfg.target)
    print(f"Dataset: {df.shape} | Default rate={y.mean():.3%}")
    print(f"Numeric: {len(num_cols)} | Categorical: {len(cat_cols)}")

    # One stratified split reused by BOTH models
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=cfg.test_size, random_state=cfg.random_state, stratify=y
    )

    # Debug: payoff sanity check against raw columns
    print_payoff_debug(pd.concat([X_train, X_test], axis=0), pd.concat([y_train, y_test], axis=0), cfg)

    # Fit separate preprocessors for numeric and categorical (for NN inputs)
    num_pre, cat_pre, Xtr_num, Xte_num, Xtr_cat, Xte_cat = fit_transform_preprocessors(
        X_train, X_test, num_cols, cat_cols
    )

    # -----------------------------
    # Logistic Regression
    # -----------------------------
    lr_pre = build_lr_preprocessor(num_cols, cat_cols)
    lr = build_logistic_regression(cfg)
    lr_pipe = Pipeline([("pre", lr_pre), ("clf", lr)])

    skf = StratifiedKFold(n_splits=cfg.cv_folds, shuffle=True, random_state=cfg.random_state)
    cv_acc = cross_val_score(lr_pipe, X_train, y_train, cv=skf, scoring="accuracy")
    cv_f1  = cross_val_score(lr_pipe, X_train, y_train, cv=skf, scoring="f1")
    print(f"\nLR CV {cfg.cv_folds}-fold | Acc: {cv_acc.mean():.4f} ± {cv_acc.std():.4f} | F1: {cv_f1.mean():.4f} ± {cv_f1.std():.4f}")

    lr_pipe.fit(X_train, y_train)
    lr_prob = lr_pipe.predict_proba(X_test)[:, 1]

    # -----------------------------
    # simple_meta NN
    # -----------------------------
    num_dim, cat_dim = Xtr_num.shape[1], Xtr_cat.shape[1]
    simple_meta = build_simple_meta_nn(num_dim, cat_dim, cfg)

    cw = None
    if cfg.nn_use_class_weight_balanced:
        classes = np.unique(y_train)
        counts = np.bincount(y_train)
        total = counts.sum()
        cw = {cls: total / (len(classes) * counts[cls]) for cls in classes}

    es = EarlyStopping(monitor="val_loss", patience=cfg.nn_patience, restore_best_weights=True, verbose=0)
    _ = simple_meta.fit(
        [Xtr_num, Xtr_cat], y_train.values,
        epochs=cfg.nn_epochs,
        batch_size=cfg.nn_batch_size,
        validation_split=cfg.nn_val_split,
        callbacks=[es],
        class_weight=cw,
        verbose=0
    )
    nn_prob = simple_meta.predict([Xte_num, Xte_cat], verbose=0).ravel()

    # -----------------------------
    # Reports (common scoring)
    # -----------------------------
    best_lr = print_summary_block("Logistic Regression", lr_prob, y_test.values, cfg)
    best_nn = print_summary_block("simple_meta NN", nn_prob, y_test.values, cfg)

    # Extra debug on approvals at best thresholds
    print_threshold_debug("Logistic Regression", best_lr)
    print_threshold_debug("simple_meta NN", best_nn)

    # Sensitivity (±20% payoff multipliers) at each model's best threshold
    sensitivity_table("Logistic Regression", best_lr, cfg)
    sensitivity_table("simple_meta NN", best_nn, cfg)

    # -----------------------------
    # Head-to-head leaderboard
    # -----------------------------
    print("\nRANKING BY BUSINESS PERFORMANCE")
    print("-" * 80)
    rows = [
        ("Logistic Regression", best_lr["profit"], best_lr["threshold"],
         best_lr["accuracy"], best_lr["precision"], best_lr["recall"], best_lr["f1"]),
        ("simple_meta NN", best_nn["profit"], best_nn["threshold"],
         best_nn["accuracy"], best_nn["precision"], best_nn["recall"], best_nn["f1"]),
    ]
    rows_sorted = sorted(rows, key=lambda r: r[1], reverse=True)
    for i, r in enumerate(rows_sorted, 1):
        print(f"{i}. {r[0]:20s} | Profit: ${r[1]:>12,.0f} | Threshold: {r[2]:.4f} | "
              f"Acc: {r[3]:.4f} | Prec: {r[4]:.4f} | Rec: {r[5]:.4f} | F1: {r[6]:.4f}")

    # -----------------------------
    # Delta (REVERSED): simple_meta NN - LR
    # -----------------------------
    print("\nDELTA (simple_meta NN - LR) @ business-optimal for each model")
    print("-" * 80)
    print(f"Profit delta:  ${best_nn['profit'] - best_lr['profit']:,.0f}")
    print(f"Revenue delta: ${best_nn['revenue'] - best_lr['revenue']:,.0f}")
    print(f"Loss delta:    ${best_nn['loss'] - best_lr['loss']:,.0f}")

if __name__ == "__main__":
    main(CFG)


=== LR vs simple_meta NN: unified preprocessing, split, and business scoring ===
Dataset: (255347, 17) | Default rate=11.613%
Numeric: 9 | Categorical: 7

=== PAYOFF DEBUG ===
Assumptions: revenue_per_good=$16,250, loss_per_default=$23,040
Break-even default probability (per approved): p*loss = (1-p)*revenue -> p* = 0.4136

LoanAmount means by class (train+test):
__y
No Default    125353.656017
Default       144515.311469
Name: LoanAmount, dtype: float64

InterestRate means by class (train+test):
__y
No Default    13.176994
Default       15.896227
Name: InterestRate, dtype: float64

LR CV 5-fold | Acc: 0.8852 ± 0.0002 | F1: 0.0641 ± 0.0042

Logistic Regression — Test @ threshold=0.50
-------------------------------------------
Accuracy:  0.8853 | Precision: 0.6084 | Recall: 0.0341 | F1: 0.0645 | ROC AUC: 0.7531
Confusion  TN=45,009  FP=130  FN=5,729  TP=202

Classification report:
              precision    recall  f1-score   support

  No Default       0.89      1.00      0.94     451