# Unified Risk Pipeline Synthetic Demo
This notebook generates a synthetic dataset, runs the unified risk pipeline step-by-step, and then in one shot.

In [None]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

np.random.seed(42)
n = 5000
dt0 = datetime(2023,1,1)
app_dt = [dt0 + timedelta(days=int(d)) for d in np.random.randint(0, 365, size=n)]
app_id = np.arange(1, n+1)
x1 = np.random.normal(0, 1, n)
x2 = np.random.exponential(1.0, n)
x3 = np.random.binomial(1, 0.3, n)
cat = np.random.choice(['A','B','C','D', None], size=n, p=[0.3,0.3,0.2,0.15,0.05])

logit = -1.0 + 0.8*x1 + 0.5*(x2>1.0).astype(int) + 0.6*x3 + (pd.Series(cat).map({'A':0.2,'B':0.0,'C':-0.1,'D':0.3}).fillna(0).values)
p = 1/(1+np.exp(-logit))
y = (np.random.rand(n) < p).astype(int)

df = pd.DataFrame({
    'app_id': app_id,
    'app_dt': app_dt,
    'x1': x1,
    'x2': x2,
    'x3': x3,
    'cat': cat,
    'target': y
})
df.head()

In [None]:
from risk_pipeline.core.config import Config
from risk_pipeline.unified_pipeline import UnifiedRiskPipeline

cfg = Config(
    target_col='target', id_col='app_id', time_col='app_dt',
    enable_scoring=False, enable_calibration=True, stage2_method='lower_mean',
    enable_woe=True, selection_order=['psi','vif','correlation','iv','boruta','stepwise'],
    use_optuna=False, model_type='all',
    use_test_split=True, oot_months=3, equal_default_splits=True,
    n_risk_bands=10, band_method='quantile'
)
pipe = UnifiedRiskPipeline(cfg)
results = pipe.fit(df)
list(results.keys())

In [None]:
# Inspect selected features and best model
results['selected_features'], results.get('best_model_name')
