# Credit Underwriting Decisioning System - Quick Start

This notebook demonstrates the complete workflow of the credit underwriting decisioning system.

## 1. Setup and Imports

In [None]:
import sys
sys.path.insert(0, '../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from data.synthetic_generator import generate_synthetic_credit_data
from models.train import (
    CreditModel, 
    time_based_split, 
    prepare_features, 
    evaluate_model
)
from scoring.scorecard import ScorecardMapper
from monitoring.drift import DriftMonitor, calculate_psi

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## 2. Generate Synthetic Data

In [None]:
# Generate synthetic data
df = generate_synthetic_credit_data(n_samples=5000, default_rate=0.15)

print(f"Generated {len(df)} applications")
print(f"Default rate: {df['default'].mean():.2%}")
print(f"\nFirst few rows:")
df.head()

In [None]:
# Visualize feature distributions
fig, axes = plt.subplots(2, 3, figsize=(15, 8))
features_to_plot = ['credit_score', 'annual_income', 'debt_to_income', 
                   'credit_utilization', 'num_delinquencies', 'employment_length_years']

for idx, feature in enumerate(features_to_plot):
    ax = axes[idx // 3, idx % 3]
    ax.hist(df[feature], bins=30, alpha=0.7, edgecolor='black')
    ax.set_title(f'{feature}')
    ax.set_xlabel('Value')
    ax.set_ylabel('Frequency')

plt.tight_layout()
plt.show()

## 3. Train Models

In [None]:
# Time-based split
train_df, cal_df, test_df = time_based_split(df, 'application_date')
print(f"Train: {len(train_df)}, Calibration: {len(cal_df)}, Test: {len(test_df)}")

# Prepare features
X_train, y_train = prepare_features(train_df)
X_cal, y_cal = prepare_features(cal_df)
X_test, y_test = prepare_features(test_df)

In [None]:
# Train Logistic Regression
lr_model = CreditModel(model_type='logistic', calibrate=True)
lr_model.train(X_train, y_train, X_cal, y_cal)

# Train Gradient Boosting
gb_model = CreditModel(model_type='gradient_boosting', calibrate=True)
gb_model.train(X_train, y_train, X_cal, y_cal)

print("Models trained successfully!")

In [None]:
# Evaluate models
y_pred_lr = lr_model.predict_proba(X_test)
y_pred_gb = gb_model.predict_proba(X_test)

lr_metrics = evaluate_model(y_test, y_pred_lr, 'Logistic Regression')
gb_metrics = evaluate_model(y_test, y_pred_gb, 'Gradient Boosting')

metrics_df = pd.DataFrame([lr_metrics, gb_metrics]).T
metrics_df.columns = ['Logistic Regression', 'Gradient Boosting']
print("\nModel Performance Comparison:")
print(metrics_df)

## 4. Scorecard Mapping

In [None]:
# Create scorecard mapper
mapper = ScorecardMapper(score0=600, odds0=50, pdo=20)
mapper.describe()

In [None]:
# Convert probabilities to scores
scores = mapper.prob_to_score(y_pred_gb)

# Visualize score distribution
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.hist(y_pred_gb, bins=50, alpha=0.7, edgecolor='black')
plt.xlabel('Default Probability')
plt.ylabel('Frequency')
plt.title('Probability Distribution')

plt.subplot(1, 2, 2)
plt.hist(scores, bins=50, alpha=0.7, edgecolor='black', color='orange')
plt.xlabel('Credit Score')
plt.ylabel('Frequency')
plt.title('Score Distribution')

plt.tight_layout()
plt.show()

print(f"\nScore Statistics:")
print(f"  Mean: {scores.mean():.1f}")
print(f"  Median: {np.median(scores):.1f}")
print(f"  Std Dev: {scores.std():.1f}")
print(f"  Range: [{scores.min():.0f}, {scores.max():.0f}]")

## 5. Drift Monitoring

In [None]:
# Use train data as baseline and test data as current
monitor = DriftMonitor(baseline_data=train_df)

# Add scores to test data for monitoring
test_with_scores = test_df.copy()
test_with_scores['predicted_score'] = mapper.prob_to_score(y_pred_gb)

# Monitor drift
report = monitor.monitor(
    test_with_scores,
    feature_cols=['credit_score', 'annual_income', 'debt_to_income'],
    score_col='predicted_score'
)

print("Drift Monitoring Report")
print("=" * 60)
print(f"Baseline samples: {report['baseline_samples']}")
print(f"Current samples: {report['current_samples']}")

print("\nFeature PSI:")
for feature, metrics in report['feature_psi'].items():
    print(f"  {feature}: PSI = {metrics['psi']:.4f}")

if report['score_psi']:
    print(f"\nScore PSI: {report['score_psi']['psi']:.4f}")

if report['alerts']:
    print(f"\nAlerts: {len(report['alerts'])}")
    for alert in report['alerts']:
        print(f"  [{alert['severity'].upper()}] {alert['message']}")

## 6. Score New Applications

In [None]:
# Create sample application
sample_app = pd.DataFrame([{
    'credit_score': 720,
    'num_credit_lines': 5,
    'credit_utilization': 0.3,
    'num_delinquencies': 0,
    'months_since_last_delinq': -1,
    'annual_income': 75000,
    'employment_length_years': 5,
    'debt_to_income': 0.25,
    'loan_amount': 15000,
    'loan_term_months': 36,
    'interest_rate': 8.5,
    'num_inquiries_6m': 1,
    'revolving_balance': 5000,
    'has_mortgage': 1,
    'has_car_loan': 0
}])

# Score
prob = gb_model.predict_proba(sample_app)[0]
score = mapper.prob_to_score(np.array([prob]))[0]

# Decision
if prob < 0.10:
    decision = "Approve"
elif prob < 0.20:
    decision = "Review"
else:
    decision = "Decline"

print("Sample Application Scoring Result:")
print("=" * 40)
print(f"Default Probability: {prob:.4f}")
print(f"Credit Score: {score:.0f}")
print(f"Decision: {decision}")