In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Bayesian Credit Default Risk Modeling\n",
    "\n",
    "Full end-to-end pipeline.\n",
    "\n",
    "- Load and preprocess data\n",
    "- Build hierarchical Bayesian model\n",
    "- Run inference\n",
    "- Validate and calibrate\n",
    "- Apply business policy (expected loss thresholding)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('../src')\n",
    "from data_preprocessing import load_data, preprocess_data\n",
    "from bayesian_model import hierarchical_logistic_model, run_inference\n",
    "from evaluation import evaluate_model, plot_calibration, plot_ppc, expected_loss_threshold\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "\n",
    "# Load data\n",
    "df = load_data()\n",
    "print(df.head())\n",
    "\n",
    "# Preprocess\n",
    "X_train, X_test, y_train, y_test, feature_names, groups = preprocess_data(df)\n",
    "\n",
    "# Encode groups to integers for indexing (e.g., purpose categories)\n",
    "le = LabelEncoder()\n",
    "groups_train = le.fit_transform(groups[:len(y_train)])\n",
    "groups_test = le.transform(groups[len(y_train):])\n",
    "num_groups = len(le.classes_)\n",
    "\n",
    "# Convert to JAX arrays\n",
    "X_train = np.array(X_train)\n",
    "X_test = np.array(X_test)\n",
    "y_train = np.array(y_train)\n",
    "y_test = np.array(y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Run model and inference\n",
    "mcmc = run_inference(hierarchical_logistic_model, X_train, groups_train, y_train, num_groups)\n",
    "mcmc.print_summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Evaluate\n",
    "pred_prob, pred_uncertainty = evaluate_model(mcmc, X_test, groups_test, y_test, num_groups)\n",
    "\n",
    "# Posterior predictive checks\n",
    "post_pred = posterior_predictive(mcmc, X_test, groups_test, num_groups)  # From evaluation.py\n",
    "plot_ppc(post_pred, y_test)\n",
    "\n",
    "# Calibration\n",
    "plot_calibration(y_test, pred_prob)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Business policy: Threshold by expected loss\n",
    "decisions = expected_loss_threshold(pred_prob, pred_uncertainty, loss_threshold=0.2)\n",
    "print(\"Sample decisions:\", decisions[:10])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}