# Model Performance Validations



In [None]:
import numpy as np
import pandas as pd


from sklearn.preprocessing import KBinsDiscretizer
import shap
from sklearn.ensemble import RandomForestClassifier

#------------------------------------Plots
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.figure_factory as ff
import plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import joblib 
import model_calibration as mc


spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "false")
pd.set_option('display.max_columns', 500)

#--------------------Model

import importlib

from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.metrics import precision_recall_curve, roc_auc_score, classification_report, f1_score, confusion_matrix, roc_curve, auc, precision_score, recall_score,  brier_score_loss
from modeling_utils import plot_model_evaluation_summary, generate_classification_report
import joblib
from risk_validation_phase1 import *
pd.set_option("display.max_rows", None)  


# Global variables

In [5]:
lr_training_vars = [ 'home_ownership', 'addr_state', 'dti', 'fico_range_high', 'all_util',
       'inq_last_12m', 'acc_open_past_24mths', 'avg_cur_bal', 'bc_open_to_buy',
       'mo_sin_old_rev_tl_op', 'mo_sin_rcnt_tl', 'mort_acc',
       'mths_since_recent_bc', 'mths_since_recent_inq', 'num_actv_rev_tl',
       'emp_length_numeric', 'emp_title_final_grouped', 'title_grouped',
       'region_median_income', 'loan_to_income_ratio', 'open_acc_ratio']

print('Len of the features', len(lr_training_vars))

xgboost_training_vars = ['loan_amnt', 'home_ownership', 'annual_inc', 'addr_state', 'dti',
       'fico_range_high', 'all_util', 'inq_last_12m', 'acc_open_past_24mths',
       'avg_cur_bal', 'bc_open_to_buy', 'mo_sin_old_rev_tl_op',
       'mo_sin_rcnt_tl', 'mort_acc', 'mths_since_recent_bc',
       'mths_since_recent_inq', 'num_actv_rev_tl', 'emp_length_numeric',
       'emp_title_final_grouped', 'title_grouped', 'region_median_income',
       'loan_to_income_ratio', 'open_acc_ratio']
       


print('Len of the features', len(xgboost_training_vars))

Len of the features 21
Len of the features 23


# Global Functions

In [6]:
def calculate_model_scores(df):
    """
    Calcula los scores calibrados para todos los modelos
    
    Parameters:
    df: DataFrame con los datos de entrada
    
    Returns:
    DataFrame original con columnas adicionales de scores
    """
    
    # Crear una copia del DataFrame para no modificar el original
    df_scores = df.copy()
    
    # 1. Baseline Model Score
    X_lr = df[lr_training_vars]
    baseline_proba = base_model.predict_proba(X_lr)[:, 1]
    df_scores['baseline_score'] = base_model_cal.predict(baseline_proba)
    
    # 2. Weighted Model Score
    weighted_proba = weighted_model.predict_proba(X_lr)[:, 1]
    df_scores['weighted_score'] = weighted_model_cal.predict(weighted_proba)
    
    # 3. Undersampled Model Score
    under_proba = under_model.predict_proba(X_lr)[:, 1]
    df_scores['undersampled_score'] = under_model_cal.predict(under_proba)
    
    # 4. XGBoost Model Score (con variables adicionales)
    X_xgb = df[xgboost_training_vars]
    xgb_proba = xgboost_model.predict_proba(X_xgb)[:, 1]
    df_scores['xgboost_score'] = xgboost_model_cal.predict(xgb_proba)
    
    return df_scores

# Load models

In [7]:

xgboost_model =  joblib.load('final_xgboost_model.joblib', 'wb')

xgboost_model_cal = joblib.load('xgboost_isotonic_calibrator.joblib')

#-------------------------------------------------------------------------------------------------------------

base_model = joblib.load('final_logistic_model.joblib')

base_model_cal = joblib.load('lr_isotonic_calibrator.joblib')



weighted_model = joblib.load('final_logistic_model_weights.joblib')

weighted_model_cal = joblib.load('lr_isotonic_calibrator_weights.joblib')



under_model = joblib.load('final_logistic_model_undersampling.joblib')

under_model_cal = joblib.load('lr_isotonic_calibrator_undersampling.joblib')


# Load Data

Load the necessary data for performance validation.

In [8]:
df = pd.read_csv('df_total_woe_validations.csv').rename(columns={'funded_amnt':'loan_amnt'})

df['d_vintage'] = pd.to_datetime(df['d_vintage'])


df = df.drop(['int_rate'], axis=1)
print('Dimension of the data:', df.shape)

Dimension of the data: (2260698, 33)


# Calculating the scores

In [9]:
df_with_scores = calculate_model_scores(df)

df_filter = df_with_scores[(df_with_scores['d_vintage']>=pd.to_datetime('2016-07-01'))]


df_filter["m_vintage"] = df_filter["d_vintage"].apply(lambda x: pd.Timestamp(year=x.year, month=x.month, day=1))

print('Dimension filtered data', df_filter.shape)

Dimension filtered data (1141486, 38)


## Performance Validations across the time


In [25]:
df_filter_target = df_filter[(df_filter['target'].notna()) & (df_filter['d_vintage']<=pd.to_datetime('2018-06-01')) ]

In [26]:

score_cols = ['baseline_score', 'weighted_score', 'undersampled_score', 'xgboost_score']


results = mc.run_temporal_analysis(df_filter_target, score_cols, target_col='target', date_col='m_vintage')

results['auc_plot'].show()

results['ks_plot'].show()

Calculating AUC by month...
Calculating KS by month...
Calculating Brier score by month...
Calculating O/E ratio by month...
Calculating ECE by month...
Calculating PSI over time...
Creating plots...


In [14]:
results.keys()

dict_keys(['auc_by_month', 'ks_by_month', 'brier_by_month', 'oe_ratio_by_month', 'ece_by_month', 'psi_by_month', 'auc_plot', 'ks_plot', 'brier_plot', 'oe_plot', 'ece_plot', 'psi_plot', 'calibration_curve', 'score_distribution_plots'])

In [34]:
results['psi_by_month']

Unnamed: 0,month_year,baseline_score_psi,weighted_score_psi,undersampled_score_psi,xgboost_score_psi
0,2016-08,0.0021,0.001876,0.001097,0.002079
1,2016-09,0.00936,0.009181,0.008988,0.007551
2,2016-10,0.026863,0.026642,0.028967,0.027767
3,2016-11,0.015961,0.015971,0.017398,0.017547
4,2016-12,0.007405,0.00715,0.007933,0.006429
5,2017-01,0.025176,0.025507,0.025899,0.023427
6,2017-02,0.030371,0.030548,0.032634,0.026842
7,2017-03,0.033522,0.03353,0.035816,0.033024
8,2017-04,0.029146,0.028865,0.028307,0.027271
9,2017-05,0.024746,0.024544,0.02517,0.023484


# Temporal Stability

In [36]:
# New stability modules
import stability_utils as stu
import stability_plots as stp

# Optional: if you edit the .py files and want to reload them without restarting the kernel
from importlib import reload

stu = reload(stu)
stp = reload(stp)


# Columns in your data
period_col = "m_vintage"  # or the month column you use
# Example score columns (rename to match your data)
model_scores = {
    "Baseline": "baseline_score",
    "Weighted": "weighted_score",
    "Undersampled": "undersampled_score",
    "XGBoost": "xgboost_score",
}

# Colors (your thesis palette)
model_colors = {
    "Baseline": "#1560bd",
    "Weighted": "#75caed",
    "Undersampled": "#8B7EC8",
    "XGBoost": "#d62728",
}

# Choose the reference period for PSI and fixed deciles (use the earliest vintage in your data if unsure)
reference_period = None  # or a specific value like "2022-01"

## PSI

In [37]:
psi_list = []
for model_name, score_col in model_scores.items():
    psi_df, details_by_p, edges, ref_used = stu.compute_psi_over_time(
        df=df_filter, 
        score_col=score_col, 
        period_col=period_col,
        reference_period=reference_period,  # None => earliest period
        n_bins=10, 
        clip_perc=1e-6,
        dropna_scores=True
    )
    psi_df["Model"] = model_name
    psi_list.append(psi_df)

psi_all = pd.concat(psi_list, ignore_index=True)

fig_psi = stp.plot_psi_over_time(
    psi_df=psi_all,
    period_col="period",
    psi_col="psi",
    model_col="Model",
    title="Score Stability (PSI) Over Time",
    colors=model_colors,   # ensures exact color names/keys
    thresholds=(0.10, 0.25),
    width=1100,
    height=500,

)

fig_psi.update_layout(
    xaxis_title="Month-Period",
    yaxis_title="PSI Value"
)


fig_psi.show()

## Distribution

In [137]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def sorted_periods_unique(values):
    vals = pd.Series(values).dropna().unique()
    try:
        return sorted(vals, key=lambda x: pd.to_datetime(str(x)))
    except Exception:
        return sorted(vals)

# 1) Determine the reference period
if reference_period is None:
    if "psi_all" in globals() and hasattr(psi_all, "columns") and "period" in psi_all.columns and not psi_all["period"].dropna().empty:
        ref_period_used = sorted_periods_unique(psi_all["period"])[0]
    else:
        ref_period_used = sorted_periods_unique(df_filter[period_col])[0]
else:
    ref_period_used = reference_period

# 2) Define the order of models for the grid (must match keys in model_scores)
models_order = ["Baseline", "Weighted", "Undersampled", "XGBoost"]

# Optional: Model title colors (thesis palette)
model_colors = {
    "Baseline": "#1560bd",
    "Weighted": "#75caed",
    "Undersampled": "#8B7EC8",
    "XGBoost": "#d62728",
}

# Overlay controls
show_all_boundaries = True         # set to False to only show the first boundary
highlight_first_boundary = True    # thicker solid line for the first boundary

# 3) Build the 2x2 grid
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=models_order,
    horizontal_spacing=0.08,
    vertical_spacing=0.12
)

# 4) Common decile palette (consistent across subplots)
decile_levels = list(range(1, 10 + 1))
base = px.colors.diverging.RdYlGn[::-1]  # reverse to get green -> red
color_idx = np.linspace(0, len(base) - 1, num=len(decile_levels)).round().astype(int)
decile_colors = [base[i] for i in color_idx]

if len(decile_colors) < len(decile_levels):
    reps = int(np.ceil(len(decile_levels) / len(decile_colors)))
    decile_colors = (decile_colors * reps)[:len(decile_levels)]

# 5) Iterate models and add stacked-area traces per decile in each quadrant
for m_idx, model_name in enumerate(models_order):
    row = m_idx // 2 + 1
    col = m_idx % 2 + 1

    # Score column for this model
    score_col = model_scores[model_name]

    # Fixed edges from the model's distribution at the reference period
    ref_mask = (df_filter[period_col] == ref_period_used)
    edges = stu.get_fixed_bin_edges_from_reference(
        reference_series=df_filter.loc[ref_mask, score_col],
        n_bins=10,
        method="quantile"
    )

    # Decile distribution (proportions by period)
    decile_df = stu.compute_decile_distribution_by_period(
        df=df_filter,
        score_col=score_col,
        period_col=period_col,
        bin_edges=edges,
        decile_col_name="decile",
        normalize=True,
        dropna_scores=True
    )

    # Ensure chronological x-axis
    periods_sorted = sorted_periods_unique(decile_df[period_col])

    # Add one stacked trace per decile
    for d in decile_levels:
        sub = decile_df[decile_df["decile"] == d][[period_col, "proportion"]].copy()
        # Align to full period list and fill missing with 0
        y = pd.Series(0.0, index=pd.Index(periods_sorted, name=period_col))
        if not sub.empty:
            sub = sub.set_index(period_col).reindex(periods_sorted)
            y.loc[sub.index] = sub["proportion"].fillna(0.0).values

        fig.add_trace(
            go.Scatter(
                x=periods_sorted,
                y=y.values,
                name=f"Decile {d}",
                mode="lines",
                line=dict(width=0.8, color=decile_colors[d - 1]),
                stackgroup=f"group{m_idx}",   # stack within this subplot only
                groupnorm="fraction",         # ensures stacks sum to 1
                showlegend=False              # keep grid clean
            ),
            row=row, col=col
        )

    # --- Overlay: fixed cumulative decile boundaries from the reference period ---
    ref_slice = decile_df.loc[decile_df[period_col] == ref_period_used, ["decile", "proportion"]].copy()
    if not ref_slice.empty:
        ref_props = (
            ref_slice.set_index("decile")
            .reindex(decile_levels)["proportion"]  # ensure 1..10 order
            .fillna(0.0)
            .sort_index()
        )
        ylines = ref_props.cumsum().values[:-1]  # exclude the top boundary (=1.0)
    else:
        # Fallback: quantile binning typically yields ~10% per decile at reference
        ylines = np.linspace(0.1, 0.9, 9)

    # Ensure lines are drawn ON TOP by adding them AFTER the area traces
    def add_ref_line(yref, width=1.2, dash="dot", color="rgba(0,0,0,0.55)"):
        fig.add_trace(
            go.Scatter(
                x=periods_sorted,
                y=[float(yref)] * len(periods_sorted),
                mode="lines",
                line=dict(color=color, width=width, dash=dash),
                hoverinfo="skip",
                showlegend=False,
                name=None
            ),
            row=row, col=col
        )

    if len(ylines) > 0:
        # First boundary highlighted (e.g., the Decile 1 cumulative boundary)
        if highlight_first_boundary:
            add_ref_line(ylines[0], width=2.0, dash="solid", color="rgba(0,0,0,0.85)")
        else:
            add_ref_line(ylines[0])

        # Remaining boundaries (optional)
        if show_all_boundaries and len(ylines) > 1:
            for yref in ylines[1:]:
                add_ref_line(yref, width=1.2, dash="dot", color="rgba(0,0,0,0.55)")
    # --- End overlay ---

    # Axes and styling per subplot
    fig.update_xaxes(
        row=row, col=col, title_text="Booked Month",
        showline=True, linewidth=1, linecolor="black", mirror=True, gridcolor="lightgrey"
    )
    fig.update_yaxes(
        title_text="Proportion", range=[0, 1], row=row, col=col,
        tickformat=".0%",
        showline=True, linewidth=1, linecolor="black", mirror=True, gridcolor="lightgrey"
    )

# 6) Global layout (white theme) + color the subplot titles with model colors
fig.update_layout(
    title_text=f"Decile Distribution by Period (Stacked Area) — Reference: {ref_period_used}",
    title_x=0.5,
    plot_bgcolor="white",
    paper_bgcolor="white",
    width=1300,
    height=800
)

# Color each subplot title according to model color
for i, ann in enumerate(fig.layout.annotations):
    if i < len(models_order):
        m = models_order[i]
        ann.font.color = model_colors.get(m, "black")

fig.show()