In [4]:
import warnings
import datetime as dt
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import re
import string 
import copy
import seaborn as sns 
import numpy as np
import pandas as pd
from scipy.stats import chi2_contingency

# Scorecard Modelling:
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from sklearn.calibration import calibration_curve
from sklearn.linear_model import LogisticRegression
from optbinning import Scorecard
from optbinning import BinningProcess

# Extras:
import pickle
from scipy import stats
from typing import Tuple
from typing import Union

In [5]:
def missing_zero_values_table(df):
  
    """
    function for data audit, returns the count and percentage of missing and o values in each column
     %% parameters 

    df: [dataframe] 
    
    """
  
    zero_val = (df == 0.00).astype(int).sum(axis=0)
        
    zero_val_percent = (df == 0.00).astype(int).sum(axis=0) / len(df)
        
    mis_val = df.isnull().sum()
    
    mis_val_percent = round(df.isnull().sum() / len(df),1)
    
    mz_table = pd.concat([zero_val,zero_val_percent, mis_val, mis_val_percent], axis=1)
    
    mz_table = mz_table.rename(
                                columns = {0 : 'No of 0s', 1 : '% of 0s', 2: 'No of Missing Values', 3 : '% of Missing Values'})
    
    mz_table['Rows'] = len(df)
    mz_table['No of Unique'] = df.nunique()
    
    mz_table['Data Type'] = df.dtypes
    
    print ("Your selected dataframe has " + str(df.shape[1]) + " columns and " + str(df.shape[0]) + " Rows.\n"      
        "There are " + str(mz_table.shape[0]) +
            " columns that have missing values.")
    
#         mz_table.to_excel('D:/sampledata/missing_and_zero_values.xlsx', freeze_panes=(1,0), index = False)
    return mz_table


In [33]:
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve, auc
from optbinning.scorecard import plot_auc_roc, plot_cap, plot_ks
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay


def plot_all_metrics(y_train, y_test, train_pred, test_pred, model, X_train, X_test):
    """
    Generate and display all relevant plots: AUC-ROC, CAP, KS, Precision-Recall, and Confusion Matrix.
    
    Parameters:
    - y_train (array-like): True labels for the training set.
    - y_test (array-like): True labels for the test set.
    - train_pred (array-like): Predictions for the training set.
    - test_pred (array-like): Predictions for the test set.
    - model: Trained model (needed for confusion matrix plot).
    - X_train (array-like): Training features (for confusion matrix plot).
    - X_test (array-like): Test features (for confusion matrix plot).
    """
    
    # Create a figure with subplots (5 rows and 2 columns for better organization)
    fig, axs = plt.subplots(5, 2, figsize=(15, 25))
    
    # AUC-ROC for training set (top-left)
    plt.sca(axs[0, 0])  # Set the current axis to top-left
    plot_auc_roc(y_train, train_pred)
    axs[0, 0].set_title('AUC-ROC for Training Set')

    # AUC-ROC for test set (top-right)
    plt.sca(axs[0, 1])  # Set the current axis to top-right
    plot_auc_roc(y_test, test_pred)
    axs[0, 1].set_title('AUC-ROC for Test Set')

    # CAP for training set (middle-left)
    plt.sca(axs[1, 0])  # Set the current axis to middle-left
    plot_cap(y_train, train_pred)
    axs[1, 0].set_title('CAP for Training Set')

    # CAP for test set (middle-right)
    plt.sca(axs[1, 1])  # Set the current axis to middle-right
    plot_cap(y_test, test_pred)
    axs[1, 1].set_title('CAP for Test Set')

    # KS for training set (third row, left)
    plt.sca(axs[2, 0])  # Set the current axis to bottom-left
    plot_ks(y_train, train_pred)
    axs[2, 0].set_title('KS for Training Set')

    # KS for test set (third row, right)
    plt.sca(axs[2, 1])  # Set the current axis to bottom-right
    plot_ks(y_test, test_pred)
    axs[2, 1].set_title('KS for Test Set')

    # Precision-Recall Curve for training set (fourth row, left)
    plt.sca(axs[3, 0])
    precision_train, recall_train, _ = precision_recall_curve(y_train, train_pred)
    pr_auc_train = auc(recall_train, precision_train)
    axs[3, 0].plot(recall_train, precision_train, label=f"PR AUC = {pr_auc_train:.2f}")
    axs[3, 0].set_title('Precision-Recall Curve for Training Set')
    axs[3, 0].set_xlabel('Recall')
    axs[3, 0].set_ylabel('Precision')
    axs[3, 0].legend()

    # Precision-Recall Curve for test set (fourth row, right)
    plt.sca(axs[3, 1])
    precision_test, recall_test, _ = precision_recall_curve(y_test, test_pred)
    pr_auc_test = auc(recall_test, precision_test)
    axs[3, 1].plot(recall_test, precision_test, label=f"PR AUC = {pr_auc_test:.2f}")
    axs[3, 1].set_title('Precision-Recall Curve for Test Set')
    axs[3, 1].set_xlabel('Recall')
    axs[3, 1].set_ylabel('Precision')
    axs[3, 1].legend()

    # Confusion Matrix for test set (fifth row, left)
    plt.sca(axs[4, 0])
    # Step 1: Predict probabilities for the test set using the scorecard object
    y_pred_proba = model.predict_proba(X_test)[:, 1]

    # Step 2: Binarize the predictions (using a threshold of 0.5 for example)
    threshold = 0.5
    y_pred = (y_pred_proba >= threshold).astype(int)

    # Step 3: Generate the confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    ConfusionMatrixDisplay(cm, display_labels=['Non-Default', 'Default']).plot(cmap='Blues', ax=axs[4, 1])
    axs[4, 1].set_title('Confusion Matrix for Test Set')

    # Confusion Matrix for training set (fifth row, right)
    plt.sca(axs[4, 0])
    y_pred_proba_train = model.predict_proba(X_train)[:, 1]
    y_pred_train = (y_pred_proba_train >= threshold).astype(int)
    cm_train = confusion_matrix(y_train, y_pred_train)
    ConfusionMatrixDisplay(cm_train, display_labels=['Non-Default', 'Default']).plot(cmap='Blues', ax=axs[4, 0])
    axs[4, 0].set_title('Confusion Matrix for Training Set')

    # Adjust layout to prevent overlap
    plt.tight_layout()
    
    # Display the plots
    plt.show()


In [1]:
# import pandas as pd
# import matplotlib.pyplot as plt
# # from sklearn.metrics import brier_score_loss, calibration_curve
# from sklearn.linear_model import LogisticRegression
# from sklearn.calibration import CalibratedClassifierCV
# from sklearn.preprocessing import StandardScaler
# from sklearn.compose import ColumnTransformer
# from sklearn.pipeline import Pipeline
# import category_encoders as ce

# def calibration_analysis(X_train, X_test, y_train, y_test, scorecard_model):
#     """
#     Perform calibration analysis using Platt scaling and isotonic regression.

#     Parameters:
#     - X_train (DataFrame): Training feature set.
#     - X_test (DataFrame): Test feature set.
#     - y_train (Series): Training target variable.
#     - y_test (Series): Test target variable.
#     - scorecard_model: A fitted scorecard model to compare non-calibrated results.

#     Returns:
#     - Plots calibration curves and prints Brier scores.
#     """

#     # Identify categorical and numerical features
#     categorical_features = X_train.select_dtypes(include=['object', 'category']).columns.tolist()
#     numerical_features = X_train.select_dtypes(include=['float64', 'int64']).columns.tolist()

#     # Logistic regression estimator
#     estimator = LogisticRegression(solver="lbfgs", class_weight='balanced')

#     # Preprocessing pipeline for numerical and categorical columns
#     preprocessor = ColumnTransformer(
#         transformers=[
#             ('num', StandardScaler(), numerical_features),
#             ('cat', ce.TargetEncoder(cols=categorical_features), categorical_features)
#         ])

#     # Logistic regression pipeline
#     logreg_pipeline = Pipeline(steps=[
#         ('preprocessor', preprocessor),
#         ('classifier', estimator)
#     ])

#     # Non-calibrated predictions from your scorecard model
#     y_train_pred_proba = scorecard_model.predict_proba(X_train)[:, -1]
#     y_test_pred_proba = scorecard_model.predict_proba(X_test)[:, -1]

#     # Brier score for non-calibrated predictions from scorecard
#     non_calibrated_brier = brier_score_loss(y_test, y_test_pred_proba)

#     # Calibrate using Platt scaling
#     platt_model = CalibratedClassifierCV(base_estimator=logreg_pipeline, method='sigmoid')
#     platt_model.fit(X_train, y_train)
#     platt_pred_calibrated = platt_model.predict_proba(X_test)[:, 1]
#     platt_train_pred_proba = platt_model.predict_proba(X_train)[:, 1]

#     # Calibrate using Isotonic regression
#     iso_model = CalibratedClassifierCV(base_estimator=logreg_pipeline, method='isotonic')
#     iso_model.fit(X_train, y_train)
#     iso_pred_calibrated = iso_model.predict_proba(X_test)[:, 1]

#     # Calculate Brier scores for calibrated predictions
#     platt_brier = brier_score_loss(y_test, platt_pred_calibrated)
#     iso_brier = brier_score_loss(y_test, iso_pred_calibrated)

#     # Print Brier scores
#     print(f'Non-Calibrated Brier Score (Scorecard): {non_calibrated_brier:.4f}')
#     print(f'Platt Scaling Brier Score: {platt_brier:.4f}')
#     print(f'Isotonic Regression Brier Score: {iso_brier:.4f}')

#     # Calibration curve using the correct test labels
#     prob_true, prob_pred_non_calibrated = calibration_curve(y_test, y_test_pred_proba, n_bins=10)
#     prob_true_calibrated_platt, prob_pred_calibrated_platt = calibration_curve(y_test, platt_pred_calibrated, n_bins=10)
#     prob_true_calibrated_isotonic, prob_pred_calibrated_isotonic = calibration_curve(y_test, iso_pred_calibrated, n_bins=10)

#     # Plotting calibration curves with Brier score annotations
#     plt.figure(figsize=(10, 6))
#     plt.plot(prob_pred_non_calibrated, prob_true, marker='o', label=f'Non-calibrated (Scorecard, Brier: {non_calibrated_brier:.3f})', color='blue')
#     plt.plot(prob_pred_calibrated_platt, prob_true_calibrated_platt, marker='o', label=f'Platt Calibrated (Brier: {platt_brier:.3f})', color='green')
#     plt.plot(prob_pred_calibrated_isotonic, prob_true_calibrated_isotonic, marker='o', label=f'Isotonic Calibrated (Brier: {iso_brier:.3f})', color='red')

#     plt.plot([0, 1], [0, 1], linestyle='--', color='black')  # Perfect calibration line

#     plt.xlabel('Predicted Probability')
#     plt.ylabel('True Probability')
#     plt.title('Calibration Plot: Scorecard vs Platt & Isotonic Calibration')
#     plt.legend()
#     plt.grid()
#     plt.show()

#     return platt_model

In [2]:
from sklearn.calibration import CalibratedClassifierCV, calibration_curve
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
import category_encoders as ce
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import brier_score_loss, roc_auc_score, precision_score, recall_score
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

def gini_coefficient(y_true, y_scores):
    """Calculate the Gini coefficient."""
    # Compute AUC
    auc = roc_auc_score(y_true, y_scores)
    # Gini = 2*AUC - 1
    return 2 * auc - 1

def cap(y_true, y_scores, n_bins=10):
    """Calculate Cumulative Accuracy Profile (CAP)."""
    total_positive = y_true.sum()
    if total_positive == 0:
        return np.zeros(n_bins)  # Avoid division by zero if no positives

    # Sort scores in descending order and get the indices
    sorted_indices = np.argsort(y_scores)[::-1]
    
    # Use .iloc to ensure correct indexing
    sorted_true = y_true.iloc[sorted_indices]  

    # Cumulative sum of positives
    cumulative_positive = np.cumsum(sorted_true)

    # Calculate CAP
    return cumulative_positive / total_positive

def calibration_analysis(X_train, X_test, y_train, y_test, scorecard_model):
    """
    Perform calibration analysis using Platt scaling and isotonic regression.

    Parameters:
    - X_train (DataFrame): Training feature set.
    - X_test (DataFrame): Test feature set.
    - y_train (Series): Training target variable.
    - y_test (Series): Test target variable.
    - scorecard_model: A fitted scorecard model to compare non-calibrated results.

    Returns:
    - Plots calibration curves and prints Brier scores.
    """

    # Identify categorical and numerical features
    categorical_features = X_train.select_dtypes(include=['object', 'category']).columns.tolist()
    numerical_features = X_train.select_dtypes(include=['float64', 'int64']).columns.tolist()

    # Preprocessing pipeline for numerical and categorical columns
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numerical_features),
            ('cat', ce.TargetEncoder(cols=categorical_features), categorical_features)
        ])

    # Logistic regression estimator
    estimator = LogisticRegression(solver="lbfgs", class_weight='balanced')

    # Logistic regression pipeline
    logreg_pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('classifier', estimator)
    ])

    # Fit the logreg_pipeline on the training data
    logreg_pipeline.fit(X_train, y_train)

    # Non-calibrated predictions from your scorecard model
    y_train_pred_proba = scorecard_model.predict_proba(X_train)[:, -1]
    y_test_pred_proba = scorecard_model.predict_proba(X_test)[:, -1]

    # Fit Platt model
    platt_model = CalibratedClassifierCV(estimator=logreg_pipeline, method='sigmoid')
    platt_model.fit(X_train, y_train)
    platt_pred_proba = platt_model.predict_proba(X_test)[:, 1]

    # Fit Isotonic model
    iso_model = CalibratedClassifierCV(estimator=logreg_pipeline, method='isotonic')
    iso_model.fit(X_train, y_train)
    iso_pred_proba = iso_model.predict_proba(X_test)[:, 1]

    # Calculate Brier scores
    non_calibrated_brier = brier_score_loss(y_test, y_test_pred_proba)
    platt_brier = brier_score_loss(y_test, platt_pred_proba)
    iso_brier = brier_score_loss(y_test, iso_pred_proba)

    # Calibration curve using the correct test labels
    prob_true, prob_pred_non_calibrated = calibration_curve(y_test, y_test_pred_proba, n_bins=10)
    prob_true_calibrated_platt, prob_pred_calibrated_platt = calibration_curve(y_test, platt_pred_proba, n_bins=10)
    prob_true_calibrated_isotonic, prob_pred_calibrated_isotonic = calibration_curve(y_test, iso_pred_proba, n_bins=10)

    # Plotting calibration curves with Brier score annotations
    plt.figure(figsize=(10, 6))
    plt.plot(prob_pred_non_calibrated, prob_true, marker='o', label=f'Non-calibrated (Scorecard, Brier: {non_calibrated_brier:.3f})', color='blue')
    plt.plot(prob_pred_calibrated_platt, prob_true_calibrated_platt, marker='o', label=f'Platt Calibrated (Brier: {platt_brier:.3f})', color='green')
    plt.plot(prob_pred_calibrated_isotonic, prob_true_calibrated_isotonic, marker='o', label=f'Isotonic Calibrated (Brier: {iso_brier:.3f})', color='red')

    plt.plot([0, 1], [0, 1], linestyle='--', color='black')  # Perfect calibration line

    plt.xlabel('Predicted Probability')
    plt.ylabel('True Probability')
    plt.title('Calibration Plot: Scorecard vs Platt & Isotonic Calibration')
    plt.legend()
    plt.grid()
    plt.show()

    # Calculate metrics
    metrics = {
        'Model': ['Scorecard', 'Platt', 'Isotonic'],
        'AUC-ROC Train': [
            roc_auc_score(y_train, y_train_pred_proba),
            roc_auc_score(y_train, platt_model.predict_proba(X_train)[:, 1]),
            roc_auc_score(y_train, iso_model.predict_proba(X_train)[:, 1])
        ],
        'AUC-ROC Test': [
            roc_auc_score(y_test, y_test_pred_proba),
            roc_auc_score(y_test, platt_pred_proba),
            roc_auc_score(y_test, iso_pred_proba)
        ],
        'Precision Train': [
            precision_score(y_train, (y_train_pred_proba >= 0.5).astype(int)),
            precision_score(y_train, (platt_model.predict_proba(X_train)[:, 1] >= 0.5).astype(int)),
            precision_score(y_train, (iso_model.predict_proba(X_train)[:, 1] >= 0.5).astype(int))
        ],
        'Precision Test': [
            precision_score(y_test, (y_test_pred_proba >= 0.5).astype(int)),
            precision_score(y_test, (platt_pred_proba >= 0.5).astype(int)),
            precision_score(y_test, (iso_pred_proba >= 0.5).astype(int))
        ],
        'Recall Train': [
            recall_score(y_train, (y_train_pred_proba >= 0.5).astype(int)),
            recall_score(y_train, (platt_model.predict_proba(X_train)[:, 1] >= 0.5).astype(int)),
            recall_score(y_train, (iso_model.predict_proba(X_train)[:, 1] >= 0.5).astype(int))
        ],
        'Recall Test': [
            recall_score(y_test, (y_test_pred_proba >= 0.5).astype(int)),
            recall_score(y_test, (platt_pred_proba >= 0.5).astype(int)),
            recall_score(y_test, (iso_pred_proba >= 0.5).astype(int))
        ],
        'Gini Train': [
            gini_coefficient(y_train, y_train_pred_proba),
            gini_coefficient(y_train, platt_model.predict_proba(X_train)[:, 1]),
            gini_coefficient(y_train, iso_model.predict_proba(X_train)[:, 1])
        ],
        'Gini Test': [
            gini_coefficient(y_test, y_test_pred_proba),
            gini_coefficient(y_test, platt_pred_proba),
            gini_coefficient(y_test, iso_pred_proba)
        ],
        # 'CAP Train': [
        #     cap(y_train, y_train_pred_proba),
        #     cap(y_train, platt_model.predict_proba(X_train)[:, 1]),
        #     cap(y_train, iso_model.predict_proba(X_train)[:, 1])
        # ],
        # 'CAP Test': [
        #     cap(y_test, y_test_pred_proba),
        #     cap(y_test, platt_pred_proba),
        #     cap(y_test, iso_pred_proba)
        # ],
    }

    # Create DataFrame from metrics
    metrics_df = pd.DataFrame(metrics)
    display(metrics_df)



In [8]:
credit_levels_decriptions = {
    1: "Very Poor",
    2: "Poor",
    3: "Below Average",
    4: "Average",
    5: "Above Average",
    6: "Good",
    7: "Very Good",
    8: "Excellent",
    9: "Exceptional",
}

def get_credit_levels(
    df: pd.DataFrame,
    target_col: str = "credit_score",
    left_bound = -np.inf,
    level_1 = 350,
    level_2 = 400,
    level_3 = 450,
    level_4 = 500,
    level_5 = 550,
    level_6 = 600,
    level_7 = 650,
    level_8 = 700,
    right_bound = np.inf
) -> pd.DataFrame:
    """
    Explain the credit levels and description for all FICO credit scores.

    Args:
        df (pd.DataFrame): The dataframe containing the credit score
        target_col (str): Column containing FICO credit score.
        left_bound (int): Lowest possible FICO credit score.
        level_1 (int): Value where the credit scores are equal or below will be categorize as level 1.
        level_2 (int): Value where the credit scores are equal or below will be categorize as level 2.
        level_3 (int): Value where the credit scores are equal or below will be categorize as level 3.
        level_4 (int): Value where the credit scores are equal or below will be categorize as level 4.
        right_bound (int): Lowest possible FICO credit score.

    Returns:
        float: The dataframe containing the credit levels and descriptions for all credit scores.
    """
    conditions = [
        (df[target_col] > left_bound) & (df[target_col] <= level_1),
        (df[target_col] > level_1) & (df[target_col] <= level_2),
        (df[target_col] > level_2) & (df[target_col] <= level_3),
        (df[target_col] > level_3) & (df[target_col] <= level_4),
        (df[target_col] > level_4) & (df[target_col] <= level_5),
        (df[target_col] > level_5) & (df[target_col] <= level_6),
        (df[target_col] > level_6) & (df[target_col] <= level_7),
        (df[target_col] > level_7) & (df[target_col] <= level_8),
        (df[target_col] > level_8) & (df[target_col] <= right_bound),
    ]

    level_choices = [1, 2, 3, 4, 5, 6, 7, 8, 9]
    lower_bound_choices = [left_bound, level_1, level_2, level_3, level_4, level_5, level_6, level_7, level_8]
    upper_bound_choices = [level_1, level_2, level_3, level_4, level_5, level_6, level_7, level_8, right_bound]
    df["credit_level"] = np.select(conditions, level_choices)
    df["credit_lower_bound"] = np.select(conditions, lower_bound_choices)
    df["credit_upper_bound"] = np.select(conditions, upper_bound_choices)
    return df

In [28]:
def roc_auc(y_true: Union[list, np.array], y_pred_proba: Union[list, np.array]) -> float:
    """
    Calculate ROC AUC (Area Under the Receiver Operating Characteristic Curve).
    
    Args:
        y_true (Union[list, np.array]): True labels.
        y_pred_prob (Union[list, np.array]): Prediction probability of target class of `1`
    Returns:
        float: ROC AUC score.
    """
    fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
    return auc(fpr, tpr)

def pr_auc(y_true: Union[list, np.array], y_pred_proba: Union[list, np.array]) -> float:
    """
    Calculate PR AUC (Area Under the Precision Recall Curve).
    
    Args:
        y_true (Union[list, np.array]): True labels.
        y_pred_prob (Union[list, np.array]): Prediction probability of target class of `1`
    Returns:
        float: PR AUC score.
    """
    precision, recall, _ = precision_recall_curve(y_true, y_pred_proba)
    return auc(recall, precision)

def gini(y_true: Union[list, np.array], y_pred_proba: Union[list, np.array]) -> float:
    """
    Calculate Gini coefficient.

    Args:
        y_true (Union[list, np.array]): True labels.
        y_pred_prob (Union[list, np.array]): Prediction probability of target class of `1`
    Returns:
        float: Gini coefficient.
    """
    fpr, tpr, _ = roc_curve(y_true, y_pred_proba)
    roc_auc = auc(fpr, tpr)
    return 2 * roc_auc - 1

def ks(y_true: Union[list, np.array], y_pred_proba: Union[list, np.array]) -> float:
    """
    Calculate Kolmogorov-Smirnov (KS) statistic.

    Args:
        y_true (Union[list, np.array]): True labels.
        y_pred_prob (Union[list, np.array]): Prediction probability of target class of `1`
    Returns:
        float: KS statistic.
    """
    y_pred_proba_not_default = y_pred_proba[y_true == 0]
    y_pred_proba_default = y_pred_proba[y_true == 1]
    ks_stat, _ = stats.ks_2samp(y_pred_proba_not_default, y_pred_proba_default)
    return ks_stat

def plot_calibration_curve(y_true: np.array, y_pred_proba: np.array, model_name: str, figsize: Tuple[int, int], n_bins=10) -> plt.Axes:
    """
    Plot calibration curve.

    Args:
        y_pred_proba (np.array): Predicted probabilities for the positive class (default).
        y_true (np.array): True binary labels (0 for not default, 1 for default).
        model_name (str): Name of the model for labeling the plot.
        figsize (Tuple[int, int]): size of the plot.
        n_bins (int): Number of bins to use for calibration curve.
    Return:
        plt.Axes: Matplotlib axis object.
    """
    prob_true, prob_pred = calibration_curve(y_true, y_pred_proba, n_bins=n_bins)
    
    plt.style.use("fivethirtyeight")
    fig, ax = plt.subplots(figsize=figsize)
    ax.plot([0, 1], [0, 1], linestyle="--", label="Perfectly calibrated")
    ax.plot(prob_pred, prob_true, marker="o", label=model_name)
    
    ax.set_xlabel("Mean predicted probability")
    ax.set_ylabel("Fraction of positives")
    ax.set_title("Calibration plot")
    ax.legend()
    ax.grid(True)

    return fig

def print_side_by_side(dict1: dict, dict2: dict) -> None:
    """
    Prints the content of two dictionaries side by side.

    Args:
        dict1 (dict): The first dictionary to be printed.
        dict2 (dict): The second dictionary to be printed.

    Returns:
        None
    """
    # Calculate the maximum length of keys in both dictionaries
    max_key_len = max(max(len(key) for key in dict1), max(len(key) for key in dict2))
    
    # Define the format string for printing, adjusted for floating point numbers
    format_str = "{:<{key_len}}: {:<10} | {:<10}"
    
    # Print header
    print(format_str.format("Metric", "Train", "Test", key_len=max_key_len))
    
    # Print separator
    print("-" * (max_key_len + 24))  # Adjusted separator length
    
    # Print key-value pairs side by side, rounding floats if necessary
    for key in dict1:
        val1 = dict1[key]
        val2 = dict2[key]
        
        # Check if the values are float, if so, round to 2 decimal places
        if isinstance(val1, float):
            val1 = round(val1, 2)
        if isinstance(val2, float):
            val2 = round(val2, 2)
        
        # Print the formatted output
        print(format_str.format(key, val1, val2, key_len=max_key_len))


In [19]:
def plot_credit_score_distributions(data, features):
    """
    Plot distributions of credit score by a list of categorical features.
    
    Parameters:
    - data: DataFrame containing the data.
    - features: List of strings representing the categorical features to plot.
    """
    num_features = len(features)
    # Calculate the number of rows/columns needed for the subplots
    nrows = (num_features + 1) // 2
    ncols = 2 if num_features > 1 else 1
    
    # Set up the matplotlib figure
    f, axes = plt.subplots(nrows, ncols, figsize=(18, 6 * nrows))
    
    # Flatten axes array for easy indexing if there's more than one subplot
    if nrows * ncols > 1:
        axes = axes.flatten()
    else:
        axes = [axes]
    
    # Plot distributions of credit score by specified features
    for i, feature in enumerate(features):
        sns.boxplot(x=feature, y='credit_score', data=data, ax=axes[i])
        axes[i].set_title(f'Credit Score by {feature.capitalize()}')
        axes[i].set_xticklabels(axes[i].get_xticklabels(), rotation=45)
    
    plt.tight_layout()
    plt.show()