In [1]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import numpy as np

from sklearn.metrics import (
    accuracy_score,
    precision_recall_fscore_support,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    log_loss,
    confusion_matrix
)

import xgboost as xgb
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

import seaborn as sns
from sklearn import model_selection

## Data import

In [8]:
df = pd.read_csv("credit_risk_dataset_red_ml_ord.csv")

# scaled
#df = pd.read_csv("credit_risk_dataset_red_ml_ord_scaled.csv")

In [23]:
# Train/test set
label = df['loan_status'] # labels
features = df.drop('loan_status',axis=1) # features
x_train, x_test, y_train, y_test = model_selection.train_test_split(features, label, 
                                                                    random_state=42, test_size=.30)

x_all = features
y_all = label

## Auxiliary functions

In [5]:
def create_prediction_df(model, X, y):
    """
    Creates a DataFrame containing the actual labels, model predictions, 
    and the confidence (probability) of the predicted class.

    Parameters:
    - model: A trained estimator (must have .predict(), ideally .predict_proba())
    - X: Input features (DataFrame or array-like)
    - y: True labels (Series or array-like)

    Returns:
    - pd.DataFrame with columns: ['y_true', 'y_pred', 'confidence']
    """
    
    # 1. Generate Predictions
    predictions = model.predict(X)
    
    # 2. Initialize the DataFrame
    # We use valid indices from y if it's a pandas object, otherwise default
    index = y.index if hasattr(y, 'index') else None
    results_df = pd.DataFrame({
        'y_true': y,
        'y_pred': predictions
    }, index=index)
    
    # 3. Calculate Confidence (if supported)
    if hasattr(model, "predict_proba"):
        # Get probabilities for all classes
        probas = model.predict_proba(X)
        
        # Extract the maximum probability for each row (the confidence of the chosen class)
        # axis=1 finds the max value across columns for each row
        confidence = np.max(probas, axis=1)
        
        results_df['confidence'] = confidence
    else:
        # Fallback for models that don't support probabilities (e.g., SVM without probability=True)
        results_df['confidence'] = np.nan
        print("Warning: Model does not support 'predict_proba'. Confidence set to NaN.")

    return results_df

In [16]:
def evaluate_classification(model, X, y):
    """
    Calculates accuracy, F1, precision, recall, support, and confusion matrix.

    Parameters:
    - model: The trained classification model (must have a .predict() method).
    - X: The features to predict on.
    - y: The true labels.

    Returns:
    - metrics: A dictionary containing 'accuracy' and 'confusion_matrix'.
    - report_df: A pandas DataFrame containing precision, recall, f1-score, and support per class.
    """
    # 1. Generate Predictions
    y_pred = model.predict(X)

    # 2. Calculate Metrics
    accuracy = accuracy_score(y, y_pred)
    precision = precision_score(y, y_pred)
    recall = recall_score(y, y_pred)
    f1 = f1_score(y, y_pred)

    # 3. Calculate Precision, Recall, F1, and Support
    # precise_recall_fscore_support returns lists for each class
    precision_l, recall_l, f1_l, support_l = precision_recall_fscore_support(y, y_pred)

    # 4. Create a DataFrame for per-class metrics
    # We attempt to use model.classes_ if available for row names, otherwise simple indices
    try:
        classes = model.classes_
    except AttributeError:
        classes = list(range(len(precision_l)))

    report_df = pd.DataFrame({
        'Precision': precision_l,
        'Recall': recall_l,
        'F1-Score': f1_l,
        'Support': support_l
    }, index=classes)

    # 5. Calculate Confusion Matrix
    conf_matrix = confusion_matrix(y, y_pred)

    
    y_proba = model.predict_proba(X)[:, 1]

    # 6. ROC AUC Score
    # Measures ability to distinguish between classes. 1.0 is perfect, 0.5 is random guessing.
    roc_auc = roc_auc_score(y, y_proba)

    # Bundle everything into a dictionary
    metrics = {
        "accuracy": accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1,
        "confusion_matrix": conf_matrix,
        "roc_auc": roc_auc
    }


    print(report_df)

    for key in metrics.keys():
        print(key, metrics[key])
        
    print(conf_matrix)

    prediction_df = create_prediction_df(model, X, y)

    df_confidence = pd.concat([X, prediction_df['confidence']], axis=1)

    return metrics, report_df, prediction_df, df_confidence

In [7]:
def get_confidence_stats(df):
    """
    Returns a summary table with mean, min, and max confidence 
    for correct vs. incorrect predictions.
    """
    # Create a copy to avoid modifying the original dataframe
    df_stats = df.copy()
    
    # Create a boolean column for grouping
    df_stats['prediction_status'] = df_stats.apply(
        lambda row: 'Correct' if row['y_true'] == row['y_pred'] else 'Incorrect', 
        axis=1
    )
    
    # Group by the status and calculate statistics on the 'confidence' column
    summary = df_stats.groupby('prediction_status')['confidence'].agg(['mean', 'std', 'min', 'max', 'count'])
    
    return summary

In [19]:
def lowest_x_confidence(df, x):
    return df.nsmallest(x, 'confidence')

## XGBoost train

In [17]:
xgb_model = xgb.XGBClassifier(objective="binary:logistic", random_state=42) 

xgb_model.fit(x_train, y_train)

# Substitute as you want x_train, y_train; x_test, y_test; x_all y_all
metrics, report_df, prediction_df, df_confidence = evaluate_classification(xgb_model, x_test, y_test)

   Precision    Recall  F1-Score  Support
0   0.925729  0.978962  0.951602      713
1   0.897260  0.700535  0.786787      187
accuracy 0.9211111111111111
Precision 0.8972602739726028
Recall 0.7005347593582888
F1-Score 0.7867867867867868
confusion_matrix [[698  15]
 [ 56 131]]
roc_auc 0.9149635118614576
[[698  15]
 [ 56 131]]


In [20]:
lowest_x_confidence(df_confidence, 5)

Unnamed: 0,person_age,person_income,person_emp_length,loan_grade,loan_amnt,loan_int_rate,loan_percent_income,cb_person_default_on_file,cb_person_cred_hist_length,person_home_ownership_MORTGAGE,person_home_ownership_OTHER,person_home_ownership_OWN,person_home_ownership_RENT,loan_intent_DEBTCONSOLIDATION,loan_intent_EDUCATION,loan_intent_HOMEIMPROVEMENT,loan_intent_MEDICAL,loan_intent_PERSONAL,loan_intent_VENTURE,confidence
1351,30,106000,11.0,5,10000,10.99,0.09,0,9,True,False,False,False,False,False,False,False,False,True,0.506864
1569,28,33996,5.0,0,12000,10.99,0.35,0,8,True,False,False,False,False,False,False,False,True,False,0.515431
1442,22,74000,1.0,4,25000,14.38,0.34,1,3,True,False,False,False,False,False,False,False,False,True,0.527062
2659,22,29361,4.0,1,6450,10.99,0.22,0,3,False,False,False,True,False,False,True,False,False,False,0.530956
798,29,56033,7.0,2,16000,14.65,0.29,0,6,True,False,False,False,False,False,False,False,False,True,0.539907
