In [None]:
import pandas as pd
import XPER

from pathlib import Path
from src.utils.models_pkl import load_pickle
from src.modeling.create_data_split import split_data
from config.config_modeling import CAT_COLS

In [None]:
OUT_PATH = Path("../data/data.csv")
df = pd.read_csv(OUT_PATH)

In [None]:
MODEL_PATH = Path("../models/XGB.pkl")
model = load_pickle(MODEL_PATH)

In [None]:
model

In [None]:
data = split_data(cols=CAT_COLS, df=df)

In [None]:
data["train"][0]

In [None]:
X_train, y_train, X_val, y_val = data["train"][0], data["train"][1], data["val"][0], data["val"][1]
X_test, y_test = data["test"][0], data["test"][1]

# XPER Values

In [None]:
from XPER.compute.Performance import ModelPerformance

# Define the evaluation metric(s) to be used
XPER = ModelPerformance(X_train, y_train, X_test, y_test, model)

# Evaluate the model performance using the specified metric(s)
PM = XPER.evaluate(["AUC"])

# Print the performance metrics
print("Performance Metrics: ", round(PM, 3))

In [None]:
from XPER.compute.Performance import ModelPerformance

# XPER for train
# Define the evaluation metric(s) to be used
XPER = ModelPerformance(X_train, y_train, X_train, y_train, model)

# Evaluate the model performance using the specified metric(s)
PM = XPER.evaluate(["AUC"])

# Print the performance metrics
print("Performance Metrics: ", round(PM, 3))

In [None]:
# Calculate XPER values for the model's performance
XPER_values = XPER.calculate_XPER_values(["AUC"])
# AUC takes much longer than Precision! For illustration purpose, it is better to choose precision.

# Permutation Importance

In [None]:
import eli5
from eli5.sklearn import PermutationImportance
from sklearn.metrics import roc_auc_score

# Calculate permutation importance using eli5
perm_importance = eli5.sklearn.PermutationImportance(
    model, scoring="roc_auc", random_state=42, n_iter=30
)
perm_importance.fit(X_test, y_test)

# Display feature importances
eli5.show_weights(perm_importance, feature_names=list(X_train.columns))

In [None]:
import matplotlib.pyplot as plt
import numpy as np

importances = perm_importance.feature_importances_
percentage_contributions = (importances / importances.sum()) * 100

# Display feature importances and create a bar plot
feature_names = list(X_train.columns)  # Replace with your feature names

# Sort feature importances in descending order for plotting
sorted_indices = np.argsort(percentage_contributions)[::-1]
sorted_features = [feature_names[i] for i in sorted_indices]
sorted_contributions = [percentage_contributions[i] for i in sorted_indices]

# Plot the bar chart
plt.figure(figsize=(10, 6))
plt.bar(range(len(sorted_features)), sorted_contributions, tick_label=sorted_features)
plt.title("Percentage Contribution to AUC (Permutation Importance)")
plt.xlabel("Features")
plt.ylabel("Percentage Contribution (%)")
plt.xticks(rotation=90)
plt.show()