In [None]:
#import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, roc_auc_score, average_precision_score
from sklearn.preprocessing import StandardScaler
import shap
import lime
import lime.lime_tabular

# Data loading
df = pd.read_csv('your_data.csv')

# Example feature engineering (customize as per your data)
X = df.drop('target', axis=1)
y = df['target']

# Simple preprocessing
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Model training
model = GradientBoostingClassifier(random_state=42)
model.fit(X_train, y_train)

# Predictions and evaluations
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]
print("Classification report:\n", classification_report(y_test, y_pred))
print("ROC AUC score:", roc_auc_score(y_test, y_pred_proba))
print("Average Precision (PR AUC):", average_precision_score(y_test, y_pred_proba))

# SHAP analysis
explainer_shap = shap.Explainer(model, X_train)
shap_values = explainer_shap(X_test)
shap.summary_plot(shap_values, X_test, feature_names=X.columns)

# Top 10 SHAP features
mean_shap = np.abs(shap_values.values).mean(axis=0)
top_features = np.argsort(mean_shap)[-10:][::-1]
print("Top 10 SHAP features:")
for i in top_features:
    print(f"{X.columns[i]}: {mean_shap[i]:.4f}")

# LIME analysis for a single test instance
explainer_lime = lime.lime_tabular.LimeTabularExplainer(
    X_train, feature_names=X.columns, class_names=['0', '1'], discretize_continuous=True, mode='classification')
i = 0  # Index of the instance you want to explain
exp = explainer_lime.explain_instance(X_test[i], model.predict_proba, num_features=10)
print("LIME explanation for first test instance:")
print(exp.as_list())

# Comparative text-based summary
print("""
Text-based Comparative Analysis:
Both SHAP and LIME offer insights into feature importances for local predictions.
SHAP provides additive feature attributions based on game theory, ensuring consistency and local accuracy.
LIME approximates the model locally using an interpretable linear model.
In this analysis, SHAP's top features were: {}
LIME's top features (for instance 0) were: {}
""".format(
    ', '.join([X.columns[i] for i in top_features]),
    ', '.join([f"{x[0]} ({x[1]:.4f})" for x in exp.as_list()])
))
