# 04: Stability Analysis

**Student**: Keisuke Nishioka (Matrikelnummer: 10081049)  
**Project**: Stability and Faithfulness Analysis of SHAP Explanations

In [None]:
import sys
import os
sys.path.append('../src')

import pandas as pd
import numpy as np
from shap_analysis import load_shap_values
from stability_metrics import compute_stability_metrics, compare_models_stability
import config

## Load SHAP Values

In [None]:
# Load SHAP values for each model
def load_all_shap_values(model_type, seeds):
    shap_dict = {}
    for seed in seeds:
        filepath = f'../results/shap_values/{model_type}_seed_{seed}_shap.npz'
        if os.path.exists(filepath):
            shap_dict[seed] = load_shap_values(filepath)
    return shap_dict

# Load for all models
xgboost_shap = load_all_shap_values('xgboost', config.RANDOM_SEEDS)
rf_shap = load_all_shap_values('random_forest', config.RANDOM_SEEDS)
lr_shap = load_all_shap_values('logistic_regression', config.RANDOM_SEEDS[:5])

print(f"Loaded SHAP values:")
print(f"  XGBoost: {len(xgboost_shap)} runs")
print(f"  Random Forest: {len(rf_shap)} runs")
print(f"  Logistic Regression: {len(lr_shap)} runs")

## Compute Stability Metrics

### XGBoost Stability

In [None]:
xgboost_stability = compute_stability_metrics(xgboost_shap)

print("XGBoost Stability Metrics:")
print(f"  Ranking Correlation: {xgboost_stability['ranking_correlation']['mean']:.4f}")
print(f"  SHAP Variance: {xgboost_stability['variance']['overall']:.4f}")
print(f"  Top-5 Consistency: {xgboost_stability['consistency']['top_5']['overall']:.4f}")

### Random Forest Stability

In [None]:
rf_stability = compute_stability_metrics(rf_shap)

print("Random Forest Stability Metrics:")
print(f"  Ranking Correlation: {rf_stability['ranking_correlation']['mean']:.4f}")
print(f"  SHAP Variance: {rf_stability['variance']['overall']:.4f}")
print(f"  Top-5 Consistency: {rf_stability['consistency']['top_5']['overall']:.4f}")

### Logistic Regression Stability

In [None]:
lr_stability = compute_stability_metrics(lr_shap)

print("Logistic Regression Stability Metrics:")
print(f"  Ranking Correlation: {lr_stability['ranking_correlation']['mean']:.4f}")
print(f"  SHAP Variance: {lr_stability['variance']['overall']:.4f}")
print(f"  Top-5 Consistency: {lr_stability['consistency']['top_5']['overall']:.4f}")

## Model Comparison

In [None]:
# Compare all models
stability_results = {
    'XGBoost': xgboost_stability,
    'Random Forest': rf_stability,
    'Logistic Regression': lr_stability
}

comparison_df = compare_models_stability(stability_results)
print("\nModel Comparison:")
print(comparison_df)

# Save comparison table
comparison_df.to_csv('../results/tables/model_stability_comparison.csv', index=False)

## Save Stability Results

In [None]:
import json

# Save stability metrics (convert numpy arrays to lists for JSON)
def convert_to_serializable(obj):
    if isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, (np.int64, np.int32)):
        return int(obj)
    elif isinstance(obj, (np.float64, np.float32)):
        return float(obj)
    elif isinstance(obj, dict):
        return {k: convert_to_serializable(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_to_serializable(item) for item in obj]
    return obj

for model_name, metrics in stability_results.items():
    serializable_metrics = convert_to_serializable(metrics)
    with open(f'../results/tables/{model_name.lower().replace(" ", "_")}_stability.json', 'w') as f:
        json.dump(serializable_metrics, f, indent=2)

print("Stability results saved!")