In [22]:
# Shapash Model Interpretation Notebook
import sys
import os
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from shapash.explainer.smart_explainer import SmartExplainer
import shapash.explainer.smart_plotter
#from shapash.utils.model_synoptic import model_synoptic

In [13]:
# Add the src directory to path so we can import modules
sys.path.append(os.path.abspath('../src'))
from data_loader import load_data
import config

In [14]:
# Models to analyze
MODELS = ['xgboost', 'lightgbm', 'catboost', 'randomforest']
SCORE_TYPES = ['FRIED', 'FRAGIRE18']

In [21]:
def load_model(model_name, score_type):
    """Load a trained model from disk"""
    model_path = os.path.join(config.MODEL_OUTPUT, 'classifiers', f"{model_name}_{score_type.lower()}.pkl")
    with open(model_path, 'rb') as f:
        model = joblib.load(f)
    return model

In [20]:
import pickle
import os
import joblib   

# Try with explicit path
model_dir = '../models/classifiers'  # Adjust if needed
model_file = 'lightgbm_fried.pkl'
full_path = os.path.join(model_dir, model_file)

print(f"Checking if file exists: {full_path}")
print(f"File exists: {os.path.exists(full_path)}")

if os.path.exists(full_path):
    with open(full_path, 'rb') as f:
        model = joblib.load(f)
    print("Model loaded successfully!")

Checking if file exists: ../models/classifiers\lightgbm_fried.pkl
File exists: True
Model loaded successfully!



Trying to unpickle estimator LabelEncoder from version 1.6.0 when using version 1.5.2. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations



In [7]:
response_dict = {0: 'non-frail', 1: 'frail'}

In [8]:
def analyze_model(model_name, score_type):
    """Analyze a model using Shapash"""
    print(f"\n{'='*50}")
    print(f"Analyzing {model_name.upper()} model for {score_type}")
    print(f"{'='*50}")
    
    # Load the model
    try:
        model = load_model(model_name, score_type)
    except FileNotFoundError:
        print(f"Model file not found for {model_name} - {score_type}")
        return
    
    # Load the data
    X, y = load_data(target_score=score_type, selected_features=False)
    
    # Convert target to int for classification
    y = y.astype(int)
    
    # Create output directory for Shapash visualizations
    output_dir = os.path.join(config.VISUALIZATION_OUTPUT, model_name, score_type.upper(), 'shapash')
    os.makedirs(output_dir, exist_ok=True)

    # Create a Shapash SmartExplainer
    xpl = SmartExplainer(model=model)
    xpl.compile(
        label_dict = response_dict,
        x=X,
        y=y,
        preprocessing=None,  # No preprocessing here since X is already processed
        features_dict=None,  # Use column names as feature names
    )
    
    # Generate model synoptic (overview)
    model_synoptic(model, x=X)
    
    # Generate visualizations
    
    # 1. Global feature importance
    contrib_plot = xpl.plot.features_importance()
    plt.title(f"Feature Importance - {model_name.upper()} - {score_type}")
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f"feature_importance.png"), dpi=300)
    plt.close()
    
    # 2. Local explanations for a sample of predictions
    # Choose 5 random samples for explanation
    sample_indices = np.random.choice(X.shape[0], size=min(5, X.shape[0]), replace=False)
    
    # Local explanations
    for i, idx in enumerate(sample_indices):
        local_plot = xpl.plot.local_plot(index=idx)
        plt.title(f"Local Explanation - Sample {i+1}")
        plt.tight_layout()
        plt.savefig(os.path.join(output_dir, f"local_explanation_{i+1}.png"), dpi=300)
        plt.close()
    
    # 3. Generate a Shapash report
    xpl.save(os.path.join(output_dir, f"shapash_report_{model_name}_{score_type.lower()}.pickle"))
    
    # 4. Feature contributions for all individuals (heatmap)
    contrib_heatmap = xpl.plot.contribution_plot(max_features=10)
    plt.title(f"Feature Contributions - {model_name.upper()} - {score_type}")
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, f"feature_contributions.png"), dpi=300)
    plt.close()
    
    # 5. Interactive dashboard (only works in notebook)
    print("To launch interactive dashboard, run: app = xpl.run_app()")
    
    # Return the explainer for further analysis
    return xpl

In [23]:
# Get the explainer for a specific model (for example, lightgbm and FRIED)
xpl = analyze_model('lightgbm', 'FRIED')


Analyzing LIGHTGBM model for FRIED
Model file not found for lightgbm - FRIED


In [9]:
# Run analysis for all models and score types
for model_name in MODELS:
    for score_type in SCORE_TYPES:
        try:
            analyze_model(model_name, score_type)
        except Exception as e:
            print(f"Error analyzing {model_name} - {score_type}: {str(e)}")

print("\nAnalysis complete. Visualizations saved in the respective model/score directories.")


Analyzing LIGHTGBM model for FRIED
Model file not found for lightgbm - FRIED

Analyzing LIGHTGBM model for FRAGIRE18
Model file not found for lightgbm - FRAGIRE18

Analyzing CATBOOST model for FRIED
Model file not found for catboost - FRIED

Analyzing CATBOOST model for FRAGIRE18
Model file not found for catboost - FRAGIRE18

Analyzing RANDOMFOREST model for FRIED
Model file not found for randomforest - FRIED

Analyzing RANDOMFOREST model for FRAGIRE18
Model file not found for randomforest - FRAGIRE18

Analysis complete. Visualizations saved in the respective model/score directories.
