In [1]:
# Shapash Model Interpretation Notebook
import sys
import os
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from shapash.explainer.smart_explainer import SmartExplainer
import shapash.explainer.smart_plotter
from sklearn.model_selection import train_test_split
#from shapash.utils.model_synoptic import model_synoptic

In [2]:
# Add the src directory to path so we can import modules
sys.path.append(os.path.abspath('../src'))
from data_loader import load_data
import config

In [3]:
# 1. Load the GP assessment data with all preprocessing steps
# Use FRAGIRE18 for GP assessment, with imputation
X, y = load_data(
    target_score='FRAGIRE18',
    imputation=True,
    feature_selection_method='wrapper'  # Use embedded method for selecting features
)

Loading data for FRAGIRE18 with imputation=True
Dropped 3 rows with NaN in target
Loaded 10 pre-selected features from ../output\models\imputed_wrapper_fs\feature_importances\wrapper_selected_features_fragire18_classification.txt


In [4]:
print(f"Working with dataset of shape: {X.shape}")
print(f"Selected features ({X.shape[1]}):")
for feature in X.columns:
    print(f"- {feature}")

Working with dataset of shape: (144, 10)
Selected features (10):
- SCORE ICOPE STEP1 REPERAGE GLOBAL
- SCORE FROP COM HORS AMTS SCORE FROPCOM0011
- Variation du poids
- SF36-36
- frequency_dispersion_Power_Spectrum_Density_ML_YF_follow-up
- critical_time_Diffusion_AP_YF_baseline
- DoubleStance_follow-up
- Smo_Ent_AccZWaist_follow-up
- Sym_P1P2_aCC_LB2_follow-up
- leftHandV2_baseline


In [5]:
# Split the data into train/test sets with stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, stratify=y)
print(f"Train set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")
print(f"Class distribution in train set: {pd.Series(y_train).value_counts(normalize=True).to_dict()}")
print(f"Class distribution in test set: {pd.Series(y_test).value_counts(normalize=True).to_dict()}")

Train set: 100 samples
Test set: 44 samples
Class distribution in train set: {0.0: 0.72, 1.0: 0.28}
Class distribution in test set: {0.0: 0.7045454545454546, 1.0: 0.29545454545454547}


In [6]:
# 3. Load the model
model_path = '../output/models/imputed_wrapper_fs/classifiers/lightgbm/FRAGIRE18/lightgbm_model.joblib'
print(f"Checking if model exists: {os.path.exists(model_path)}")

# Load the model
model = joblib.load(model_path)
print("Model loaded successfully!")

Checking if model exists: True
Model loaded successfully!


In [7]:
# 4 Feature dict
features_dict = {
    "SCORE ICOPE STEP1 REPERAGE GLOBAL": "Icope Score (Step 1)",
    "SCORE FROP COM HORS AMTS SCORE FROPCOM0011": "Overall fall risk score",
    "Variation du poids": "Weight variation",
    "SF36-36": "Patient self-reported global health feeling",
    "frequency_dispersion_Power_Spectrum_Density_ML_YF_follow-up": "Freq_dispersion of Power Spectrum density ML Eyes closed follow-up",
    "critical_time_Diffusion_AP_YF_baseline": "Critical time at AP dir. Eyes closed(baseline)",
    "DoubleStance_follow-up": "Doube stance time follow-up",
    "Smo_Ent_AccZWaist_follow-up": "Smoothness of the trunk signal (follow-up)",
    "Sym_P1P2_aCC_LB2_follow-up": "symmetry of the resultant acceleration at the trunk level (follow-up)",
    "leftHandV2_baseline": "Grip strength of the left hand (baseline)"
}

In [8]:
# Targert variable
y_test = y_test.astype(int)
response_dict_fragire18 = {0: 'non-frail', 1: 'frail'}

In [9]:
# Lancement du module explainer

xpl_fragire18 = SmartExplainer(
    label_dict = response_dict_fragire18,
    model = model,
    features_dict = features_dict
)

In [10]:
# Compile the explainer - using test data for explanation
# Reset indices to make them match
X_test = X_test.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)

xpl_fragire18.compile(
    x=X_test,
    y_target = y_test
    )

INFO: Shap explainer type - <shap.explainers._tree.TreeExplainer object at 0x00000233364C4170>


In [11]:
app = xpl_fragire18.run_app(port=7055)

INFO:root:Your Shapash application run on http://LAPTOP-SIM-STELER33:7055/
INFO:root:Use the method .kill() to down your app.


In [12]:
#pp.kill

In [13]:
from explainerdashboard import ClassifierExplainer, ExplainerDashboard

explainer = ClassifierExplainer(
                model, X_test, y_test,
                # optional:
                labels=['non-frail', 'frail'],
                descriptions = features_dict)

# db = ExplainerDashboard(explainer, title="Expert's Eye",
#                     whatif=False, # you can switch off tabs with bools
#                     shap_interaction=False,
#                     decision_trees=False)

ExplainerDashboard(explainer).run()

Note: model_output=='probability'. For LGBMClassifier shap values normally get calculated against X_background, but paramater X_background=None, so using X instead
Generating self.shap_explainer = shap.TreeExplainer(model, X, model_output='probability', feature_perturbation='interventional')...
Note: Shap interaction values will not be available. If shap values in probability space are not necessary you can pass model_output='logodds' to get shap values in logodds without the need for a background dataset and also working shap interaction values...
Building ExplainerDashboard..
Detected notebook environment, consider setting mode='external', mode='inline' or mode='jupyterlab' to keep the notebook interactive while the dashboard is running...
For this type of model and model_output interactions don't work, so setting shap_interaction=False...
The explainer object has no decision_trees property. so setting decision_trees=False...
Generating layout...
Calculating shap values...
Calculatin

Exception in thread Thread-5 (<lambda>):
Traceback (most recent call last):
  File "c:\Users\matth\miniconda3\envs\thesis_env\Lib\site-packages\urllib3\connection.py", line 198, in _new_conn
    sock = connection.create_connection(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\matth\miniconda3\envs\thesis_env\Lib\site-packages\urllib3\util\connection.py", line 85, in create_connection
    raise err
  File "c:\Users\matth\miniconda3\envs\thesis_env\Lib\site-packages\urllib3\util\connection.py", line 73, in create_connection
    sock.connect(sa)
OSError: [WinError 10049] The requested address is not valid in its context

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "c:\Users\matth\miniconda3\envs\thesis_env\Lib\site-packages\urllib3\connectionpool.py", line 787, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "c:\Users\matth\miniconda3\envs\thesis_env\Lib\site-packages\u

ConnectionError: HTTPConnectionPool(host='0.0.0.0', port=8050): Max retries exceeded with url: /_alive_13093824-3db9-4e9d-a3cb-935d2ad2aa6a (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x0000023339DEC200>: Failed to establish a new connection: [WinError 10049] The requested address is not valid in its context'))

Empty DataFrame
Columns: [col, contribution, value]
Index: []
                                      col  contribution      value
2                      Variation du poids     -0.002942   0.000000
5  critical_time_Diffusion_AP_YF_baseline     -0.028548   0.323537
7             Smo_Ent_AccZWaist_follow-up      0.007726   0.267099
8              Sym_P1P2_aCC_LB2_follow-up     -0.019497   1.011461
9                     leftHandV2_baseline      0.000000  15.500000
Empty DataFrame
Columns: [col, contribution, value]
Index: []
Empty DataFrame
Columns: [col, contribution, value]
Index: []
