In [1]:
import torch as T
import shap
from src.pecan_dataport.participant_preprocessing import PecanParticipantPreProcessing
from src.regressors.gru_regression import ConsumptionGRURegressor
from src.dataset import PecanDataset, PecanDataModule
from torch.utils.data import DataLoader

In [None]:
pecan_dataset = PecanParticipantPreProcessing('661_test_30_all', 'data/participants_data/1min/', 1028)
device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')

[!] - Trainable dataframe shape - (129086, 28)
[*] Train dataframe shape: (90360, 28)
[*] Validation dataframe shape: (25817, 28)
[*] Test dataframe shape: (12909, 28)


 20%|██████████████▉                                                          | 18250/89332 [00:01<00:05, 13195.30it/s]

In [None]:
ckpt = 'lib/ckpts/participants/661_test_30_all/sigmoid/GRU/best/best-GRU-chpkt-pecanstreet-participant-id-661_test_30_all_epoch=080-val_loss=0.00000.ckpt'
scaler = pecan_dataset.scaler

In [None]:
shap_train_sequence = PecanDataset(pecan_dataset.val_sequences, device)
shap_test_sequence = PecanDataset(pecan_dataset.test_sequences, device)

In [None]:
shap_train_loader = DataLoader(
            shap_train_sequence,
            batch_size=32,
            shuffle = False,
            num_workers=0,
            pin_memory=True)

shap_test_loader = DataLoader(
            shap_test_sequence,
            batch_size=512,
            shuffle = False,
            num_workers=0,
            pin_memory=True)

In [None]:
shap_train_batch = next(iter(shap_train_loader))
shap_test_batch = next(iter(shap_test_loader))

In [None]:
sequences = shap_train_batch["sequence"]
labels = shap_train_batch["label"]
background = sequences.to(device)
background.shape

In [None]:
model = ConsumptionGRURegressor.load_from_checkpoint(checkpoint_path=ckpt,
                                                    scaler=scaler,
                                                    device=device,
                                                    n_features=pecan_dataset.get_n_features(),
                                                    lr=1e-5,
                                                    n_hidden=256,
                                                    n_layers=3,
                                                    dropout=0.2,
                                                    activation_function='sigmoid')


In [None]:
gru_model = model.model

In [None]:
explainer = shap.DeepExplainer(gru_model, background)

In [None]:
test_sequences = shap_test_batch["sequence"]
test_labels = shap_test_batch["label"]
shap_test = test_sequences.to(device)


In [None]:
T.backends.cudnn.enabled = False
shap_values = explainer.shap_values(shap_test)

In [None]:
shap.initjs()
shap.force_plot(explainer.expected_value[0], shap_values[0][0], pecan_dataset.get_features_names())

In [None]:
shap_values.shape

In [None]:
shap_values[-1, :, :].shape

In [None]:
import pandas as pd
import numpy as np
df = pd.DataFrame({
    "mean_abs_shap": np.mean(np.abs(shap_values[-1, :, :]), axis=0), 
    "stdev_abs_shap": np.std(np.abs(shap_values[-1, :, :]), axis=0), 
    "name": pecan_dataset.get_features_names()
})
df.sort_values("mean_abs_shap", ascending=False)[:10]

In [None]:
shap_values[0,:, :]

In [None]:
import matplotlib.pyplot as plt
shap.summary_plot(shap_values[0, :, :], features=test_sequences[0, :,:], feature_names=pecan_dataset.get_features_names())
# plt.savefig(f'etc/imgs/features/Shap_values.svg', dpi=600, bbox_inches='tight')

In [None]:
shap.waterfall_plot(shap_values[0, :, :])