In [1]:
import torch as T
import shap
import pandas as pd
import matplotlib.pyplot as plt
from src.regressors.linear_regression import ConsumptionLinearRegressor, ConsumptionMLPRegressor
from src.regressors.lstm_regressor import ConsumptionLSTMRegressor
from src.regressors.gru_regression import ConsumptionGRURegressor
from src.regressors.rnn_regressor import ConsumptionRNNRegressor
from src.regressors.conv_rnn_regressor import ConsumptionConvRNNRegressor
from src.regressors.transformer_regressor import ConsumptionTransformerRegressor, ConsumptionTSTRegressor
from src.regressors.fcn_regressor import ConsumptionFCNRegressor
from src.regressors.tcn_regressor import ConsumptionTCNRegressor
from src.regressors.resnet_regressor import ConsumptionResNetRegressor

from src.pecan_dataport.participant_preprocessing import PecanParticipantPreProcessing
from src.dataset import PecanDataset, PecanDataModule
from src.utils.functions import create_sequences, mkdir_if_not_exists
T.manual_seed(123)

<torch._C.Generator at 0x1bda406bab0>

In [2]:
model_name = "TST"
participant_id = "661_test_30"

mkdir_if_not_exists('etc/')
mkdir_if_not_exists('etc/imgs')
mkdir_if_not_exists('etc/imgs/features')
mkdir_if_not_exists(f'etc/imgs/features/{participant_id}')
mkdir_if_not_exists(f'etc/imgs/features/{participant_id}/shap_values')
mkdir_if_not_exists(f'etc/imgs/features/{participant_id}/shap_values/summary_plots')
mkdir_if_not_exists(f'etc/imgs/features/{participant_id}/shap_values/summary_plots/{model_name}')
mkdir_if_not_exists(f'etc/imgs/features/{participant_id}/shap_values/force_plots')
mkdir_if_not_exists(f'etc/imgs/features/{participant_id}/shap_values/force_plots/{model_name}')


In [3]:
pecan_dataset = PecanParticipantPreProcessing('661_test_30', 'data/participants_data/1min/',
                                                   181)
_, _, train_sequences, test_sequences, val_sequences = pecan_dataset.get_sequences()
n_features = pecan_dataset.get_n_features()
scaler = pecan_dataset.get_scaler()
ckpt = f'checkpoints/participants/661_test_30/sigmoid/{model_name}/best/best-TST-chpkt-pecanstreet-participant-id-661_test_30_epoch=177-val_loss=0.00000.ckpt'
device = T.device('cuda:0' if T.cuda.is_available() else 'cpu')

[!] - Trainable dataframe shape - (129086, 28)
[*] Train dataframe shape: (90360, 28)
[*] Validation dataframe shape: (25817, 28)
[*] Test dataframe shape: (12909, 28)


100%|█████████████████████████████████████████████████████████████████████████| 64513/64513 [00:05<00:00, 11033.26it/s]
100%|█████████████████████████████████████████████████████████████████████████| 64513/64513 [00:06<00:00, 10673.18it/s]
100%|█████████████████████████████████████████████████████████████████████████| 90179/90179 [00:08<00:00, 10764.69it/s]
100%|█████████████████████████████████████████████████████████████████████████| 12728/12728 [00:01<00:00, 12172.58it/s]
100%|█████████████████████████████████████████████████████████████████████████| 25636/25636 [00:02<00:00, 11808.54it/s]

[!] Train sequence shape: (181, 28)
[!] Test sequence shape: (181, 28)
[!] Val sequence shape: (181, 28)
[!] Len of train, val and test sequence: 90179 25636 12728





In [10]:
total_df = pecan_dataset.get_standart_df_features()
background_sequence = create_sequences(total_df[:int(len(total_df)*.5)], 'consumption', 60)
test_sequence = create_sequences(total_df[int(len(total_df)*.5):], 'consumption', 60)
background_sequence[0][0].shape, test_sequence[0][0].shape

100%|█████████████████████████████████████████████████████████████████████████| 64483/64483 [00:06<00:00, 10236.45it/s]
100%|█████████████████████████████████████████████████████████████████████████| 64483/64483 [00:05<00:00, 11927.67it/s]


((60, 28), (60, 28))

In [11]:
background_sequence = PecanDataset(background_sequence, device)
test_sequence = PecanDataset(test_sequence, device)

background_data_module = T.utils.data.DataLoader(
    background_sequence,
    batch_size=512,
    shuffle = False,
    num_workers=0,
    pin_memory=False
)
test_data_module = T.utils.data.DataLoader(
    test_sequence,
    batch_size=128,
    shuffle = False,
    num_workers=0,
    pin_memory=False
)


In [12]:
train_batch = next(iter(background_data_module))
background = train_batch["sequence"].to(device)

test_batch = next(iter(test_data_module))
test = test_batch["sequence"].to(device)

In [13]:
background.shape, test.shape

(torch.Size([512, 60, 28]), torch.Size([128, 60, 28]))

In [14]:
lr = 1e-5
n_hidden = 256
n_layers = 3
dropout = 0.2
activation_fn = 'sigmoid'
d_model = 128
n_head = 16
d_ffn = 256
tst_activation_fn = 'gelu'
sequence_length = 60
max_seq_len = 120
d_k = 32
d_v = 32
res_dropout = 0.1
fc_dropout = 0.1

model = ConsumptionTSTRegressor.load_from_checkpoint(checkpoint_path=ckpt, strict=False, device=device,
                                                             n_features=n_features, seq_len=sequence_length,
                                        max_seq_len=max_seq_len, d_model=d_model, n_head=n_head,
                                        d_k=d_k, d_v=d_v, d_ffn=d_ffn, res_dropout=res_dropout,
                                        n_layers=n_layers, lr=lr,  activation_function=tst_activation_fn,
                                        fc_dropout=fc_dropout, scaler=scaler)

In [15]:
explainer = shap.DeepExplainer(model.model, background.to(device))

In [16]:
T.backends.cudnn.enabled = False
shap_values = explainer.shap_values(test.to(device))
shap_values[0][0].shape

Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.




RuntimeError: CUDA out of memory. Tried to allocate 120.00 MiB (GPU 0; 6.00 GiB total capacity; 3.61 GiB already allocated; 0 bytes free; 4.31 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
shap.initjs()
shap.force_plot(explainer.expected_value[0], shap_values[0][0], pecan_dataset.get_features_names(), show = False, matplotlib=True)
plt.savefig(
    f'etc/imgs/features/{participant_id}/shap_values/force_plots/{model_name}/{participant_id}_{model_name}_force.svg',
    dpi=600, bbox_inches='tight')


In [None]:
shap_values_2D = shap_values.reshape(-1,28)
X_test_2D = test.reshape(-1,28)
shap_values_2D.shape, X_test_2D.shape

In [None]:
x_test_2d = pd.DataFrame(data=X_test_2D, columns = pecan_dataset.get_features_names())
x_test_2d

In [None]:
x_test_2d.corr()

In [None]:
shap.summary_plot(shap_values_2D, x_test_2d, show = False)
plt.savefig(
    f'etc/imgs/features/{participant_id}/shap_values/summary_plots/{model_name}/{participant_id}_{model_name}_bee_summ.svg',
    dpi=600, bbox_inches='tight')


In [None]:
shap.summary_plot(shap_values_2D, x_test_2d, plot_type="bar", show=False)
plt.savefig(
    f'etc/imgs/features/{participant_id}/shap_values/summary_plots/{model_name}/{participant_id}_{model_name}_bar_summ.svg',
    dpi=600, bbox_inches='tight')
