In [1]:
#! pip install shap
import sys
sys.path.insert(0,'shap-0.35.0')

In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np  
import torch       
import shap

In [3]:
# data 
import argparse
from data.cashflow import get_dataloader
from pathlib import Path

In [4]:
# model
from model.utils_shap import load_model
from model.cnn import CNN

In [5]:
# params
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_file = 'datasets/Personatics_mini.hdf5'
batch_size = 14000 #512
kernel_size = 4
ch_max = 512
nfc = 1024
load_path = 'archive/old_checkpoint/personatics_binary_cnn_adam_k4_chmax512_nfc1024_wc010.0_drop0.0_bn0_l1_wr10.0_bs512_lr0.0001/models/iter-144000_ap-0.8387.pkl'
metric_name = 'ap'
threshold = 0.9


In [6]:
dataloaders = get_dataloader(data_file, batch_size)

In [7]:
# model 
model_dir = Path(load_path).parent

In [8]:
model = CNN(dropout_p=0, kernel_size=kernel_size, ch_max=ch_max, nfc=nfc, num_class=1).to(device)
model.eval()

_, _ = load_model(model, load_path, device, metric_name)

load model from iter: 144000 ap: 0.8387


In [9]:
for batch in iter(dataloaders['train']):
        x, y, b = batch
        y[y == 2] = 1 # change label 2 to 1

        x, y, b = x.to(device), y.to(device), b.to(device)
        
        #with torch.no_grad():
        #    y_logits, b_preds = model(x)
        


In [10]:
x.shape, y.shape

(torch.Size([12283, 11, 60]), torch.Size([12283]))

In [11]:
# SHAP
features, labels = x, y
explainer = shap.DeepExplainer(model, features)  #IMPORTANT: change the code for y, b = model(x)

In [12]:
shap_values = explainer.shap_values(features[:500], ranked_outputs=True)

In [13]:
shap_values.shape, features[:500].shape

((500, 11, 60), torch.Size([500, 11, 60]))

## Summarize the effects of all the features

In [None]:
new_size = 60
chanel = 1
fig = shap.summary_plot(np.sum(shap_values, axis=chanel), 
                  features=np.sum(features[:500,:,:].cpu().numpy(), axis=chanel),
                  feature_names=None, plot_type='violin')
plt.savefig('shap_features_summary.png')

In [None]:
subject = 4
shap_values[subject, :].shape, features[subject, :, :].cpu().numpy().shape

In [None]:
features[subject:subject+1,:,:].shape

In [None]:
subject = 20

shap_values = explainer.shap_values(features[subject:subject+1,:,:], ranked_outputs=True)

In [None]:


new_size = 60
chanel = 1

# Init the JS visualization code
shap.initjs()
# Choosing which example to use

# Plot the explanation of the predictions for one subject
shap.force_plot(explainer.expected_value[0], 
                shap_values[subject, :, :].T, 
                features=features[subject, :, :].T.cpu().numpy(), 
                feature_names=None)

## Get the ordered impotant features

In [None]:
shap_values[subject, :, :].shape

### Important days for single account

In [None]:
vals= np.sum(np.abs(shap_values[subject, :, :]), axis=0)
feature_importance = pd.DataFrame(list(zip(range(0, 60),vals)),columns=['col_name','feature_importance_vals'])
feature_importance.sort_values(by=['feature_importance_vals'],ascending=False,inplace=True)
feature_importance.head()

In [None]:
vals= np.abs(shap_values[subject, :, :]).mean(0)
feature_importance = pd.DataFrame(list(zip(range(0, 60),vals)),columns=['col_name','feature_importance_vals'])
feature_importance.sort_values(by=['feature_importance_vals'],ascending=False,inplace=True)
feature_importance.head()