In [2]:
from config import DATA_DIR, CACHE_DIR
from common import cache, utils, ignore_lists
from tasks.common import return_response
from tasks.modeling.pytorch_model import LSTMTextClassifier, CNNTextClassifier, DfTrainingDataset

from os import listdir, path, makedirs

import statistics
from scipy import stats
import numpy as np
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer, StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import recall_score, precision_score, f1_score, accuracy_score

import json
import joblib

import torch

In [3]:
from captum.attr import IntegratedGradients
from captum.attr import InterpretableEmbeddingBase, TokenReferenceBase
from captum.attr import visualization

In [5]:
file_path = path.join(DATA_DIR, 'Francis.zip')

file_hash = cache.get_file_hash(file_path)

print(file_hash)

405bd27891a2e0d04806a92d996c58d6


In [6]:
data_cache_path = path.join(CACHE_DIR, f'torchdata_{file_hash}.pt')
train_dataset = torch.load(data_cache_path)

In [7]:
clf = LSTMTextClassifier({
    'embedding_size': train_dataset.embedding_size,
    'context_size': train_dataset.context_size,
    'num_classes': train_dataset.num_classes
}, train_dataset)

cache_path = path.join(CACHE_DIR, f'torchmodel_{file_hash}.pt')

clf.load_state_dict(torch.load(cache_path))

<All keys matched successfully>

In [8]:
ig = IntegratedGradients(clf)

In [12]:
test_idx = 10
print(train_dataset.raw_examples[test_idx])

can i enjoy cis rates under fibre entertainment bundle?


In [41]:
clf.zero_grad()
input_emb = train_dataset.X_train[test_idx].unsqueeze(0)
input_context = train_dataset.X_contexts[test_idx].unsqueeze(0)
preds_proba = torch.softmax(clf(
        input_emb, input_context
    ),
    dim=-1
)
preds_proba, preds_idx = torch.max(preds_proba, axis=-1)

In [42]:
print(f'Predicted: {train_dataset.le.classes_[int(preds_idx.detach().numpy())]} - {float(preds_proba.detach().numpy())}')

Predicted: CIS-MobilePlan-EligibilityFEB - 0.8132966756820679


In [57]:
attr_target = int(preds_idx.detach().item())

In [44]:
attributions_ig, delta = ig.attribute(
    input_emb, 
#     train_dataset.X_train[test_idx].unsqueeze(0),
    target=attr_target,
    additional_forward_args=(input_context,),
    n_steps=500, 
    return_convergence_delta=True
)

In [45]:
attributions_ig

tensor([[[-0.0532,  0.0124, -0.0329,  ..., -0.0546,  0.0017, -0.0703],
         [-0.0009, -0.0120,  0.0155,  ...,  0.0669,  0.0098,  0.0275],
         [-0.1229,  0.0176, -0.0293,  ..., -0.0044, -0.0081, -0.0669],
         ...,
         [-0.0000,  0.0000, -0.0000,  ..., -0.0000,  0.0000,  0.0000],
         [-0.0000,  0.0000, -0.0000,  ..., -0.0000,  0.0000,  0.0000],
         [-0.0000, -0.0000, -0.0000,  ..., -0.0000,  0.0000, -0.0000]]])

In [46]:
delta

tensor([-0.0010])

In [47]:
attributions_ig.shape

torch.Size([1, 80, 300])

In [61]:
torch.max(attributions_ig, axis=-1)[0][0]

tensor([0.1270, 0.1639, 0.4556, 0.3845, 0.6495, 0.8603, 0.2893, 1.0582, 0.3348,
        0.1755, -0.0000, -0.0000, -0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, -0.0000, -0.0000, -0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, -0.0000])

In [59]:
torch.zeros((0, 5))

tensor([], size=(0, 5))