# Set-up<br>
This set-up assumes that the working directory (`os.curdir`) is where the notebook is.

In [1]:
import os
import sys
this_notebook_dir = os.curdir
project_root_dir = os.path.relpath(os.path.join('..', '..'), this_notebook_dir)
if project_root_dir not in sys.path:
    sys.path += [project_root_dir]
from pprint import pprint

# Loading data and model<br>
We will now a dataset

In [2]:
from src.data.dataload import *
data = load_sst()
print(f'loaded dataset {data.NAME}')
train, dev, test = data.train_val_test

loaded dataset sst


Loading a model for the dataset

In [3]:
from src.models.bcnmodel import *
from src.models.bertmodel import *
model = BCNModel()
print(f'expecting location for the model file at '
      f'"{model._get_model_filepath_for_dataset(data)}"')
model.load_model(data)
print(f'loaded model {model} of type {model.MODELTYPE} for {data.NAME}')

expecting location for the model file at "../../models/bcn-sst_output/model.tar.gz"
loaded model <src.models.bcnmodel.BCNModel object at 0x7faca645dee0> of type allennlp for sst


# Explainers<br>
Creating an explainer for the model

In [4]:
from src.explainers.explainers import *
explainer = LimeExplainer(model, num_samples=2000)
print(f'using explainer {type(explainer)} with model {explainer.model} and dataset {explainer.model.dataset_finetune.NAME}')

using explainer <class 'src.explainers.explainers.LimeExplainer'> with model <src.models.bcnmodel.BCNModel object at 0x7faca645dee0> and dataset sst


Run explainer

In [5]:
inds = np.arange(5, 10)
X = explainer.explain_instances(dev.sentence[inds])
print('SENTENCE:', dev.sentence[inds[0]])
tokenized = model.tokenizer.tokenize(dev.sentence[inds[0]])
if type(explainer) == LimeExplainer:
    scores, pred, inds, tokens = X
    print('tokens', [tokenized[t] for t in tokens[0]])
    print('scores', ['%.3f' % s for s in scores[0]])
elif type(explainer) == SHAPExplainer:
    shap_values = X
    print('shap values', shap_values[0])
elif type(explainer) == AllenNLPExplainer:
    grads, labels = X
    print(tokenized)
    print('gradients', ['%.3f' % g for g in grads[0]])

SENTENCE: Half Submarine flick , Half Ghost Story , All in one criminally neglected film
tokens [in, All, Ghost, Half, Half, flick, ,, one, Submarine, ,, Story]
scores ['0.130', '0.053', '0.039', '0.026', '0.020', '-0.002', '-0.002', '-0.003', '-0.004', '-0.005', '-0.009']
