# Set-up<br>
This set-up assumes that the working directory (`os.curdir`) is where the notebook is.

In [109]:
import os
import sys
this_notebook_dir = os.curdir
# all imports will be relative to the root directory of the project
# we thus add it here:
project_root_dir = os.path.relpath(os.path.join('..', '..'), this_notebook_dir)
if project_root_dir not in sys.path:
    sys.path += [project_root_dir]

# Loading data and model

We will now a dataset

In [3]:
from src.data.dataload import *
data = load_sst()
print(f'loaded dataset {data.NAME}')
train, dev, test = data.train_val_test
dev

loaded dataset sst


Unnamed: 0,sentence,label
0,It 's a lovely film with lovely performances b...,3
1,"No one goes unindicted here , which is probabl...",2
2,And if you 're not nearly moved to tears by a ...,3
3,"A warm , funny , engaging film .",4
4,Uses sharp humor and insight into human nature...,4
...,...,...
1096,it seems to me the film is about the art of ri...,1
1097,It 's just disappointingly superficial -- a mo...,1
1098,The title not only describes its main characte...,1
1099,Sometimes it feels as if it might have been ma...,2


Loading a model for the dataset

In [255]:
from src.models.bcnmodel import *
from src.models.bertmodel import *
#model = BertModel()
model = BCNModel(device=torch.device('cpu'))
print(f'expecting location for the model file at '
      f'"{model._get_model_filepath_for_dataset(data)}"')
model.load_model(data)
print(f'loaded model {type(model)} of kind {model.MODELTYPE} for {data.NAME}')

expecting location for the model file at "../../models/bcn-sst_output/model.tar.gz"
loaded model <class 'src.models.bcnmodel.BCNModel'> of kind allennlp for sst


# Perturbations

We will now load all the necessary tools to perturb data

In [256]:
from src.data.perturbations import add_perturbations
from src.data.perturbations import \
    remove_commas, \
    remove_all_punctuation, \
    switch_gender, \
    strip_trailing_punct, \
    add_typo, \
    change_first_name, \
    change_last_name, \
    change_location, \
    contraction, \
    swap_adjectives

perturbation_reduction = ['strip_punct', 'remove_commas', 'remove_all_punct']
perturbations_named = ['change_first_name','change_last_name', 'change_location']
perturbations_other = ['contraction', 'add_typo', 'switch_gender', 'swap_adj']
perturbations_list = perturbation_reduction + perturbations_named + ['contraction', 'add_typo', 'switch_gender', 'swap_adj']

perturbations_named_f = [change_first_name, change_last_name, change_location]
perturbations_unnamed_f = [remove_commas, remove_all_punctuation, switch_gender,
                           strip_trailing_punct, add_typo, contraction, swap_adjectives]
perturbations_all_f = perturbations_named_f + perturbations_unnamed_f

def bert_detokenize(token_list):
    """
    Reverse BERT tokenization accounting for ##s representing subwords. Based on https://github.com/huggingface/transformers/issues/36
    """
    text = ' '.join(token_list)
    text = text.replace(' ##', '')
    text = text.replace("` `", "``")
    text = text.replace("' '", "''")
    return text

def run_detokenizer_on_perturbations(df):
    for perturbation in perturbations_list:
        if f'{perturbation}_concat' not in df.columns:
            continue
        df[f'{perturbation}_concat'] = df[f'{perturbation}_tokens'].apply(bert_detokenize)
        df[f'{perturbation}_tokens'] = df[f'{perturbation}_concat'].apply(lambda x: model.tokenizer.tokenize(x))
    return df

# merge cased and uncased perturbations
def merge_perturbations(df, df_cased):
    for perturbation in perturbations_named:
        for suffix in ['tokens', 'pert_ind', 'success', 'concat']:
            df[f'{perturbation}_{suffix}'] = df_cased[f'{perturbation}_{suffix}'].tolist()
    return df

Now we are ready to generate a dataframe with the perturbations

In [257]:
inds = np.arange(5, 10)
inds = inds[model.predict_label_batch(dev.sentence[inds]) == dev.label[inds]]
if type(model) == BCNModel:
    df_perturbations = add_perturbations(
        df=dev.iloc[inds],
        tokenizer=model.tokenizer,
        sentence_col_name=data.SENTENCE,
        perturbation_functions=perturbations_all_f,
    )
elif type(model) == BertModel:
    # uncased perturbations
    df_perturbations = add_perturbations(
        df=dev.iloc[inds],
        tokenizer=model.tokenizer,
        sentence_col_name=data.SENTENCE,
        perturbation_functions=perturbations_unnamed_f,
    )
    # with cased tokenizer
    df_perturbations_named = add_perturbations(
        df=dev.iloc[inds],
        tokenizer=BertTokenizer.from_pretrained('bert-base-cased'),
        sentence_col_name=data.SENTENCE,
        perturbation_functions=perturbations_named_f,
    )
    # merge the two together
    df_perturbations = merge_perturbations(df_perturbations, df_perturbations_named)
    # remove ## in the reconstructed sentences
    df_perturbations = run_detokenizer_on_perturbations(df_perturbations)
df_perturbations.head()

Unnamed: 0,sentence,label,tokens_orig,change_first_name_concat,change_first_name_tokens,change_first_name_success,change_first_name_pert_ind,change_last_name_concat,change_last_name_tokens,change_last_name_success,...,add_typo_success,add_typo_pert_ind,contraction_concat,contraction_tokens,contraction_success,contraction_pert_ind,swap_adj_concat,swap_adj_tokens,swap_adj_success,swap_adj_pert_ind
7,"Dazzles with its fully-written characters , it...",4,"[Dazzles, with, its, fully, -, written, charac...","Dazzles with its fully-written characters , it...","[Dazzles, with, its, fully, -, written, charac...",0,,"Dazzles with its fully-written characters , it...","[Dazzles, with, its, fully, -, written, charac...",0,...,1,"[26, 27]","Dazzles with its fully-written characters , it...","[Dazzles, with, its, fully, -, written, charac...",0,,"Dazzles with its fully-written characters , it...","[Dazzles, with, its, fully, -, written, charac...",0,
8,"Visually imaginative , thematically instructiv...",4,"[Visually, imaginative, ,, thematically, instr...","Visually imaginative , thematically instructiv...","[Visually, imaginative, ,, thematically, instr...",0,,"Visually imaginative , thematically instructiv...","[Visually, imaginative, ,, thematically, instr...",0,...,1,"[25, 26]","Visually imaginative , thematically instructiv...","[Visually, imaginative, ,, thematically, instr...",0,,"Visually imaginative , thematically instructiv...","[Visually, imaginative, ,, thematically, instr...",0,
9,"Nothing 's at stake , just a twisty double-cro...",3,"[Nothing, 's, at, stake, ,, just, a, twisty, d...","Nothing 's at stake , just a twisty double-cro...","[Nothing, 's, at, stake, ,, just, a, twisty, d...",0,,"Nothing 's at stake , just a twisty double-cro...","[Nothing, 's, at, stake, ,, just, a, twisty, d...",0,...,1,"[27, 28]","Nothing 's at stake , just a twisty double-cro...","[Nothing, 's, at, stake, ,, just, a, twisty, d...",0,,"Nothing 's at stake , just a twisty double-cro...","[Nothing, 's, at, stake, ,, just, a, twisty, d...",0,


# Explainers and Evaluation

Explainers are run in an evaluation loop, where metrics are applied to each suitable perturbation of each sentence:

In [258]:
from src.explainers.explainers import *

import tqdm
import scipy as sp

df_perturbations.reset_index(drop=True, inplace=True)
preds = model.predict_label_batch(df_perturbations.sentence)


def get_sorted_tokens(scores, tokens):
    N = len(scores)
    sorted_inds = sorted(range(N), key=lambda i: abs(scores[i]), reverse=True)
    return [str(tokens[i]).lower() for i in sorted_inds]


def metric_top_k(k, original_scores, original_tokens, perturbed_scores, perturbed_tokens, **kwargs):
    N = len(original_tokens)
    k = min(k, N)
    original_top_k = get_sorted_tokens(scores=original_scores, tokens=original_tokens)[:k]
    perturbed_top_k = get_sorted_tokens(scores=perturbed_scores, tokens=perturbed_tokens)[:k]
    num_common = 0
    for word in set([str(t) for t in original_top_k]):
        num_common += min(original_top_k.count(word), perturbed_top_k.count(word))
    print(f'TOP-{k}')
    print(original_top_k)
    print(perturbed_top_k)
    return num_common / k

def metric_spearman(original_scores, perturbed_scores, **kwargs):
    return sp.stats.spearmanr(original_scores, perturbed_scores).correlation


def run_evaluation(explainer, show_f, metric_proxy):
    for i in tqdm.trange(len(df_perturbations)):
        prediction = preds[i]
        original_sentence = df_perturbations.sentence[i]
        original_tokens = model.tokenizer.tokenize(original_sentence)
        original_explanation = None
        for p in perturbations_list:
            perturbed_sentence = df_perturbations[f'{p}_concat'][i]
            perturbed_tokens = model.tokenizer.tokenize(perturbed_sentence)
            # perturbation wasn't successful:
            if df_perturbations[f'{p}_success'][i] == 0:
                print('skipped:', p, 'unsuccessful')
                continue
            # different numbers of tokens
            elif len(original_tokens) != len(perturbed_tokens):
                print('skipped:', p, 'length changed')
                continue
            # or didn't reach the same correct prediction
            elif model.predict_label(perturbed_sentence) != preds[i]:
                print('skipped', p, 'prediction changed')
                continue
            # we will run the prediction on the perturbed sentence now
            print('running explainer with ', p)
            if original_explanation is None:
                original_explanation = explainer.explain_instances([original_sentence])
                original_tokens, original_scores = metric_proxy(sentence=original_sentence,
                                                                explanation=original_explanation,
                                                                prediction=prediction)
                original = dict(
                    original_sentence=original_sentence,
                    original_tokens=original_tokens,
                    original_explanation=original_explanation,
                    original_scores=original_scores,
                    prediction=preds[i],
                )
            perturbed_explanation = explainer.explain_instances([perturbed_sentence])
            perturbed_tokens, perturbed_scores = metric_proxy(sentence=perturbed_sentence,
                                                              explanation=perturbed_explanation,
                                                              prediction=prediction)
            perturbed = dict(
                perturbed_sentence=perturbed_sentence,
                perturbed_tokens=perturbed_tokens,
                perturbed_explanation=perturbed_explanation,
                perturbed_scores=perturbed_scores,
            )

            print('-'*100)
            # show metrics
            K = 5
            print(f'TOP-{K} score:', metric_top_k(k=K, **original, **perturbed))
            print('SPEARMAN:', metric_spearman(k=5, original_scores=original_scores, perturbed_scores=perturbed_scores))
            # show perturbed explanation
            print('ORIGINAL:', original_sentence)
            show_f(sentence=original_sentence,
                   explanation=original_explanation,
                   **original, **perturbed)
            print('PERTURBED:', perturbed_sentence)
            show_f(sentence=perturbed_sentence,
                   explanation=perturbed_explanation,
                   **original, **perturbed)
            print('-'*100)
        original_explanation = None

### LIME explainer

Construct LIME explainer:

In [259]:
lime_explainer = LimeExplainer(model, num_samples=500)
print(f'using explainer {type(lime_explainer)} with model {lime_explainer.model} and dataset {lime_explainer.model.dataset_finetune.NAME}')

using explainer <class 'src.explainers.explainers.LimeExplainer'> with model <src.models.bcnmodel.BCNModel object at 0x760f849126d0> and dataset sst


Analyse:

In [260]:
def metric_proxy_lime(sentence, explanation, **kwargs):
    scores, pred, inds, tokens = explanation
    tokenized = model.tokenizer.tokenize(sentence)
    tokens = [tokenized[t] for t in tokens[0]]
    scores = scores[0]
    return tokens, scores

def show_lime_sentence(sentence, explanation, **kwargs):
    tokens, scores = metric_proxy_lime(sentence=sentence, explanation=explanation, **kwargs)
    print('tokens', tokens)
    print('scores', ['%.3f' % s for s in scores])

run_evaluation(explainer=lime_explainer,
               show_f=show_lime_sentence,
               metric_proxy=metric_proxy_lime)

  0%|          | 0/3 [00:00<?, ?it/s]

skipped: strip_punct length changed
skipped: remove_commas length changed
skipped: remove_all_punct length changed
skipped: change_first_name unsuccessful
skipped: change_last_name unsuccessful
skipped: change_location unsuccessful
skipped: contraction unsuccessful
running explainer with  add_typo


 33%|███▎      | 1/3 [00:06<00:13,  6.94s/it]

----------------------------------------------------------------------------------------------------
TOP-5
['and', 'characters', 'and', 'dazzles', 'which']
['and', ')', 'characters', 'dazzles', 'and']
TOP-5 score: 0.8
SPEARMAN: 1.0
ORIGINAL: Dazzles with its fully-written characters , its determined stylishness ( which always relates to characters and story ) and Johnny Dankworth 's best soundtrack in years .
tokens [and, characters, and, Dazzles, which, story, determined, -, fully, with, ), ,, its, characters, relates, its, stylishness, (, to, always, written]
scores ['0.227', '0.061', '0.051', '0.042', '0.041', '0.025', '0.022', '0.020', '0.018', '0.014', '0.010', '0.008', '-0.000', '-0.000', '-0.002', '-0.003', '-0.005', '-0.013', '-0.013', '-0.015', '-0.020']
PERTURBED: Dazzles with its fully - written characters , its determined stylishness ( which always relates to characters and story ) and Johnny Dankworth 's best soundtrack i nyears .
tokens [and, characters, Dazzles, which, s

100%|██████████| 3/3 [00:12<00:00,  4.25s/it]

----------------------------------------------------------------------------------------------------
TOP-5
['thoroughly', 'imaginative', 'thematically', 'innocence', 'visually']
['thoroughly', 'thematically', 'imaginative', 'experience', 'hin']
TOP-5 score: 0.6
SPEARMAN: 0.9999999999999998
ORIGINAL: Visually imaginative , thematically instructive and thoroughly delightful , it takes us on a roller-coaster ride from innocence to experience without even a hint of that typical kiddie-flick sentimentality .
tokens [thoroughly, imaginative, thematically, Visually, roller, coaster, it, instructive, from, -, ,, ,, a, takes, ride, even, and, on, experience, without, that, to, us, hint, of, a, delightful, innocence]
scores ['0.152', '0.061', '0.054', '0.046', '0.039', '0.038', '0.032', '0.027', '0.018', '0.014', '0.010', '0.007', '0.000', '-0.000', '-0.001', '-0.001', '-0.003', '-0.005', '-0.007', '-0.012', '-0.015', '-0.019', '-0.022', '-0.023', '-0.024', '-0.024', '-0.025', '-0.050']
PERTURBE




### Alternative explainer

This depends on a model, so we will first prepare show_f and metric_proxy for each explainer:

**AllenNLP explainer helpers**

In [264]:
import html
from IPython.core.display import display, HTML

# Prevent special characters like & and < to cause the browser to display something other than what you intended.
def html_escape(text):
    return html.escape(text)

def visualise_weights(tokens, gradients, max_alpha=.3):
    max_alpha = max_alpha 
    highlighted_text = []
    for i in range(len(tokens)):
        weight = gradients[i]
        highlighted_text.append('<span style="background-color:rgba(135,206,250,' + str(weight / max_alpha) + ');">' + html_escape(tokens[i]) + '</span>')
    highlighted_text = ' '.join(highlighted_text)
    print(display(HTML(highlighted_text)))

def metric_proxy_allennlp(sentence, explanation, **kwargs):
    grads, labels = explanation
    tokenized = model.tokenizer.tokenize(sentence)
    return tokenized, grads[0]

def show_allennlp_sentence(sentence, explanation, **kwargs):
    tokens, scores = metric_proxy_allennlp(sentence=sentence, explanation=explanation, **kwargs)
    visualise_weights([str(t) for t in tokens], scores, max_alpha=.5)

**SHAP explainer helpers**

In [265]:
def metric_proxy_shap(sentence, explanation, prediction, **kwargs):
    tokens = explanation.data[0, 1:-1]
    scores = explanation.values[0][1:-1]
    return tokens, scores[:, prediction]

def show_shap_sentence(sentence, explanation, **kwargs):
    tokens, scores = metric_proxy_shap(sentence=sentence, explanation=explanation, **kwargs)
    print('tokens:', tokens)
    print('scores:', ['%.3f' % s for s in scores])

We will now run evaluation with the alternative explainer:

In [266]:
alternative_explainer = None
metric_proxy_func = None
show_func = None
if type(model) == BCNModel:
    alternative_explainer = AllenNLPExplainer(model)
    show_func = show_allennlp_sentence
    metric_proxy_func = metric_proxy_allennlp
elif type(model) == BertModel:
    alternative_explainer = SHAPExplainer(model)
    show_func = show_shap_sentence
    metric_proxy_func = metric_proxy_shap

print('alternative explainer', type(alternative_explainer))
run_evaluation(explainer=alternative_explainer,
               show_f=show_func,
               metric_proxy=metric_proxy_func)

  0%|          | 0/3 [00:00<?, ?it/s]

alternative explainer <class 'src.explainers.explainers.AllenNLPExplainer'>
skipped: strip_punct length changed
skipped: remove_commas length changed
skipped: remove_all_punct length changed
skipped: change_first_name unsuccessful
skipped: change_last_name unsuccessful
skipped: change_location unsuccessful
skipped: contraction unsuccessful
running explainer with  add_typo
----------------------------------------------------------------------------------------------------
TOP-5
['best', "'s", 'relates', 'stylishness', 'johnny']
['best', 'nyears', 'i', 'relates', 'stylishness']
TOP-5 score: 0.6
SPEARMAN: 0.815270935960591
ORIGINAL: Dazzles with its fully-written characters , its determined stylishness ( which always relates to characters and story ) and Johnny Dankworth 's best soundtrack in years .


None
PERTURBED: Dazzles with its fully - written characters , its determined stylishness ( which always relates to characters and story ) and Johnny Dankworth 's best soundtrack i nyears .


 33%|███▎      | 1/3 [00:00<00:00,  2.63it/s]

None
----------------------------------------------------------------------------------------------------
skipped: switch_gender unsuccessful
skipped: swap_adj unsuccessful
skipped: strip_punct length changed
skipped: remove_commas length changed
skipped: remove_all_punct length changed
skipped: change_first_name unsuccessful
skipped: change_last_name unsuccessful
skipped: change_location unsuccessful
skipped: contraction unsuccessful
running explainer with  add_typo
----------------------------------------------------------------------------------------------------
TOP-5
['delightful', 'thoroughly', 'instructive', 'it', 'thematically']
['delightful', 'thoroughly', 'instructive', 'it', 'thematically']
TOP-5 score: 1.0
SPEARMAN: 0.9150496562261269
ORIGINAL: Visually imaginative , thematically instructive and thoroughly delightful , it takes us on a roller-coaster ride from innocence to experience without even a hint of that typical kiddie-flick sentimentality .


None
PERTURBED: Visually imaginative , thematically instructive and thoroughly delightful , it takes us on a roller - coaster ride from innocence to experience without even a hin tof that typical kiddie - flick sentimentality .


100%|██████████| 3/3 [00:00<00:00,  3.60it/s]

None
----------------------------------------------------------------------------------------------------
skipped: switch_gender unsuccessful
skipped: swap_adj unsuccessful
skipped: strip_punct length changed
skipped: remove_commas length changed
skipped: remove_all_punct length changed
skipped: change_first_name unsuccessful
skipped: change_last_name unsuccessful
skipped: change_location unsuccessful
skipped: contraction unsuccessful
skipped add_typo prediction changed
skipped: switch_gender unsuccessful
skipped: swap_adj unsuccessful



