# Set-up

In [1]:
import os
os.chdir('..')

In [2]:
import spacy
nlp = spacy.load('en_core_web_sm')

import random

import pandas as pd
import numpy as np
import scipy as sp
import importlib
import torch
import spacy
from torch.utils.data import \
    TensorDataset, \
    DataLoader
from transformers import \
    BertTokenizer, \
    BertForSequenceClassification, \
    AdamW, \
    BertConfig, \
    get_linear_schedule_with_warmup

import allennlp
import allennlp_models
from allennlp.models.archival import load_archive
from allennlp.common.util import JsonDict
from allennlp.data import Instance
from allennlp.predictors.predictor import Predictor
from allennlp.data.fields import LabelField
from allennlp.data.tokenizers.spacy_tokenizer import SpacyTokenizer
from typing import List, Dict
from overrides import overrides

from scipy.stats import kendalltau
from scipy.stats import spearmanr

from copy import deepcopy

import html
import random
from IPython.core.display import display, HTML

In [3]:
import sys
project_root_dir = os.path.relpath(os.path.join('..', '..'), os.curdir)
if project_root_dir not in sys.path:
    sys.path += [project_root_dir]
from src.data.dataload import *
from src.models.bcnmodel import *



In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

## Loading data

In [5]:
import src.data.dataload

sst = load_sst()
agnews = load_agnews()

input_data = {'sst': sst, 'agnews': agnews}
datasets = list(input_data.keys())

In [6]:
train = {}
dev = {}
test = {}

for dataset in datasets:
    train[dataset], dev[dataset], test[dataset] = input_data[dataset].train_val_test

Using custom data configuration default
Reusing dataset ag_news (/Users/olivier/.cache/huggingface/datasets/ag_news/default/0.0.0/17ec33e23df9e89565131f989e0fdf78b0cc4672337b582da83fc3c9f79fe34d)


In [7]:
subset = deepcopy(test['agnews'][:300])

## Loading explainers

In [8]:
import explainers_functions
from explainers_functions import *

In [9]:
# importlib.reload(explainers_functions)

## Loading perturbations

In [10]:
os.getcwd()

'/Volumes/LaCie/GitHubNLP/ucl-nlp-group-project'

In [11]:
from src.data.perturbations import add_perturbations
from src.data.perturbations import \
    remove_commas, \
    remove_all_punctuation, \
    switch_gender, \
    strip_trailing_punct, \
    add_typo, \
    change_first_name, \
    change_last_name, \
    change_location, \
    contraction, \
    swap_adjectives

In [12]:
tokenizer = SpacyTokenizer()

In [13]:
train_perturbations = add_perturbations(
    df=subset,
    tokenizer=tokenizer,
    sentence_col_name='sentence', 
    perturbation_functions=[
        remove_commas,
        remove_all_punctuation,
        switch_gender,
        strip_trailing_punct,
        add_typo,
        change_first_name,
        change_last_name,
        change_location,
        contraction,
        swap_adjectives
    ]
)

In [14]:
perturbation_reduction = ['strip_punct', 'remove_commas', 'remove_all_punct']
perturbation_list = perturbation_reduction + ['contraction','change_first_name','change_last_name', 'change_location', 'add_typo',
                  'switch_gender', 'swap_adj']

## Visualisation

Source: https://adataanalyst.com/machine-learning/highlight-text-using-weights/

In [15]:
# Prevent special characters like & and < to cause the browser to display something other than what you intended.
def html_escape(text):
    return html.escape(text)

def visualise_weights(tokens, gradients, max_alpha = 0.3):
    max_alpha = max_alpha 
    highlighted_text = []
    for i in range(len(tokens)):
        weight = gradients[i]
        highlighted_text.append('<span style="background-color:rgba(135,206,250,' + str(weight / max_alpha) + ');">' + html_escape(tokens[i]) + '</span>')
    highlighted_text = ' '.join(highlighted_text)
    print(display(HTML(highlighted_text)))

# BCN

**Set up your model & tokenizer**


The models can be downloaded from here:

https://upload.disroot.org/r/o8RgEa6y#lwPxvgfr6TDPXo/xl+u6kdGrsC5MIpaHQ3AstSZzZRg=
https://upload.disroot.org/r/_3rS3BsN#C/GGVAC1StHrFX/BEcT8zjGzGAhfWFeZc+wJ1uwyoaw=

They should then be saved in the local Github folders '/models/bcn-sst_output' or '/models/bcn-agnews_output'

In [16]:
bcn = BCNModel()
bcn.load_model(agnews)
# BCN_AG_predictor = bcn.predictor

## AllenNLP Interpret

In [17]:
AllenNLPExplainer1 = AllenNLPExplainer(bcn)

In [22]:
df = train_perturbations
orig_col_index = df.columns.get_loc('sentence')
tokens_col_index = df.columns.get_loc('tokens_orig')
explainer = AllenNLPExplainer1

rows_to_review = 40

results_matrix = np.zeros((rows_to_review,len(perturbation_list)))

# to count the number of times every perturbation leads to a significant change in interpretation
count = {}
for perturbation in perturbation_list:
    count[perturbation] = 0

for r in range(rows_to_review):
    sentence = {}
    tokens = {}
    success = {}
    pert_indices = {}
    scores = {}
    prediction = {}
    comparison = {}

    sentence['orig'] = df.iloc[r, orig_col_index]
    tokens['orig'] = df.iloc[r,tokens_col_index]
    scores['orig'] = np.array(explainer.explain_instance(sentence['orig'])[0])
    prediction['orig'] = explainer.explain_instance(sentence['orig'])[1]
    
    # creating dict for easier reference
    for perturbation in perturbation_list:
        col_index = df.columns.get_loc(perturbation + '_concat')
        sentence[perturbation] = df.iloc[r, col_index]
        col_index = df.columns.get_loc(perturbation + '_tokens')
        tokens[perturbation] = df.iloc[r, col_index]
        col_index = df.columns.get_loc(perturbation + '_success')
        success[perturbation] = df.iloc[r, col_index]
        col_index = df.columns.get_loc(perturbation + '_pert_ind')
        pert_indices[perturbation] = df.iloc[r, col_index]
        if success[perturbation] != 0:
            scores[perturbation] = np.array(explainer.explain_instance(sentence[perturbation])[0])
            prediction[perturbation] = explainer.explain_instance(sentence[perturbation])[1]
        
    for perturbation in perturbation_list:
        # interpretations should only be compared if a sentence has been modified
        if success[perturbation] == 0:
            comparison[perturbation] = None
            continue
        # interpretations should not be compared if prediction has changed
        elif prediction[perturbation] != prediction['orig']:
            comparison[perturbation] = None
            continue
        else:
            # variable to track whether an important token has been changed 
            change_flag = 0
            for i in pert_indices[perturbation]:
            # if gradient greater than 0.05, token can be considered as important
                if scores['orig'][i] > 0.05:
                    change_flag += 1
            # interpretations should not be compared if an important token has been changed
            if change_flag > 0:   
                comparison[perturbation] = None
                continue
            else:
                
                if perturbation in perturbation_reduction: # perturbation category 1
                    # remove empty tokens from tokenised version of perturbed sentence
                    tokens[perturbation] = [tokens[perturbation][i] for i in range(len(tokens[perturbation])) 
                               if not i in pert_indices[perturbation]]
                    # remove scores of tokens that have been deleted as a result of the perturbation
                    orig_score_reduced = np.array([scores['orig'][i] for i in range(len(scores['orig'])) 
                                   if not i in pert_indices[perturbation]])
                    # normalise gradient
                    orig_score_reduced = orig_score_reduced/np.sum(orig_score_reduced)
                    orig_score_for_comparison = orig_score_reduced
                elif perturbation == 'swap_adj': # perturbation category 2
                    orig_score_swapped = deepcopy(scores['orig'])
                    for i in range(len(pert_indices[perturbation][::2])):
                        index_adj_1 = pert_indices[perturbation][i]
                        index_adj_2 = pert_indices[perturbation][i+1]
                        score_adj_1 = scores['orig'][index_adj_1]
                        score_adj_2 = scores['orig'][index_adj_2]
                        orig_score_swapped[index_adj_1] = score_adj_2
                        orig_score_swapped[index_adj_2] = score_adj_1
                    orig_score_for_comparison = orig_score_swapped
                else: # remaining perturbations
                    orig_score_for_comparison = scores['orig']
                
                if len(orig_score_for_comparison) != len(scores[perturbation]):
                    print('length not matching after perturbation')
                    comparison[perturbation] = None
                else:
                    comparison[perturbation] = spearmanr(orig_score_for_comparison, scores[perturbation])[0]
                    if comparison[perturbation] < 0.8:
                        count[perturbation] += 1
                        visualise_weights(tokens['orig'], scores['orig'])
                        visualise_weights(tokens[perturbation], scores[perturbation])
    for p in range(len(perturbation_list)):
        perturbation = perturbation_list[p]
        results_matrix[r,p] = comparison[perturbation]

None


None
length not matching after perturbation
length not matching after perturbation


None


None
length not matching after perturbation


None


None
length not matching after perturbation
length not matching after perturbation


None


None


None


None


None


None
length not matching after perturbation


None


None


None


None


None


None


None


None
length not matching after perturbation


None


None


None


None


None


None
length not matching after perturbation


None


None


None


None


None


None
length not matching after perturbation


In [25]:
np.nanmean(results_matrix, axis = 0)

array([0.97845921, 0.95042059, 0.78822114, 0.97521083, 0.95922501,
       0.9242627 , 0.87250521, 0.8916365 , 0.92874647, 0.99543632])

In [26]:
perturbation_list

['strip_punct',
 'remove_commas',
 'remove_all_punct',
 'contraction',
 'change_first_name',
 'change_last_name',
 'change_location',
 'add_typo',
 'switch_gender',
 'swap_adj']