# Set-up

Change the working directory to the one where you saved your files

In [6]:
cd /content/drive/MyDrive/COMP0087/allenNLP/BCN

/content/drive/MyDrive/COMP0087/allenNLP/BCN


In [7]:
from torch import nn
import torch.optim as optim
import numpy as np
import spacy
nlp = spacy.load('en_core_web_sm')
import pandas as pd

Change path below as needed

In [8]:
test_set = pd.read_json('/content/drive/MyDrive/COMP0087/data/test.jsonl', orient='records', lines=True)

In [None]:
%%shell
pip install allennlp==2.2.0 allennlp-models==2.2.0

In [10]:
# imports from allennlp
from allennlp.models.archival import load_archive
from allennlp.common.util import JsonDict
from allennlp.data import Instance
from allennlp.predictors.predictor import Predictor
from allennlp.data.fields import LabelField
from allennlp.data.tokenizers.spacy_tokenizer import SpacyTokenizer

from typing import List, Dict

from overrides import overrides

from allennlp.interpret.attackers import Attacker, InputReduction

from allennlp.interpret.saliency_interpreters import SimpleGradient

tokenizer = SpacyTokenizer()

In [None]:
%%shell
pip install checklist==0.0.10

In [12]:
import checklist
from checklist.editor import Editor
from checklist.perturb import Perturb

In [13]:
import warnings
warnings.filterwarnings('ignore')

In [14]:
from scipy.stats import kendalltau
from scipy.stats import spearmanr

Background about model: http://docs.allennlp.org/v0.9.0/api/allennlp.models.biattentive_classification_network.html

In [15]:
# importing the dataset reader
import tagging
# importing the BCN model
import BCN_model

# Training

In [57]:
# training model
# here, the output will be saved to a new folder called 'BCN_output'. You will get an error message if such a directory already exists.
!pwd; allennlp train --include-package tagging -s BCN_output_2 config_BCN.jsonnet

/content/drive/MyDrive/COMP0087/allenNLP/BCN
2021-04-13 21:09:39.390690: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.11.0
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
2021-04-13 21:09:42,644 - ERROR - allennlp.common.plugins - Plugin allennlp_models could not be loaded: No module named 'nltk.translate.meteor_score'
2021-04-13 21:09:42,894 - INFO - allennlp.common.params - include_in_archive = None
2021-04-13 21:09:42,928 - INFO - allennlp.common.params - random_seed = 13370
2021-04-13 21:09:42,934 - INFO - allennlp.common.params - numpy_seed = 1337
2021-04-13 21:09:42,935 - INFO - allennlp.common.params - pytorch_seed = 133
2021-04-13 21:09:42,956 - INFO - allennlp.common.checks - Pytorch version: 1.8.1+cu101
2021-04-13 21:09:42,957 - INFO - all

# Predictions

In [16]:
@Predictor.register('ag_text_classifier')
class AGNewsClassifier(Predictor):
    """
    Predictor for any model that takes in a sentence and returns
    a single class for it.  In particular, it can be used with
    the [`BasicClassifier`](../models/basic_classifier.md) model.

    """

    def predict(self, sentence: str) -> JsonDict:
        return self.predict_json({"Description": sentence})

    @overrides
    def _json_to_instance(self, json_dict: JsonDict) -> Instance:
        """
        Expects JSON that looks like `{"sentence": "..."}`.
        Runs the underlying model, and adds the `"label"` to the output.
        """
        sentence = json_dict["Description"]
        reader_has_tokenizer = (
            getattr(self._dataset_reader, "tokenizer", None) is not None
            or getattr(self._dataset_reader, "_tokenizer", None) is not None
        )
        if not reader_has_tokenizer:
            tokenizer = SpacyTokenizer()
            sentence = tokenizer.tokenize(sentence)
        return self._dataset_reader.text_to_instance(sentence)

    @overrides
    def predictions_to_labeled_instances(
        self, instance: Instance, outputs: Dict[str, np.ndarray]
    ) -> List[Instance]:
        new_instance = instance.duplicate()
        label = np.argmax(outputs["class_probabilities"])
        new_instance.add_field("label", LabelField(int(label), skip_indexing=True))
        return [new_instance]

In [17]:
archive = load_archive("./BCN_output/model.tar.gz")
model = archive.model
vocab = model.vocab

In [18]:
predictor = Predictor.from_archive(archive, 'ag_text_classifier')

In [23]:
sample = []
for row in test_set.head(1000).itertuples():
  sample.append(row[3])

# Input reduction

In [None]:
@Predictor.register('ag_text_classifier_with_input_red')
class InputReductionTextClassifierPredictor(AGNewsClassifier):
    
    def predict_json(self, json_dict: JsonDict) -> JsonDict:
        predictor = AGNewsClassifier(self._model, self._dataset_reader)
        prediction = predictor.predict(sentence=json_dict['Description'])

        attacker = InputReduction(predictor)
        attack = attacker.attack_from_json(inputs=json_dict,
                                           input_field_to_attack='tokens',
                                           grad_input_field='grad_input_1',
                                           ignore_tokens=None)

        return {'prediction': prediction, 'input_reduction_output': attack}

In [None]:
predictor_with_input_red = Predictor.from_archive(archive, 'ag_text_classifier_with_input_red')

In [None]:
input = "The Cleveland Indians pulled within one game of the AL Central lead by beating the Minnesota Twins, 7-1, Saturday night with home runs by Travis Hafner and Victor Martinez."

In [None]:
predictor_with_input_red.predict(input)

{'input_reduction_output': {'final': [['pulled', 'Hafner']],
  'original': ['The',
   'Cleveland',
   'Indians',
   'pulled',
   'within',
   'one',
   'game',
   'of',
   'the',
   'AL',
   'Central',
   'lead',
   'by',
   'beating',
   'the',
   'Minnesota',
   'Twins',
   ',',
   '7',
   '-',
   '1',
   ',',
   'Saturday',
   'night',
   'with',
   'home',
   'runs',
   'by',
   'Travis',
   'Hafner',
   'and',
   'Victor',
   'Martinez',
   '.']},
 'prediction': {'class_probabilities': [0.000256297062151134,
   0.9993062019348145,
   0.0002524169394746423,
   0.00018510186055209488],
  'label': '2',
  'logits': [-1.1653821468353271,
   7.103096961975098,
   -1.1806373596191406,
   -1.4908137321472168]}}

# Visualisation

Source: https://adataanalyst.com/machine-learning/highlight-text-using-weights/

In [25]:
import html
import random
from IPython.core.display import display, HTML

In [26]:
# Prevent special characters like & and < to cause the browser to display something other than what you intended.
def html_escape(text):
    return html.escape(text)

In [27]:
def visualise_weights(tokens, gradients, max_alpha = 0.4):
  max_alpha = max_alpha 
  highlighted_text = []
  for i in range(len(tokens)):
      weight = gradients[i]
      highlighted_text.append('<span style="background-color:rgba(135,206,250,' + str(weight / max_alpha) + ');">' + html_escape(tokens[i]) + '</span>')
  highlighted_text = ' '.join(highlighted_text)
  print(display(HTML(highlighted_text)))

# Checklist

In [None]:
for sentence in sample[:10]:
  tokens = [str(x) for x in tokenizer.tokenize(sentence)]
  grad = SimpleGradient(predictor).saliency_interpret_from_json({'Description':sentence})
  gradient_list = grad['instance_1']['grad_input_1']
  visualise_weights(tokens, gradient_list, 0.4)

In [None]:
pdata = list(nlp.pipe(sample))

In [None]:
for i in range(100):
  sentence = sample[i]
  if Perturb.contractions(sample[i]) != []:

    tokens = [str(x) for x in tokenizer.tokenize(sentence)]

    grad_orig = SimpleGradient(predictor).saliency_interpret_from_json({'Description':sentence})
    gradient_list_orig = np.array(grad_orig['instance_1']['grad_input_1'])

    visualise_weights(tokens, gradient_list_orig)

    perturbed_sentence = Perturb.contractions(sentence)[0]

    tokens = [str(x) for x in tokenizer.tokenize(perturbed_sentence)]

    grad_pert = SimpleGradient(predictor).saliency_interpret_from_json({'Description': perturbed_sentence})
    gradient_list_pert = np.array(grad_pert['instance_1']['grad_input_1'])

    visualise_weights(tokens, gradient_list_pert)

# Manual perturbations

- Checklist (contraction, typos, name change, punctuation)
- More manual perturbations (gender, temporal)

In [20]:
# helper functions

punctuation = ['!', '"', '&', "'", '(', ')', ',', '-', '.', '/', ':', ';', '?', '[', ']', '_', '`', '{', '}', '—',
 '…', '®', '–', '™', '‐']

def remove_char(text, char):
    """Removes characters from a list of string
    Inputs: 
    text: String or list that has strings as elements. Transformation will be applied to all strings.
    char: character(s) to be removed. This can be a string, or a list of strings (if multiple characters 
    need to be removed)
    
    Output: modified string, or list of modified strings
    """
    if type(text) == str:
        text_fun = [text]
    else:
        text_fun = text
    if type(char) == str:
        char = [char]
    result = text_fun
    dummy_string = ''
    dummy_dict = dict.fromkeys(char,'')
    table = dummy_string.maketrans(dummy_dict)
    for i in range(len(text_fun)):
        text_fun[i] = text_fun[i].translate(table)
    if type(text) == str:
        result = text_fun[0]
    return result

In [21]:
def switch_to_lower_case(sentence):
  # sentence should be a list of strings
  for i in range(len(sentence)):
    sentence[i] = sentence[i].lower()
  return sentence

def remove_comma(sentence):
  # sentence should be a list of strings
  return remove_char(sentence, ',')

def remove_all_punctuation(sentence):
  # sentence should be a list of strings
  return remove_char(sentence, punctuation)

dict_names = {'Jason': 'Kelly', 'Kelly': 'Jason'}

def change_name(sentence, dict_names = dict_names):
  """input: sentence as list of tokens
  dict: names to change"""
  for i in range(len(sentence)):
    if sentence[i] in dict_names:
      sentence[i] = dict_names[sentence[i]]
  return sentence

dict_pronouns = {'he': 'she', 'him':'her', 'his': 'her', 'she':'he', 'her': 'his', 'hers': 'his'}
def change_pronouns(sentence, dict_pronouns = dict_pronouns):
  """input: sentence as list of tokens
  dict: pronouns to change"""
  for i in range(len(sentence)):
    if sentence[i] in dict_pronouns:
      sentence[i] = dict_pronouns[sentence[i]]
  return sentence

dict_gender = {}
pairs = [['man','woman'],['men','women'],['boy','girl'],['boyfriend','girlfriend'],['wife', 'husband'], ['brother','sister']]
for pair in pairs:
  dict_gender[pair[0]] = pair[1]
  dict_gender[pair[1]] = pair[0]

def change_gender(sentence, dict_genders = dict_gender):
  """input: sentence as list of tokens
  dict: words to change"""
  for i in range(len(sentence)):
    if sentence[i] in dict_gender:
      sentence[i] = dict_gender[sentence[i]]
  return sentence

In [45]:
# select perturbation(s) and add them to the list
list_perturbations = [remove_all_punctuation]

def perturb(sentence):
  for perturbation in list_perturbations:
    sentence = perturbation(sentence)
  return sentence

for s in range(150,160):
  sentence_orig = sample[s]

  pred_orig = predictor.predict(sentence=sentence_orig)['label']
  grad_orig = SimpleGradient(predictor).saliency_interpret_from_json({'Description':sentence_orig})
  gradient_list_orig = np.array(grad_orig['instance_1']['grad_input_1'])

  tokens_orig = [str(x) for x in tokenizer.tokenize(sentence_orig)]
  tokens_pert = tokens_orig.copy()
  tokens_pert = perturb(tokens_pert)

  change_to_input_or_pred = 0

  empty_indices = []
  for i in range(len(tokens_pert)):
    if tokens_pert[i] == '':
      empty_indices.append(i)

  for i in reversed(empty_indices):
    if gradient_list_orig[i] > 0.04:
      change_to_input_or_pred += 1
      continue
      # print('token', tokens_orig[i], 'with gradient', gradient_list_orig[i], 'removed')
  
  # remove relevant indices from lists
  gradient_list_orig_reduced = np.array([gradient_list_orig[i] for i in range(len(tokens_pert)) if not i in empty_indices])
  tokens_orig_reduced = [tokens_orig[i] for i in range(len(tokens_pert)) if not i in empty_indices]
  tokens_pert = [tokens_pert[i] for i in range(len(tokens_pert)) if not i in empty_indices]

  # generate sentence
  sentence_pert = " ".join(tokens_pert)
  pred_pert = predictor.predict(sentence=sentence_pert)['label']

  if pred_pert != pred_orig:
    change_to_input_or_pred += 1
    continue
    # print('Prediction has changed')
    # print('initial prediction:', pred_orig)
    # print(predictor.predict(sentence=sentence_orig)['class_probabilities'])
    # print('new prediction:', pred_pert)
    # print(predictor.predict(sentence=sentence_pert)['class_probabilities'])

  # ignore cases where the prediction has changed or an important token has been removed
  if change_to_input_or_pred > 0:
    continue

  grad_pert = SimpleGradient(predictor).saliency_interpret_from_json({'Description': sentence_pert})
  gradient_list_pert = np.array(grad_pert['instance_1']['grad_input_1'])

  # normalise initial gradients based on remaining tokens
  gradient_list_orig_reduced = gradient_list_orig_reduced/np.sum(gradient_list_orig_reduced)

  if len(gradient_list_orig_reduced) == len(gradient_list_pert) and spearmanr(gradient_list_orig_reduced, gradient_list_pert)[0] < 0.85:
    print(s)
    visualise_weights(tokens_orig, gradient_list_orig)
    visualise_weights(tokens_orig_reduced, gradient_list_orig_reduced)
    visualise_weights(tokens_pert, gradient_list_pert)

150


None


None


None
151


None


None


None
152


None


None


None
157


None


None


None
158


None


None


None


In [32]:
spearmanr(gradient_list_orig_reduced, gradient_list_pert)[0] < 0.85

False

# SHAP

In [None]:
! pip install shap==0.39.0

In [34]:
import shap

In [35]:
type(predictor.predict)

method

In [36]:
input_df = pd.DataFrame(sample[:10], columns=['sentence'])

In [37]:
input_df

Unnamed: 0,sentence
0,Unions representing workers at Turner Newall...
1,"SPACE.com - TORONTO, Canada -- A second\team o..."
2,AP - A company founded by a chemistry research...
3,AP - It's barely dawn when Mike Fitzpatrick st...
4,AP - Southern California's smog-fighting agenc...
5,The British Department for Education and Skill...
6,"\\""Sven Jaschan, self-confessed author of the ..."
7,\\FOAF/LOAF and bloom filters have a lot of i...
8,"Wiltshire Police warns about ""phishing"" after ..."
9,"In its first two years, the UK's dedicated car..."


In [49]:
def f(input_sentence):
  return predictor.predict(sentence = input_sentence)['class_probabilities']

In [50]:
f(sample[0])

[0.05422848090529442,
 0.003512652823701501,
 0.15680429339408875,
 0.7854545712471008]

In [52]:
shap_values = shap.KernelExplainer(f,input_df)

Provided model function fails when applied to the provided data set.


TypeError: ignored

In [56]:
explainer = shap.DeepExplainer(model, sample[0])

AttributeError: ignored