# Set-up

In [None]:
#!pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz
# !pip install allennlp==2.2.0 allennlp-models==2.2.0
# !pip install lime
# !pip install transformers
# !pip install pytreebank
# !pip install datasets==1.5.0
# !pip install shap==0.39.0

In [1]:
import os

In [71]:
import spacy
nlp = spacy.load('en_core_web_sm')

import random

import pandas as pd
import numpy as np
import scipy as sp
import importlib
import torch
import spacy
from torch.utils.data import \
    TensorDataset, \
    DataLoader
from transformers import \
    BertTokenizer, \
    BertForSequenceClassification, \
    AdamW, \
    BertConfig, \
    get_linear_schedule_with_warmup

import allennlp
import allennlp_models
from allennlp.models.archival import load_archive
from allennlp.common.util import JsonDict
from allennlp.data import Instance
from allennlp.predictors.predictor import Predictor
from allennlp.data.fields import LabelField
from allennlp.data.tokenizers.spacy_tokenizer import SpacyTokenizer
from typing import List, Dict
from overrides import overrides

import warnings
warnings.filterwarnings('ignore')

In [3]:
import sys
project_root_dir = os.path.abspath(os.path.join('..'))
if project_root_dir not in sys.path:
    sys.path += [project_root_dir]
from src.data.dataload import *
from src.models.bcnmodel import *



In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# cd '/content/drive/MyDrive/NLP Project'

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Loading data

In [126]:
import src.data.dataload
sst=src.data.dataload.load_sst()
train_sst, _, test_sst = sst.train_val_test

In [17]:
ag_news=src.data.dataload.load_agnews()
train_ag, _, test_ag = ag_news.train_val_test

Using custom data configuration default
Reusing dataset ag_news (C:\Users\Da Wei\.cache\huggingface\datasets\ag_news\default\0.0.0\17ec33e23df9e89565131f989e0fdf78b0cc4672337b582da83fc3c9f79fe34d)
Using custom data configuration default
Reusing dataset ag_news (C:\Users\Da Wei\.cache\huggingface\datasets\ag_news\default\0.0.0\17ec33e23df9e89565131f989e0fdf78b0cc4672337b582da83fc3c9f79fe34d)


# Explainers

In [None]:
# import shap

In [123]:
import explainers_functions
from explainers_functions import *
importlib.reload(explainers_functions) #this was needed only because I was modifying the modules

<module 'explainers_functions' from 'C:\\Users\\Da Wei\\Desktop\\UCL CSML\\COMP087\\NLP Project\\GitRepo\\notebooks\\explainers_functions.py'>

<module 'explainers_functions' from 'C:\\Users\\Da Wei\\Desktop\\UCL CSML\\COMP087\\NLP Project\\GitRepo\\notebooks\\explainers_functions.py'>

## BERT

**Set up your model & tokenizer**

In [None]:
bert_model = BertForSequenceClassification.from_pretrained('/content/drive/MyDrive/NLP Project/BERT Model')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

id2label = bert_model.config.id2label
label2id = bert_model.config.label2id
labels = sorted(label2id, key=label2id.get)

### LIME

#### SST

In [None]:
LimeExplainer = LimeExplainer(bert_model,tokenizer,labels,device,'BERT')
tokens,values = LimeExplainer.explain_instances(train['sentence'][0:10])

tokens,values

#### AG News

In [None]:
# to be added

### SHAP

#### SST

In [None]:
SHAPExplainer = SHAPExplainer(bert_model,tokenizer,labels,device)
tokens,values = SHAPExplainer.explain_instances(train['sentence'][0:10])

tokens,values

#### AG News

In [None]:
'''
NOTE - the BERT model hasn't been fine-tuned on AG News so this is just to verify the mechanics on a different dataset
'''
SHAPExplainer = SHAPExplainer(bert_model,tokenizer,labels,device)
tokens,values = SHAPExplainer.explain_instances(train_ag['sentence'][0:5])

tokens,values

## BCN

**Set up your model & tokenizer**


The models can be downloaded from here:

https://upload.disroot.org/r/o8RgEa6y#lwPxvgfr6TDPXo/xl+u6kdGrsC5MIpaHQ3AstSZzZRg=
https://upload.disroot.org/r/_3rS3BsN#C/GGVAC1StHrFX/BEcT8zjGzGAhfWFeZc+wJ1uwyoaw=

They should then be saved in the local Githufolders '/models/bcn-sst_output' or '/models/bcn-agnews_output'

In [9]:
bcn = BCNModel()
bcn.load_model(ag_news)
BCN_AG_predictor = bcn.predictor

error loading _jsonnet (this is expected on Windows), treating C:\Users\DAWEI~1\AppData\Local\Temp\tmpr0m48anb\config.json as plain json
error loading _jsonnet (this is expected on Windows), treating C:\Users\DAWEI~1\AppData\Local\Temp\tmpr0m48anb\config.json as plain json


In [None]:
# cd /content/drive/MyDrive/NLP Project/AllenNLP

In [None]:
# # importing the dataset reader
# import tagging
# # importing the BCN model
# import BCN_model
# archive = load_archive("./BCN_output/model.tar.gz")
# BCN_model = archive.model
# vocab = BCN_model.vocab
# BCN_predictor = Predictor.from_archive(archive, 'ag_text_classifier')

### LIME

#### AG News

In [117]:
LimeExplainer1 = LimeExplainer(bcn)
indices=np.random.choice(len(test_ag), 20, replace=False)
instance_array = test_ag['sentence'].iloc[indices]
indices_AG,preds_AG=LimeExplainer1.explain_instances(instance_array)

In [120]:
indices_AG,preds_AG

([[0,
   12,
   24,
   14,
   10,
   23,
   13,
   2,
   16,
   8,
   9,
   19,
   1,
   3,
   7,
   17,
   4,
   22,
   6,
   20,
   18,
   15,
   11,
   21,
   5],
  [1,
   19,
   2,
   4,
   6,
   21,
   10,
   5,
   8,
   25,
   3,
   15,
   0,
   13,
   20,
   27,
   9,
   11,
   28,
   16,
   17,
   18,
   12,
   24,
   7,
   22,
   26,
   23,
   14],
  [0,
   29,
   27,
   18,
   10,
   16,
   5,
   1,
   25,
   17,
   23,
   12,
   4,
   30,
   19,
   3,
   13,
   2,
   28,
   6,
   31,
   24,
   11,
   15,
   20,
   26,
   9,
   8,
   7,
   14,
   22,
   21],
  [1, 0, 9, 3, 4, 8, 13, 7, 12, 11, 5, 6, 10, 14, 2],
  [0,
   6,
   4,
   23,
   11,
   14,
   8,
   15,
   7,
   10,
   19,
   16,
   3,
   17,
   9,
   13,
   5,
   2,
   18,
   22,
   20,
   12,
   21,
   1],
  [1,
   21,
   6,
   4,
   22,
   15,
   18,
   12,
   14,
   26,
   8,
   23,
   7,
   16,
   20,
   9,
   19,
   5,
   11,
   13,
   24,
   10,
   25,
   0,
   2,
   3,
   17],
  [16,
   6,
   24,
   9,
   15,

([[0,
   12,
   24,
   14,
   10,
   23,
   13,
   2,
   16,
   8,
   9,
   19,
   1,
   3,
   7,
   17,
   4,
   22,
   6,
   20,
   18,
   15,
   11,
   21,
   5],
  [1,
   19,
   2,
   4,
   6,
   21,
   10,
   5,
   8,
   25,
   3,
   15,
   0,
   13,
   20,
   27,
   9,
   11,
   28,
   16,
   17,
   18,
   12,
   24,
   7,
   22,
   26,
   23,
   14],
  [0,
   29,
   27,
   18,
   10,
   16,
   5,
   1,
   25,
   17,
   23,
   12,
   4,
   30,
   19,
   3,
   13,
   2,
   28,
   6,
   31,
   24,
   11,
   15,
   20,
   26,
   9,
   8,
   7,
   14,
   22,
   21],
  [1, 0, 9, 3, 4, 8, 13, 7, 12, 11, 5, 6, 10, 14, 2],
  [0,
   6,
   4,
   23,
   11,
   14,
   8,
   15,
   7,
   10,
   19,
   16,
   3,
   17,
   9,
   13,
   5,
   2,
   18,
   22,
   20,
   12,
   21,
   1],
  [1,
   21,
   6,
   4,
   22,
   15,
   18,
   12,
   14,
   26,
   8,
   23,
   7,
   16,
   20,
   9,
   19,
   5,
   11,
   13,
   24,
   10,
   25,
   0,
   2,
   3,
   17],
  [16,
   6,
   24,
   9,
   15,

**SST**

In [124]:
bcn = BCNModel()
bcn.load_model(sst)
BCN_SST_predictor = bcn.predictor

error loading _jsonnet (this is expected on Windows), treating C:\Users\DAWEI~1\AppData\Local\Temp\tmp_or8l9aj\config.json as plain json
error loading _jsonnet (this is expected on Windows), treating C:\Users\DAWEI~1\AppData\Local\Temp\tmp_or8l9aj\config.json as plain json


In [128]:
LimeExplainer1 = LimeExplainer(bcn)
indices=np.random.choice(len(test_sst), 20, replace=False)
instance_array = test_sst['sentence'].iloc[indices]
indices_SST,preds_SST=LimeExplainer1.explain_instances(instance_array)

In [129]:
indices_SST,preds_SST

([[13,
   19,
   4,
   23,
   1,
   0,
   15,
   7,
   14,
   16,
   20,
   6,
   18,
   27,
   12,
   11,
   10,
   22,
   5,
   3,
   9,
   2,
   17,
   25,
   26,
   8,
   21,
   24],
  [5, 1, 4, 0, 2, 3],
  [9,
   7,
   23,
   11,
   10,
   19,
   8,
   20,
   18,
   15,
   6,
   14,
   13,
   16,
   12,
   4,
   5,
   0,
   2,
   22,
   21,
   3,
   17,
   1,
   24],
  [8, 2, 14, 6, 7, 9, 3, 5, 0, 1, 13, 11, 4, 10, 12],
  [4, 9, 6, 5, 3, 1, 8, 0, 10, 2, 7],
  [8,
   15,
   24,
   17,
   2,
   23,
   12,
   19,
   5,
   20,
   13,
   11,
   10,
   14,
   21,
   4,
   6,
   0,
   9,
   25,
   16,
   3,
   22,
   1,
   7,
   18],
  [25,
   23,
   21,
   16,
   26,
   14,
   7,
   8,
   4,
   6,
   3,
   18,
   10,
   0,
   20,
   15,
   5,
   9,
   13,
   17,
   1,
   2,
   22,
   24,
   11,
   12,
   19],
  [9, 10, 1, 3, 0, 4, 11, 12, 5, 7, 8, 6, 2],
  [11, 2, 0, 14, 5, 15, 9, 17, 18, 10, 12, 8, 4, 3, 1, 20, 6, 16, 13, 19, 7],
  [2, 5, 0, 6, 9, 8, 1, 7, 4, 3],
  [5, 3, 10, 16, 14, 0

([[13,
   19,
   4,
   23,
   1,
   0,
   15,
   7,
   14,
   16,
   20,
   6,
   18,
   27,
   12,
   11,
   10,
   22,
   5,
   3,
   9,
   2,
   17,
   25,
   26,
   8,
   21,
   24],
  [5, 1, 4, 0, 2, 3],
  [9,
   7,
   23,
   11,
   10,
   19,
   8,
   20,
   18,
   15,
   6,
   14,
   13,
   16,
   12,
   4,
   5,
   0,
   2,
   22,
   21,
   3,
   17,
   1,
   24],
  [8, 2, 14, 6, 7, 9, 3, 5, 0, 1, 13, 11, 4, 10, 12],
  [4, 9, 6, 5, 3, 1, 8, 0, 10, 2, 7],
  [8,
   15,
   24,
   17,
   2,
   23,
   12,
   19,
   5,
   20,
   13,
   11,
   10,
   14,
   21,
   4,
   6,
   0,
   9,
   25,
   16,
   3,
   22,
   1,
   7,
   18],
  [25,
   23,
   21,
   16,
   26,
   14,
   7,
   8,
   4,
   6,
   3,
   18,
   10,
   0,
   20,
   15,
   5,
   9,
   13,
   17,
   1,
   2,
   22,
   24,
   11,
   12,
   19],
  [9, 10, 1, 3, 0, 4, 11, 12, 5, 7, 8, 6, 2],
  [11, 2, 0, 14, 5, 15, 9, 17, 18, 10, 12, 8, 4, 3, 1, 20, 6, 16, 13, 19, 7],
  [2, 5, 0, 6, 9, 8, 1, 7, 4, 3],
  [5, 3, 10, 16, 14, 0

### AllenNLP Interpret

In [None]:
test_input = test_ag['sentence'][0]

In [None]:
AllenNLPExplainer1 = AllenNLPExplainer(bcn)

In [None]:
AllenNLPExplainer1.explain_instance(test_input)