# Set-up

In [1]:
# !pip install allennlp==2.2.0 allennlp-models==2.2.0
# !pip install lime
# !pip install transformers
# !pip install pytreebank
# !pip install datasets==1.5.0
# !pip install shap==0.39.0

In [2]:
import os
os.chdir('..')

In [3]:
import spacy
nlp = spacy.load('en_core_web_sm')

import random

import pandas as pd
import numpy as np
import scipy as sp
import importlib
import torch
import spacy
from torch.utils.data import \
    TensorDataset, \
    DataLoader
from transformers import \
    BertTokenizer, \
    BertForSequenceClassification, \
    AdamW, \
    BertConfig, \
    get_linear_schedule_with_warmup

import allennlp
import allennlp_models
from allennlp.models.archival import load_archive
from allennlp.common.util import JsonDict
from allennlp.data import Instance
from allennlp.predictors.predictor import Predictor
from allennlp.data.fields import LabelField
from allennlp.data.tokenizers.spacy_tokenizer import SpacyTokenizer
from typing import List, Dict
from overrides import overrides

In [4]:
import sys
project_root_dir = os.path.relpath(os.path.join('..', '..'), os.curdir)
if project_root_dir not in sys.path:
    sys.path += [project_root_dir]
from src.data.dataload import *
from src.models.bcnmodel import *



In [5]:
# from google.colab import drive
# drive.mount('/content/drive')

In [6]:
# cd '/content/drive/MyDrive/NLP Project'

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Loading data

In [8]:
import src.data.dataload
sst=src.data.dataload.load_sst()
train, val, test = sst.train_val_test

In [9]:
ag_news=src.data.dataload.load_agnews()
train_ag, _, test_ag = ag_news.train_val_test

Using custom data configuration default
Reusing dataset ag_news (/Users/olivier/.cache/huggingface/datasets/ag_news/default/0.0.0/17ec33e23df9e89565131f989e0fdf78b0cc4672337b582da83fc3c9f79fe34d)


# Explainers

In [None]:
# import shap

In [10]:
import explainers_functions
from explainers_functions import *

In [11]:
importlib.reload(explainers_functions) #this was needed only because I was modifying the modules

<module 'explainers_functions' from '/Volumes/LaCie/GitHubNLP/ucl-nlp-group-project/notebooks/explainers_functions.py'>

## BERT

**Set up your model & tokenizer**

In [None]:
bert_model = BertForSequenceClassification.from_pretrained('/content/drive/MyDrive/NLP Project/BERT Model')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

id2label = bert_model.config.id2label
label2id = bert_model.config.label2id
labels = sorted(label2id, key=label2id.get)

### LIME

#### SST

In [None]:
LimeExplainer = LimeExplainer(bert_model,tokenizer,labels,device,'BERT')
tokens,values = LimeExplainer.explain_instances(train['sentence'][0:10])

tokens,values

#### AG News

In [None]:
# to be added

### SHAP

#### SST

In [None]:
SHAPExplainer = SHAPExplainer(bert_model,tokenizer,labels,device)
tokens,values = SHAPExplainer.explain_instances(train['sentence'][0:10])

tokens,values

#### AG News

In [None]:
'''
NOTE - the BERT model hasn't been fine-tuned on AG News so this is just to verify the mechanics on a different dataset
'''
SHAPExplainer = SHAPExplainer(bert_model,tokenizer,labels,device)
tokens,values = SHAPExplainer.explain_instances(train_ag['sentence'][0:5])

tokens,values

## BCN

**Set up your model & tokenizer**

In [12]:
bcn = BCNModel()
bcn.load_model(ag_news)
BCN_AG_predictor = bcn.predictor

In [None]:
# cd /content/drive/MyDrive/NLP Project/AllenNLP

In [None]:
# # importing the dataset reader
# import tagging
# # importing the BCN model
# import BCN_model
# archive = load_archive("./BCN_output/model.tar.gz")
# BCN_model = archive.model
# vocab = BCN_model.vocab
# BCN_predictor = Predictor.from_archive(archive, 'ag_text_classifier')

### LIME

In [None]:
# cd '/content/drive/MyDrive/NLP Project'

#### AG News

In [None]:
labels_BCN = ['Sci/Tech', 'Sports','World','Business']
LimeExplainer = LimeExplainer(BCN_AG_predictor,None,labels_BCN,device,'BCN')
indices=np.random.choice(len(test_ag), 20, replace=False)
instance_array = test_ag['sentence'].iloc[indices]
top_tokens_AG,top_values_AG=LimeExplainer.explain_instances(instance_array)

### AllenNLP Interpret

In [13]:
test_input = test_ag['sentence'][0]

In [14]:
AllenNLPExplainer1 = AllenNLPExplainer(bcn)

In [16]:
AllenNLPExplainer1.explain_instance(test_input)

[0.04113465106580933,
 0.02347537529962658,
 0.20143643756039975,
 0.03416881208462243,
 0.07821835198811906,
 0.06852774958176362,
 0.008980276119335804,
 0.02471138056079196,
 0.006761755534241612,
 0.05469031318094185,
 0.012258247775123621,
 0.04079487203037579,
 0.007463397550701732,
 0.025463750597306743,
 0.01442624182474412,
 0.0019733171764495213,
 0.004629691792762053,
 0.01385116111230194,
 0.08362074665289253,
 0.25021796830065135,
 0.0031955437909933684]