In [1]:
#Hyperparameters:

SHOULD_USE_SEED = True
RADIUS = 10000
BATCH_SIZE = 700
EPSILON = 0.1
SIGMA = 0.0001
MAX_ITERS = 100

NN_BATCH_SIZE = 64

TRAIN_FILE = "snli_1.0_train.txt"
VAL_FILE = "snli_1.0_dev.txt"
TEST_FILE = "snli_1.0_test.txt"


In [2]:
from sklearn import metrics
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

import numpy as np

from data.data_creator import data_create_SNLI

if SHOULD_USE_SEED:
    np.random.seed(42)


In [3]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


## Dataset 

In [4]:
data, labels = data_create_SNLI()

In [5]:
#Splitting the dataset by using the original split

x_train, x_val, x_test = data[TRAIN_FILE], data[VAL_FILE], data[TEST_FILE]
y_train, y_val, y_test = labels[TRAIN_FILE], labels[VAL_FILE], labels[TEST_FILE]

## Black-box model - Multinomial Naive Bayes classifier

The black box model that was used in the original article.

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer

data_multinomial_nb = dict()

for key in data:
    data_multinomial_nb[key] = \
        [first + ', ' + second for first, second in zip(data[key]['premise'], data[key]['hypothesis'])]
    
x_train, x_val, x_test = data_multinomial_nb[TRAIN_FILE], data_multinomial_nb[VAL_FILE], data_multinomial_nb[TEST_FILE]

vect_text = TfidfVectorizer(use_idf = False)
x_vec_train = vect_text.fit_transform(x_train)

clf = MultinomialNB().fit(x_vec_train, y_train)

In [7]:
preds = clf.predict(vect_text.transform(x_val))

In [8]:
print('Val accuracy', metrics.accuracy_score(y_val, preds))

Val accuracy 0.5363


## Black-box model - Neural Network

The black box model above doesn't give a good result. Therefore, we use another black box model presented in the following article:
https://nlp.stanford.edu/pubs/snli_paper.pdf

### Data preperation


In [9]:
#https://jamesmccaffrey.wordpress.com/2021/01/04/creating-a-custom-torchtext-dataset-from-a-text-file/

from torchtext.legacy.data import Field
import torchtext as tt

TEXT = tt.legacy.data.Field(sequential=True,
  init_token='(bos)',  # start of sequence
  eos_token='(eos)',   # replace parens with less, greater
  lower=True,
  tokenize=tt.data.utils.get_tokenizer("basic_english"),)
LABEL = tt.legacy.data.Field(sequential=False,
  use_vocab=True,
  unk_token=None,
  is_target=True)

In [10]:
import os

(train_obj, valid_obj, test_obj) = tt.legacy.data.TabularDataset.splits(
  path=".//data/SNLI/",
  train=TRAIN_FILE,
  validation=VAL_FILE,
  test=TEST_FILE,
  skip_header = True,
  format='csv',
  filter_pred = lambda x: x.gold_label != '-',
    #gold_label	sentence1_binary_parse	sentence2_binary_parse	sentence1_parse	sentence2_parse	sentence1	sentence2	captionID	pairID	label1	label2	label3	label4	label5
  fields=[('gold_label', LABEL), ('sentence1_binary_parse', None), ('sentence2_binary_parse', None), 
          ('sentence1_parse', None), ('sentence1_parse', None), ('sentence1', TEXT), ('sentence2', TEXT), ('captionID', None), 
         ('pairID', None), ('label1', None), ('label2', None), ('label3', None), ('label4', None), ('label5', None)], 
    csv_reader_params = {'delimiter' : '	'})

In [11]:
TEXT.build_vocab(train_obj.sentence1, min_freq=1, vectors='glove.6B.300d')
TEXT.build_vocab(train_obj.sentence2, min_freq=1, vectors='glove.6B.300d')
LABEL.build_vocab(train_obj.gold_label)
pretrained_embeddings = TEXT.vocab.vectors

In [12]:
train_iter, val_iter, test_iter = tt.legacy.data.BucketIterator.splits(
   (train_obj, valid_obj, test_obj), 
    batch_size=NN_BATCH_SIZE, 
    sort_key = lambda x: len(x.sentence1),
    sort_within_batch = False,
    repeat=False, 
    device=device)

In [13]:
#TODO: Should be in separat file!
import torch
import torch.nn as nn
from tqdm import tqdm
import copy

class NeuralNetModel (nn.Module):
    def __init__(self, hidden_size, embeddings, embeddings_size, num_layers, text, tag, num_classes = 3, verbose = True):
        super().__init__()
        self.text = text
        self.tag = tag
        self.N = len(tag.vocab.itos)   # tag vocab size
        self.V = len(text.vocab.itos)  # text vocab size
        self.verbose = verbose

        pad_id = self.text.vocab.stoi[self.tag.pad_token]
        self.loss_function = nn.CrossEntropyLoss()

        self.premise_layer = nn.Sequential(
            nn.Embedding.from_pretrained(embeddings = embeddings, freeze = True),
            nn.LSTM(input_size = embeddings_size, hidden_size = hidden_size, num_layers=num_layers, batch_first=True)
        )
        self.hypothesis_layer = nn.Sequential(
            nn.Embedding.from_pretrained(embeddings = embeddings, freeze = True),
            nn.LSTM(input_size = embeddings_size, hidden_size = hidden_size, num_layers=num_layers, batch_first=True)
        )
        self.model_layers = nn.Sequential(
            nn.Linear(hidden_size*2, 200),
            nn.Tanh(),
            nn.Linear(200, 200),
            nn.Tanh(),
            nn.Linear(200, 200),
            nn.Tanh(),
            nn.Linear(200, num_classes),
            nn.Softmax(dim = -1)
        )
        self.log_soft_max = nn.LogSoftmax(dim = -1)

#     def init_parameters(self, init_low=-0.5, init_high=0.5):
#         """Initialize parameters. We usually use larger initial values for smaller models.
#         See http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf for a more
#         in-depth discussion.
#         """
#         for p in self.parameters():
#             p.data.uniform_(init_low, init_high)
        
    def forward(self, premise_batch, hypothesis_batch):
        """Performs forward computation over a whole text_batch, returns logits.

        Arguments: 
          text_batch: a tensor of size max_length x batch_size
          hidden_0: a tensor of size 1 x batch_size x hidden_size
        Returns:
          logits: a tensor of size max_length x batch_size x N. It provides a logit for each tag of each word of each sentence
          in the batch.
        """
        premise_batch = premise_batch.T
        hypothesis_batch = hypothesis_batch.T
        _, (premise_layer_output, _) = self.premise_layer(premise_batch)
        _, (hypothesis_layer_output, _) = self.hypothesis_layer(hypothesis_batch)
        premise_layer_output = premise_layer_output[-1, :, :].squeeze(0)
        hypothesis_layer_output = hypothesis_layer_output[-1, :, :].squeeze(0)
        if (len(premise_layer_output.shape) == 1):
            premise_layer_output = premise_layer_output.unsqueeze(0)
        if (len(hypothesis_layer_output.shape) == 1):
            hypothesis_layer_output = hypothesis_layer_output.unsqueeze(0)
        #print(premise_layer_output.shape)
        concat_output = torch.cat((premise_layer_output, hypothesis_layer_output), dim = 1)
        #print(concat_output.shape)
        res = self.model_layers(concat_output)
        logits = self.model_layers(concat_output)
        #print(logits)
        return logits

    '''
       Computes the loss for a batch by comparing logits of a batch returned by forward to ground_truth, 
       which stores the true tag ids for the batch. Thus logits is a tensor of size max_length x batch_size x N, 
       and ground_truth is a tensor of size max_length x batch_size. Note that the criterion functions in torch expect 
       outputs of a certain shape, so you might need to perform some shape conversions.

       You might find nn.CrossEntropyLoss from the last project segment useful. 
       Note that if you use nn.CrossEntropyLoss then you should not use a softmax layer at the end since that's 
       already absorbed into the loss function. Alternatively, you can use nn.LogSoftmax as the final sublayer 
       in the forward pass, but then you need to use nn.NLLLoss, which does not contain its own softmax.
       We recommend the former, since working in log space is usually more numerically stable. 
       For reshaping tensors, check out the torch.Tensor.view method.
    '''
    def compute_loss(self, logits, ground_truth):
        return self.loss_function(logits, ground_truth.view(-1))

    '''
      Trains the model on training data generated by the iterator train_iter and validation data val_iter.
      The epochs and learning_rate variables are the number of epochs (number of times to run through the training data) 
      to run for and the learning rate for the optimizer, respectively. You can use the validation data to determine 
      which model was the best one as the epochs go by. Notice that our code below assumes that during training 
      the best model is stored so that rnn_tagger.load_state_dict(rnn_tagger.best_model) restores the 
      parameters of the best model.
    '''
    
    def train_all(self, train_iter, val_iter, epochs=100, learning_rate=0.001):
        # Switch the module to training mode
        self.train()
        # Use Adam to optimize the parameters
        optim = torch.optim.Adam(self.parameters(), lr=learning_rate)
        best_validation_accuracy = -float('inf')
        best_model = None
        # Run the optimization for multiple epochs
        for epoch in range(epochs): 
            total = 0
            running_loss = 0.0
            for batch in tqdm(train_iter, leave=self.verbose):
                # Zero the parameter gradients
                self.zero_grad()

                # Input and target
                premises = batch.sentence1
                hypothesis = batch.sentence2 
                tags = batch.gold_label
                # Run forward pass and compute loss along the way.
                
                logits = self.forward(premises, hypothesis)
                #print(logits.shape)
                loss = self.compute_loss(logits, tags)

#                 # Perform backpropagation
#                 print(logits)
#                 print(tags)
#                 print("#############################################################")
                (loss/premises.size(1)).backward()
                #print(loss)

                # Update parameters
                optim.step()

                # Training stats
                total += 1
                running_loss += loss.item()
                
            # Evaluate and track improvements on the validation dataset
            validation_accuracy = self.evaluate(val_iter)
            if validation_accuracy > best_validation_accuracy:
                best_validation_accuracy = validation_accuracy
                self.best_model = copy.deepcopy(self.state_dict())
            epoch_loss = running_loss / total
            if (self.verbose):
                print (f'Epoch: {epoch} Loss: {epoch_loss:.4f} '
                      f'Validation accuracy: {validation_accuracy:.4f}')

            
    def predict(self, text_batch):
        """Returns the most likely sequence of tags for a sequence of words in `text_batch`.

        Arguments: 
          text_batch: a tensor containing word ids of size (seq_len, batch_size) 
        Returns:
          tag_batch: a tensor containing tag ids of size (seq_len, batch_size)
        """
        logits = self.forward(text_batch.sentence1, text_batch.sentence2)
        #print(logits)
        tag_batch = torch.argmax(logits, axis = -1)
        return tag_batch
    
    def predict_proba(self, text_batch):
        """Returns the most likely sequence of tags for a sequence of words in `text_batch`.

        Arguments: 
          text_batch: a tensor containing word ids of size (seq_len, batch_size) 
        Returns:
          tag_batch: a tensor containing tag ids of size (seq_len, batch_size)
        """
        if (text_batch is list):
            print(":)")
        logits = self.forward(text_batch.sentence1, text_batch.sentence2).detach().cpu().numpy()
        return logits


    def evaluate(self, iterator):
        """Returns the model's performance on a given dataset `iterator`.

        Arguments: 
          iterator
        Returns:
          overall accuracy
        """
        correct = 0
        total = 0
        pad_id = self.text.vocab.stoi[self.tag.pad_token]
        for batch in tqdm(iterator, leave = self.verbose):
            premises = batch.sentence1
            hypothesis = batch.sentence2 
            tags = batch.gold_label
            # Run forward pass and compute loss along the way.
                
            #logits = self.forward(premises, hypothesis)
            #tags = batch.tag
            tags_pred = self.predict(batch)
            mask = tags.ne(pad_id)
            cor = (tags == tags_pred)[mask]
            correct += cor.float().sum().item()
            total += mask.float().sum().item()
        return correct/total


### Building the model

In [14]:
clf = NeuralNetModel(100, pretrained_embeddings, 300, 1, TEXT, LABEL).to(device)

In [15]:
LABEL.vocab.itos

['entailment', 'contradiction', 'neutral']

In [None]:
clf.train_all(train_iter, val_iter, epochs = 10)

100%|██████████| 8584/8584 [01:22<00:00, 104.50it/s]
100%|██████████| 154/154 [00:00<00:00, 245.99it/s]
  0%|          | 0/8584 [00:00<?, ?it/s]

Epoch: 0 Loss: 0.9416 Validation accuracy: 0.6361


100%|██████████| 8584/8584 [01:20<00:00, 106.54it/s]
100%|██████████| 154/154 [00:00<00:00, 285.98it/s]
  0%|          | 0/8584 [00:00<?, ?it/s]

Epoch: 1 Loss: 0.8945 Validation accuracy: 0.5865


100%|██████████| 8584/8584 [01:18<00:00, 109.48it/s]
100%|██████████| 154/154 [00:00<00:00, 289.52it/s]
  0%|          | 0/8584 [00:00<?, ?it/s]

Epoch: 2 Loss: 0.8810 Validation accuracy: 0.6355


100%|██████████| 8584/8584 [01:16<00:00, 111.59it/s]
100%|██████████| 154/154 [00:00<00:00, 289.78it/s]
  0%|          | 0/8584 [00:00<?, ?it/s]

Epoch: 3 Loss: 0.8713 Validation accuracy: 0.6538


 23%|██▎       | 1974/8584 [00:17<00:53, 122.76it/s]

## Instance to explain

In [None]:
def convert_tensor_to_text (t , FIELD):
    if (t.dim() == 0):
        return FIELD.vocab.itos[t.item()]
    return ' '.join([FIELD.vocab.itos[i] for i in t])

In [None]:
batch = next(iter(train_iter))
print('premise to explain: ',convert_tensor_to_text(batch.sentence1[:, 0], TEXT))
print('hypothesis to explain: ',convert_tensor_to_text(batch.sentence2[:, 0], TEXT))
print('Predicted class: ', convert_tensor_to_text(clf.predict(batch)[0], LABEL))
print('True class: ', convert_tensor_to_text(batch.gold_label[0], LABEL))
# x_explain = x_test[1]#"the movie's thesis -- elegant technology for the masses -- is surprisingly refreshing ."
# for batch in train_iter:
#     print('x to explain: ',batch)
#     print('Predicted class: ', clf.predict(batch))
#     print('True class: ', y_test[1])
#     print('Predict probablilities: ', clf.predict_proba(vect_text.transform([x_explain]))[0])


## Building MeLime model:

In [None]:
import nltk
from torch.utils.data import DataLoader
def tokenizer(x):
#     if '*' in x:
#         x = x.split('*')
#         return x[0].split() + x[1].split()
    return x.split()
dl_train = [tokenizer(x) for x in x_train]

In [None]:
from gen_models.word2vec_gen import Word2VecGen, Word2VecEncoder
#The radius is <radius> most similar words
generator = Word2VecGen(encoder = Word2VecEncoder(dl_train), corpus = x_train, radius = RADIUS, tokenizer = tokenizer,
                       tokens_not_to_sample = ['*', '.'])

In [None]:
from interpretable_local_models.statistics_model import StatisticsLocalModel
y_p_explain = max(clf.predict_proba(batch)[0]).item()
print('Probability for the predicted label: ', y_p_explain)
print('The tokenized hypothesis:')

tokenized_x_explain_premise = tokenizer(convert_tensor_to_text(batch.sentence1[:, 0], TEXT))
tokenized_x_explain_hypothesis = tokenizer(convert_tensor_to_text(batch.sentence2[:, 0], TEXT))
fliter_out_tokens = [TEXT.pad_token, TEXT.init_token, TEXT.eos_token]
tokenized_x_explain_hypothesis = list(filter(lambda x: x not in fliter_out_tokens, tokenized_x_explain_hypothesis))
tokenized_x_explain_premise = list(filter(lambda x: x not in fliter_out_tokens, tokenized_x_explain_premise))
print(tokenized_x_explain_hypothesis)
x_explain_hypothesis = ' '.join(tokenized_x_explain_hypothesis)
x_explain_premise = ' '.join(tokenized_x_explain_premise)

x_explain = x_explain_premise + " * " + x_explain_hypothesis
print(x_explain)
explainer_model = StatisticsLocalModel(y_p_explain, len(tokenizer(x_explain)), tokenizer)


In [None]:
from MeLime.model import MeLimeModel
from torch import tensor

class Instance:
    
    def __init__(self, sentence1, sentence2):
        self.sentence1 = sentence1
        self.sentence2 = sentence2

premise_len = batch.sentence1.shape[0]
hypothesis_len = batch.sentence2.shape[0]

def create_tensor_from_sentence(sentence, length):
    pad_id = TEXT.vocab.stoi[TEXT.pad_token]
    init_id = TEXT.vocab.stoi[TEXT.init_token]
    eos_id = TEXT.vocab.stoi[TEXT.eos_token]
    tensor = torch.ones((2,), dtype=torch.int64)
    t = tensor.new_full(size = (length, 1), fill_value  = pad_id, device = device)
    t[0, 0] = init_id
    tokens_idx = torch.LongTensor([TEXT.vocab.stoi[token] for token in tokenizer(sentence)])
    t[1 : len(tokens_idx) + 1, 0] = tokens_idx
    t[len(tokens_idx) + 1, 0] = eos_id
    return t

def transform_func(x):
    splitted_x = x.split('*')
    premise = splitted_x[0]
    hypothesis = splitted_x[1]
    t_premise = create_tensor_from_sentence(premise, premise_len)
    t_hypothesis = create_tensor_from_sentence(hypothesis, hypothesis_len)
#     print(t_premise)
#     print(t_hypothesis)
#     print(premise)
#     print(hypothesis)
    return Instance(t_premise, t_hypothesis)
    #return vect_text.transform([x])

model = MeLimeModel(black_box_model = clf,gen_model =generator, batch_size = BATCH_SIZE, epsilon_c = EPSILON, 
                    sigma = SIGMA, explainer_model = explainer_model, transform_func = transform_func, 
                    max_iters = MAX_ITERS, tokenizer = tokenizer)
        


## Explaining the instance

In [None]:
res, sentences_with_probs = model.forward(x_explain)

## Plotting results

In [None]:
ax = StatisticsLocalModel.plot_explaination(res)


In [None]:
import seaborn as sns
premise_res = []
hypothesis_res = []
did_finish_premise = False
for word, stat in res:
    if word == '*':
        did_finish_premise = True
        continue
    if did_finish_premise:
        hypothesis_res.append((word, stat))
        continue
    premise_res.append((word, stat))
print("Premise plot:")
StatisticsLocalModel.plot_sentence_heatmap(premise_res)


In [None]:
print("Hypothesis plot:")
StatisticsLocalModel.plot_sentence_heatmap(hypothesis_res)

## Plotting most favorable and contrary samples phrases:

Favorable sentence - a generated sentence using Word2VecGen that improves the model's confidence in its 
prediction on the original sentence.

Contrary samples - a generated sentence using Word2VecGen that decrease the model's confidence in its prediction on the original sentence and <b>might even change its prediction on the generated sentence</b>.

### Most contrary samples phrases:



In [None]:
sorted(sentences_with_probs, key = lambda x: x[1])[:5]

### Most favorable samples phrases:

In [None]:
sorted(sentences_with_probs, key = lambda x: x[1], reverse = True)[:5]