In [1]:
#Hyperparameters:

SHOULD_USE_SEED = True
RADIUS = 10000
BATCH_SIZE = 700
EPSILON = 0.1
SIGMA = 0.0001
MAX_ITERS = 100

TRAIN_FILE = "snli_1.0_train.txt"
VAL_FILE = "snli_1.0_dev.txt"
TEST_FILE = "snli_1.0_test.txt"


In [2]:
from sklearn import metrics
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

import numpy as np

from data.data_creator import data_create_SNLI

if SHOULD_USE_SEED:
    np.random.seed(42)


## Dataset 

In [3]:
data, labels = data_create_SNLI()

print("Example a premise and hypothesis from the database ", data[TRAIN_FILE][0])
print("Number of instances in the training database: ", len(data[TRAIN_FILE]))

Example a premise and hypothesis from the database  A person on a horse jumps over a broken down airplane., A person is training his horse for a competition.
Number of instances in the training database:  550152


In [5]:
#Splitting the dataset by using the original split

x_train, x_val, x_test = data[TRAIN_FILE], data[VAL_FILE], data[TEST_FILE]
y_train, y_val, y_test = labels[TRAIN_FILE], labels[VAL_FILE], labels[TEST_FILE]

## Black-box model - Multinomial Naive Bayes classifier

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer

vect_text = TfidfVectorizer(use_idf = False)
x_vec_train = vect_text.fit_transform(x_train)

clf = MultinomialNB().fit(x_vec_train, y_train)

  (0, 21069)	0.4588314677411235
  (0, 19756)	0.22941573387056174
  (0, 14131)	0.4588314677411235
  (0, 15708)	0.22941573387056174
  (0, 20121)	0.22941573387056174
  (0, 3965)	0.22941573387056174
  (0, 8979)	0.22941573387056174
  (0, 919)	0.22941573387056174
  (0, 15271)	0.22941573387056174
  (0, 30230)	0.22941573387056174
  (0, 13865)	0.22941573387056174
  (0, 11437)	0.22941573387056174
  (0, 6345)	0.22941573387056174
  (1, 21069)	0.48507125007266594
  (1, 19756)	0.24253562503633297
  (1, 14131)	0.24253562503633297
  (1, 15708)	0.24253562503633297
  (1, 20121)	0.24253562503633297
  (1, 3965)	0.24253562503633297
  (1, 8979)	0.24253562503633297
  (1, 919)	0.24253562503633297
  (1, 15271)	0.24253562503633297
  (1, 1872)	0.24253562503633297
  (1, 8426)	0.24253562503633297
  (1, 19893)	0.24253562503633297
  :	:
  (550150, 15271)	0.3779644730092272
  (550150, 14698)	0.5669467095138409
  (550150, 17390)	0.3779644730092272
  (550150, 29890)	0.1889822365046136
  (550150, 32105)	0.18898223650461

In [None]:
preds = clf.predict(vect_text.transform(x_val))

In [None]:
print('Val accuracy', metrics.accuracy_score(y_val, preds))

## Instance to explain

In [None]:
x_explain = x_test[1]#"the movie's thesis -- elegant technology for the masses -- is surprisingly refreshing ."
print('x to explain: ',x_explain)
print('Predicted class: ', clf.predict(vect_text.transform([x_explain]))[0])
print('True class: ', y_test[1])
print('Predict probablilities: ', clf.predict_proba(vect_text.transform([x_explain]))[0])


## Building MeLime model:

In [None]:
import nltk
from torch.utils.data import DataLoader
def tokenizer(x):
    return x.split()
dl_train = [tokenizer(x) for x in x_train]

In [None]:
from gen_models.word2vec_gen import Word2VecGen, Word2VecEncoder
#The radius is <radius> most similar words
generator = Word2VecGen(encoder = Word2VecEncoder(dl_train), corpus = x_train, radius = RADIUS, tokenizer = tokenizer)

In [None]:
from interpretable_local_models.statistics_model import StatisticsLocalModel
tokenized_x_explain = x_explain.split()
y_p_explain = max(clf.predict_proba(vect_text.transform([x_explain]))[0])
explainer_model = StatisticsLocalModel(y_p_explain, len(tokenized_x_explain), tokenizer)
print(tokenized_x_explain)

In [None]:
from MeLime.model import MeLimeModel

def transform_func(x):
    return vect_text.transform([x])

model = MeLimeModel(black_box_model = clf,gen_model =generator, batch_size = BATCH_SIZE, epsilon_c = EPSILON, 
                    sigma = SIGMA, explainer_model = explainer_model, transform_func = transform_func, 
                    max_iters = MAX_ITERS, tokenizer = tokenizer)

## Explaining the instance

In [None]:
res, sentences_with_probs = model.forward(x_explain)

## Plotting results

In [None]:
ax = StatisticsLocalModel.plot_explaination(res)


In [None]:
import seaborn as sns
StatisticsLocalModel.plot_sentence_heatmap(res)

## Plotting most favorable and contrary samples phrases:

Favorable sentence - a generated sentence using Word2VecGen that improves the model's confidence in its 
prediction on the original sentence.

Contrary samples - a generated sentence using Word2VecGen that decrease the model's confidence in its prediction on the original sentence and <b>might even change its prediction on the generated sentence</b>.

### Most contrary samples phrases:



In [None]:
sorted(sentences_with_probs, key = lambda x: x[1])[:5]

### Most favorable samples phrases:

In [None]:
sorted(sentences_with_probs, key = lambda x: x[1], reverse = True)[:5]