In [1]:
#https://christophm.github.io/interpretable-ml-book/anchors.html
#https://docs.seldon.io/projects/alibi/en/latest/api/alibi.explainers.anchors.anchor_text.html
!pip install anchor-exp
import os
import os.path
import numpy as np
import sklearn
import sklearn.model_selection
import sklearn.linear_model
import sklearn.ensemble
import spacy
import sys
from sklearn.feature_extraction.text import CountVectorizer
from anchor import anchor_text
import time




In [2]:
import pandas as pd,numpy as np

df=pd.read_csv('/content/sample_data/hotel_reviews.csv')[['reviews.text','reviews.rating']].dropna()

df['sentiment'] = np.where(df['reviews.rating'] >= 4, 'pos', 'neg')

data = df['reviews.text']
labels = df['sentiment']

In [3]:
!pip install torch transformers spacy && python -m spacy download en_core_web_lg

Collecting en-core-web-lg==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl (400.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m400.7/400.7 MB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [4]:
nlp = spacy.load('en_core_web_lg')

In [5]:
train, test, train_labels, test_labels = sklearn.model_selection.train_test_split(data, labels, test_size=.2, random_state=42)
train, val, train_labels, val_labels = sklearn.model_selection.train_test_split(train, train_labels, test_size=.1, random_state=42)
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)
val_labels = np.array(val_labels)

In [6]:
vectorizer = CountVectorizer(min_df=1)
vectorizer.fit(train)

train_vectors = vectorizer.transform(train)
test_vectors = vectorizer.transform(test)
val_vectors = vectorizer.transform(val)

In [7]:
c = sklearn.linear_model.LogisticRegression(solver='liblinear')
#c = sklearn.ensemble.RandomForestClassifier(n_estimators=100, n_jobs=-1)
c.fit(train_vectors, train_labels)
preds = c.predict(val_vectors)
print('Val accuracy', sklearn.metrics.accuracy_score(val_labels, preds))
def predict_lr(texts):
    return c.predict(vectorizer.transform(texts))

Val accuracy 0.8319057815845824


### Explaining a prediction
use_unk_distribution=True means we will perturb examples by replacing words with UNKS

In [8]:
explainer = anchor_text.AnchorText(nlp, ['negative', 'positive'], use_unk_distribution=True)

text = 'I enjoyed this movie, but I think the music could be better.'
pred=predict_lr([text])
print('Prediction: %s' % pred)


Prediction: ['pos']


In [9]:

exp = explainer.explain_instance(text, predict_lr, threshold=0.9)

In [10]:
print('Anchor: %s' % (' AND '.join(exp.names())))
print('Precision: %.2f' % exp.precision())
print()
print('Examples where anchor applies and the model predicts %s:' % pred)
print()
print('\n'.join([x[0] for x in exp.examples(only_same_prediction=True)]))
print()
print('Examples where anchor applies and the model predicts the oppositte class')
print()
print('\n'.join([x[0] for x in exp.examples(partial_index=0, only_different_prediction=True)]))


Anchor: .
Precision: 1.00

Examples where anchor applies and the model predicts ['pos']:

I UNK this UNK , UNK UNK UNK UNK music UNK be UNK .
UNK enjoyed this UNK UNK UNK I think UNK music could UNK UNK .
UNK UNK this UNK UNK UNK I think UNK music UNK be UNK .
UNK UNK UNK UNK , but UNK UNK the UNK UNK be UNK .
UNK enjoyed this UNK , but I think UNK music UNK be UNK .
I enjoyed UNK movie , but UNK UNK UNK music could be better .
I enjoyed UNK UNK , UNK UNK UNK UNK music could be UNK .
UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK could UNK UNK .
UNK enjoyed this UNK , UNK I UNK UNK UNK UNK be better .
I enjoyed this UNK , UNK UNK think UNK UNK could UNK better .

Examples where anchor applies and the model predicts the oppositte class




## Using BERT

The distribution above is a bit naive, and you get a lot of sentences that don't look realistic.  
Let's use BERT to perturb the data:

In [11]:
explainer = anchor_text.AnchorText(nlp, ['negative', 'positive'], use_unk_distribution=False)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [12]:

text = 'I enjoyed this movie, but I think the music could be better.'

pred = predict_lr([text])
print('Prediction: %s' % pred)

b = time.time()
exp = explainer.explain_instance(text, predict_lr, threshold=0.9, verbose=False)
print('Time: %s' % (time.time() - b))

Prediction: ['pos']


  to_pred = torch.tensor([encoded], device=self.device)


KeyboardInterrupt: 

In [13]:
print('Anchor: %s' % (' AND '.join(exp.names())))
print('Precision: %.2f' % exp.precision())
print()
print('Examples where anchor applies and model predicts %s:' % pred)
print()
print('\n'.join([x[0] for x in exp.examples(only_same_prediction=True)]))
print()
print('Examples where anchor applies and model predicts the opposite class')
print()
print('\n'.join([x[0] for x in exp.examples(only_different_prediction=True)]))

Anchor: .
Precision: 1.00

Examples where anchor applies and model predicts ['pos']:

I UNK this UNK , UNK UNK UNK UNK music UNK be UNK .
UNK enjoyed this UNK UNK UNK I think UNK music could UNK UNK .
UNK UNK this UNK UNK UNK I think UNK music UNK be UNK .
UNK UNK UNK UNK , but UNK UNK the UNK UNK be UNK .
UNK enjoyed this UNK , but I think UNK music UNK be UNK .
I enjoyed UNK movie , but UNK UNK UNK music could be better .
I enjoyed UNK UNK , UNK UNK UNK UNK music could be UNK .
UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK could UNK UNK .
UNK enjoyed this UNK , UNK I UNK UNK UNK UNK be better .
I enjoyed this UNK , UNK UNK think UNK UNK could UNK better .

Examples where anchor applies and model predicts the opposite class




Note how the examples are much more realistic than just using UNKS.

We are sampling from BERT sequentially above (one mask at a time), to get sentences that are more coherent.  
If you want to do a single BERT pass per sample, you can set `onepass=True`. You may lose some coherence, but it will be much faster.

In [14]:
text = 'The movie was not bad, but I think the music could be better .'
pred = predict_lr([text])
print('Prediction: %s' % pred)

b = time.time()
exp = explainer.explain_instance(text, predict_lr, threshold=0.9, verbose=False, onepass=True)
print('Time: %s' % (time.time() - b))

Prediction: ['neg']


KeyboardInterrupt: 

In [15]:
print('Anchor: %s' % (' AND '.join(exp.names())))
print('Precision: %.2f' % exp.precision())
print()
print('Examples where anchor applies and model predicts %s:' % pred)
print()
print('\n'.join([x[0] for x in exp.examples(only_same_prediction=True)]))
print()
print('Examples where anchor applies and model predicts the opposite class')
print()
print('\n'.join([x[0] for x in exp.examples(only_different_prediction=True)]))

Anchor: .
Precision: 1.00

Examples where anchor applies and model predicts ['neg']:

I UNK this UNK , UNK UNK UNK UNK music UNK be UNK .
UNK enjoyed this UNK UNK UNK I think UNK music could UNK UNK .
UNK UNK this UNK UNK UNK I think UNK music UNK be UNK .
UNK UNK UNK UNK , but UNK UNK the UNK UNK be UNK .
UNK enjoyed this UNK , but I think UNK music UNK be UNK .
I enjoyed UNK movie , but UNK UNK UNK music could be better .
I enjoyed UNK UNK , UNK UNK UNK UNK music could be UNK .
UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK could UNK UNK .
UNK enjoyed this UNK , UNK I UNK UNK UNK UNK be better .
I enjoyed this UNK , UNK UNK think UNK UNK could UNK better .

Examples where anchor applies and model predicts the opposite class




In [16]:
print('Partial anchor: %s' % (' AND '.join(exp.names(0))))
print('Precision: %.2f' % exp.precision(0))
print()
print('Examples where anchor applies and model predicts %s:' % pred)
print()
print('\n'.join([x[0] for x in exp.examples(partial_index=0, only_same_prediction=True)]))
print()
print('Examples where anchor applies and model predicts the opposite')
print()
print('\n'.join([x[0] for x in exp.examples(partial_index=0, only_different_prediction=True)]))

Partial anchor: .
Precision: 1.00

Examples where anchor applies and model predicts ['neg']:

I UNK this UNK , UNK UNK UNK UNK music UNK be UNK .
UNK enjoyed this UNK UNK UNK I think UNK music could UNK UNK .
UNK UNK this UNK UNK UNK I think UNK music UNK be UNK .
UNK UNK UNK UNK , but UNK UNK the UNK UNK be UNK .
UNK enjoyed this UNK , but I think UNK music UNK be UNK .
I enjoyed UNK movie , but UNK UNK UNK music could be better .
I enjoyed UNK UNK , UNK UNK UNK UNK music could be UNK .
UNK UNK UNK UNK UNK UNK UNK UNK UNK UNK could UNK UNK .
UNK enjoyed this UNK , UNK I UNK UNK UNK UNK be better .
I enjoyed this UNK , UNK UNK think UNK UNK could UNK better .

Examples where anchor applies and model predicts the opposite


