In [1]:
import numpy as np
import lime
import torch
import torch.nn.functional as F
from lime.lime_text import LimeTextExplainer

In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification



In [3]:
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
import numpy as np
import pandas as pd
from typing import List

# this is the name of the model we want to evaluate on 
# huggingface.com/models or alternatively you could train your own
MODEL="nlptown/bert-base-multilingual-uncased-sentiment"

tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

In [4]:
def model_adapter(texts: List[str]):
    
    all_scores = []

    for i in range(0, len(texts), 64):

        batch = texts[i:i+64]
        
        # use bert encoder to tokenize text 
        encoded_input = tokenizer(batch, 
          return_tensors='pt', 
          padding=True, 
          truncation=True, 
          max_length=model.config.max_position_embeddings-2)

        # run the model
        output = model(**encoded_input)
        # by default this model gives raw logits rather 
        # than a nice smooth softmax so we apply it ourselves here
        scores = output[0].softmax(1).detach().numpy()

        all_scores.extend(scores)

    return np.array(all_scores)

In [5]:
target_names=list(model.config.id2label.values())

In [6]:
target_names

['1 star', '2 stars', '3 stars', '4 stars', '5 stars']

In [7]:
explainer = LimeTextExplainer(class_names=target_names)

In [8]:
explainer

<lime.lime_text.LimeTextExplainer at 0x16b65d839d0>

In [11]:
# str_to_predict = "surprising increase in revenue in spite of decrease in market share"
# exp = explainer.explain_instance(str_to_predict, model_adapter, num_features=5, num_samples=2000)

In [19]:
from eli5.lime import TextExplainer

te = TextExplainer(n_samples=5000, random_state=42)
te.fit("""The restaurant was amazing, the quality of their 
food was exceptional. The waiters were so polite.""", model_adapter)
te.explain_prediction(target_names=list(model.config.id2label.values()))

Contribution?,Feature
-0.468,<BIAS>
-6.366,Highlighted in text (sum)

Contribution?,Feature
-0.4,<BIAS>
-5.709,Highlighted in text (sum)

Contribution?,Feature
-0.397,<BIAS>
-3.822,Highlighted in text (sum)

Contribution?,Feature
-0.539,Highlighted in text (sum)
-0.621,<BIAS>

Contribution?,Feature
2.169,Highlighted in text (sum)
-0.521,<BIAS>


In [8]:
# import numpy as np
# import lime
# import torch
# import torch.nn.functional as F
# from lime.lime_text import LimeTextExplainer

# from transformers import AutoTokenizer, AutoModelForSequenceClassification

# tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
# model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
# class_names = ['positive','negative', 'neutral']

# def predictor(texts):
#     outputs = model(**tokenizer(texts, return_tensors="pt", padding=True))
#     probas = F.softmax(outputs.logits).detach().numpy()
#     return probas

# explainer = LimeTextExplainer(class_names=class_names)

# str_to_predict = "surprising increase in revenue in spite of decrease in market share"
# exp = explainer.explain_instance(str_to_predict, predictor, num_features=20, num_samples=2000)
# exp.show_in_notebook(text=str_to_predict)