In [22]:
from transformers import pipeline
import lime
from lime.lime_text import LimeTextExplainer
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from IPython.core.display import HTML
from nltk.corpus import stopwords


In [2]:
riasec = ['conventional', 'realistic', 'investigative', 'enterprising', 'social', 'artistic']

In [71]:
classifier = pipeline('zero-shot-classification', model='facebook/bart-large-mnli')

In [4]:
filename_model = 'facebook/bart-large-mnli'
tokenizer = AutoTokenizer.from_pretrained(filename_model)
model = AutoModelForSequenceClassification.from_pretrained(filename_model)

In [163]:
text_long = ' She achieves good grades in sciences, Math, and German. She is very interested in science and is a member of the technology and science clubs at her school, where her teachers have told her that she is very talented. Through her membership in these clubs, she has access to a tech lab. She is currently also working on a mini-job basis in an electronics store. Fatma is interested in university studies. Her parents are quite conservative and are uncertain of academic success for their daughter and therefore prefer Fatma starting vocational training after grade 10. She thus faces a conflict between self-fulfillment and satisfying her parents, whom she also does not want to disappoint.'
#text = ' She achieves good grades in sciences, Math, and German. She is very interested in science and is a member of the technology and science clubs at her school, where her teachers have told her that she is very talented. Through her membership in these clubs, she has access to a tech lab. '
text = 'I like to draw paintings with oil on a canvas.'
#text = 'I think that AI and robots will take over the world and destroy humanity.'

In [164]:
result = classifier(text, candidate_labels = riasec)
result

{'sequence': 'I like to draw paintings with oil on a canvas.',
 'labels': ['artistic',
  'enterprising',
  'social',
  'conventional',
  'realistic',
  'investigative'],
 'scores': [0.8289536237716675,
  0.07523660361766815,
  0.0406937301158905,
  0.020833542570471764,
  0.01776549592614174,
  0.01651705801486969]}

In [144]:
german_stopwords = stopwords.words('german')
english_stopwords = stopwords.words('english')

In [161]:
def predictor(texts):
    inputs = []
    for text in texts:
        text_parts = text.split('</>')
        premise = text_parts[0]
        hypothesis = text_parts[1]
        inputs.append((premise, hypothesis))
    outputs = model(**tokenizer(inputs, return_tensors="pt", padding=True))
    tensor_logits = outputs[0]
    probas = F.softmax(tensor_logits, dim=1).detach().numpy()    
    return probas

In [165]:
# classify
result = classifier(text, candidate_labels = riasec)
most_probable_label = result['labels'][result['scores'].index(max(result['scores']))]

# explain
mlni_label =  ['contradiction', 'neutral', 'entailment']
print(most_probable_label)
exp_text = text + '</>' + 'This example is ' + most_probable_label + '.'

explainer = LimeTextExplainer(class_names=mlni_label)
exp = explainer.explain_instance(exp_text, predictor, num_features=15, num_samples=100, top_labels=3)

html_texts = "<h2>"+most_probable_label+"</h2>"
base_colors = ["65, 105, 225", "255, 140, 0"]

html_texts += '<span style="background-color:rgba('+base_colors[0]+');">Positive</span>'
html_texts += '<span style="background-color:rgba('+base_colors[1]+');">Negative</span>'


#for idx in exp.available_labels():
for idx in [0,1,2]:
    html_text = "<p>" + text + "</p>"
    pred_class = mlni_label[idx]
    highlights = exp.as_list(label=idx)

    already_highlighted = []

    for word, value in highlights:

        if word in already_highlighted or word in english_stopwords:
            continue

        already_highlighted.append(word)
        base_color = base_colors[0]
        if value < 0:
            base_color = base_colors[1]
            value = -value
        html_text = html_text.replace(word, '<span style="background-color:rgba('+base_color+','+str(value*10)+');">' + word + '</span>')
    class_disp = "<h3>"+pred_class+"</h3>"
    html_texts = html_texts + class_disp + html_text
HTML(html_texts)


artistic


In [151]:
exp.as_list(label=2)

[('and', -0.08618430471412966),
 ('she', -0.0859298874292672),
 ('that', -0.06719110759849281),
 ('at', 0.061237579853974926),
 ('clubs', -0.058735758892011894),
 ('in', -0.057071755112204775),
 ('membership', -0.054079517865397414),
 ('access', -0.051213714037788204),
 ('to', 0.0411751789336965),
 ('Through', -0.034003017623415656),
 ('example', 0.03368673514533424),
 ('Math', 0.030179239986825087),
 ('these', 0.025607989542153462),
 ('has', -0.006308368329728049),
 ('This', -0.0007901448201678051)]