In [11]:
from transformers import pipeline
import lime
from lime.lime_text import LimeTextExplainer
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from IPython.core.display import HTML
from nltk.corpus import stopwords


In [12]:
riasec = ['conventional', 'realistic', 'investigative', 'enterprising', 'social', 'artistic']

In [13]:
classifier = pipeline('zero-shot-classification', model='facebook/bart-large-mnli')

In [14]:
filename_model = 'facebook/bart-large-mnli'
tokenizer = AutoTokenizer.from_pretrained(filename_model)
model = AutoModelForSequenceClassification.from_pretrained(filename_model)

In [15]:
text_long = ' She achieves good grades in sciences, Math, and German. She is very interested in science and is a member of the technology and science clubs at her school, where her teachers have told her that she is very talented. Through her membership in these clubs, she has access to a tech lab. She is currently also working on a mini-job basis in an electronics store. Fatma is interested in university studies. Her parents are quite conservative and are uncertain of academic success for their daughter and therefore prefer Fatma starting vocational training after grade 10. She thus faces a conflict between self-fulfillment and satisfying her parents, whom she also does not want to disappoint.'
#text = ' She achieves good grades in sciences, Math, and German. She is very interested in science and is a member of the technology and science clubs at her school, where her teachers have told her that she is very talented. Through her membership in these clubs, she has access to a tech lab. '
text = 'I like to draw paintings with oil on a canvas.'
#text = 'I think that AI and robots will take over the world and destroy humanity.'

In [16]:
result = classifier(text, candidate_labels = riasec)
result

{'sequence': 'I like to draw paintings with oil on a canvas.',
 'labels': ['artistic',
  'enterprising',
  'social',
  'conventional',
  'realistic',
  'investigative'],
 'scores': [0.8289536237716675,
  0.07523660361766815,
  0.0406937301158905,
  0.020833542570471764,
  0.01776549592614174,
  0.01651705801486969]}

In [17]:
german_stopwords = stopwords.words('german')
english_stopwords = stopwords.words('english')

In [18]:
def predictor(texts):
    inputs = []
    for text in texts:
        text_parts = text.split('</>')
        premise = text_parts[0]
        hypothesis = text_parts[1]
        inputs.append((premise, hypothesis))
    outputs = model(**tokenizer(inputs, return_tensors="pt", padding=True))
    tensor_logits = outputs[0]
    probas = F.softmax(tensor_logits, dim=1).detach().numpy()    
    return probas

In [19]:
# classify
result = classifier(text, candidate_labels = riasec)
most_probable_label = result['labels'][result['scores'].index(max(result['scores']))]

# explain
mlni_label =  ['contradiction', 'neutral', 'entailment']
print(most_probable_label)
exp_text = text + '</>' + 'This example is ' + most_probable_label + '.'

explainer = LimeTextExplainer(class_names=mlni_label)
exp = explainer.explain_instance(exp_text, predictor, num_features=15, num_samples=100, top_labels=3)

html_texts = "<h2>"+most_probable_label+"</h2>"
base_colors = ["65, 105, 225", "255, 140, 0"]

html_texts += '<span style="background-color:rgba('+base_colors[0]+');">Positive</span>'
html_texts += '<span style="background-color:rgba('+base_colors[1]+');">Negative</span>'


#for idx in exp.available_labels():
for idx in [0,1,2]:
    html_text = "<p>" + text + "</p>"
    pred_class = mlni_label[idx]
    highlights = exp.as_list(label=idx)

    already_highlighted = []

    for word, value in highlights:

        if word in already_highlighted or word in english_stopwords:
            continue

        already_highlighted.append(word)
        base_color = base_colors[0]
        if value < 0:
            base_color = base_colors[1]
            value = -value
        html_text = html_text.replace(word, '<span style="background-color:rgba('+base_color+','+str(value*10)+');">' + word + '</span>')
    class_disp = "<h3>"+pred_class+"</h3>"
    html_texts = html_texts + class_disp + html_text
HTML(html_texts)


artistic


In [20]:
exp.as_list(label=2)

[('artistic', 0.45064555136099405),
 ('is', -0.13278034409769263),
 ('with', -0.09046589949120915),
 ('paintings', 0.07991711636654875),
 ('draw', 0.07783086334408314),
 ('example', -0.07570553009251148),
 ('This', -0.06660073407272886),
 ('to', -0.0445329141752559),
 ('on', -0.04033300204429178),
 ('I', -0.03642130582782339),
 ('oil', -0.0247765868155197),
 ('canvas', 0.02454206035897591),
 ('like', 0.019951890420947145),
 ('a', 0.01548164410801061)]

In [1]:
from label_classification import ZeroShotLabelClassifier
from label_classification import ZeroShotLabelClassifierExplainer

riasec = ['konventionell', 'realistisch', 'investigativ', 'unternehmerisch', 'sozial', 'künstlerisch']
text = 'Ich male gerne mit Ölfarben auf Leinwände.'

hypothesis_template = "Dieser Satz ist {}." 
model_name = 'svalabs/gbert-large-zeroshot-nli'

label_classifier = ZeroShotLabelClassifier(labels=riasec, model_name=model_name, hypothesis_template=hypothesis_template)
classification_result = label_classifier.classify(text)

print("Classification Result:", classification_result)

explainer = ZeroShotLabelClassifierExplainer(model_name=model_name, hypothesis_template=hypothesis_template)
most_likely_label = classification_result['labels'][0]
explanation = explainer.explain(text, label=most_likely_label, num_samples=500)

print("Explanation:", explanation)

  from .autonotebook import tqdm as notebook_tqdm


Classification Result: {'sequence': 'Ich male gerne mit Ölfarben auf Leinwände.', 'labels': ['künstlerisch', 'sozial', 'investigativ', 'konventionell', 'unternehmerisch', 'realistisch'], 'scores': [0.6761776804924011, 0.12924496829509735, 0.06201709061861038, 0.0512198880314827, 0.04103587940335274, 0.04030449688434601]}
Explanation: {'label': 'künstlerisch', 'highlights': [{'key': 'mit', 'value': -0.019699673298812104}, {'key': 'Ölfarben', 'value': 0.01607146873328142}, {'key': 'Ich', 'value': -0.01460975509297848}, {'key': 'auf', 'value': 0.012080694272585452}, {'key': 'male', 'value': -0.0117092584679759}, {'key': 'Leinwände', 'value': 0.010141154599231662}, {'key': 'gerne', 'value': 0.004742624167008458}]}


In [4]:
explanation

[('künstlerisch', -0.27583054519670597),
 ('Satz', -0.028239034199067908),
 ('Ich', -0.017440056389824005),
 ('gerne', 0.01683523589818306),
 ('Dieser', -0.011961222919886274),
 ('auf', 0.011844058096153754),
 ('male', 0.009472113940166406),
 ('Leinwände', 0.006018754902110311),
 ('Ölfarben', -0.004320230246263173),
 ('mit', 0.0042881870837588),
 ('ist', -0.002133286568133212)]

In [6]:
from label_classification import ZeroShotLabelClassifier
from label_classification import ZeroShotLabelClassifierExplainer

riasec = ['conventional', 'realistic', 'investigative', 'enterprising', 'social', 'artistic']
# text = 'I like to draw paintings with oil on a canvas.'
text = 'Ich male gerne mit Ölfarben auf Leinwände.'

label_classifier = ZeroShotLabelClassifier(labels=riasec)
classification_result = label_classifier.classify(text)

print("Classification Result:", classification_result)

explainer = ZeroShotLabelClassifierExplainer()
most_likely_label = classification_result['labels'][0]
explanation = explainer.explain(text, label=most_likely_label)

print("Explanation:", explanation)



Classification Result: {'sequence': 'I like to draw paintings with oil on a canvas.', 'labels': ['artistic', 'enterprising', 'social', 'conventional', 'realistic', 'investigative'], 'scores': [0.8289536237716675, 0.07523660361766815, 0.0406937301158905, 0.020833542570471764, 0.01776549592614174, 0.01651705801486969]}
Explanation: [('artistic', 0.5180652546543094), ('is', -0.1172885350094423), ('with', -0.09541382498477957), ('paintings', 0.08335546547578707), ('oil', -0.07009838241002983), ('example', -0.06874800570680951), ('I', -0.050959537567378387), ('on', -0.050236912535283805), ('draw', 0.047615706955168466), ('a', 0.03868497582078177), ('This', -0.0385271696006294), ('to', -0.03514698021738425), ('like', -0.023746628390619695), ('canvas', 0.012904417043864759)]
