### Sentiment Analysis in Italian. Model1

* see: https://huggingface.co/neuraly/bert-base-italian-cased-sentiment

In [1]:
import torch
import numpy as np
from torch import nn  
from transformers import AutoTokenizer, AutoModelForSequenceClassification

### The first two functions download the pretrained model from Internet.


In [2]:
# Globals

THR = 0.3
DEC_DIGITS = 4


In [3]:
def load_model():
    # Load the tokenizer and the model. The first one is better
    MODEL_NAME = "neuraly/bert-base-italian-cased-sentiment"
    
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    # Load the model
    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
    
    return tokenizer, model

In [4]:
def predict(input_sentence, tokenizer, model):
    # encode the sentence and create the input tensor
    input_ids = tokenizer(input_sentence, add_special_tokens=True, padding=True)['input_ids']

    # Create tensor for input
    tensor = torch.tensor(input_ids).long()
    
    # add the batch dimension (not needed if we're scoring on N sentences)
    tensor = tensor.unsqueeze(0)

    # Call the model and get the logits
    logits = model(tensor)['logits']

    # Remove the fake batch dimension
    # I changed from the url this line of code to avoid an exception... This way it works
    # logits = logits.squeeze(0)

    # The model was trained with a Log Likelyhood + Softmax combined loss, hence to extract probabilities we need a softmax on top of the logits tensor
    proba = nn.functional.softmax(logits, dim=1)
    
    # after the softmax to use same dim=1
    proba = proba.squeeze(0)
    
    # proba is (negative, neutral, positive)
    # [0] to remove the added dimension
    return proba

In [76]:
#
# this one has as input a list of sentences
#
def batch_predict(input_sentences, tokenizer, model):
    # encode the sentence and create the input tensor
    input_ids = tokenizer(input_sentences, add_special_tokens=True, padding=True)['input_ids']
    
    # Create tensor for input
    tensor = torch.tensor(input_ids).long()

    # Call the model and get the logits
    logits = model(tensor)
    
    print(logits)
    
    logits = logits['logits']

    # Remove the fake batch dimension
    # I changed from the url this line of code to avoid an exception... This way it works

    # The model was trained with a Log Likelyhood + Softmax combined loss, hence to extract probabilities we need a softmax on top of the logits tensor
    print(logits.size())
    proba = nn.functional.softmax(logits, dim=1)
    
    # proba is (negative, neutral, positive)

    return proba

In [77]:
%%time

# loading model and tokenizer
tokenizer, model = load_model()

CPU times: user 836 ms, sys: 118 ms, total: 954 ms
Wall time: 5.8 s


### scoring

In [14]:
%%time

input_sentences = [
    "La gestione da parte della Regione Lazio della complessa macchina dei vaccini è stata buona?",
    "La gestione da parte della Regione Lazio della complessa macchina dei vaccini è stata buona",
    "La vostra organizzazione offre servizi pessimi",
    "La vostra organizzazione offre servizi non adeguati",
    "Sono molto soddisfatto del tuo lavoro",
    "non sono del tutto sicuro che il lavoro sia adeguato",
    "l'azienda dovrebbe offrire servizi migliori",
    "il supporto offerto dal customer care non è stato adeguato",
    "il risultato è pessimo",
    "il Napoli ha giocato una partita ottima",
    "il lavoro dell'allenatore è stato modesto"
]



labels_score = ["positive", "negative", "neutral"]

print(f"Showing only score greater than {THR}")
print()

for sentence in input_sentences:
        
        negative, neutral, positive = predict(sentence, tokenizer, model)
        
        neg_score = round(negative.item(), DEC_DIGITS)
        pos_score = round(positive.item(), DEC_DIGITS)
        neutr_score = round(neutral.item(), DEC_DIGITS)
        
        list_scores = [pos_score, neg_score, neutr_score]
    
        print("Sentence: ", sentence)
    
        for i, score in enumerate(list_scores):
            if score > THR:
                print(f"{labels_score[i]} score is: {score}")
            
            # formatting
        print()
            
            
# formatting
print()

Showing only score greater than 0.3

Sentence:  La gestione da parte della Regione Lazio della complessa macchina dei vaccini è stata buona?
neutral score is: 0.8939

Sentence:  La gestione da parte della Regione Lazio della complessa macchina dei vaccini è stata buona
positive score is: 0.9509

Sentence:  La vostra organizzazione offre servizi pessimi
negative score is: 0.9852

Sentence:  La vostra organizzazione offre servizi non adeguati
negative score is: 0.9593

Sentence:  Sono molto soddisfatto del tuo lavoro
positive score is: 0.9984

Sentence:  non sono del tutto sicuro che il lavoro sia adeguato
negative score is: 0.6756
neutral score is: 0.3186

Sentence:  l'azienda dovrebbe offrire servizi migliori
positive score is: 0.9908

Sentence:  il supporto offerto dal customer care non è stato adeguato
negative score is: 0.9884

Sentence:  il risultato è pessimo
negative score is: 0.9974

Sentence:  il Napoli ha giocato una partita ottima
positive score is: 0.9865

Sentence:  il lavo

In [78]:
%%time
#
# batch scoring
#

input_sentences = [
    "un lavoro facile",
    "un lavoro molto facile"
]

proba_tensor = batch_predict(input_sentences, tokenizer, model)

SequenceClassifierOutput(loss=None, logits=tensor([[-3.8324,  0.2546,  3.3162],
        [-3.5013, -0.8748,  4.5212]], grad_fn=<AddmmBackward>), hidden_states=None, attentions=None)
torch.Size([2, 3])
CPU times: user 213 ms, sys: 4.03 ms, total: 217 ms
Wall time: 53.9 ms


In [79]:
THR = 0.2
DEC_DIGITS = 4

labels_score = ["positive", "negative", "neutral"]

print(f"Showing only score greater than {THR}")
print()

for i, sentence in enumerate(input_sentences):
    negative, neutral, positive = proba_tensor[i]
    
    neg_score = round(negative.item(), DEC_DIGITS)
    pos_score = round(positive.item(), DEC_DIGITS)
    neutr_score = round(neutral.item(), DEC_DIGITS)
        
    list_scores = [pos_score, neg_score, neutr_score]
    
    print("Sentence: ", sentence)
    
    for i, score in enumerate(list_scores):
        if score > THR:
            print(f"{labels_score[i]} score is: {score}")
            
    # formatting
    print()

Showing only score greater than 0.2

Sentence:  un lavoro facile
positive score is: 0.9546

Sentence:  un lavoro molto facile
positive score is: 0.9952



### Save in the model catalog

### Test the deployed model

In [23]:
THR = 0.2
DEC_DIGITS = 4

labels_score = ["positive", "negative", "neutral"]

print(f"Showing only score greater than {THR}")
print()

for i, sentence in enumerate(input_sentences):
        
        negative, neutral, positive = predict(sentence, tokenizer, model)
        negativeB, neutralB, positiveB = proba_tensor[i]
        
        neg_score = round(negative.item(), DEC_DIGITS)
        pos_score = round(positive.item(), DEC_DIGITS)
        neutr_score = round(neutral.item(), DEC_DIGITS)
        
        neg_scoreB = round(negativeB.item(), DEC_DIGITS)
        pos_scoreB = round(positiveB.item(), DEC_DIGITS)
        neutr_scoreB = round(neutralB.item(), DEC_DIGITS)
        
        list_scores = [pos_score, neg_score, neutr_score]
        list_scoresB = [pos_scoreB, neg_scoreB, neutr_scoreB]
        
        print("Sentence: ", sentence)
    
        for i, score in enumerate(list_scores):
            if score > THR:
                print(f"{labels_score[i]} score is: {score}")
        for i, scoreB in enumerate(list_scoresB):
            if scoreB > THR:
                print(f"{labels_score[i]} score is: {scoreB}")
                
            # formatting
        print()
            
            
# formatting
print()

Showing only score greater than 0.2

Sentence:  La gestione da parte della Regione Lazio della complessa macchina dei vaccini è stata buona?
neutral score is: 0.8939
neutral score is: 0.8939

Sentence:  La gestione da parte della Regione Lazio della complessa macchina dei vaccini è stata buona
positive score is: 0.9509
positive score is: 0.9396

Sentence:  La vostra organizzazione offre servizi pessimi
negative score is: 0.9852
negative score is: 0.7595
neutral score is: 0.2395

Sentence:  La vostra organizzazione offre servizi non adeguati
negative score is: 0.9593
negative score is: 0.3279
neutral score is: 0.6707

Sentence:  Sono molto soddisfatto del tuo lavoro
positive score is: 0.9984
positive score is: 0.9909

Sentence:  non sono del tutto sicuro che il lavoro sia adeguato
negative score is: 0.6756
neutral score is: 0.3186
neutral score is: 0.8476

Sentence:  l'azienda dovrebbe offrire servizi migliori
positive score is: 0.9908
neutral score is: 0.8356

Sentence:  il supporto of