### Sentiment Analysis in Italian. Model1

* see: https://huggingface.co/neuraly/bert-base-italian-cased-sentiment

In [2]:
import torch
from torch import nn  
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# to save in Model Catalog
from ads.common.model_artifact import ModelArtifact
from ads.common.model_export_util import prepare_generic_model
from ads import set_auth

from ads.common.model_export_util import prepare_generic_model
from ads.common.model_metadata import (MetadataCustomCategory,
                                       UseCaseType,
                                       Framework)

In [3]:
# Globals

tokenizer = None
# Load the model
model = None

In [4]:
def my_load_model():
    global tokenizer, model
    
    # Load the tokenizer and the model
    MODEL_NAME = "neuraly/bert-base-italian-cased-sentiment"

    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    # Load the model
    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

In [5]:
def my_predict(input_sentence):
    # encode the sentence and create the input tensor
    input_ids = tokenizer.encode(input_sentence, add_special_tokens=True)

    # Create tensor for input
    tensor = torch.tensor(input_ids).long()
    
    # add the baych dimension (not needed if we're scoring on N sentences)
    tensor = tensor.unsqueeze(0)

    # Call the model and get the logits
    logits = model(tensor)

    # Remove the fake batch dimension
    # I changed from the url this line of code to avoid an exception... This way it works
    logits = logits['logits'].squeeze(0)

    # The model was trained with a Log Likelyhood + Softmax combined loss, hence to extract probabilities we need a softmax on top of the logits tensor
    proba = nn.functional.softmax(logits, dim=0)
    
    # proba is (negative, neutral, positive)
    return proba

In [6]:
# loading model and tokenizer
my_load_model()

### scoring

In [7]:
%%time

# this is the sentence we're using for our tests
# sentence = "Beh, l'azienda XXXX dovrebbe provare ad offrire servizi migliori, i servizi attuali non sono adeguati e costano tanto"
sentence = "La gestione da parte della Regione Lazio della complessa macchina dei vaccini è stata adeguata"

negative, neutral, positive = my_predict(sentence)

CPU times: user 566 ms, sys: 11.5 ms, total: 577 ms
Wall time: 144 ms


In [8]:
print('Negative score:', round(negative.item(), 4))
print('Neutral score:', round(neutral.item(), 4))
print('Positive score:', round(positive.item(), 4))

Negative score: 0.0014
Neutral score: 0.9472
Positive score: 0.0514


### Save in the model catalog

In [9]:
set_auth(auth='resource_principal')

In [12]:
# 1. prepare artifacts directory

PATH_ARTEFACT = "/home/datascience/model-files"

artifact = prepare_generic_model(PATH_ARTEFACT, model=model, force_overwrite=True, data_science_env=True, 
                                 use_case_type=UseCaseType.SENTIMENT_ANALYSIS)

HBox(children=(FloatProgress(value=0.0, description='loop1', max=4.0, style=ProgressStyle(description_width='i…

INFO:ads.common.model_artifact:We give you the option to specify a different inference conda environment for model deployment purposes. By default it is assumed to be the same as the conda environment used to train the model. If you wish to specify a different environment for inference purposes, please assign the path of a published or data science conda environment to the optional parameter `inference_conda_env`. 


In [13]:
%%writefile {PATH_ARTEFACT}/score.py

#
# customize and save score.py
#
import torch
from torch import nn  
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import json
import os
import io
import logging 

# logging configuration - OPTIONAL 
logging.basicConfig(format='%(name)s - %(levelname)s - %(message)s', level=logging.INFO)
logger_pred = logging.getLogger('model-prediction')
logger_pred.setLevel(logging.INFO)
logger_feat = logging.getLogger('input-features')
logger_feat.setLevel(logging.INFO)

model = None
tokenizer = None

# to enable/disable detailed logging
DEBUG = True

"""
   Inference script. This script is used for prediction by scoring server when schema is known.
"""

def load_model():
    """
    Loads model from the serialized format

    Returns
    -------
    model:  a model instance on which predict API can be invoked
    """
    global tokenizer, model
    
    # Load the tokenizer and the model
    MODEL_NAME = "neuraly/bert-base-italian-cased-sentiment"

    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    # Load the model
    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
    
    print("Loaded model...")
    logger_pred.info("Loaded model...")
    
    return model

def predict(data, model=load_model()) -> dict:
    """
    Returns prediction given the model and data to predict

    Parameters
    ----------
    model: Model instance returned by load_model API
    data: Data format as expected by the predict API of the core estimator. For eg. in case of sckit models it could be numpy array/List of list/Panda DataFrame

    Returns
    -------
    predictions: Output from scoring server
        Format: { 'prediction': output from `model.predict` method }

    """
    # model contains the model and the scaler
    logger_pred.info("In predict...")
    
    # some check
    assert model is not None, "Model is not loaded"
    
    logger_pred.info("Invoking model......")
    
    # encode the sentence and create the input tensor
    input_ids = tokenizer.encode(sentence, add_special_tokens=True)

    # Create tensor for input
    tensor = torch.tensor(input_ids).long()
    
    # add the baych dimension (not needed if we're scoring on N sentences)
    tensor = tensor.unsqueeze(0)

    # Call the model and get the logits
    logits = model(tensor)

    # Remove the fake batch dimension
    # I changed from the url this line of code to avoid an exception... This way it works
    logits = logits['logits'].squeeze(0)

    # The model was trained with a Log Likelyhood + Softmax combined loss, hence to extract probabilities we need a softmax on top of the logits tensor
    proba = nn.functional.softmax(logits, dim=0)
    
    # proba is (negative, neutral, positive)
    
    return { 'prediction': proba }

Overwriting /home/datascience/model-files/score.py


In [14]:
catalog_entry = artifact.save(display_name='model-sentiment1', description='A model for Sentiment Analysis using Tranformers')

INFO:ads.common.model_artifact:{
  "git_branch": "None",
  "git_commit": "None",
  "repository_url": "None",
  "script_dir": "/home/datascience/model-files",
  "training_id": null,
  "training_script": "None"
}
['runtime.yaml', 'score.py']


HBox(children=(FloatProgress(value=0.0, description='loop1', max=5.0, style=ProgressStyle(description_width='i…

artifact:/tmp/saved_model_b40f6664-92df-4528-9e6a-f0382dcffbf3.zip


### Test the deployed model

In [None]:
import sys 
sys.path.insert(0, PATH_ARTEFACT)

import score

from score import load_model, predict

In [None]:
model = score.load_model()

In [None]:
score.predict("E' un giocatore finito")