### Sentiment Analysis: prepare deployment

* use class definition in sentiment_analyzers file 
* Conda env used: Natural Language Processing for CPU Python 3.7

In [1]:
import torch
from torch import nn

# HuggingFace transformers (availale in OCI DS conda nlp env)
# see: https://github.com/huggingface/transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# from my Python file
from sentiment_analyzers import MultiSentimentAnalyzer

import os
from ads import set_auth
from ads.common.model_artifact import ModelArtifact
from ads.common.model_export_util import prepare_generic_model
from ads.common.model_metadata import (MetadataCustomCategory,
                                       UseCaseType)

In [2]:
# create deployment directory, if not exists
PATH_ARTEFACT = f"./nlptown-checkpoint"

if not os.path.exists(PATH_ARTEFACT):
    os.mkdir(PATH_ARTEFACT)

In [3]:
# copy sentiment_analyzers.py to PATH_ARTEFACT
!cp sentiment_analyzers.py $PATH_ARTEFACT

In [4]:
# load model and tokenizers
# Load tokenizer and PyTorch weights form the Hub
MODEL_NAME = "nlptown/bert-base-multilingual-uncased-sentiment"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

In [5]:
# Save to disk
LOCAL_DIR = PATH_ARTEFACT

tokenizer.save_pretrained(LOCAL_DIR)
model.save_pretrained(LOCAL_DIR)

In [6]:
artifact = prepare_generic_model(model=model, model_path=PATH_ARTEFACT,
                                 force_overwrite=True, 
                                 data_science_env=True,
                                 use_case_type=UseCaseType.SENTIMENT_ANALYSIS)

loop1:   0%|          | 0/4 [00:00<?, ?it/s]



In [10]:
%%writefile {PATH_ARTEFACT}/score.py

import pandas as pd
import numpy as np

import json
import os
import pickle

import io
import logging 

# logging configuration - OPTIONAL 
logging.basicConfig(format='%(name)s - %(levelname)s - %(message)s', level=logging.INFO)
logger_pred = logging.getLogger('model-prediction')
logger_pred.setLevel(logging.INFO)
logger_feat = logging.getLogger('input sentence')
logger_feat.setLevel(logging.INFO)

# from my Python file
from sentiment_analyzers import MultiSentimentAnalyzer

MODEL_FILE_NAME = "pytorch_model.bin"

# the class
sent_analyzer = None

# to enable/disable detailed logging
DEBUG = True

"""
   Inference script. This script is used for prediction by scoring server when schema is known.
"""

def load_model():
    """
    Loads model from the serialized format

    Returns
    -------
    model:  a model instance on which predict API can be invoked
    """
    global sent_analyzer
    
    model_dir = os.path.dirname(os.path.realpath(__file__))
    contents = os.listdir(model_dir)
    
    # Load the model from the model_dir using the appropriate loader
    logger_pred.info(model_dir)
    
    if MODEL_FILE_NAME in contents:
        sent_analyzer = MultiSentimentAnalyzer(
            model_dir, labels=["1 star", "2 star", "3 star", "4 star", "5 star"])
        
        logger_pred.info("Loaded the model and tokenizer !!!")
                
    else:
        raise Exception('{0} is not found in model directory {1}'.format(LOCAL_DIR, model_dir))
    
    return sent_analyzer

def pre_inference(data):
    """
    Preprocess data

    Parameters
    ----------
    data: Data format as expected by the predict API of the core estimator.

    Returns
    -------
    data: Data format after any processing.

    """
    logger_pred.info("Preprocessing...")
    
    return data

def post_inference(yhat):
    """
    Post-process the model results

    Parameters
    ----------
    yhat: Data format after calling model.predict.

    Returns
    -------
    yhat: Data format after any processing.

    """
    logger_pred.info("Postprocessing output...")
    
    return yhat

def predict(data, model=load_model()):
    """
    Returns prediction given the model and data to predict

    Parameters
    ----------
    model: Model instance returned by load_model API
    data: Data format as expected by the predict API of the core estimator. For eg. in case of sckit models it could be numpy array/List of list/Pandas DataFrame

    Returns
    -------
    predictions: Output from scoring server
        Format: {'prediction': output from model.predict method}

    """
    
    if DEBUG:
        logger_pred.info("In function predict...")
        
    
    if DEBUG:
        logger_pred.info("Input data:")
        logger_pred.info(type(data))
        logger_pred.info(data)
    
    # some check
    assert sent_analyzer is not None, "Model is not loaded"
    
    scores = sent_analyzer.score(data)
    
    if DEBUG:
        logger_pred.info(scores)
    
    # post inference not needed
    return scores

Overwriting ./nlptown-checkpoint/score.py


In [15]:
artifact.introspect()

['runtime.yaml', 'vocab.txt', 'config.json', 'special_tokens_map.json', 'sentiment_analyzers.py', 'score.py', 'test_json_output.json', '__pycache__', 'tokenizer_config.json', '.ipynb_checkpoints', 'pytorch_model.bin', 'tokenizer.json']


Unnamed: 0,Test key,Test name,Result,Message
0,runtime_env_path,Check that field MODEL_DEPLOYMENT.INFERENCE_ENV_PATH is set,Passed,
1,runtime_env_python,Check that field MODEL_DEPLOYMENT.INFERENCE_PYTHON_VERSION is set to a value of 3.6 or higher,Passed,
2,runtime_path_exist,Check that the file path in MODEL_DEPLOYMENT.INFERENCE_ENV_PATH is correct.,Passed,
3,runtime_version,Check that field MODEL_ARTIFACT_VERSION is set to 3.0,Passed,
4,runtime_yaml,"Check that the file ""runtime.yaml"" exists and is in the top level directory of the artifact directory",Passed,
5,score_load_model,Check that load_model() is defined,Passed,
6,score_predict,Check that predict() is defined,Passed,
7,score_predict_arg,Check that all other arguments in predict() are optional and have default values,Passed,
8,score_predict_data,"Check that the only required argument for predict() is named ""data""",Passed,
9,score_py,"Check that the file ""score.py"" exists and is in the top level directory of the artifact directory",Passed,


In [16]:
# Saving the model artifact to the model catalog.
compartment_id = os.environ['NB_SESSION_COMPARTMENT_OCID']
project_id = os.environ['PROJECT_OCID']

set_auth(auth='resource_principal')

#
# Save to Model Catalog
#
catalog_entry = artifact.save(display_name='ITA2 Sentiment analysis', 
                              description='A model for sentiment analysis',
                              # to avoid to commit (be careful)
                              ignore_pending_changes=True,
                              # needs a longer timeout (the bin file is 600MB)
                              timeout=600)

loop1:   0%|          | 0/5 [00:00<?, ?it/s]

artifact:/tmp/saved_model_99e0737d-8e61-4a74-a8c8-4709e99f451b.zip


### test the score.py

In [11]:
# %reload_ext autoreload
%load_ext autoreload

%autoreload 2

# add the path of score.py: 

import sys 
sys.path.insert(0, PATH_ARTEFACT)

from score import load_model, predict

INFO:model-prediction:/home/datascience/italian-sentiment-analysis/nlptown-checkpoint
Loading model...
Model loading completed!
INFO:model-prediction:Loaded the model and tokenizer !!!


In [12]:
_ = load_model()

INFO:model-prediction:/home/datascience/italian-sentiment-analysis/nlptown-checkpoint
Loading model...
Model loading completed!
INFO:model-prediction:Loaded the model and tokenizer !!!


In [13]:
predictions_test = predict("Sono soddisfatto dei servizi offerti dalla vostra azienda")

INFO:model-prediction:In function predict...
INFO:model-prediction:Input data:
INFO:model-prediction:<class 'str'>
INFO:model-prediction:Sono soddisfatto dei servizi offerti dalla vostra azienda
INFO:model-prediction:[{'label': '1 star', 'score': 0.0029}, {'label': '2 star', 'score': 0.0041}, {'label': '3 star', 'score': 0.0824}, {'label': '4 star', 'score': 0.4626}, {'label': '5 star', 'score': 0.448}]


In [14]:
predictions_test

[{'label': '1 star', 'score': 0.0029},
 {'label': '2 star', 'score': 0.0041},
 {'label': '3 star', 'score': 0.0824},
 {'label': '4 star', 'score': 0.4626},
 {'label': '5 star', 'score': 0.448}]