### Prepare Model Deployment

In [1]:
import os
import ocifs
import pandas as pd

from ads.common.model_artifact import ModelArtifact
from ads.common.model_export_util import prepare_generic_model
from ads import set_auth

from ads.common.model_export_util import prepare_generic_model
from ads.common.model_metadata import (MetadataCustomCategory,
                                       UseCaseType,
                                       Framework)

import json
import pickle

In [2]:
set_auth(auth='resource_principal')

In [7]:
# 1. prepare artifacts directory

PATH_ARTEFACT = "/home/datascience/model-files"
MODEL_FILE = 'model.pkl'

PATH_NAME = PATH_ARTEFACT + "/" + MODEL_FILE

# the model pickle file have been saved by catboost1 NB

In [None]:
#
# Here I can specify some info that will be saved in Model Catalog
#
my_inf_conda_env = 'oci://conda_envs@fr95jjtqbdhh/conda_environments/gpu/tf26_catboost/1.0/tf26_catboostv1_0'

artifact = prepare_generic_model(PATH_ARTEFACT, force_overwrite=True, data_science_env=False, 
                                 use_case_type=UseCaseType.BINARY_CLASSIFICATION,
                                 inference_conda_env=my_inf_conda_env)

In [None]:
%%writefile {PATH_ARTEFACT}/score.py

#
# customize and save score.py
#
import pandas as pd
import numpy as np
import time

import catboost as cat

import pickle
import json
import os
import io
import logging 

# logging configuration - OPTIONAL 
logging.basicConfig(format='%(name)s - %(levelname)s - %(message)s', level=logging.INFO)
logger_pred = logging.getLogger('model-prediction')
logger_pred.setLevel(logging.INFO)
logger_feat = logging.getLogger('input-features')
logger_feat.setLevel(logging.INFO)

# it is loaded in load_model()
model_file_name = "model.pkl"

# to enable/disable detailed logging
DEBUG = True

"""
   Inference script. This script is used for prediction by scoring server when schema is known.
"""

def load_model():
    """
    Loads model from the serialized format

    Returns
    -------
    model:  a model instance on which predict API can be invoked
    """
    
    model_dir = os.path.dirname(os.path.realpath(__file__))
    contents = os.listdir(model_dir)
    
    # Load the model from the model_dir using the appropriate loader
    
    if model_file_name in contents:
        with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), model_file_name), "rb") as file:
            model = pickle.load(file) 
            logger_pred.info("Loaded model...")
       
    else:
        raise Exception('{0} is not found in model directory {1}'.format(model_file_name, model_dir))
    
    return model

def preprocess_data(x):
    logger_pred.info("Eventually preprocessing and adding features...")
    
    return x

def predict(data, model=load_model()) -> dict:
    """
    Returns prediction given the model and data to predict

    Parameters
    ----------
    model: Model instance returned by load_model API
    data: Data format as expected by the predict API of the core estimator. For eg. in case of sckit models it could be numpy array/List of list/Panda DataFrame

    Returns
    -------
    predictions: Output from scoring server
        Format: { 'prediction': output from `model.predict` method }

    """
    # model contains the model and the scaler
    logger_pred.info("In predict...")
    
    # some check
    assert model is not None, "Model is not loaded"
    
    x = pd.read_json(io.StringIO(data)).values
    
    if DEBUG:
        logger_feat.info("Logging features")
        logger_feat.info(x)
    
    # preprocess data (for example normalize features)
    x = preprocess_data(x)

    logger_pred.info("Invoking model......")
    
    preds = model.predict_proba(x)
    
    # rounded
    preds = np.round(preds[:, 1], 4)
    preds = preds.tolist()
    
    logger_pred.info("Logging predictions")
    logger_pred.info(preds)
    
    return { 'prediction': preds }

In [None]:
catalog_entry = artifact.save(display_name='model-catboost20', description='A model for Employee Attrition using catboost')

### Test

In [3]:
def read_from_object_storage(prefix, file_name):
    # get access to OSS as an fs
    # config={} assume resource_principal auth
    fs = ocifs.OCIFileSystem(config={})
    
    FILE_PATH = prefix + file_name
    
    # reading data from Object Storage
    with fs.open(FILE_PATH, 'rb') as f:
        df = pd.read_csv(f)
    
    return df

In [4]:
PREFIX = "oci://data_input@fr95jjtqbdhh/"
FILE_NAME = "orcl_attrition.csv"

# see in functions above
data_orig = read_from_object_storage(prefix=PREFIX, file_name=FILE_NAME)

# some columns are not needed. This is the list of columns that will be used
my_columns = ['Age', 'Attrition', 'EnvironmentSatisfaction', 'MaritalStatus', 'TravelForWork', 'SalaryLevel', 'JobFunction', 'CommuteLength', 'EducationalLevel', 'EducationField', 'MonthlyIncome', 
              'OverTime', 'StockOptionLevel', 'TrainingTimesLastYear', 'YearsSinceLastPromotion', 'WorkLifeBalance']

# dataset filtrato eliminando le colonne non necessarie
data = data_orig[my_columns]

# tipologie di features e colonne
TARGET = 'Attrition'

# automatizziamo !!!
all_columns = sorted(my_columns)
features = sorted(list(set(all_columns) - set([TARGET])))

x_data = data[features]
y_data = data[TARGET]

In [5]:
# let's use for test the first two
x_input = x_data.iloc[10:20].values
y_label = y_data.iloc[10:20].values

In [8]:
%load_ext autoreload

%autoreload 2

# add the path of score.py: 

import sys 
sys.path.insert(0, PATH_ARTEFACT)

from score import load_model, predict

# Load the model to memory 
loaded_model = load_model()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
INFO:model-prediction:Loaded model...
INFO:model-prediction:Loaded model...


In [9]:
predictions_test = predict(json.dumps(x_input.tolist()), loaded_model)

INFO:model-prediction:In predict...
INFO:input-features:Logging features
INFO:input-features:[[36 17 'Medical' 'L3' 1 'Software Developer' 'Married' 2426 'No' 3710 1
  5 'infrequent' 3 0]
 [30 16 'Life Sciences' 'L2' 4 'Software Developer' 'Single' 4193 'Yes'
  700 0 3 'infrequent' 3 0]
 [32 27 'Life Sciences' 'L1' 1 'Software Developer' 'Divorced' 2911 'No'
  3072 1 1 'infrequent' 2 4]
 [35 20 'Medical' 'L2' 2 'Software Developer' 'Divorced' 2661 'No' 6172 1
  2 'infrequent' 3 1]
 [29 25 'Life Sciences' 'L3' 3 'Software Developer' 'Single' 2028 'Yes'
  472 0 4 'infrequent' 3 0]
 [30 22 'Life Sciences' 'L4' 2 'Software Developer' 'Divorced' 9980 'No'
  6370 1 1 'infrequent' 3 8]
 [33 6 'Life Sciences' 'L2' 1 'Software Developer' 'Divorced' 3298 'Yes'
  1530 2 5 'infrequent' 2 0]
 [23 17 'Medical' 'L2' 4 'Software Developer' 'Divorced' 2935 'Yes' 5150
  2 2 'none' 2 0]
 [54 3 'Life Sciences' 'L4' 1 'Product Management' 'Married' 15427 'No'
  5590 0 3 'infrequent' 3 3]
 [39 3 'Life Scien

In [14]:
# the last stepis to invoke the model deployment
endpoint = "https://modeldeployment.eu-frankfurt-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.eu-frankfurt-1.amaaaaaa7egirmqax67pw7mc37vjfzj76xxmp7eunvipii76qsld7wvyaxyq/predict"

In [15]:
import requests
import oci
from oci.signer import Signer

# again using RP
rps = oci.auth.signers.get_resource_principals_signer()

# payload goes here
body = json.dumps(x_input.tolist()) 

print("These are the probs from the deployed model:")
print(requests.post(endpoint, json=body, auth=rps).json())

print()
print("and these are the labels:")
y_label

These are the probs from the deployed model:
{'prediction': [0.215, 0.4864, 0.1949, 0.1424, 0.8115, 0.0973, 0.3988, 0.3068, 0.2332, 0.3079]}

and these are the labels:


array(['No', 'No', 'No', 'No', 'Yes', 'No', 'No', 'No', 'No', 'No'],
      dtype=object)