### Prepare Deploy
* save the model to the model catalog and prepare deployment as **REST service**

In [1]:
import os
from ads.common.model_artifact import ModelArtifact
from ads.common.model_export_util import prepare_generic_model
from ads.common.model_metadata import (MetadataCustomCategory,
                                       UseCaseType)
from ads import set_auth

import pandas as pd
import numpy as np

import json
import pickle

In [2]:
 # env and auth
compartment_id = os.environ['NB_SESSION_COMPARTMENT_OCID']
project_id = os.environ['PROJECT_OCID']

set_auth(auth='resource_principal')

In [3]:
# 1. prepare artifacts directory

PATH_ARTEFACT = f"./model-files"

if not os.path.exists(PATH_ARTEFACT):
    os.mkdir(PATH_ARTEFACT)

In [4]:
MODEL_FILE_NAME = "credit-scoring.pkl"
SCALER_FILE_NAME = "scaler.pkl"

model = pickle.load(open(PATH_ARTEFACT + "/" + MODEL_FILE_NAME, 'rb'))
scaler = pickle.load(open(PATH_ARTEFACT + "/" + SCALER_FILE_NAME, 'rb'))

In [5]:
# 2. prepare deploy to Model Catalog
# this URL is taken from the Conda published env
INFERENCE_ENV = "oci://conda_ds@frqap2zhtzbe/conda_environments/cpu/General Machine Learning for CPUs on Python 3.7/1.0/generalml_p37_cpu_v1"

artifact = prepare_generic_model(model=model, model_path=PATH_ARTEFACT,
                                 inference_conda_env=INFERENCE_ENV,
                                 force_overwrite=True, data_science_env=False,
                                 use_case_type=UseCaseType.BINARY_CLASSIFICATION)

loop1:   0%|          | 0/4 [00:00<?, ?it/s]



### Customize score.py

any customization you want to add to the main code f the REST service should be generated in this cell below

before executing artifact.save()

In [19]:
%%writefile {PATH_ARTEFACT}/score.py

import pandas as pd
import numpy as np

from xgboost import XGBClassifier

import json
import os
import pickle

import io
import logging 

# logging configuration - OPTIONAL 
logging.basicConfig(format='%(name)s - %(levelname)s - %(message)s', level=logging.INFO)
logger_pred = logging.getLogger('model-prediction')
logger_pred.setLevel(logging.INFO)
logger_feat = logging.getLogger('input-features')
logger_feat.setLevel(logging.INFO)

model_name = 'credit-scoring.pkl'
scaler_name = 'scaler.pkl'

# scaler is global
scaler = None

# to enable/disable detailed logging
DEBUG = True

"""
   Inference script. This script is used for prediction by scoring server when schema is known.
"""

def load_model(model_file_name=model_name):
    """
    Loads model from the serialized format

    Returns
    -------
    model:  a model instance on which predict API can be invoked
    """
    global scaler
    
    model_dir = os.path.dirname(os.path.realpath(__file__))
    contents = os.listdir(model_dir)
    
    # Load the model from the model_dir using the appropriate loader
    
    if model_file_name in contents:
        with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), model_file_name), "rb") as file:
            model = pickle.load(file) 
            logger_pred.info("Loaded the model !!!")
            
            with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), scaler_name), "rb") as file_sc:
                scaler = pickle.load(file_sc)
                logger_pred.info("Loaded the scaler !!!")
                
    else:
        raise Exception('{0} is not found in model directory {1}'.format(model_file_name, model_dir))
    
    return model

def pre_inference(data):
    """
    Preprocess data

    Parameters
    ----------
    data: Data format as expected by the predict API of the core estimator.

    Returns
    -------
    data: Data format after any processing.

    """
    logger_pred.info("Preprocessing...")
    
    # first scaling
    data_scaled = scaler.transform(data)
    
    # we assume no null, so we need only to add two columns with zero
    # this is the matrix with right rows and two cols
    z = np.zeros((data_scaled.shape[0],2))
    
    data = np.concatenate((data_scaled, z), axis=1)
    
    return data

def post_inference(yhat):
    """
    Post-process the model results

    Parameters
    ----------
    yhat: Data format after calling model.predict.

    Returns
    -------
    yhat: Data format after any processing.

    """
    logger_pred.info("Postprocessing output...")
    
    return yhat

def predict(data, model=load_model()):
    """
    Returns prediction given the model and data to predict

    Parameters
    ----------
    model: Model instance returned by load_model API
    data: Data format as expected by the predict API of the core estimator. For eg. in case of sckit models it could be numpy array/List of list/Pandas DataFrame

    Returns
    -------
    predictions: Output from scoring server
        Format: {'prediction': output from model.predict method}

    """
    global scaler
    
    # model contains the model and the scaler
    logger_pred.info("In function predict...")
    
    # some check
    assert model is not None, "Model is not loaded"
    assert scaler is not None, "Scaler is not loaded"
    
    x = pd.read_json(io.StringIO(data)).values
    
    if DEBUG:
        logger_feat.info("Logging features")
        logger_feat.info(x)
    
    # preprocess data (for example normalize features)
    x = pre_inference(x)

    logger_pred.info("Invoking model......")
    
    # compute predictions (binary, from model)
    preds = model.predict(x)
    
    # to avoid not JSON serialiable erro (np.array is not)
    preds = preds.tolist()
    
    # post inference not needed
    return {'prediction': preds}

Overwriting ./model-files/score.py


### Some tests

In [20]:
%reload_ext autoreload
# %load_ext autoreload

%autoreload 2

# add the path of score.py: 

import sys 
sys.path.insert(0, PATH_ARTEFACT)

from score import load_model, predict

In [21]:
# Load the model to memory 
_ = load_model()

INFO:model-prediction:Loaded the model !!!
INFO:model-prediction:Loaded the scaler !!!


In [22]:
x_input = np.array([[1,2,3,4,5,6,7,8,9,10],
                   [1,2,3,4,5,6,7,8,9,10],
                   [1,2,3,4,5,6,7,8,9,10],
                   [1,2,3,4,5,6,7,8,9,10]])

predictions_test = predict(json.dumps(x_input.tolist()), _)

print("Tests results:")
print(predictions_test)

INFO:model-prediction:In function predict...
INFO:input-features:Logging features
INFO:input-features:[[ 1  2  3  4  5  6  7  8  9 10]
 [ 1  2  3  4  5  6  7  8  9 10]
 [ 1  2  3  4  5  6  7  8  9 10]
 [ 1  2  3  4  5  6  7  8  9 10]]
INFO:model-prediction:Preprocessing...
INFO:model-prediction:Invoking model......
Tests results:
{'prediction': [1, 1, 1, 1]}


In [36]:
# now test with real values taken from train set
file_test = './cs-training.csv'

START = 200
END = 220

TARGET = 'SeriousDlqin2yrs'
features = ['RevolvingUtilizationOfUnsecuredLines', 'age',
            'NumberOfTime30-59DaysPastDueNotWorse', 'DebtRatio', 'MonthlyIncome',
            'NumberOfOpenCreditLinesAndLoans', 'NumberOfTimes90DaysLate',
            'NumberRealEstateLoansOrLines', 'NumberOfTime60-89DaysPastDueNotWorse',
            'NumberOfDependents']

# last parameter to avoid too many decimal digits
# it's a problems with float in Pandas
dati_orig = pd.read_csv(file_test, float_precision='round_trip')

dati_test = dati_orig[features]
labels = dati_orig[TARGET].values

# fix null values
condition = dati_test.isna()['NumberOfDependents']
dati_test.loc[condition, 'NumberOfDependents'] = 0

condition = dati_test.isna()['MonthlyIncome']
dati_test.loc[condition, 'MonthlyIncome'] = 5400

x_input = dati_test[START:END].values

predictions_test = predict(json.dumps(x_input.tolist()), _)

print("Tests results:")
print(predictions_test)
print()
print("Expected labels:")
print(labels[START:END])

INFO:model-prediction:In function predict...
INFO:input-features:Logging features
INFO:input-features:[[4.54238640e-02 6.40000000e+01 0.00000000e+00 1.81311286e-01
  5.20000000e+03 4.00000000e+00 0.00000000e+00 1.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [3.11954668e-01 6.10000000e+01 0.00000000e+00 2.69281443e-01
  9.86600000e+03 1.20000000e+01 0.00000000e+00 2.00000000e+00
  0.00000000e+00 2.00000000e+00]
 [7.87389160e-02 4.60000000e+01 0.00000000e+00 1.66215372e-01
  4.80000000e+03 6.00000000e+00 0.00000000e+00 1.00000000e+00
  0.00000000e+00 4.00000000e+00]
 [1.37609462e-01 4.90000000e+01 0.00000000e+00 4.29533892e-01
  7.27200000e+03 1.10000000e+01 0.00000000e+00 2.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [3.01730380e-02 6.40000000e+01 0.00000000e+00 1.73869570e-01
  9.00000000e+03 8.00000000e+00 0.00000000e+00 1.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [2.19783750e-02 4.00000000e+01 0.00000000e+00 4.43755624e-01
  1.00000000e+04 1.20000000e+01 0.00000000e+00 5.00

### Save the model to the Model Catalog

In [24]:
# Saving the model artifact to the model catalog.
catalog_entry = artifact.save(display_name='credit-scoring', 
              description='A model for credit scoring')

loop1:   0%|          | 0/5 [00:00<?, ?it/s]

artifact:/tmp/saved_model_fe016e20-4481-4562-8844-b64465619810.zip


### At this point you have to deploy from the Console UI

### Test the deployed model

After the successful creation of the Model Deployment you can test the endpoint with the following code

In [26]:
import requests
import oci

In [27]:
ENDPOINT = "https://modeldeployment.eu-frankfurt-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.eu-frankfurt-1.amaaaaaangencdyaozcafif4lrslx2thb4evzqnjk37uhptqodoa3z5inhaa/predict"

In [32]:
# again using RP
rps = oci.auth.signers.get_resource_principals_signer()

# payload goes here
body = json.dumps(x_input.tolist()) 

print("These are the results from the deployed model:")
print(requests.post(ENDPOINT, json=body, auth=rps).json())

print()

These are the results from the deployed model:
{'prediction': [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]}

