# Model Server

## Environment

In [1]:
import nuclio

In [2]:
import os

base_path = os.path.abspath('../')
data_path = os.path.join(base_path, 'data')
src_path = os.path.join(base_path, 'src')
streaming_path = os.path.join(base_path, 'streaming')

os.environ['base_path'] = base_path
os.environ['data_path'] = data_path
os.environ['src_path'] = src_path
os.environ['streaming_path'] = streaming_path

os.environ['METRICS_TABLE'] = data_path
os.environ['FEATURES_TABLE'] = base_path+'/features'
os.environ['PREDICTIONS_TABLE'] = streaming_path+'/predictions'
os.environ['BATCHES_TO_GENERATE'] = '20'
os.environ['model_path'] = os.path.abspath('../') + '/artifacts/model/1/model.pkl'
os.environ['model_name'] = 'netops_predictor_v1'
os.environ['model_class'] = 'MLRunModel'
os.environ['model_col'] = 'model'
os.environ['model_class_col'] = 'class'
os.environ['prediction_col'] = 'predictions'
os.environ['worker_col'] = 'worker'
os.environ['hostname_col'] = 'hostname'
os.environ['timestamp_col'] = 'when'
os.environ['orig_timestamp_col'] = 'timestamp'

## Function

In [3]:
# nuclio: start-code

In [4]:
import os
import pandas as pd
import cloudpickle
import numpy as np
import json
from mlrun import get_or_create_ctx
import socket

In [5]:
def get_data_parquet(context):
    mpath = [os.path.join(context.features_table, file) for file in os.listdir(context.features_table) if file.endswith(('parquet', 'pq'))]
    files_by_updated = sorted(mpath, key=os.path.getmtime, reverse=True)
    return pd.read_parquet(files_by_updated[:1][0])

In [6]:
def save_to_parquet(context, df: pd.DataFrame):
    print('Saving features to Parquet')
    
    # Need to fix timestamps from ns to ms if we write to parquet 
    # And add this model name to indexes
    keys = list([name if name != context.orig_timestamp_col else context.timestamp_col for name in df.index.names]) + [context.model_col, context.model_class_col, context.worker_col, context.hostname_col]
    df = df.reset_index()
    df[context.timestamp_col] = df.pop(context.orig_timestamp_col).astype('datetime64[ms]')
    
    # Fix indexes
    df = df.set_index(keys)
    
    # Save parquet
    first_timestamp = df.index[0][0].strftime('%Y%m%dT%H%M%S')
    last_timestamp = df.index[-1][0].strftime('%Y%m%dT%H%M%S')
    filename = first_timestamp + '-' + last_timestamp + '.parquet'
    filepath = os.path.join(context.predictions_table, filename)
    with open(filepath, 'wb+') as f:
        df.to_parquet(f)

In [7]:
def init_context(context):
    
    # How many batches to create? (-1 will run forever)
    batches_to_generate = int(os.getenv('BATCHES_TO_GENERATE', 20))
    setattr(context, 'batches_to_generate', batches_to_generate)
    setattr(context, 'batches_generated', 0)
    
    # Set vars from env
    setattr(context, 'model_name', os.getenv('model_name', 'netops_model'))
    setattr(context, 'model_col', os.getenv('model_col', 'model'))
    setattr(context, 'model_class_col', os.getenv('model_class_col', 'class'))
    setattr(context, 'worker_col', os.getenv('worker_col', 'worker'))
    setattr(context, 'hostname_col', os.getenv('hostname_col', 'hostname'))
    setattr(context, 'timestamp_col', os.getenv('timestamp_col', 'when'))
    setattr(context, 'orig_timestamp_col', os.getenv('orig_timestamp_col', 'timestamp'))
    setattr(context, 'features_table', os.getenv('FEATURES_TABLE', 'netops_features'))
    setattr(context, 'predictions_table', os.getenv('PREDICTIONS_TABLE', 'netops_predictions'))
    setattr(context, 'prediction_col', os.getenv('prediction_col', 'prediction'))
    
    # Load model
    model_path = os.environ['model_path']
    if model_path.startswith('store://'):
        mlctx = get_or_create_ctx('inference')
        model = mlctx.get_dataitem(model_path)
        model_path = os.path.join(model.url, 'model.pkl')
    with open(model_path, 'rb') as f:
        model = cloudpickle.load(f)
    setattr(context, 'model', model)
    setattr(context, 'model_class', type(model).__name__)
    
     # Create saving directory if needed
    filepath = os.path.join(context.predictions_table)
    if not os.path.exists(filepath):
        os.makedirs(filepath)

In [8]:
def handler(context, event):
    
    # Limit the number of generated batches to save cluster resources
    # for people forgetting the demo running
    if (context.batches_to_generate == -1) or (context.batches_generated <= context.batches_to_generate):
    
        if getattr(event.trigger, 'kind', 'cron') == 'cron':
            # Get latest parquets
            df = get_data_parquet(context)

            # Predict
            df[context.prediction_col] = context.model.predict(df.values)

            # Add server metadata
            df[context.model_col] = context.model_name
            df[context.model_class_col] = context.model_class
            df[context.worker_col] = context.worker_id
            df[context.hostname_col] = socket.gethostname()

            # Save
            save_to_parquet(context, df)
        else:
            body = json.loads(event.body)
            feats = np.asarray(body['instances'])
            result: np.ndarray = context.model.predict(feats)
            return result.tolist()
        
        # Update batches count
        context.batches_generated += 1

In [9]:
# nuclio: end-code

## Local test

In [10]:
import mlrun
import os 

fn = mlrun.import_function('hub:///sklearn_classifier')
fn.apply(mlrun.platforms.v3io_cred())
fn.apply(mlrun.mount_v3io())
fn.spec.build.base_image = 'mlrun/ml-models'
fn.spec.build.commands = ['pip install scikit-learn==0.24.2']
sample_size = -1
test_size = 0.1
train_val_split = 0.75
label_column = 'is_error'

task = mlrun.new_task(name='train',
               params={"sample"          : sample_size,
                       "label_column"    : label_column,
                       "test_size"       : test_size,
                       "train_val_split" : train_val_split},
               inputs={"dataset"         : os.path.abspath('../')+'/data' + '/' + os.listdir(os.path.abspath('../')+'/features')[-1]},
               hyper_params={'model_pkg_class': ["sklearn.ensemble.RandomForestClassifier", 
                                                "sklearn.linear_model.LogisticRegression",
                                                "sklearn.ensemble.AdaBoostClassifier"]},
               selector='max.accuracy',
               outputs=['model', 'test_set'],
               out_path = os.path.abspath('../')+'/artifacts')

In [11]:
fn.deploy()

> 2021-10-03 14:05:06,938 [info] Started building image: .mlrun/func-default-sklearn-classifier:latest
E1003 14:05:49.703079       1 aws_credentials.go:77] while getting AWS credentials NoCredentialProviders: no valid providers in chain. Deprecated.
	For verbose messaging see aws.Config.CredentialsChainVerboseErrors
[36mINFO[0m[0040] Retrieving image manifest mlrun/ml-models:0.7.0-rc7 
[36mINFO[0m[0043] Retrieving image manifest mlrun/ml-models:0.7.0-rc7 
[36mINFO[0m[0045] Built cross stage deps: map[]                
[36mINFO[0m[0045] Retrieving image manifest mlrun/ml-models:0.7.0-rc7 
[36mINFO[0m[0047] Retrieving image manifest mlrun/ml-models:0.7.0-rc7 
[36mINFO[0m[0049] Executing 0 build triggers                   
[36mINFO[0m[0049] Unpacking rootfs as cmd RUN pip install scikit-learn==0.24.2 requires it. 
[36mINFO[0m[0143] RUN pip install scikit-learn==0.24.2         
[36mINFO[0m[0143] Taking snapshot of full filesystem...        
[36mINFO[0m[0167] cmd: /bin/s

True

In [12]:
models = fn.run(task)

> 2021-10-03 14:08:23,928 [info] starting run train uid=5c0dfea1f6c14c9f9a05f52fd324ea9e DB=http://mlrun-api:8080
> 2021-10-03 14:08:24,066 [info] Job is running in the background, pod: train-pg6kr
> 2021-10-03 14:08:36,274 [info] best iteration=1, used criteria max.accuracy
> 2021-10-03 14:08:36,573 [info] run executed, status=completed
Converting input from bool to <class 'numpy.uint8'> for compatibility.
Converting input from bool to <class 'numpy.uint8'> for compatibility.
Converting input from bool to <class 'numpy.uint8'> for compatibility.
lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
Converting input from bool to <class 'numpy.uint8'> for compatibility.
C

project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
default,...d324ea9e,0,Oct 03 14:08:31,completed,train,v3io_user=danikind=jobowner=dani,dataset,sample=-1label_column=is_errortest_size=0.1train_val_split=0.75,best_iteration=1accuracy=1.0test-error=0.0rocauc=1.0brier_score=0.0f1-score=1.0precision_score=1.0recall_score=1.0,test_setprobability-calibrationconfusion-matrixfeature-importancesprecision-recall-binaryroc-binarymodeliteration_results





> 2021-10-03 14:08:43,313 [info] run executed, status=completed


In [13]:
init_context(context)

Trying to unpickle estimator DecisionTreeClassifier from version 0.24.2 when using version 0.23.2. This might lead to breaking code or invalid results. Use at your own risk.
Trying to unpickle estimator RandomForestClassifier from version 0.24.2 when using version 0.23.2. This might lead to breaking code or invalid results. Use at your own risk.


In [14]:
event = nuclio.Event(body='', trigger={'kind': 'cron'})
out = handler(context, event)
out

Saving features to Parquet


## Test

In [99]:
from mlrun import code_to_function, mount_v3io
import os

base_path = os.path.abspath('../')
data_path = os.path.join(base_path, 'data')
src_path = os.path.join(base_path, 'src')
streaming_path = base_path

In [100]:
fn = code_to_function('inference-server',
                      kind='nuclio',
                      project='network-operations', image='mlrun/ml-models')
fn.set_envs({'METRICS_TABLE' : data_path,
             'FEATURES_TABLE' : base_path+'/features'
             'PREDICTIONS_TABLE' : streaming_path+'/predictions',
             'BATCHES_TO_GENERATE' : '20',
             'model_path' : os.path.abspath('../') + '/artifacts/model/1/model.pkl',
             'model_name' : 'netops_predictor_v1',
             'model_class' : 'MLRunModel',
             'model_col' : 'model',
             'model_class_col' : 'class',
             'prediction_col' : 'predictions',
             'worker_col' : 'worker',
             'hostname_col' : 'hostname',
             'timestamp_col' : 'when',
             'orig_timestamp_col' : 'timestamp'})

fn.apply(mount_v3io())
fn.add_trigger('cron', nuclio.triggers.CronTrigger(interval='1m'))

<mlrun.runtimes.function.RemoteRuntime at 0x7f17b8c05790>

In [101]:
fn.save()
fn.export('../src/inference-server.yaml')

> 2021-10-03 13:19:45,306 [info] function spec saved to path: ../src/inference-server.yaml


<mlrun.runtimes.function.RemoteRuntime at 0x7f17b8c05790>

In [102]:
fn.deploy(project='network-operations')

> 2021-10-03 13:19:45,312 [info] Starting remote function deploy
2021-10-03 13:19:45  (info) Deploying function
2021-10-03 13:19:45  (info) Building
2021-10-03 13:19:45  (info) Staging files and preparing base images
2021-10-03 13:19:45  (info) Building processor image
2021-10-03 13:19:51  (info) Build complete
> 2021-10-03 13:19:59,560 [info] successfully deployed function: {'internal_invocation_urls': ['nuclio-network-operations-inference-server.default-tenant.svc.cluster.local:8080'], 'external_invocation_urls': ['default-tenant.app.dev8.lab.iguazeng.com:30932']}


'http://default-tenant.app.dev8.lab.iguazeng.com:30932'