# Model Server

In [1]:
import nuclio

In [6]:
%nuclio config kind = "nuclio"

%nuclio: setting kind to 'nuclio'


In [7]:
%%nuclio env

FEATURES_TABLE = /User/demo-network-operations/streaming/features
PREDICTIONS_TABLE = /User/demo-network-operations/streaming/inference_pq
prediction_col = predictions
# model_path = /User/demo-network-operations/artifacts/model/2/model.pkl
model_path = store://network-operations/train_model#f2ea18dd-504d-4d14-83d2-a326fe5afc37

%nuclio: setting 'FEATURES_TABLE' environment variable
%nuclio: setting 'PREDICTIONS_TABLE' environment variable
%nuclio: setting 'prediction_col' environment variable
%nuclio: setting '# model_path' environment variable
%nuclio: setting 'model_path' environment variable


## Function

In [8]:
# nuclio: start-code

In [55]:
import os
import pandas as pd
import cloudpickle
import numpy as np
import json
from mlrun import get_or_create_ctx

In [10]:
def get_data_parquet(context):
    mpath = [os.path.join(context.features_table, file) for file in os.listdir(context.features_table) if file.endswith(('parquet', 'pq'))]
    files_by_updated = sorted(mpath, key=os.path.getmtime, reverse=True)
    return pd.read_parquet(files_by_updated[:1][0])

In [11]:
def save_to_parquet(context, df: pd.DataFrame):
    print('Saving features to Parquet')
    
    # Need to fix timestamps from ns to ms if we write to parquet
    keys = list(df.index.names)
    df = df.reset_index()
    df['timestamp'] = df.loc[:, 'timestamp'].astype('datetime64[ms]')
    
    # Fix indexes
    df = df.set_index(keys)
    
    # Save parquet
    first_timestamp = df.index[0][0].strftime('%Y%m%dT%H%M%S')
    last_timestamp = df.index[-1][0].strftime('%Y%m%dT%H%M%S')
    filename = first_timestamp + '-' + last_timestamp + '.parquet'
    filepath = os.path.join(context.predictions_table, filename)
    with open(filepath, 'wb+') as f:
        df.to_parquet(f)

In [35]:
def init_context(context):
    # Set vars from env
    setattr(context, 'features_table', os.getenv('FEATURES_TABLE', 'netops_features'))
    setattr(context, 'predictions_table', os.getenv('PREDICTIONS_TABLE', 'netops_predictions'))
    setattr(context, 'prediction_col', os.getenv('prediction_col', 'prediction'))
    
    # Load model
    model_path = os.environ['model_path']
    if model_path.startswith('store://'):
        mlctx = get_or_create_ctx('inference')
        model = mlctx.get_dataitem(model_path)
        model_path = os.path.join(model.url, 'model.pkl')
    with open(model_path, 'rb') as f:
        model = cloudpickle.load(f)
    setattr(context, 'model', model)
    
     # Create saving directory if needed
    filepath = os.path.join(context.predictions_table)
    if not os.path.exists(filepath):
        os.makedirs(filepath)

In [62]:
def handler(context, event):
    
    if getattr(event.trigger, 'kind', 'cron') == 'cron':
        # Get latest parquets
        df = get_data_parquet(context)

        # Call aggregate
        df[context.prediction_col] = context.model.predict(df.values)

        # Save
        save_to_parquet(context, df)
    else:
        body = json.loads(event.body)
        feats = np.asarray(body['instances'])
        result: np.ndarray = context.model.predict(feats)
        return result.tolist()

In [63]:
# nuclio: end-code

## Local test

In [58]:
init_context(context)

[mlrun] 2020-07-12 13:08:08,964 logging run results to: http://mlrun-api:8080




In [48]:
event = nuclio.Event(body='', trigger={'kind': 'cron'})
out = handler(context, event)
out

Saving features to Parquet


## Test

In [2]:
from mlrun import code_to_function, mount_v3io

In [3]:
fn = code_to_function('inference-server',
                      kind='nuclio',
                      project='network-operations')
fn.spec.base_spec['spec']['build']['baseImage'] = 'mlrun/ml-models'
fn.apply(mount_v3io())
fn.add_trigger('cron', nuclio.triggers.CronTrigger(interval='1m'))

<mlrun.runtimes.function.RemoteRuntime at 0x7f1a8a3a7cc0>

In [4]:
fn.save()
fn.export('../src/inference-server.yaml')

> 2020-07-22 09:36:33,030 [debug] saving function: inference-server, tag: 
> 2020-07-22 09:36:33,076 [info] function spec saved to path: ../src/inference-server.yaml


<mlrun.runtimes.function.RemoteRuntime at 0x7f1a8a3a7cc0>

In [66]:
fn.deploy(project='network-operations')

[mlrun] 2020-07-12 13:11:11,410 deploy started
[nuclio] 2020-07-12 13:11:20,608 (info) Build complete
[nuclio] 2020-07-12 13:11:36,801 (info) Function deploy complete
[nuclio] 2020-07-12 13:11:36,807 done updating network-operations-inference-server, function address: 192.168.224.209:31346


'http://192.168.224.209:31346'

In [51]:
pd.read_parquet('../streaming/inference_pq/20200712T104242-20200712T104252.parquet')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,cpu_utilization,latency,packet_loss,throughput,predictions
timestamp,company,data_center,device,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-07-12 10:42:42.074,Jones-Smith,Justin_Ports,1551700666269,68.173121,6.979815,0.0,248.089786,False
2020-07-12 10:42:42.074,Jones-Smith,Justin_Ports,6563505230628,84.395612,0.539184,0.0,248.232249,False
2020-07-12 10:42:42.074,Baker-Johnson,David_Alley,2034261575854,76.958371,0.0,0.0,238.849329,False
2020-07-12 10:42:42.074,Baker-Johnson,David_Alley,2175534833578,66.400887,8.233415,0.0,248.632805,False
2020-07-12 10:42:42.074,Baker-Johnson,Amanda_Club,7190547782118,73.719916,0.09493,0.0,270.105857,False
2020-07-12 10:42:42.074,Baker-Johnson,Amanda_Club,1789940329694,100.0,100.0,50.0,0.0,True
2020-07-12 10:42:47.074,Jones-Smith,Alexander_Forks,2635623113798,68.790027,0.0,0.0,280.462707,False
2020-07-12 10:42:47.074,Jones-Smith,Alexander_Forks,5409875454366,59.250415,0.048837,0.602311,232.570927,False
2020-07-12 10:42:47.074,Jones-Smith,Justin_Ports,1551700666269,82.389219,0.0,0.0,245.247448,False
2020-07-12 10:42:47.074,Jones-Smith,Justin_Ports,6563505230628,63.886912,7.110852,0.0,239.023515,False
