# Nuclio - prediction function

## Setup the environment

In [1]:
# nuclio: ignore
import nuclio

### Set environment variables

In [2]:
# Iguazio access
%nuclio env FRAMESD=${V3IO_FRAMESD}
%nuclio env V3IO_USERNAME=${V3IO_USERNAME}
%nuclio env V3IO_ACCESS_KEY=${V3IO_ACCESS_KEY}

# Model handling
%nuclio env MODEL_FILE=lgb.model
%nuclio env -c MODEL_FILEPATH=/tmp/mlmodel/${MODEL_FILE}
%nuclio env -l MODEL_FILEPATH=models/trained/${MODEL_FILE}

# Function variables
%nuclio env NUM_OF_PRODUCTS_TO_RETURN=4
%nuclio env CUSTOMERS_TABLE=customers
%nuclio env PREDICTIONS_TABLE=predictions

%nuclio: setting 'FRAMESD' environment variable
%nuclio: setting 'V3IO_USERNAME' environment variable
%nuclio: setting 'V3IO_ACCESS_KEY' environment variable
%nuclio: setting 'MODEL_FILE' environment variable
%nuclio: setting 'MODEL_FILEPATH' environment variable
%nuclio: setting 'NUM_OF_PRODUCTS_TO_RETURN' environment variable
%nuclio: setting 'CUSTOMERS_TABLE' environment variable
%nuclio: setting 'PREDICTIONS_TABLE' environment variable


### Base image

In [3]:
%nuclio config spec.build.baseImage = "python:3.6-jessie"

%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'


### Installations

In [4]:
%%nuclio cmd
pip install requests
pip install pandas
pip install lightgbm
pip install v3io_frames

Collecting lightgbm
[?25l  Downloading https://files.pythonhosted.org/packages/77/0f/5157e6b153b3d4a70dc5fbe2ab6f209604197590f387f03177b7a249ac60/lightgbm-2.2.3-py2.py3-none-manylinux1_x86_64.whl (1.2MB)
[K    100% |████████████████████████████████| 1.2MB 25.1MB/s 
Installing collected packages: lightgbm
Successfully installed lightgbm-2.2.3


### Get the model

In [29]:
# nuclio: ignore
# Verify the model is in the shared data directory
os.environ['MODEL_SHARED_FILEPATH'] = '/bigdata/recommendation_demo/models/'
os.environ['FULL_LOCAL_MODEL_PATH'] = f'{os.path.join(os.getcwd(), os.environ["MODEL_FILEPATH"])}'
!mkdir -p /v3io${MODEL_SHARED_FILEPATH}
!cp ${FULL_LOCAL_MODEL_PATH} /v3io${MODEL_SHARED_FILEPATH}

%nuclio: setting 'MODEL_SHARED_FILEPATH' environment variable


In [32]:
%nuclio env MODEL_SHARED_FILEPATH = ${MODEL_SHARED_FILEPATH}

%nuclio: setting 'MODEL_SHARED_FILEPATH' environment variable


In [30]:
%%nuclio cmd -c 
apt-get update && apt-get install -y wget
mkdir -p /tmp/mlmodel
wget -O /tmp/mlmodel/${MODEL_FILE} --header "x-v3io-session-key: ${V3IO_ACCESS_KEY}" http://${V3IO_WEBAPI_SERVICE_HOST}:8081${MODEL_SHARED_FILEPATH}${MODEL_FILE}

### Imports

In [6]:
# Util
import os
import json
import requests
import datetime

# Function
import pandas as pd
import lightgbm as lgb

# DB
import v3io_frames as v3f

## Function code

### Init context

In [45]:
def init_context(context):
    # Define DB
    client = v3f.Client('framesd:8081')
    setattr(context, 'client', client)
    
    setattr(context, 'customers_table', os.environ['CUSTOMERS_TABLE'])
    
    setattr(context, 'predictions', os.environ['PREDICTIONS_TABLE'])
    context.client.delete('tsdb', context.predictions, if_missing=1)
    context.client.create('tsdb', context.predictions, attrs={'rate': '1/s'})
    
    # define Model
    model = lgb.Booster(model_file=os.environ['MODEL_FILEPATH'])
    setattr(context, 'model', model)
    
    # vars
    setattr(context, 'k', int(os.environ['NUM_OF_PRODUCTS_TO_RETURN']))

### Format dataframe for prediction

In [8]:
def prepare_df(df):
    # Extract features col
    keep_cols = ['products']
    df = df.loc[:, keep_cols]
    
    # Create features df
    df = pd.read_json(df.values[0][0])
    return df

In [39]:
def handler(context, event):
    
    # Get user
    customer_id = event.body['id']
    store = event.body['store']
    context.logger.debug(f'Predicting for: {customer_id} in {store}')

    # Get user parameters
    df = context.client.read('kv', context.customers_table, filter=f'id=={customer_id}').reset_index()

    # Do we have features for the user?
    if not df.empty:
                             
        # Create features df for the user
        df = prepare_df(df)
                            
        # Predict
        df["prediction"] = context.model.predict(df)
                             
        # Eliminate low confidence results
        df = df.loc[df.prediction > 0.01, ['prediction', 'product_id']]

        # Keep top products
        df.sort_values(by='prediction', ascending=False, inplace=True)
        best_products = df.iloc[:context.k, :]
        best_products = best_products.reset_index()
        best_products = best_products.reset_index()
        best_products = best_products.rename(columns={'level_0': 'prediction_num'})
        best_products['customer_id'] = int(customer_id)
        best_products['store'] = store
        best_products['time'] = datetime.datetime.now()
        best_products = best_products.drop('index', axis=1)
        best_products = best_products.set_index(['time', 'store', 'prediction_num', 'product_id'])
        context.logger.debug(f'Predicted:\n{best_products}')
        
                            
        # Save results to DB
        context.client.write('tsdb', context.predictions, best_products)

In [46]:
# nuclio: ignore
init_context(context)

In [40]:
# nuclio: ignore
event = nuclio.Event(body={'id': '1232', 'store': '03311311313011021022'})
handler(context, event)

In [58]:
# nuclio: ignore
context.client.read('tsdb', query='select prediction from predictions group by product_id', step='1m', start="now-1h", end='now',multi_index=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,prediction
time,product_id,Unnamed: 2_level_1
2019-07-08 12:41:50,21903,0.091223
2019-07-08 12:42:50,21903,0.091223
2019-07-08 12:43:50,21903,0.091223
2019-07-08 12:44:50,21903,0.091223
2019-07-08 12:45:50,21903,0.091223
2019-07-08 12:46:50,21903,0.091223
2019-07-08 12:47:50,21903,0.091223
2019-07-08 12:48:50,21903,0.091223
2019-07-08 12:49:50,21903,0.091223
2019-07-08 12:50:50,21903,0.091223


In [48]:
%nuclio deploy -n prediction_server -p recommendation_engine -c

[nuclio.deploy] 2019-07-08 13:40:18,359 (info) Building processor image
[nuclio.deploy] 2019-07-08 13:40:20,380 (info) Pushing image
[nuclio.deploy] 2019-07-08 13:40:20,381 (info) Build complete
[nuclio.deploy] 2019-07-08 13:40:26,436 (info) Function deploy complete
[nuclio.deploy] 2019-07-08 13:40:26,442 done updating prediction-server, function address: 18.197.206.39:31840
%nuclio: function deployed
