In [20]:
import nuclio 
import mlrun
import os

In [88]:
%%nuclio config 
kind = "nuclio"
spec.build.baseImage = "mlrun/mlrun:0.6.5-rc15"

%nuclio: setting kind to 'nuclio'
%nuclio: setting spec.build.baseImage to 'mlrun/mlrun'


In [2]:
# nuclio: start-code

In [19]:
import mlrun.feature_store as fs
import mlrun
import datetime
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import os
import requests
import json
import numpy as np

In [21]:
def modify_data(context,ticker_data):
    ticker_to_int = {'GOOGL' : 0,'MSFT' : 1,'AMZN' : 2,'AAPL' : 3,'INTC' : 4}
    ticker_data["symbol"] = ticker_data["symbol"].apply(lambda x: ticker_to_int.get(x))
    for col in [x for x in ticker_data.columns if "Open" in x or "Close" in x or "High" in x or "Low" in x]:
        ticker_data[[col]] = context.priceMMS.fit_transform(ticker_data[[col]])
    for col in [x for x in ticker_data.columns if "Volume" in x]:
        ticker_data[[col]] = context.volumeMMS.fit_transform(ticker_data[[col]])
    for col in [x for x in ticker_data.columns if "Sentiment" in x]:
        ticker_data[[col]] = context.sentimentMMS.fit_transform(ticker_data[[col]])
        
    return ticker_data

In [22]:
def predict(context,data):
    event_data = {'inputs': data}
    endpoint = context.endpoint #+ "/v2/models/model2/predict"
    resp = requests.put(endpoint, json=json.dumps(event_data))
    return json.loads(resp.text)

In [39]:
def handler(context,event):
    start = datetime.datetime.now()-datetime.timedelta(5)
    end = datetime.datetime.now()
    data = fs.get_offline_features("stocks-vec").to_dataframe()#,start_time = start, end_time = end,entity_timestamp_column="Datetime").to_dataframe()
    all_ticker_predictions = []
    for ticker in context.sym_to_url.keys():
        ticker_df = data[data["symbol"] == ticker].fillna(0)
        ticker_df = ticker_df.sort_values(by=["Datetime"],ascending = True)[:10]
        ticker_df = ticker_df.drop(["Datetime","Content","Link"], axis=1)
        modified_ticker = modify_data(context,ticker_df)
        context.logger.info(f"predicting stock price for {ticker}")
        stock_prediction = predict(context,modified_ticker.values.reshape(1,ticker_df.shape[0],ticker_df.shape[1]).tolist())
        stock_prediction = json.loads(stock_prediction["outputs"])
        all_ticker_predictions.append(context.priceMMS.inverse_transform(stock_prediction)[0][0])
                
    df = pd.DataFrame()
    df["ticker"] = context.sym_to_url.keys()
    df["predicted"] = all_ticker_predictions
    
    data = df.values.tolist()
    
    columns = [{'text': key, 'type': 'object'} for key in df.columns]
    response = [{'columns': columns,
                'rows': data,
                'type': 'table'}]
    return response

In [40]:
def init_context(context):
    context.logger.info("Initalizing context")
    setattr(context, 'PROJECT_NAME', os.getenv('PROJECT_NAME', "stocks-" + os.getenv('V3IO_USERNAME',"dani")))
    mlrun.set_environment(project=context.PROJECT_NAME)
    
    setattr(context,'stocks_vec', os.getenv('stocks_vec',"stocks-vec"))
    
    sym_to_url = {'GOOGL': 'google-inc', 'MSFT': 'microsoft-corp', 'AMZN': 'amazon-com-inc',
                'AAPL': 'apple-computer-inc', 'INTC' : 'intel-corp'}
    setattr(context, 'sym_to_url', sym_to_url)
    
    setattr(context,"priceMMS",MinMaxScaler(feature_range = (0, 1)))
    setattr(context,"volumeMMS",MinMaxScaler(feature_range = (0, 1)))
    setattr(context,"sentimentMMS",MinMaxScaler(feature_range = (0, 1)))
    
    setattr(context,"endpoint",os.getenv('endpoint', ""))

In [41]:
# nuclio: end-code

In [None]:
#test locally
mlrun.set_environment(project="stocks-" + os.getenv('V3IO_USERNAME'))
init_context(context)
data = handler(context,"")

In [79]:
data

[{'columns': [{'text': 'ticker', 'type': 'object'},
   {'text': 'predicted', 'type': 'object'}],
  'rows': [['GOOGL', 2440.325043225952],
   ['MSFT', 270.94796436812067],
   ['AMZN', 3442.4882586227754],
   ['AAPL', 137.11272962329258],
   ['INTC', 56.09725701435514]],
  'type': 'table'}]

In [6]:
# test remote deployment
from mlrun import code_to_function
import os
import mlrun
fn = code_to_function('rnn_model_prediction',
                      handler='handler')
fn.apply(mlrun.auto_mount())
# Set parameters for current deployment
fn.set_envs({'PROJECT_NAME' : "stocks-" + os.getenv('V3IO_USERNAME'),
             'STOCKS_STREAM':  os.getenv('V3IO_USERNAME') +' /stocks/stocks_stream',
             'endpoint' : "http://default-tenant.app.app-lab-testing.iguazio-cd2.com:31219"})
fn.spec.max_replicas = 1

In [7]:
addr = fn.deploy(project="stocks-" + os.getenv('V3IO_USERNAME'))

> 2021-07-28 12:05:47,874 [info] Starting remote function deploy
2021-07-28 12:05:48  (info) Deploying function
2021-07-28 12:05:48  (info) Building
2021-07-28 12:05:48  (info) Staging files and preparing base images
2021-07-28 12:05:48  (info) Building processor image
2021-07-28 12:05:49  (info) Build complete
> 2021-07-28 12:05:55,844 [info] function deployed, address=default-tenant.app.app-lab-testing.iguazio-cd2.com:31616


In [3]:
!curl {addr}

Exception caught in handler - "'Datetime'": Traceback (most recent call last):
  File "/opt/nuclio/_nuclio_wrapper.py", line 114, in serve_requests
    self._handle_event(event)
  File "/opt/nuclio/_nuclio_wrapper.py", line 262, in _handle_event
    entrypoint_output = self._entrypoint(self._context, event)
  File "/opt/nuclio/07-model_prediction.py", line 38, in handler
    ticker_df = ticker_df.sort_values(by=["Datetime"],ascending = True)[:10]
  File "/usr/local/lib/python3.7/site-packages/pandas/util/_decorators.py", line 311, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.7/site-packages/pandas/core/frame.py", line 6254, in sort_values
    k = self._get_label_or_level_values(by, axis=axis)
  File "/usr/local/lib/python3.7/site-packages/pandas/core/generic.py", line 1776, in _get_label_or_level_values
    raise KeyError(key)
KeyError: 'Datetime'
