# Stocks Analysis Demo

In [1]:
# Utils
import os
import json
import numpy as np

# MLRun imports
from mlrun import mlconf

# Setup API Endpoint
mlconf.dbpath = 'http://mlrun-api:8080'

## Setup MLRun stocks project

In [2]:
from mlrun import new_project

# update the dir and repo to reflect real locations 
# the remote git repo must be initialized in GitHub
project_dir = os.path.abspath('./')
remote_git = 'https://github.com/mlrun/demo-stocks.git'

# Create the project
project = new_project('stocks', project_dir, init_git=False)

# We can update our project directory to the latest status by running
# newproj.pull()

In [3]:
# Define an artifact path to keep track of where our artifacts are going
ARTIFACT_PATH =  os.path.join(os.path.abspath(project.context), 'artifacts')
mlconf.artifact_path = ARTIFACT_PATH

In [4]:
# Set functions to project
project.set_function('code/03-read-stocks.yaml', name='stocks_reader', kind='nuclio')
project.set_function('code/04-read-news.yaml', name='news_reader', kind='nuclio')
project.set_function('code/05-stream-viewer.yaml', name='stream_viewer', kind='nuclio')
project.set_function('code/06-grafana.yaml', name='grafana', kind='job')
project.set_function('training/bert_sentiment_classification.yaml', name='bert_sentiment_classifier_trainer', kind='job')
project.set_function('hub://sentiment_analysis_serving', name='sentiment_analysis_server')

<mlrun.runtimes.function.RemoteRuntime at 0x7f0814502e10>

## Create deployment workflow

In [5]:
%%writefile code/workflow.py
from kfp import dsl
from mlrun import mount_v3io, mlconf
import os
from nuclio.triggers import V3IOStreamTrigger

funcs = {}

# Directories and Paths
projdir = os.path.join('/', 'User', 'demo-stocks')
# model_filepath = os.path.join(projdir, 'models', 'bert_sentiment_analysis_model.pt')
model_filepath = os.path.join(projdir, 'models', 'model.pt')
reviews_datafile = os.path.join(projdir, 'data', 'reviews.csv')

# Performence limit
sentiment_server_max_replicas = 1


def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        # Add V3IO Mount
        f.apply(mount_v3io())
        
        # Always pull images to keep updates
        f.spec.image_pull_policy = 'Always'
    
    # Define inference-stream related triggers
    functions['sentiment_analysis_server'].add_model('bert_classifier_v1', model_filepath)
    functions['sentiment_analysis_server'].spec.readiness_timeout = 500
    functions['sentiment_analysis_server'].set_config('readinessTimeoutSeconds', 500)
    functions['sentiment_analysis_server'].spec.max_replicas = sentiment_server_max_replicas
                
        
@dsl.pipeline(
    name='Stocks demo deployer',
    description='Up to RT Stocks ingestion and analysis'
)
def kfpipeline(
    # General
    V3IO_CONTAINER = 'bigdata',
    STOCKS_TSDB_TABLE = 'stocks/stocks_tsdb',
    STOCKS_SENTIMENT_TSDB_TABLE = 'stocks/stocks_sentiment_tsdb',
    STOCKS_KV_TABLE = 'stocks/stocks_kv',
    STOCKS_STREAM = 'stocks/stocks_stream',
    RUN_TRAINER = False,
    
    # Trainer
    pretrained_model = 'bert-base-cased',
    reviews_dataset = reviews_datafile,
    models_dir = 'models',
    model_filename = 'bert_sentiment_analysis_model.pt',
    n_classes = 3,
    MAX_LEN = 128,
    BATCH_SIZE = 16,
    EPOCHS =  1,
    random_state = 42,
    
    # stocks reader
    STOCK_LIST = ['GOOGL', 'MSFT', 'AMZN', 'AAPL', 'INTC'],
    EXPRESSION_TEMPLATE = "symbol='{symbol}';price={price};volume={volume};last_updated='{last_updated}'",
    
    # Sentiment analysis server
    model_name = 'bert_classifier_v1',
    model_filepath = model_filepath # if not trained
    
    ):
    
    with dsl.Condition(RUN_TRAINER == True):
        
        trainer_image_builder = funcs['bert_sentiment_classifier_trainer'].deploy_step(skip_deployed=True)
        
        trainer = funcs['bert_sentiment_classifier_trainer'].as_step(name='bert_sentiment_classifier_trainer',
                                                                     params={'pretrained_model': pretrained_model,
                                                                             'EPOCHS': EPOCHS,
                                                                             'models_dir': models_dir,
                                                                             'model_filename': model_filename,
                                                                             'n_classes': n_classes,
                                                                             'MAX_LEN': MAX_LEN,
                                                                             'BATCH_SIZE': BATCH_SIZE,
                                                                             'EPOCHS': EPOCHS,
                                                                             'random_state': random_state},
                                                                     inputs={'reviews_dataset': reviews_dataset},
                                                                     image=trainer_image_builder.outputs['image'],
                                                                     outputs=['bert_sentiment_analysis_model'])
        
        sentiment_server = funcs['sentiment_analysis_server'].deploy_step(env={f'SERVING_MODEL_{model_name}': trainer.outputs['bert_sentiment_analysis_model']})
        
        news_reader = funcs['news_reader'].deploy_step(env={'V3IO_CONTAINER': V3IO_CONTAINER,
                                                        'STOCKS_STREAM': STOCKS_STREAM,
                                                        'SENTIMENT_MODEL_ENDPOINT': sentiment_server.outputs['endpoint']})
    
    with dsl.Condition(RUN_TRAINER == False):
        
        sentiment_server = funcs['sentiment_analysis_server'].deploy_step(env={f'SERVING_MODEL_{model_name}': model_filepath})
        
        news_reader = funcs['news_reader'].deploy_step(env={'V3IO_CONTAINER': V3IO_CONTAINER,
                                                            'STOCKS_STREAM': STOCKS_STREAM,
                                                            'STOCKS_SENTIMENT_TSDB_TABLE': STOCKS_SENTIMENT_TSDB_TABLE,
                                                            'SENTIMENT_MODEL_ENDPOINT': sentiment_server.outputs['endpoint']})
    
    stocks_reader = funcs['stocks_reader'].deploy_step(env={'STOCK_LIST': STOCK_LIST,
                                                            'V3IO_CONTAINER': V3IO_CONTAINER,
                                                            'STOCKS_TSDB_TABLE': STOCKS_TSDB_TABLE,
                                                            'STOCKS_KV_TABLE': STOCKS_KV_TABLE,
                                                            'EXPRESSION_TEMPLATE': EXPRESSION_TEMPLATE})
    
    stream_viewer = funcs['stream_viewer'].deploy_step(env={'V3IO_CONTAINER': V3IO_CONTAINER,
                                                            'STOCKS_STREAM': STOCKS_STREAM}).after(news_reader)
    
    grafana_builder = funcs['grafana'].deploy_step(skip_deployed=True)
    
    grafana_dashboard = funcs['grafana'].as_step(name='grafana_deployer',
                                                 params={'streamview_url': stream_viewer.outputs['endpoint'],
                                                         'v3io_container': V3IO_CONTAINER,
                                                         'stocks_kv_table': STOCKS_KV_TABLE,
                                                         'stocks_tsdb_table': STOCKS_TSDB_TABLE,
                                                         'stocks_sentiment_tsdb_table': STOCKS_SENTIMENT_TSDB_TABLE,},
                                                 image=grafana_builder.outputs['image'])
    

Overwriting code/workflow.py


## Add workflow

In [6]:
project.set_workflow('main', os.path.join(os.path.abspath(project.context), 'code', 'workflow.py'))

## Save Project

In [7]:
project.save(os.path.join(project.context, 'project.yaml'))

### Run workflow

In [8]:
project.run('main', artifact_path=ARTIFACT_PATH, dirty=True)

> 2020-09-24 08:11:51,593 [info] using in-cluster config.




> 2020-09-24 08:11:52,256 [info] Pipeline run id=7f811f99-4869-450f-8c3c-893eb1ace4ab, check UI or DB for progress


'7f811f99-4869-450f-8c3c-893eb1ace4ab'