# Stocks Analysis Demo

In [1]:
# Utils
import os
import json
import numpy as np

# MLRun imports
from mlrun import mlconf


# Setup API Endpoint
mlconf.dbpath = 'http://mlrun-api:8080'

## Setup MLRun stocks project

In [2]:
from mlrun import new_project

# update the dir and repo to reflect real locations 
# the remote git repo must be initialized in GitHub
project_dir = os.path.abspath('./')
remote_git = 'https://github.com/mlrun/demos.git'

# Create the project
project = new_project('stocks', project_dir, init_git=False)

# We can update our project directory to the latest status by running
# newproj.pull()

In [3]:
# Define an artifact path to keep track of where our artifacts are going
ARTIFACT_PATH =  os.path.join(os.path.abspath(project.context), 'artifacts')
mlconf.artifact_path = ARTIFACT_PATH

In [4]:
# Set functions to project
# project.set_function('code/00-train-sentiment-analysis-model.ipynb', name='bert_sentiment_classifier_trainer', kind='job')
project.set_function('code/bert_sentiment_classification.yaml', name='bert_sentiment_classifier_trainer')
project.set_function('code/01-read-stocks.ipynb', name='stocks_reader')
project.set_function('code/02-read-news.ipynb', name='news_reader')
project.set_function('code/03-stream-viewer.ipynb', name='stream_viewer')
project.set_function('hub://sentiment_analysis_serving', name='sentiment_analysis_server')

<mlrun.runtimes.function.RemoteRuntime at 0x7f14ce8cefd0>

## Download a pre-trained model (optional)
Since running the [training](training/bert_sentiment_classification.ipynb) part to achieve good results may take some time, we had already trained and uploaded a model to a public location.  
You can easily download it by running the following cell:

In [5]:
# Run this to download the pre-trained model to your `models` directory

import os
model_location = 'https://iguazio-sample-data.s3.amazonaws.com/models/model.pt'
saved_models_directory = os.path.join(os.path.abspath('../'), 'models')

# Create paths
os.makedirs(saved_models_directory, exist_ok=1)
model_filepath = os.path.join(saved_models_directory, os.path.basename(model_location))
!wget -nc -P {saved_models_directory} {model_location} 

File ‘/User/models/model.pt’ already there; not retrieving.



## Deploy a Grafana Dashboard  
To track the different stocks on a live dashboard we will use **Grafana**.  <br>
We will use [Grafwiz](https://github.com/v3io/grafwiz) to define and deploy the dashboard directly from this notebook

In [6]:
from grafwiz import *
import v3io_frames as v3f

def deploy_dashboard(grafana_url:str = 'http://grafana', 
                     streamview_url:str = 'http://nuclio-stocks-stream-viewer:8080',
                     v3io_container:str = 'bigdata',
                     stocks_kv_table:str = 'stocks/stocks_kv',
                     stocks_tsdb_table:str = 'stocks/stocks_tsdb'):
    # Create datasources
    DataSource(name='Iguazio').deploy(grafana_url, use_auth=True)
    DataSource(name='stream-viewer', frames_url=streamview_url).deploy(grafana_url, use_auth=False, overwrite=False)
    
    # Verify the KV table can be shown
    client = v3f.Client('framesd:8081', container=v3io_container)
    client.execute(backend='kv', table=stocks_kv_table, command='infer')
    
    # Create grafana dashboard
    dash = Dashboard("stocks", start='now-7d', dataSource='Iguazio')

    # Add a symbol combo box (template) with data from the stocks table
    dash.template(name="SYMBOL", label="Symbol", query="fields=symbol;table=stocks/stocks_kv;backend=kv;container=bigdata")

    # Create a table and log viewer in one row
    tbl = Table('Current Stocks Value', span=12).source(table=stocks_kv_table,fields=['symbol','volume', 'price', 'sentiment', 'last_updated'],container=v3io_container)
    dash.row([tbl])

    # Create 2 charts on the second row
    metrics_row = [Graph(metric).series(table=stocks_tsdb_table, fields=[metric], filter='symbol=="$SYMBOL"',container=v3io_container) for metric in ['price','volume']]
    metrics_row.append(Graph('sentiment').series(table=stocks_tsdb_table, fields=['sentiment'], filter='symbol=="$SYMBOL"', container=v3io_container))
    dash.row(metrics_row)
    
    # Create log veiwer panel
    log = Table('Articles Log', dataSource='stream-viewer', span=12)
    dash.row([log])
    
    # Deploy to Grafana
    return dash.deploy(grafana_url)
deploy_dashboard()

Datasource Iguazio already exists
Datasource Iguazio created successfully
Datasource stream-viewer already exists
Datasource stream-viewer created successfully
Dashboard stocks created successfully


## Create deployment workflow

In [7]:
%%writefile code/workflow.py
from kfp import dsl
from mlrun import mount_v3io, mlconf
import os
from nuclio.triggers import V3IOStreamTrigger, CronTrigger

funcs = {}

# Directories and Paths
projdir = os.path.join('/', 'User', 'stock-trading')
model_filepath = os.path.join(projdir, 'models', 'model.pt') # Previously saved model if downloaded
reviews_datafile = os.path.join(projdir, 'data', 'reviews.csv')

# Performence limit
max_replicas = 1

# Readers cron interval
readers_cron_interval = '300s'

# Training GPU Allocation
training_gpus = 1


def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        # Add V3IO Mount
        f.apply(mount_v3io())
        
        # Always pull images to keep updates
        f.spec.image_pull_policy = 'Always'
    
    # Define inference-stream related triggers
    functions['sentiment_analysis_server'].add_model('bert_classifier_v1', model_filepath)
    functions['sentiment_analysis_server'].spec.readiness_timeout = 500
    functions['sentiment_analysis_server'].set_config('readinessTimeoutSeconds', 500)
    
    # Add triggers
    functions['stocks_reader'].add_trigger('cron', CronTrigger(readers_cron_interval))
    functions['news_reader'].add_trigger('cron', CronTrigger(readers_cron_interval))
    
    
    # Set max replicas for resource limits
    functions['sentiment_analysis_server'].spec.max_replicas = max_replicas
    functions['news_reader'].spec.max_replicas = max_replicas
    functions['stocks_reader'].spec.max_replicas = max_replicas
    
    # Add GPU for training
    functions['bert_sentiment_classifier_trainer'].gpus(training_gpus)
        
@dsl.pipeline(
    name='Stocks demo deployer',
    description='Up to RT Stocks ingestion and analysis'
)
def kfpipeline(
    # General
    V3IO_CONTAINER = 'bigdata',
    STOCKS_TSDB_TABLE = 'stocks/stocks_tsdb',
    STOCKS_KV_TABLE = 'stocks/stocks_kv',
    STOCKS_STREAM = 'stocks/stocks_stream',
    RUN_TRAINER = False,
    
    # Trainer
    pretrained_model = 'bert-base-cased',
    reviews_dataset = reviews_datafile,
    models_dir = 'models',
    model_filename = 'bert_sentiment_analysis_model.pt',
    n_classes = 3,
    MAX_LEN = 128,
    BATCH_SIZE = 16,
    EPOCHS =  2,
    random_state = 42,
    
    # stocks reader
    STOCK_LIST = ['GOOGL', 'MSFT', 'AMZN', 'AAPL', 'INTC'],
    EXPRESSION_TEMPLATE = "symbol='{symbol}';price={price};volume={volume};last_updated='{last_updated}'",
    
    # Sentiment analysis server
    model_name = 'bert_classifier_v1',
    model_filepath = model_filepath # if not trained
    
    ):
    
    with dsl.Condition(RUN_TRAINER == True):
                
        trainer = funcs['bert_sentiment_classifier_trainer'].as_step(name='bert_sentiment_classifier_trainer',
                                                                     params={'pretrained_model': pretrained_model,
                                                                             'EPOCHS': EPOCHS,
                                                                             'models_dir': models_dir,
                                                                             'model_filename': model_filename,
                                                                             'n_classes': n_classes,
                                                                             'MAX_LEN': MAX_LEN,
                                                                             'BATCH_SIZE': BATCH_SIZE,
                                                                             'EPOCHS': EPOCHS,
                                                                             'random_state': random_state},
                                                                     inputs={'reviews_dataset': reviews_dataset},
                                                                     outputs=['bert_sentiment_analysis_model'])
        
        sentiment_server = funcs['sentiment_analysis_server'].deploy_step(env={f'SERVING_MODEL_{model_name}': trainer.outputs['bert_sentiment_analysis_model']})
        
        news_reader = funcs['news_reader'].deploy_step(env={'V3IO_CONTAINER': V3IO_CONTAINER,
                                                            'STOCKS_STREAM': STOCKS_STREAM,
                                                            'STOCKS_TSDB_TABLE': STOCKS_TSDB_TABLE,
                                                            'SENTIMENT_MODEL_ENDPOINT': sentiment_server.outputs['endpoint']})
    
    with dsl.Condition(RUN_TRAINER == False):
        
        sentiment_server = funcs['sentiment_analysis_server'].deploy_step(env={f'SERVING_MODEL_{model_name}': model_filepath})
        
        news_reader = funcs['news_reader'].deploy_step(env={'V3IO_CONTAINER': V3IO_CONTAINER,
                                                            'STOCKS_STREAM': STOCKS_STREAM,
                                                            'STOCKS_TSDB_TABLE': STOCKS_TSDB_TABLE,
                                                            'SENTIMENT_MODEL_ENDPOINT': sentiment_server.outputs['endpoint']})
    
    stocks_reader = funcs['stocks_reader'].deploy_step(env={'STOCK_LIST': STOCK_LIST,
                                                            'V3IO_CONTAINER': V3IO_CONTAINER,
                                                            'STOCKS_TSDB_TABLE': STOCKS_TSDB_TABLE,
                                                            'STOCKS_KV_TABLE': STOCKS_KV_TABLE,
                                                            'EXPRESSION_TEMPLATE': EXPRESSION_TEMPLATE})
    
    stream_viewer = funcs['stream_viewer'].deploy_step(env={'V3IO_CONTAINER': V3IO_CONTAINER,
                                                            'STOCKS_STREAM': STOCKS_STREAM}).after(news_reader)

Overwriting code/workflow.py


## Add workflow

In [8]:
project.set_workflow('main', os.path.join(os.path.abspath(project.context), 'code', 'workflow.py'))

## Save Project

In [9]:
project.save(os.path.join(project.context, 'project.yaml'))

### Run workflow

In [10]:
project.run('main', arguments={'RUN_TRAINER': True}, artifact_path=ARTIFACT_PATH, dirty=True)

> 2020-10-19 13:54:54,985 [info] using in-cluster config.




> 2020-10-19 13:54:55,598 [info] Pipeline run id=5bb26366-a026-4301-afe4-8bdb9c4fc028, check UI or DB for progress


'5bb26366-a026-4301-afe4-8bdb9c4fc028'