# Stocks Analysis Demo

## Setup stocks project

In [1]:
from os import path
import os
import mlrun

# Set the base project name
project_name_base = 'stocks'
# Initialize the MLRun environment and save the project name and artifacts path
project_name, artifact_path = mlrun.set_environment(project=project_name_base,
                                                    user_project=True)

project_path = path.abspath('./')

project = mlrun.new_project(project_name_base,
                            context=project_path,                           
                            user_project=True)
                                                    
# Display the current project name and artifacts path
print(f'Project name: {project_name}')
print(f'Artifacts path: {artifact_path}')

Project name: stocks-iguazio
Artifacts path: v3io:///projects/{{run.project}}/artifacts


## Declare project functions

In [2]:
from mlrun import mount_v3io, code_to_function
# Set functions to project
project.set_function('code/00-train-sentiment-analysis-model.ipynb', name='bert_sentiment_classifier_trainer')
project.set_function('code/01-read-stocks.ipynb', name='stocks_reader')
project.set_function('code/02-read-news.ipynb', name='news_reader')
project.set_function('code/03-stream-viewer.ipynb', name='stream_viewer')
#mlrun.mlconf.hub_url = 'https://raw.githubusercontent.com/mlrun/functions/development/sentiment_analysis_serving/function.yaml'
project.set_function('hub://sentiment_analysis_serving', name='sentiment_analysis_server')
project.set_function('code/06-read-vector.ipynb', name='vector_reader')
project.set_function("code/07-grafana.ipynb", name='grafana_view')

project.func('sentiment_analysis_server').apply(mount_v3io())
# Declaring project name for later use
project.spec.params = {}
project.spec.params["PROJECT_NAME"] = project_name

## Download a pre-trained model (optional)
Since running the [training](training/bert_sentiment_classification.ipynb) part to achieve good results may take some time, we had already trained and uploaded a model to a public location.  
You can easily download it by running the following cell:

If you would like to change the location of the source data, set the `SAMPLE_DATA_SOURCE_URL_PREFIX` environment variable.

For example, set it to `/v3io/projects/demos-data/iguazio/`

In [15]:
# Run this to download the pre-trained model to your `models` directory
url_prefix = os.environ.get('SAMPLE_DATA_SOURCE_URL_PREFIX', 'https://s3.wasabisys.com/iguazio/')

import os
model_location = f'{url_prefix.rstrip("/")}/data/stock-analysis/model.pt'
saved_models_directory = os.path.join(os.path.abspath('./'), 'models')

# Create paths
os.makedirs(saved_models_directory, exist_ok=1)
model_filepath = os.path.join(saved_models_directory, os.path.basename(model_location))

if "http" in model_location:
    ! wget -nc -P {saved_models_directory} {model_location}
else:
    ! cp {model_location} {saved_models_directory}


In [4]:
# add model 
project.func('sentiment_analysis_server').add_model("model1", class_name='SentimentClassifierServing', model_path=model_filepath)

<mlrun.serving.states.TaskState at 0x7f2c029acd90>

## Create deployment workflow

In [5]:
%%writefile code/workflow.py
from kfp import dsl
from mlrun import mount_v3io, mlconf, load_project
import os
from nuclio.triggers import V3IOStreamTrigger, CronTrigger
import re 

funcs = {}

# Directories and Paths
projdir = os.path.abspath('./')
project = load_project(projdir)
project_name = project.spec.params.get("PROJECT_NAME")
model_filepath = os.path.join(projdir, 'models', 'model.pt') # Previously saved model if downloaded
reviews_datafile = os.path.join(projdir, 'data', 'reviews.csv')
# Performence limit
max_replicas = 1

# Readers cron interval
readers_cron_interval = '300s'

# Training GPU Allocation
# Set to 0 if no gpus are to be used
training_gpus = 0

def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        # Add V3IO Mount
        f.apply(mount_v3io())
        
        # Always pull images to keep updates
        f.spec.image_pull_policy = 'Always'
    
    # Define inference-stream related triggers
    functions['sentiment_analysis_server'].add_model('bert_classifier_v1', model_filepath)
    functions['sentiment_analysis_server'].spec.readiness_timeout = 500
    functions['sentiment_analysis_server'].set_config('readinessTimeoutSeconds', 500)
    
    # Adept image to use CPU if a GPU is not assigned
    if training_gpus == 0:
        functions['sentiment_analysis_server'].spec.base_spec['spec']['build']['baseImage']='mlrun/ml-models'
        functions['bert_sentiment_classifier_trainer'].spec.image='mlrun/ml-models'
    
    # Add triggers
    functions['stocks_reader'].add_trigger('cron', CronTrigger(readers_cron_interval))
    functions['news_reader'].add_trigger('cron', CronTrigger(readers_cron_interval))
    
    
    # Set max replicas for resource limits
    functions['sentiment_analysis_server'].spec.max_replicas = max_replicas
    functions['news_reader'].spec.max_replicas = max_replicas
    functions['stocks_reader'].spec.max_replicas = max_replicas
    
    # Add GPU for training
    functions['bert_sentiment_classifier_trainer'].gpus(training_gpus)
        
@dsl.pipeline(
    name='Stocks demo deployer',
    description='Up to RT Stocks ingestion and analysis'
)
def kfpipeline(
    # General
    V3IO_CONTAINER = 'users',
    STOCKS_TSDB_TABLE = os.getenv('V3IO_USERNAME') + '/stocks/stocks_tsdb',
    STOCKS_KV_TABLE = os.getenv('V3IO_USERNAME') + '/stocks/stocks_kv',
    STOCKS_STREAM = os.getenv('V3IO_USERNAME') + '/stocks/stocks_stream',
    RUN_TRAINER: bool = False,
    
    # Trainer
    pretrained_model = 'bert-base-cased',
    reviews_dataset = reviews_datafile,
    models_dir = 'models',
    model_filename = 'bert_sentiment_analysis_model.pt',
    n_classes: int = 3,
    MAX_LEN: int = 128,
    BATCH_SIZE: int = 16,
    EPOCHS: int =  2,
    random_state: int = 42,
    
    # stocks reader
    STOCK_LIST: list = ['GOOGL', 'MSFT', 'AMZN', 'AAPL', 'INTC'],
    EXPRESSION_TEMPLATE = "symbol='{symbol}';price={price};volume={volume};last_updated='{last_updated}'",
    
    # Sentiment analysis server
    model_name = 'bert_classifier_v1',
    model_filepath = model_filepath # if not trained
    
    ):
    
    with dsl.Condition(RUN_TRAINER == True):
        
        deployer = funcs['bert_sentiment_classifier_trainer'].deploy_step()
                
        trainer = funcs['bert_sentiment_classifier_trainer'].as_step(name='bert_sentiment_classifier_trainer',
                                                                     handler='train_sentiment_analysis_model',
                                                                     params={'pretrained_model': pretrained_model,
                                                                             'EPOCHS': EPOCHS,
                                                                             'models_dir': models_dir,
                                                                             'model_filename': model_filename,
                                                                             'n_classes': n_classes,
                                                                             'MAX_LEN': MAX_LEN,
                                                                             'BATCH_SIZE': BATCH_SIZE,
                                                                             'EPOCHS': EPOCHS,
                                                                             'random_state': random_state},
                                                                     inputs={'reviews_dataset': reviews_dataset},
                                                                     outputs=['bert_sentiment_analysis_model'],
                                                                     image=deployer.outputs['image'])
        
        sentiment_server = funcs['sentiment_analysis_server'].deploy_step(env={f'SERVING_MODEL_{model_name}': trainer.outputs['bert_sentiment_analysis_model']})
        
        news_reader = funcs['news_reader'].deploy_step(env={'V3IO_CONTAINER': V3IO_CONTAINER,
                                                            'STOCKS_STREAM': STOCKS_STREAM,
                                                            'STOCKS_TSDB_TABLE': STOCKS_TSDB_TABLE,
                                                            'SENTIMENT_MODEL_ENDPOINT': sentiment_server.outputs['endpoint'],
                                                           'PROJECT_NAME' : project_name})
    
    with dsl.Condition(RUN_TRAINER == False):
        
        sentiment_server = funcs['sentiment_analysis_server'].deploy_step(env={f'SERVING_MODEL_{model_name}': model_filepath})
        
        news_reader = funcs['news_reader'].deploy_step(env={'V3IO_CONTAINER': V3IO_CONTAINER,
                                                            'STOCKS_STREAM': STOCKS_STREAM,
                                                            'STOCKS_TSDB_TABLE': STOCKS_TSDB_TABLE,
                                                            'SENTIMENT_MODEL_ENDPOINT': sentiment_server.outputs['endpoint'],
                                                           'PROJECT_NAME' : project_name})
    
    stocks_reader = funcs['stocks_reader'].deploy_step(env={'STOCK_LIST': STOCK_LIST,
                                                            'V3IO_CONTAINER': V3IO_CONTAINER,
                                                            'STOCKS_TSDB_TABLE': STOCKS_TSDB_TABLE,
                                                            'STOCKS_KV_TABLE': STOCKS_KV_TABLE,
                                                            'EXPRESSION_TEMPLATE': EXPRESSION_TEMPLATE,
                                                           'PROJECT_NAME' : project_name})
    
    stream_viewer = funcs['stream_viewer'].deploy_step(env={'V3IO_CONTAINER': V3IO_CONTAINER,
                                                            'STOCKS_STREAM': STOCKS_STREAM}).after(news_reader)
    
    vector_viewer = funcs['vector_reader'].deploy_step(env={'PROJECT_NAME' : project_name}).after(news_reader)
    
    grafana_viewer = funcs["grafana_view"].deploy_step()
    
    grafana_viewer = funcs["grafana_view"].as_step(params = {"streamview_url" : stream_viewer.outputs["endpoint"],
                                                             "readvector_url" : vector_viewer.outputs["endpoint"],
                                                             "v3io_container" : V3IO_CONTAINER,
                                                             "stocks_kv" : STOCKS_KV_TABLE,
                                                             "stocks_tsdb" : STOCKS_TSDB_TABLE,
                                                             "grafana_url" : "http://grafana"},
                                                   handler = "handler").after(grafana_viewer)

Overwriting code/workflow.py


## Add workflow

In [6]:
project.set_workflow('main', os.path.join(os.path.abspath(project.context), 'code', 'workflow.py'))

## Save Project

In [7]:
project.save(os.path.join(project.context, 'project.yaml'))

### Run workflow
In this cell we will run the `main` workflow via `KubeFlow Pipelines` on top of our cluster.  
Running the pipeline may take some time. Due to possible jupyter timeout, it's best to track the pipeline's progress via KFP or the MLRun UI.

In [8]:
project.run('main', arguments={'RUN_TRAINER': False}, artifact_path=artifact_path, dirty=True)

> 2021-04-06 10:53:25,324 [info] using in-cluster config.


> 2021-04-10 19:41:57,619 [info] Pipeline run id=e3f4fc99-6793-449e-a2d5-c0a6956e8a1a, check UI or DB for progress


'e3f4fc99-6793-449e-a2d5-c0a6956e8a1a'