# Stocks demo pipeline

## steps
* [Run notebook ingest_stocks](01_ingest_news.ipynb)
* [Run notebook ingest_news](02_ingest_stocks.ipynb)
* [Create feature vector](#Create-feature-vector)
* [Set up the project](#Set-up-the-project)
* [Write and save workflow](#Write-and-save-workflow)
* [Run the workflow](#Run-the-workflow)

In [1]:
import mlrun
project = mlrun.get_or_create_project(name='stocks',user_project=True, context="./")

> 2023-05-28 14:34:16,320 [info] loaded project stocks from MLRun DB


# Create feature vector

In [2]:
# Define the list of features we will be using
features = ['stocks.*',
            'news.sentiment',
            ]

# Import MLRun's Feature Store
import mlrun.feature_store as fstore

# Define the feature vector name for future reference
fv_name = 'stocks'

# Define the feature vector using our Feature Store (fstore)
transactions_fv = fstore.FeatureVector(fv_name, 
                          features, 
                          description='stocks information')

# Save the feature vector in the Feature Store
transactions_fv.save()

In [3]:
# Get offline feature vector as dataframe and save the dataset to parquet
import datetime
start_time = datetime.datetime.now()-datetime.timedelta(59)
end_time = end_time = datetime.datetime.now()-datetime.timedelta(0)
fv_data = fstore.get_offline_features(fv_name,start_time=start_time,end_time=end_time, entity_timestamp_column = 'Datetime')
fv_data.to_dataframe().head()

Unnamed: 0,Open,High,Low,Close,Volume,ticker2onehot_A,ticker2onehot_AAL,ticker2onehot_AAP,ticker2onehot_AAPL,ticker2onehot_ABBV,ticker2onehot_ABC,ticker2onehot_ABT,ticker2onehot_ACGL,ticker2onehot_ACN,ticker2onehot_ADBE,sentiment
0,134.179993,134.225006,133.929993,134.014999,8963,1,0,0,0,0,0,0,0,0,0,
1,98.82,98.839996,98.760002,98.790001,32972,0,0,0,0,0,0,1,0,0,0,
2,67.540001,67.550003,67.445,67.480003,11353,0,0,0,0,0,0,0,1,0,0,
3,14.58,14.59,14.55,14.56,100621,0,1,0,0,0,0,0,0,0,0,
4,158.815002,158.865005,158.654999,158.654999,6280,0,0,0,0,0,1,0,0,0,0,


## Set up the project

In [4]:
import os

# getting our model training function
project.set_function('./src/train_stocks.py',
                     name='train_stocks', kind='job', image='mlrun/ml-models')


project.set_function('./src/serving_stocks.py',
                     name='serving_stocks', kind='serving', image='mlrun/ml-models')

<mlrun.runtimes.serving.ServingRuntime at 0x7f352e48ab90>

## Write and save workflow

In [5]:
%%writefile src/workflow.py
import mlrun
from kfp import dsl

@dsl.pipeline(
    name="Stocks Prediction Pipeline",
    description="predicting stock prices using yahoo api with sentiment analysis"
)

def kfpipeline(vector_name:str,
               seq_size:int = 5,
               batch_size:int = 1,
               hidden_dim:int = 2,
               n_layers:int = 1,
               epochs:int = 3,
               start_time:int = 59,
               end_time:int = 0,
               model_filepath = './'
               ):
    
    project = mlrun.get_current_project()
    
    train_stocks = project.get_function('train_stocks').apply(mlrun.auto_mount())

    train_stocks_run = mlrun.run_function(name='train_stocks',
                                          function='train_stocks',
                                          handler='handler',
                                          params={'context':context,
                                                  'hidden_dim':hidden_dim,
                                                  'n_layers':n_layers,
                                                  'epochs':epochs, 
                                                  'vector_name':vector_name,
                                                  'seq_size':seq_size,
                                                  'start_time':start_time,
                                                  'end_time':end_time,
                                                  'batch_size':batch_size,
                                                  'model_filepath':model_filepath},
                                          outputs=["model"])
    
    
    # deploying serving function
    serving_function = project.get_function("serving_stocks")
    # Mount it:
    serving_function.apply(mlrun.mount_v3io())
    # Set the topology and get the graph object:
    graph = serving_function.set_topology("flow", engine="async")
    # Build the serving graph:
    graph.to(handler='preprocess', name='reading_data')\
         .to(class_name="StocksModel", model_name='stocks_model', model_path=str(train_stocks_run.outputs['model']))\
         .to(handler='postprocess',name='postprocess').respond()
         
    
    # Set the desired requirements:
    serving_function.with_requirements(requirements=['yfinance','yahoo_fin'])
    # Deploy the serving function:
    mlrun.deploy_function("serving_stocks")

Overwriting src/workflow.py


In [6]:
import os

# get source help function
def get_source_path():
    cwd = os.getcwd()
    user_name = os.environ['V3IO_USERNAME']
    source_path = cwd.replace('/User','v3io:///users')+'/'+user_name +'/project.tar.gz'
    return source_path

In [7]:
import shutil

shutil.make_archive('project','gztar','./')
source_path = get_source_path()

project.set_source(source_path,pull_at_runtime=True)

# Register the workflow file:
workflow_name = "stocks-workflow"
project.set_workflow(workflow_name, "src/workflow.py",schedule='0 0 * * */1')

project.save()

<mlrun.projects.project.MlrunProject at 0x7f352e4e0210>

## Run the workflow

In [8]:
project.run(name=workflow_name,
            arguments={
                "vector_name":"stocks",
                "seq_size": 5,
                "batch_size": 1,
                "hidden_dim": 2,
                "n_layers": 1,
                "epochs": 3,
                "start_time":59,
                "end_time":0,
                "model_filepath":'./'},
            watch=True,schedule=True)

> 2023-05-28 14:34:44,289 [info] executing workflow scheduling 'workflow-runner-workflow2' remotely with kfp engine
> 2023-05-28 14:34:44,296 [info] Storing function: {'name': 'workflow2', 'uid': 'cc767a810d5841dbba36718d54aabdea', 'db': 'http://mlrun-api:8080'}
> 2023-05-28 14:34:44,640 [info] task schedule created: {'schedule': '0 0 * * */1', 'project': 'stocks-avia', 'name': 'workflow2'}
