# Stocks demo

## **steps**
> * Run notebook ingest_stocks
> * Run notebook ingest_news
> * Create feature vector
> * Create pipeline

In [1]:
# !/User/align_mlrun.sh

In [2]:
import mlrun
project = mlrun.get_or_create_project(name='stocks',user_project=True, context="./")

> 2022-03-30 13:47:00,392 [info] loaded project stocks from MLRun DB


## Create feature vector

In [3]:
# Define the list of features we will be using
features = ['stocks_data.*',
            'stocks_news.sentiment',
            ]

# Import MLRun's Feature Store
import mlrun.feature_store as fstore

# Define the feature vector name for future reference
fv_name = 'stocks'

# Define the feature vector using our Feature Store (fstore)
transactions_fv = fstore.FeatureVector(fv_name, 
                          features, 
                          description='stocks information')

# Save the feature vector in the Feature Store
transactions_fv.save()

In [4]:
# Get offline feature vector as dataframe and save the dataset to parquet
import datetime
start_time = datetime.datetime.now()-datetime.timedelta(7)
end_time = datetime.datetime.now()-datetime.timedelta(0)
train_dataset = fstore.get_offline_features(fv_name,start_time=start_time,end_time=end_time, entity_timestamp_column = 'Datetime')
train_dataset.to_dataframe().head()#.sentiment.unique()

Unnamed: 0,Open,High,Low,Close,Volume,ticker2onehot_A,ticker2onehot_AAL,ticker2onehot_AAP,ticker2onehot_AAPL,sentiment
0,133.619995,133.891998,133.520004,133.850006,7680,1,0,0,0,
1,211.074997,211.460007,211.039993,211.460007,5255,0,0,1,0,
2,16.205,16.2465,16.184999,16.2465,210345,0,1,0,0,
3,171.970001,172.350006,171.910004,172.330002,823502,0,0,0,1,
4,211.600006,211.679993,211.490005,211.570007,4962,0,0,1,0,


## Set up the project

In [5]:
import os

# getting our preprocess function 
# project.set_function(os.path.join(os.getcwd(),'src','preprocess.py'),
#                      name='preprocess',kind='job',handler='preprocess',image='mlrun/ml-models'
#                      )

# getting our model training function
project.set_function(os.path.join(os.getcwd(),'src','train_stocks.py'),
                     name='train_stocks', kind='job', image='mlrun/ml-models')

project.set_function(os.path.join(os.getcwd(),'src','serving.py'),
                     name='serving', kind='serving', image='mlrun/ml-models')


<mlrun.runtimes.serving.ServingRuntime at 0x7fef079d2c50>

## Write and save workflow

In [6]:
%%writefile workflow.py
import mlrun
from kfp import dsl

@dsl.pipeline(
    name="Stocks Prediction Pipeline",
    description="predicting stock prices using yahoo api with sentiment analysis"
)

def kfpipeline(vector_name:str,
               seq_size:int = 5,
               batch_size:int = 32,
               hidden_dim:int = 10,
               n_layers:int = 4,
               epochs:int = 3,
               start_time:int = 7,
               end_time:int = 0,
               ):
    
    project = mlrun.get_current_project()
    
    preprocess = project.get_function('train_stocks').apply(mlrun.auto_mount())

    preprocess_run = mlrun.run_function(name='train_stocks',
                                        function='train_stocks',
                                        handler='train_stocks',
                                        params={'context':context,
                                                'hidden_dim':hidden_dim,
                                                'n_layers':n_layers,
                                                'epochs':epochs, 
                                                'vector_name':vector_name,
                                                'seq_size':seq_size,
                                                'start_time':start_time,
                                                'end_time':end_time,
                                                'batch_size':batch_size},
                                       outputs=["model"])
    
    # deploying serving function
    serving_function = project.get_function("serving")
    # Mount it:
    serving_function.apply(mlrun.mount_v3io())
    # Set the topology and get the graph object:
    graph = serving_function.set_topology("flow", engine="async")
    # Build the serving graph:
    graph.to(handler="preprocess", name="preprocess")\
         .to(class_name="mlrun.frameworks.pytorch.PyTorchModelServer", model_name='pytorch_stocks_model', model_path=str(preprocess_run.outputs["model"]))\
    
    # Set the desired requirements:
    serving_function.with_requirements(requirements=['yfinance','yahoo_fin'])
    # Deploy the serving function:
    mlrun.deploy_function("serving")

Overwriting workflow.py


In [7]:
# Register the workflow file:
workflow_name = "stocks_workflow"
project.set_workflow(workflow_name, "workflow.py")

# Save the project:
project.save()

In [8]:
project.run(name=workflow_name,
            arguments={
                "vector_name":"stocks",
                "seq_size": 5,
                "batch_size": 32,
                "hidden_dim": 10,
                "n_layers": 4,
                "epochs": 3,
                "start_time":7,
                "end_time":0,
            })

> 2022-03-30 13:47:01,810 [info] submitted pipeline stocks-dani-stocks_workflow 2022-03-30 13-47-01 id=14a439a3-6264-481f-bc40-80e9efaf2754
> 2022-03-30 13:47:01,811 [info] Pipeline run id=14a439a3-6264-481f-bc40-80e9efaf2754, check UI for progress


14a439a3-6264-481f-bc40-80e9efaf2754