# Stocks demo

## **steps**
> * Run notebook ingest_stocks
> * Run notebook ingest_news
> * Create feature vector
> * Create pipeline

In [15]:
import mlrun
project = mlrun.get_or_create_project(name='stocks',user_project=True, context="./")

> 2022-05-19 12:45:04,662 [info] loaded project stocks from MLRun DB


## Create feature vector

In [16]:
# Define the list of features we will be using
features = ['stocks_data.*',
            'stocks_news.sentiment',
            ]

# Import MLRun's Feature Store
import mlrun.feature_store as fstore

# Define the feature vector name for future reference
fv_name = 'stocks'

# Define the feature vector using our Feature Store (fstore)
transactions_fv = fstore.FeatureVector(fv_name, 
                          features, 
                          description='stocks information')

# Save the feature vector in the Feature Store
transactions_fv.save()

In [17]:
# Get offline feature vector as dataframe and save the dataset to parquet
import datetime
start_time = datetime.datetime.now()-datetime.timedelta(7)
end_time = datetime.datetime.now()-datetime.timedelta(0)
train_dataset = fstore.get_offline_features(fv_name,start_time=start_time,end_time=end_time, entity_timestamp_column = 'Datetime')
train_dataset.to_dataframe().head()#.sentiment.unique()

Unnamed: 0,Open,High,Low,Close,Volume,ticker2onehot_A,ticker2onehot_AAL,ticker2onehot_AAP,ticker2onehot_AAPL,sentiment
0,115.239998,115.43,115.07,115.260002,7618,1.0,0.0,0.0,0.0,
1,209.160294,209.419998,208.960007,209.020004,5910,0.0,0.0,1.0,0.0,
2,15.695,15.73,15.64,15.69,191455,0.0,1.0,0.0,0.0,
3,142.100006,142.264999,141.75,142.069,794298,0.0,0.0,0.0,1.0,
4,142.059998,142.399994,141.634995,141.800003,986422,0.0,0.0,0.0,1.0,


## Set up the project

In [18]:
import os

# getting our model training function
project.set_function(os.path.join(os.getcwd(),'src','train_stocks.py'),
                     name='train_stocks', kind='job', image='mlrun/ml-models')


project.set_function(os.path.join(os.getcwd(),'src','train_stocks.py'),
                     name='serving', kind='serving', image='mlrun/ml-models')

<mlrun.runtimes.serving.ServingRuntime at 0x7f6649a3e210>

## Write and save workflow

In [19]:
%%writefile workflow.py
import mlrun
from kfp import dsl

@dsl.pipeline(
    name="Stocks Prediction Pipeline",
    description="predicting stock prices using yahoo api with sentiment analysis"
)

def kfpipeline(vector_name:str,
               seq_size:int = 5,
               batch_size:int = 32,
               hidden_dim:int = 10,
               n_layers:int = 4,
               epochs:int = 3,
               start_time:int = 7,
               end_time:int = 0,
               model_filepath = './'
               ):
    
    project = mlrun.get_current_project()
    
    preprocess = project.get_function('train_stocks').apply(mlrun.auto_mount())

    preprocess_run = mlrun.run_function(name='train_stocks',
                                        function='train_stocks',
                                        handler='train_stocks',
                                        params={'context':context,
                                                'hidden_dim':hidden_dim,
                                                'n_layers':n_layers,
                                                'epochs':epochs, 
                                                'vector_name':vector_name,
                                                'seq_size':seq_size,
                                                'start_time':start_time,
                                                'end_time':end_time,
                                                'batch_size':batch_size,
                                                'model_filepath':model_filepath},
                                       outputs=["model"])
    
    
    # deploying serving function
    serving_function = project.get_function("serving")
    # Mount it:
    serving_function.apply(mlrun.mount_v3io())
    # Set the topology and get the graph object:
    graph = serving_function.set_topology("flow", engine="async")
    # Build the serving graph:
    graph.to(class_name="mlrun.frameworks.pytorch.PyTorchModelServer", model_name='pytorch_stocks_model', model_path=str(preprocess_run.outputs["model"]))\
         
    
    # Set the desired requirements:
    serving_function.with_requirements(requirements=['yfinance','yahoo_fin'])
    # Deploy the serving function:
    mlrun.deploy_function("serving")

Overwriting workflow.py


In [20]:
# Register the workflow file:
workflow_name = "stocks_workflow"
project.set_workflow(workflow_name, "workflow.py")

# Save the project:
project.save()

In [21]:
project.run(name=workflow_name,
            arguments={
                "vector_name":"stocks",
                "seq_size": 5,
                "batch_size": 32,
                "hidden_dim": 10,
                "n_layers": 4,
                "epochs": 3,
                "start_time":7,
                "end_time":0,
                "model_filepath":os.path.join(os.getcwd(),'src')
            })

> 2022-05-19 12:45:12,583 [info] submitted pipeline stocks-avia-stocks_workflow 2022-05-19 12-45-12 id=d1d43b0f-32f9-4852-8ac8-c66b9eebb59a
> 2022-05-19 12:45:12,584 [info] Pipeline run id=d1d43b0f-32f9-4852-8ac8-c66b9eebb59a, check UI for progress


d1d43b0f-32f9-4852-8ac8-c66b9eebb59a