# Training stocks model

Steps
* [Create feature vector](#Create-feature-vector)
* [Train locally using mlrun pytorch framework](#Train-locally-using-mlrun-pytorch-framework)

In [1]:
# install prerequisites
# prerequisites for the notebook is installing 2 packages yfinance yahoo_fin for uploading stocks data 
import importlib.util
import IPython

def install_missing_packages(packages):
    install_flag = False
    for package in packages:
        spec = importlib.util.find_spec(package)
        if spec is None:
            %pip install {package}
            install_flag = True
        else:     
            print("package {} installed".format(package))
        if install_flag:            
            print ("restarting kernerl due to package install")
            IPython.Application.instance().kernel.do_shutdown(True)
# For illustrative purposes.
packages  = ['torch']
install_missing_packages(packages)

package torch installed


In [2]:
import mlrun
project = mlrun.get_or_create_project(name='stocks',user_project=True, context="./")

> 2022-11-08 11:38:26,160 [info] loaded project stocks from MLRun DB


## Create feature vector

In [3]:
# Define the list of features we will be using
features = ['stocks.*',
            'news.sentiment',
            ]

# Import MLRun's Feature Store
import mlrun.feature_store as fstore

# Define the feature vector name for future reference
fv_name = 'stocks'

# Define the feature vector using our Feature Store (fstore)
transactions_fv = fstore.FeatureVector(fv_name, 
                          features, 
                          description='stocks information')

# Save the feature vector in the Feature Store
transactions_fv.save()

In [4]:
# Get offline feature vector as dataframe and save the dataset to parquet
import datetime
start_time = datetime.datetime.now()-datetime.timedelta(59)
end_time = datetime.datetime.now()-datetime.timedelta(0)
train_dataset = fstore.FeatureVector.get_offline_features(fv_name,start_time=start_time,end_time=end_time, timestamp_for_filtering = 'Datetime')
df = train_dataset.to_dataframe()
df

Unnamed: 0,Open,High,Low,Close,Volume,ticker2onehot_A,ticker2onehot_ABBV,ticker2onehot_ABT,ticker2onehot_ABMD,ticker2onehot_AAPL,ticker2onehot_AAP,ticker2onehot_ACN,ticker2onehot_ABC,ticker2onehot_ACGL,ticker2onehot_AAL,sentiment
0,14.370000,14.575000,14.370000,14.525000,1391313,0,0,0,0,0,0,0,0,0,1,
1,159.634201,160.369995,159.509995,160.330002,4839065,0,0,0,0,1,0,0,0,0,0,
2,137.529999,137.889999,137.229996,137.869995,30482,1,0,0,0,0,0,0,0,0,0,
3,108.779999,109.168999,108.599998,108.889999,98389,0,0,1,0,0,0,0,0,0,0,
4,182.000000,182.587097,181.619995,182.509995,0,0,0,0,0,0,1,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31685,162.009995,162.009995,162.009995,162.009995,0,0,0,0,0,0,0,0,1,0,0,0.0
31686,138.029999,138.029999,138.029999,138.029999,0,1,0,0,0,0,0,0,0,0,0,1.0
31687,138.919998,138.919998,138.919998,138.919998,0,0,0,0,0,1,0,0,0,0,0,
31688,148.100006,148.100006,148.100006,148.100006,0,0,1,0,0,0,0,0,0,0,0,0.5


## Train locally using mlrun pytorch framework 

In [5]:
fn = mlrun.code_to_function('train-stocks', kind='job',image='mlrun/mlrun',handler='handler', requirements=['torch', 'tensorboard', 'tqdm'], filename='src/train_stocks.py')
fn.deploy() # building the image

In [6]:
import os

params = {'hidden_dim':2,
          'n_layers':1,
          'epochs':1, 
          'vector_name':'stocks',
          'seq_size':5,
          'start_time':59,
          'end_time':0,
          'batch_size':1,
          'model_filepath':'./'}

fn.run(local=False,watch=True, params = params)

> 2022-11-08 11:38:28,425 [info] starting run train-stocks-model-handler uid=95a1ce4c0c8344268c95a98286da8822 DB=http://mlrun-api:8080
Epoch 1/1:
Training: 100% |██████████| 31630/31630 [00:51<00:00, 619.01Batch/s, MSELoss=7.55e-5, accuracy=0.991] 
Validating: 100% |██████████| 31630/31630 [00:17<00:00, 1793.65Batch/s, MSELoss=8.33e-5, accuracy=0.991] 

Summary:
+----------+-----------------------+
| Metrics  |        Values         |
+----------+-----------------------+
| MSELoss  | 0.0017828106647357345 |
| accuracy |  0.9958229064941406   |
+----------+-----------------------+



project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
stocks-dani,...86da8822,0,Nov 08 11:38:28,completed,train-stocks-model-handler,v3io_user=danikind=owner=danihost=jupyter-dani-746f87df7-gfl9n,,hidden_dim=2n_layers=1epochs=1vector_name=stocksseq_size=5start_time=59end_time=0batch_size=1model_filepath=/User/test/demos/stocks-prediction/src,hidden_dim=2n_layers=1epochs=1vector_name=stocksseq_size=5start_time=59end_time=0batch_size=1model_filepath=/User/test/demos/stocks-prediction/srclr=0.0001training_MSELoss=7.546275446657091e-05training_accuracy=0.9913130700588226validation_MSELoss=0.0017828106647357345validation_accuracy=0.9958229064941406,training_MSELoss.htmltraining_accuracy.htmlvalidation_MSELoss.htmlvalidation_accuracy.htmlMSELoss_summary.htmlaccuracy_summary.htmllr_values.htmlstocks_model_custom_objects_map.jsonstocks_model_custom_objects.zipmodel





> 2022-11-08 11:41:46,173 [info] run executed, status=completed


<mlrun.model.RunObject at 0x7f695ab767d0>