# Training stocks model

Steps
* [Create feature vector](#Create-feature-vector)
* [Train locally using mlrun pytorch framework](#Train-locally-using-mlrun-pytorch-framework)

In [1]:
# install prerequsits 
# prerequisits for the notebook is installing 2 packages yfinance yahoo_fin for uploading stocks data 
import importlib.util
import IPython

def install_missing_packages(packages):
    install_flag = False
    for package in packages:
        spec = importlib.util.find_spec(package)
        if spec is None:
            %pip install {package}
            install_flag = True
        else:     
            print("package {} installed".format(package))
        if install_flag:            
            print ("restarting kernerl due to package install")
            IPython.Application.instance().kernel.do_shutdown(True)
# For illustrative purposes.
packages  = ['torch']
install_missing_packages(packages)

In [2]:
import mlrun
project = mlrun.get_or_create_project(name='stocks',user_project=True, context="src/")

> 2022-11-07 07:39:47,614 [info] loaded project stocks from MLRun DB


## Create feature vector

In [3]:
# Define the list of features we will be using
features = ['stocks.*',
            'news.sentiment',
            ]

# Import MLRun's Feature Store
import mlrun.feature_store as fstore

# Define the feature vector name for future reference
fv_name = 'stocks'

# Define the feature vector using our Feature Store (fstore)
transactions_fv = fstore.FeatureVector(fv_name, 
                          features, 
                          description='stocks information')

# Save the feature vector in the Feature Store
transactions_fv.save()

In [4]:
# Get offline feature vector as dataframe and save the dataset to parquet
import datetime
start_time = datetime.datetime.now()-datetime.timedelta(59)
end_time = datetime.datetime.now()-datetime.timedelta(0)
train_dataset = fstore.get_offline_features(fv_name,start_time=start_time,end_time=end_time, entity_timestamp_column = 'Datetime')
#train_dataset = fstore.get_offline_features(fv_name,with_indexes=True, entity_timestamp_column = 'Datetime')
df = train_dataset.to_dataframe()
df

Unnamed: 0,Open,High,Low,Close,Volume,ticker2onehot_AAL,ticker2onehot_ACN,ticker2onehot_ACGL,ticker2onehot_ABMD,ticker2onehot_ABT,ticker2onehot_ABC,ticker2onehot_ABBV,ticker2onehot_AAP,ticker2onehot_A,ticker2onehot_AAPL,sentiment
0,46.520000,46.584999,46.419998,46.490002,0,0,0,1,0,0,0,0,0,0,0,
1,289.399994,290.070007,289.079987,289.679993,40693,0,1,0,0,0,0,0,0,0,0,
2,155.470001,155.580002,154.919998,154.990005,3802356,0,0,0,0,0,0,0,0,0,1,
3,135.979996,136.289993,135.600006,136.085007,48073,0,0,0,0,0,0,0,0,1,0,
4,147.539993,147.996994,147.539993,147.589996,11140,0,0,0,0,0,1,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31132,261.160004,261.160004,261.160004,261.160004,0,0,1,0,0,0,0,0,0,0,0,0.0
31133,161.639999,161.639999,161.639999,161.639999,0,0,0,0,0,0,1,0,0,0,0,1.0
31134,373.989990,373.989990,373.989990,373.989990,0,0,0,0,1,0,0,0,0,0,0,1.0
31135,56.860001,56.860001,56.860001,56.860001,0,0,0,1,0,0,0,0,0,0,0,0.0


## Train locally using mlrun pytorch framework 

In [5]:
fn = mlrun.code_to_function('train_stocks_model', kind='job',image='mlrun/ml-models',handler='handler', filename='src/train_stocks.py').apply(mlrun.auto_mount())

In [6]:
import os

params = {'hidden_dim':2,
          'n_layers':1,
          'epochs':1, 
          'vector_name':'stocks',
          'seq_size':5,
          'start_time':59,
          'end_time':0,
          'batch_size':1,
          'model_filepath':os.path.join(os.getcwd(),'src')}

fn.run(local=True,watch=True, params = params)

> 2022-11-07 07:39:50,423 [info] starting run train-stocks-model-handler uid=fffeead39d1048dc80a68ec427b3ca4a DB=http://mlrun-api:8080
Epoch 1/1:
Training: 100% |██████████| 31077/31077 [00:49<00:00, 627.62Batch/s, MSELoss=0.0152, accuracy=0.877]  
Validating: 100% |██████████| 31077/31077 [00:16<00:00, 1834.93Batch/s, MSELoss=0.0005, accuracy=0.978]  

Summary:
+----------+----------------------+
| Metrics  |        Values        |
+----------+----------------------+
| MSELoss  | 0.008913171477615833 |
| accuracy |  0.9918268918991089  |
+----------+----------------------+



project,uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
stocks-dani,...27b3ca4a,0,Nov 07 07:39:50,completed,train-stocks-model-handler,v3io_user=danikind=owner=danihost=jupyter-dani-746f87df7-gfl9n,,hidden_dim=2n_layers=1epochs=1vector_name=stocksseq_size=5start_time=59end_time=0batch_size=1model_filepath=/User/test/demos/stocks-prediction/src,hidden_dim=2n_layers=1epochs=1vector_name=stocksseq_size=5start_time=59end_time=0batch_size=1model_filepath=/User/test/demos/stocks-prediction/srclr=0.0001training_MSELoss=0.01524735614657402training_accuracy=0.8765198141336441validation_MSELoss=0.008913171477615833validation_accuracy=0.9918268918991089,training_MSELoss.htmltraining_accuracy.htmlvalidation_MSELoss.htmlvalidation_accuracy.htmlMSELoss_summary.htmlaccuracy_summary.htmllr_values.htmlstocks_model_custom_objects_map.jsonstocks_model_custom_objects.zipmodel





> 2022-11-07 07:42:54,811 [info] run executed, status=completed


<mlrun.model.RunObject at 0x7f9ab0ada290>