# Training stocks model

Steps
* [Create feature vector](#Create-feature-vector)
* [Train locally using mlrun pytorch framework](#Train-locally-using-mlrun-pytorch-framework)

In [1]:
# !pip install -U torch

In [2]:
import mlrun
project = mlrun.get_or_create_project(name='stocks',user_project=True, context="src/")

> 2022-09-19 08:37:48,821 [info] loaded project stocks from MLRun DB


## Create feature vector

In [3]:
# Define the list of features we will be using
features = ['stocks.*',
            'news.sentiment',
            ]

# Import MLRun's Feature Store
import mlrun.feature_store as fstore

# Define the feature vector name for future reference
fv_name = 'stocks'

# Define the feature vector using our Feature Store (fstore)
transactions_fv = fstore.FeatureVector(fv_name, 
                          features, 
                          description='stocks information')

# Save the feature vector in the Feature Store
transactions_fv.save()

In [4]:
# Get offline feature vector as dataframe and save the dataset to parquet
import datetime
start_time = datetime.datetime.now()-datetime.timedelta(59)
end_time = datetime.datetime.now()-datetime.timedelta(0)
train_dataset = fstore.get_offline_features(fv_name,start_time=start_time,end_time=end_time, entity_timestamp_column = 'Datetime')
#train_dataset = fstore.get_offline_features(fv_name,with_indexes=True, entity_timestamp_column = 'Datetime')
df = train_dataset.to_dataframe()
df

Unnamed: 0,Open,High,Low,Close,Volume,ticker2onehot_A,ticker2onehot_AAL,ticker2onehot_AAP,ticker2onehot_AAPL,ticker2onehot_ABBV,ticker2onehot_ABC,ticker2onehot_ABMD,ticker2onehot_ABT,ticker2onehot_ACN,ticker2onehot_ADBE,sentiment
0,127.410004,127.790001,127.150002,127.470001,44123,1,0,0,0,0,0,0,0,0,0,
1,155.389999,155.889999,155.236404,155.236404,3258876,0,0,0,1,0,0,0,0,0,0,
2,14.105000,14.280000,14.080000,14.080000,2437829,0,1,0,0,0,0,0,0,0,0,
3,148.470001,149.289993,148.470001,148.744995,137841,0,0,0,0,1,0,0,0,0,0,
4,143.470001,143.889999,142.850006,142.850006,24367,0,0,0,0,0,1,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30315,104.000000,104.000000,104.000000,104.000000,0,0,0,0,0,0,0,0,1,0,0,0.5
30316,129.889999,129.889999,129.889999,129.889999,0,1,0,0,0,0,0,0,0,0,0,1.0
30317,150.699997,150.699997,150.699997,150.699997,0,0,0,0,1,0,0,0,0,0,0,0.0
30318,166.800003,166.800003,166.800003,166.800003,0,0,0,1,0,0,0,0,0,0,0,1.0


## Train locally using mlrun pytorch framework 

In [5]:
fn = mlrun.code_to_function('train_stocks_model', kind='job',image='mlrun/ml-models',handler='handler', filename='src/train_stocks.py').apply(mlrun.auto_mount())

In [None]:
import os

params = {'hidden_dim':2,
          'n_layers':1,
          'epochs':3, 
          'vector_name':'stocks',
          'seq_size':5,
          'start_time':59,
          'end_time':0,
          'batch_size':1,
          'model_filepath':os.path.join(os.getcwd(),'src')}

fn.run(local=True,watch=True, params = params)

> 2022-09-19 08:37:50,702 [info] starting run train-stocks-model-handler uid=2d35074cddd349a1858f561f695f8f2d DB=http://mlrun-api:8080
Epoch 1/3:
Training: 100% |██████████| 30260/30260 [00:59<00:00, 512.67Batch/s, MSELoss=0.011, accuracy=0.895]   
Validating: 100% |██████████| 30260/30260 [00:18<00:00, 1634.15Batch/s, MSELoss=0.000154, accuracy=0.988]

Summary:
+----------+----------------------+
| Metrics  |        Values        |
+----------+----------------------+
| MSELoss  | 0.004261314868927002 |
| accuracy |  0.9059755802154541  |
+----------+----------------------+

Epoch 2/3:
Training: 100% |██████████| 30260/30260 [00:59<00:00, 509.27Batch/s, MSELoss=0.000869, accuracy=0.971]
Validating: 100% |██████████| 30260/30260 [00:18<00:00, 1647.96Batch/s, MSELoss=0.00252, accuracy=0.95]  

Summary:
+----------+-----------------------+
| Metrics  |        Values         |
+----------+-----------------------+
| MSELoss  | 0.0005988021730445325 |
| accuracy |  0.9841306209564209   |
+--