In [1]:
import pandas as pd
import torch as th
from torch import nn
import os
import re
import yfinance as yf
import numpy as np
import warnings
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
warnings.filterwarnings('ignore')
            
import sys; sys.path.insert(0, '..')
from model.models import run_ensemble_strategy, TrainerConfig, Trainer
import data.preprocessing as pp
from utils.utils_analyze import get_price, check_index_dim
from utils.indicators import indicator_list, indicators_stock_stats
from env.BaseEnv import EnvConfig
from env.EnvStock_val import StockEnvValidation
from env.EnvStock_train import StockEnvTrain
from env.EnvStock_trade import StockEnvTrade
from policy.Policies import MlActorCriticPolicy
from config.config import indexes, ticker_list, category_dict, ticker_list_with_dict, tech_tickers
%matplotlib inline
%load_ext autoreload
%autoreload 2

### Do not Run these two cell if you do not want to preprocess data from the begining

In [2]:
START_DATE = '2011-01-01'
SEED = 42069
NORNAMLIZE = True

###  Run these two cell if you do not want to preprocess data from the begining

In [3]:
SAC_PARAMS = {'gamma':0.995,
 'learning_rate':0.016241092434986143,
 'batch_size':64,
 'buffer_size':1000000,
 'learning_starts':10000,
 'gradient_steps':8,
 'ent_coef':'auto',
 'tau':0.005,
 'target_entropy':'auto',
 'policy_kwargs':{'log_std_init': -3.3486909298160947,'net_arch': [256, 256] },
    'use_sde': False,
 'seed':42069,
             "device":"cuda"}


PPO_PARAMS =  {'n_steps': 512, 'batch_size': 64, 'gamma': 0.999, 'learning_rate': 0.03691157097485833, 
                                'ent_coef': 0.08932887565089782, 
                                'clip_range': 0.1, 'n_epochs': 5, 
                                'gae_lambda': 0.9, 'max_grad_norm': 0.8, 'vf_coef': 0.891060944163853, 
                                'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 
              'activation_fn': th.nn.ReLU, 'ortho_init': False}, 'seed': 42069, "device":"cuda"}

REC_PARAMS = {
    "net_arch": [dict(pi=[256, 64, 64], vf=[256, 128])],
        "batch_size": 32,
        "activation_fn": nn.modules.activation.Hardswish,
        "learning_rate":  0.0369,
        "target_step": 100000,
        "lstm_hidden_size":256,
        "n_lstm_layers":2,
        "episode": 2,
        "seed":31,
        "shared_lstm":True
}
DATASET_VERSION = 'pretrainedTrial6'
model = 'reccurent_ppo'
MODEL_NAME = f'jimmy_{model}-v1'
POPULATION = 10
TIME_INTERVAL = 5
START_TRADE = '2011-01-01'
policy_kwargs = dict(activation_fn=th.nn.ReLU,
                     net_arch=[dict(pi=[256, 128, 64], vf=[256, 128])])
TIME_FRAME = 0

In [29]:
winner_params = {'n_steps':8,'batch_size':8,'gamma':0.95,'learning_rate':0.027651081986547765,
 'ent_coef':0.009004220378275581,'clip_range':0.3,'n_epochs':1,'gae_lambda':0.95,'max_grad_norm':0.9,
                 'vf_coef':0.9311987337638572,
 'policy_kwargs':{'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': th.nn.Tanh, 'ortho_init': False},
                 'seed':42069}

In [30]:
params = {'SAC':SAC_PARAMS, 'PPO':PPO_PARAMS, "REC_PPO":REC_PARAMS}

In [40]:
tconfig = TrainerConfig(start_date=START_TRADE,
                        rebalance_window = 126,
                        validation_window = 126,
                        **{'hparams':winner_params},
                        timesteps=50000,
                       policy_kwargs = params['REC_PPO'],
                       index_list = indexes)

In [41]:
trainer = Trainer(model, 'MlpLstmPolicy', 
                  StockEnvTrain, StockEnvValidation, StockEnvTrade, 
                  dataset_version=DATASET_VERSION, population=10,
                     config = tconfig, model_name= MODEL_NAME, debug=False)

{'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'seed': 42069}
Total number of stocks:   469
GPU available


In [None]:
trainer.train(dataset='datasets/dow50.csv', timesteps=45550, 
              load=False, model_to_load='model_to_load', normalize =NORNAMLIZE)

{'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'seed': 42069}
Training time  jimmy_reccurent_ppo-v1 :  10.91240090529124  minutes
-----------------
Total Reward:  -4.199516787193716
Total Trades:  3343
End total asset for validation 1021915.8873350304
Mean Reward: 0.5318949534761487
STD reward: 0.20680523013041852
-----------------
Reward for the period is -4.199516787193716
Agent #0 has better performance for the training period with total asset: 1021915.8873350304
{'batch_size': 128, 'ent_coef': 0.0722302359713579, 'gae_lambda': 0.98, 'max_grad_norm': 0.7, 'vf_coef': 0.2044074893925364, 'gamma': 0.99, 'learning_rate': 0.02013, 'clip_range': 0.1, 'policy_kwa

2022-10-05 17:55:15.422 ERROR   wandb.jupyter: Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Total reward for the the window is 5.176260463951621


wandb: Currently logged in as: kaanb (use `wandb login --relogin` to force relogin)
wandb: wandb version 0.13.3 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.4834016e+18
end_total_asset,1031074.43994
trade_reward,-0.14298


{'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'seed': 42069}
Training time  jimmy_reccurent_ppo-v1 :  10.938544849554697  minutes
-----------------
Total Reward:  5.326679207442794
Total Trades:  3864
End total asset for validation 1070500.7923188857
Mean Reward: 3.7390797765692696
STD reward: 0.391989511213904
-----------------
Reward for the period is 5.326679207442794
Agent #0 has better performance for the training period with total asset: 1070500.7923188857
{'batch_size': 32, 'ent_coef': 0.0165006032313881, 'gae_lambda': 1.0, 'max_grad_norm': 0.6, 'vf_coef': 0.8714603844285117, 'gamma': 0.99, 'learning_rate': 0.04536, 'clip_range': 0.2, 'policy_kwargs':

Saving to  results/account_value_trade_main_jimmy_reccurent_ppo-v1.csv
previous_total_asset:1031074.4399413111
end_total_asset:1140855.8147733626
total_asset_change:109781.37483205146
Total cash is: 0.07141730242866251$ and total holdings in stocks are 1140855.7433560602$
Buy & Hold strategy with previous total asset:  1156671.0253192375
Total Cost:  3944.9882072992277
Sum of rewards  102040.25338896608
Total trades:  746
Total buy orders are 397 and total sell orders are 349
Total days in turbulance:  0
Sharpe:  0.24287091946467665
Total reward for the the window is 20.87423899251735


wandb: wandb version 0.13.3 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.4992128e+18
end_total_asset,1140855.81477
trade_reward,0.3019


{'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'seed': 42069}
Training time  jimmy_reccurent_ppo-v1 :  11.081592718760172  minutes
-----------------
Total Reward:  12.757907731691375
Total Trades:  4069
End total asset for validation 1124561.241893951
Mean Reward: 6.04609273589449
STD reward: 0.31962057606989597
-----------------
Reward for the period is 12.757907731691375
Agent #0 has better performance for the training period with total asset: 1124561.241893951
{'batch_size': 32, 'ent_coef': 0.0165006032313881, 'gae_lambda': 1.0, 'max_grad_norm': 0.6, 'vf_coef': 0.8714603844285117, 'gamma': 0.99, 'learning_rate': 0.04536, 'clip_range': 0.2, 'policy_kwargs':

Saving to  results/account_value_trade_main_jimmy_reccurent_ppo-v1.csv
previous_total_asset:1140855.8147733626
end_total_asset:1320673.783592855
total_asset_change:179817.9688194925
Total cash is: 0.06833382737538951$ and total holdings in stocks are 1320673.7152590277$
Buy & Hold strategy with previous total asset:  1289590.2623749394
Total Cost:  3699.7725287903622
Sum of rewards  179560.93908675946
Total trades:  664
Total buy orders are 411 and total sell orders are 253
Total days in turbulance:  0
Sharpe:  0.412301730851117
Total reward for the the window is 23.226352156314533


wandb: wandb version 0.13.3 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.5149376e+18
end_total_asset,1320673.78359
trade_reward,0.21985


{'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'seed': 42069}
Training time  jimmy_reccurent_ppo-v1 :  11.276950486501057  minutes
-----------------
Total Reward:  10.973318701784592
Total Trades:  3301
End total asset for validation 1101788.8713999193
Mean Reward: 8.349754737713374
STD reward: 0.07133587739383405
-----------------
Reward for the period is 10.973318701784592
Agent #0 has better performance for the training period with total asset: 1101788.8713999193
{'batch_size': 32, 'ent_coef': 0.0165006032313881, 'gae_lambda': 1.0, 'max_grad_norm': 0.6, 'vf_coef': 0.8714603844285117, 'gamma': 0.99, 'learning_rate': 0.04536, 'clip_range': 0.2, 'policy_kwarg

Model destabilized with params:   Creating new params
Expected parameter loc (Tensor of shape (182, 50)) of distribution Normal(loc: torch.Size([182, 50]), scale: torch.Size([182, 50])) to satisfy the constraint Real(), but found invalid values:
tensor([[nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        ...,
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan]], device='cuda:0',
       grad_fn=<AddmmBackward0>)
{'batch_size': 256, 'ent_coef': 0.056434853773618204, 'gae_lambda': 0.92, 'max_grad_norm': 0.6, 'vf_coef': 0.17052744642968665, 'gamma': 0.995, 'learning_rate': 0.01203, 'clip_range': 0.2, 'policy_kwargs': {'net_arch': [{'pi': [512, 128, 64], 'vf': [256, 128]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'lstm_hidden_size': 64, 'n_lstm_layers': 2, 'shared_lstm': True}, 'seed': 66}
Model destab

wandb: wandb version 0.13.3 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.5307488e+18
end_total_asset,1368366.89836
trade_reward,-0.28976


{'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'seed': 42069}
Training time  jimmy_reccurent_ppo-v1 :  11.292533377806345  minutes
-----------------
Total Reward:  4.571677113533951
Total Trades:  4026
End total asset for validation 961652.4474983832
Mean Reward: -1.2147369057754986
STD reward: 0.036726062113217724
-----------------
Reward for the period is 4.571677113533951
Agent #0 has better performance for the training period with total asset: 961652.4474983832
{'batch_size': 32, 'ent_coef': 0.0165006032313881, 'gae_lambda': 1.0, 'max_grad_norm': 0.6, 'vf_coef': 0.8714603844285117, 'gamma': 0.99, 'learning_rate': 0.04536, 'clip_range': 0.2, 'policy_kwargs

Model destabilized with params:   Creating new params
Expected parameter loc (Tensor of shape (42, 50)) of distribution Normal(loc: torch.Size([42, 50]), scale: torch.Size([42, 50])) to satisfy the constraint Real(), but found invalid values:
tensor([[nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        ...,
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan]], device='cuda:0',
       grad_fn=<AddmmBackward0>)
{'batch_size': 32, 'ent_coef': 0.08949030960724885, 'gae_lambda': 0.99, 'max_grad_norm': 0.8, 'vf_coef': 0.5550723938974348, 'gamma': 0.999, 'learning_rate': 0.01809, 'clip_range': 0.4, 'policy_kwargs': {'net_arch': [{'pi': [256, 64], 'vf': [256, 128]}], 'activation_fn': <class 'torch.nn.modules.activation.ReLU'>, 'lstm_hidden_size': 128, 'n_lstm_layers': 2, 'shared_lstm': False}, 'seed': 66}
Model destabilized wi

wandb: wandb version 0.13.3 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.54656e+18
end_total_asset,1268078.87878
trade_reward,-0.37301


{'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'seed': 42069}
Training time  jimmy_reccurent_ppo-v1 :  11.441183376312257  minutes
-----------------
Total Reward:  3.3066852181218565
Total Trades:  3604
End total asset for validation 946022.0448098617
Mean Reward: -3.436165742389858
STD reward: 0.22017717882002288
-----------------
Reward for the period is 3.3066852181218565
Agent #0 has better performance for the training period with total asset: 946022.0448098617
{'batch_size': 32, 'ent_coef': 0.0165006032313881, 'gae_lambda': 1.0, 'max_grad_norm': 0.6, 'vf_coef': 0.8714603844285117, 'gamma': 0.99, 'learning_rate': 0.04536, 'clip_range': 0.2, 'policy_kwargs

Model destabilized with params:   Creating new params
Expected parameter loc (Tensor of shape (148, 50)) of distribution Normal(loc: torch.Size([148, 50]), scale: torch.Size([148, 50])) to satisfy the constraint Real(), but found invalid values:
tensor([[nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        ...,
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan],
        [nan, nan, nan,  ..., nan, nan, nan]], device='cuda:0',
       grad_fn=<AddmmBackward0>)
Sharpe Ratio:  0
Best params,  {'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'se

wandb: wandb version 0.13.3 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.562544e+18
end_total_asset,1625364.42668
trade_reward,-0.15806


{'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'seed': 42069}
Training time  jimmy_reccurent_ppo-v1 :  11.477150515715282  minutes
-----------------
Total Reward:  10.617439522058703
Total Trades:  2785
End total asset for validation 1086402.6381729594
Mean Reward: 2.9173723931569837
STD reward: 0.21750318506485158
-----------------
Reward for the period is 10.617439522058703
Agent #0 has better performance for the training period with total asset: 1086402.6381729594
{'batch_size': 32, 'ent_coef': 0.0165006032313881, 'gae_lambda': 1.0, 'max_grad_norm': 0.6, 'vf_coef': 0.8714603844285117, 'gamma': 0.99, 'learning_rate': 0.04536, 'clip_range': 0.2, 'policy_kwar

Training time  jimmy_reccurent_ppo-v1 :  9.674183817704519  minutes
-----------------
Total Reward:  11.498931767244358
Total Trades:  5187
End total asset for validation 1101564.5553527582
Mean Reward: 6.491802032117266
STD reward: 0.18869927477539417
-----------------
Reward for the period is 11.498931767244358
{'batch_size': 64, 'ent_coef': 0.08620927113950247, 'gae_lambda': 0.99, 'max_grad_norm': 0.9, 'vf_coef': 0.21291474043605962, 'gamma': 0.999, 'learning_rate': 0.00179, 'clip_range': 0.1, 'policy_kwargs': {'net_arch': [{'pi': [256, 64], 'vf': [256, 128]}], 'activation_fn': <class 'torch.nn.modules.activation.ReLU'>, 'lstm_hidden_size': 256, 'n_lstm_layers': 2, 'shared_lstm': False}, 'seed': 66}
Training time  jimmy_reccurent_ppo-v1 :  9.647233295440675  minutes
-----------------
Total Reward:  12.295317726093344
Total Trades:  5077
End total asset for validation 1125908.65033071
Mean Reward: 6.13210961947334
STD reward: 0.3086292793146059
-----------------
Reward for the period

wandb: wandb version 0.13.4 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.5782688e+18
end_total_asset,1721168.37811
trade_reward,-0.36393


{'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'seed': 42069}
Training time  jimmy_reccurent_ppo-v1 :  12.153349991639455  minutes
-----------------
Total Reward:  3.5776643837307347
Total Trades:  3084
End total asset for validation 1144249.817723965
Mean Reward: 1.8022172576876982
STD reward: 0.23508243341873425
-----------------
Reward for the period is 3.5776643837307347
Agent #0 has better performance for the training period with total asset: 1144249.817723965
{'batch_size': 32, 'ent_coef': 0.0165006032313881, 'gae_lambda': 1.0, 'max_grad_norm': 0.6, 'vf_coef': 0.8714603844285117, 'gamma': 0.99, 'learning_rate': 0.04536, 'clip_range': 0.2, 'policy_kwargs

Training time  jimmy_reccurent_ppo-v1 :  11.669483816623687  minutes
-----------------
Total Reward:  -0.9336072726873681
Total Trades:  4977
End total asset for validation 1032514.4737576311
Mean Reward: 3.1009679287672043
STD reward: 0.4054271369017695
-----------------
Reward for the period is -0.9336072726873681
{'batch_size': 32, 'ent_coef': 0.05407606788494298, 'gae_lambda': 0.9, 'max_grad_norm': 0.8, 'vf_coef': 0.3841585396863446, 'gamma': 0.999, 'learning_rate': 0.00591, 'clip_range': 0.3, 'policy_kwargs': {'net_arch': [{'pi': [256, 64], 'vf': [256, 128]}], 'activation_fn': <class 'torch.nn.modules.activation.Hardswish'>, 'lstm_hidden_size': 256, 'n_lstm_layers': 1, 'shared_lstm': False}, 'seed': 66}
Training time  jimmy_reccurent_ppo-v1 :  11.671817111968995  minutes
-----------------
Total Reward:  -0.9336072726873681
Total Trades:  4977
End total asset for validation 1032514.4737576311
Mean Reward: 1.5427406648057513
STD reward: 0.05425506784112577
-----------------
Reward f

wandb: wandb version 0.13.4 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.59408e+18
end_total_asset,1410461.24991
trade_reward,0.14003


{'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'seed': 42069}
Training time  jimmy_reccurent_ppo-v1 :  12.286200523376465  minutes
-----------------
Total Reward:  -3.9819639660418034
Total Trades:  3742
End total asset for validation 958234.1720387505
Mean Reward: -0.254363774927333
STD reward: 0.13284832050937745
-----------------
Reward for the period is -3.9819639660418034
Agent #0 has better performance for the training period with total asset: 958234.1720387505
{'batch_size': 32, 'ent_coef': 0.0165006032313881, 'gae_lambda': 1.0, 'max_grad_norm': 0.6, 'vf_coef': 0.8714603844285117, 'gamma': 0.99, 'learning_rate': 0.04536, 'clip_range': 0.2, 'policy_kwar

Training time  jimmy_reccurent_ppo-v1 :  11.883566570281982  minutes
-----------------
Total Reward:  -3.0405449941463303
Total Trades:  4964
End total asset for validation 871811.6463035373
Mean Reward: -1.119330525281839
STD reward: 0.04497723332463514
-----------------
Reward for the period is -3.0405449941463303
{'batch_size': 64, 'ent_coef': 0.09734441635468326, 'gae_lambda': 0.99, 'max_grad_norm': 0.3, 'vf_coef': 0.037058848982533865, 'gamma': 0.995, 'learning_rate': 0.04612, 'clip_range': 0.4, 'policy_kwargs': {'net_arch': [{'pi': [256, 64], 'vf': [256, 128]}], 'activation_fn': <class 'torch.nn.modules.activation.Hardswish'>, 'lstm_hidden_size': 64, 'n_lstm_layers': 1, 'shared_lstm': False}, 'seed': 66}
Model destabilized with params:   Creating new params
Expected parameter loc (Tensor of shape (64, 50)) of distribution Normal(loc: torch.Size([64, 50]), scale: torch.Size([64, 50])) to satisfy the constraint Real(), but found invalid values:
tensor([[nan, nan, nan,  ..., nan, na

wandb: wandb version 0.13.4 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.6098048e+18
end_total_asset,1577621.52436
trade_reward,-0.28136


{'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'seed': 42069}
Training time  jimmy_reccurent_ppo-v1 :  12.377450466156006  minutes
-----------------
Total Reward:  5.722559251473285
Total Trades:  3271
End total asset for validation 1148708.8555316483
Mean Reward: 2.396920195611892
STD reward: 0.32857539855802065
-----------------
Reward for the period is 5.722559251473285
Agent #0 has better performance for the training period with total asset: 1148708.8555316483
{'batch_size': 32, 'ent_coef': 0.0165006032313881, 'gae_lambda': 1.0, 'max_grad_norm': 0.6, 'vf_coef': 0.8714603844285117, 'gamma': 0.99, 'learning_rate': 0.04536, 'clip_range': 0.2, 'policy_kwargs'

-----------------
Total Reward:  12.35814853780903
Total Trades:  3159
End total asset for validation 1156534.605535051
Mean Reward: 3.3837635717354715
STD reward: 0.22685002741384594
-----------------
Reward for the period is 12.35814853780903
{'batch_size': 128, 'ent_coef': 0.09012089395864403, 'gae_lambda': 0.98, 'max_grad_norm': 0.5, 'vf_coef': 0.12061656519500952, 'gamma': 0.995, 'learning_rate': 0.0118, 'clip_range': 0.2, 'policy_kwargs': {'net_arch': [{'pi': [256, 128, 64], 'vf': [64]}], 'activation_fn': <class 'torch.nn.modules.activation.ReLU'>, 'lstm_hidden_size': 128, 'n_lstm_layers': 2, 'shared_lstm': False}, 'seed': 66}
Training time  jimmy_reccurent_ppo-v1 :  10.135783243179322  minutes
-----------------
Total Reward:  8.230115237645805
Total Trades:  4994
End total asset for validation 1103057.786502553
Mean Reward: 3.6669952860102057
STD reward: 0.3433416302374492
-----------------
Reward for the period is 8.230115237645805
{'batch_size': 128, 'ent_coef': 0.090120893958

In [20]:

pretrain_set = pd.read_csv('datasets/pretrain_set3.csv')
pretrain_set['turbulence'] = 0

In [None]:
pretrain = trainer.pretrain(sn, '2011-05-01', START_TRADE, NORNAMLIZE)

In [67]:
model_to_load = pretrain.split('/')[1]

In [None]:
trainer.cluster(False, period=365, number_of_clusters=5, stocks_per_cluster=5)

In [60]:
m = pd.read_csv('./datasets/.csv')

TypeError: list indices must be integers or slices, not tuple