In [1]:
import pandas as pd
import torch as th
import os
import re
import yfinance as yf
import numpy as np
import warnings
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
warnings.filterwarnings('ignore')
            
import sys; sys.path.insert(0, '..')
from model.models import run_ensemble_strategy, TrainerConfig, Trainer
import data.preprocessing as pp
from utils.utils_analyze import get_price, check_index_dim
from utils.indicators import indicator_list, indicators_stock_stats
from env.BaseEnv import EnvConfig
from env.EnvStock_val import StockEnvValidation
from env.EnvStock_train import StockEnvTrain
from env.EnvStock_trade import StockEnvTrade
from policy.Policies import MlActorCriticPolicy
from config.config import indexes, ticker_list, category_dict, ticker_list_with_dict, tech_tickers
%matplotlib inline
%load_ext autoreload
%autoreload 2

### Do not Run these two cell if you do not want to preprocess data from the begining

In [16]:
START_DATE = '2011-01-01'
SEED = 42069
NORNAMLIZE = True

###  Run these two cell if you do not want to preprocess data from the begining

In [32]:
SAC_PARAMS = {'gamma':0.995,
 'learning_rate':0.016241092434986143,
 'batch_size':64,
 'buffer_size':1000000,
 'learning_starts':10000,
 'gradient_steps':8,
 'ent_coef':'auto',
 'tau':0.005,
 'target_entropy':'auto',
 'policy_kwargs':{'log_std_init': -3.3486909298160947,'net_arch': [256, 256] },
    'use_sde': False,
 'seed':42069,
             "device":"cuda"}


PPO_PARAMS =  {'n_steps': 512, 'batch_size': 64, 'gamma': 0.999, 'learning_rate': 0.03691157097485833, 
                                'ent_coef': 0.08932887565089782, 
                                'clip_range': 0.1, 'n_epochs': 5, 
                                'gae_lambda': 0.9, 'max_grad_norm': 0.8, 'vf_coef': 0.891060944163853, 
                                'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 
              'activation_fn': th.nn.ReLU, 'ortho_init': False}, 'seed': 42069, "device":"cuda"}
DATASET_VERSION = 'pretrainedTrial6'
model = 'PPO'
MODEL_NAME = f'jim_{model}-v31'
POPULATION = 10
TIME_INTERVAL = 5
START_TRADE = '2011-01-01'
policy_kwargs = dict(activation_fn=th.nn.ReLU,
                     net_arch=[dict(pi=[256, 128, 64], vf=[256, 128])])
TIME_FRAME = 0

In [34]:
winner_params = {'n_steps':8,'batch_size':8,'gamma':0.95,'learning_rate':0.027651081986547765,
 'ent_coef':0.009004220378275581,'clip_range':0.3,'n_epochs':1,'gae_lambda':0.95,'max_grad_norm':0.9,'vf_coef':0.9311987337638572,
 'policy_kwargs':{'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': th.nn.Tanh, 'ortho_init': False},'seed':42069,'device':'cuda'}

In [35]:
params = {'SAC':SAC_PARAMS, 'PPO':PPO_PARAMS}

In [36]:
tconfig = TrainerConfig(start_date=START_TRADE,
                        rebalance_window = 242,
                        validation_window = 242,
                        **{'hparams':winner_params},
                        timesteps=50000,
                       policy_kwargs = policy_kwargs,
                       index_list = indexes)

In [37]:
trainer = Trainer(model, 'MlpPolicy', 
                  StockEnvTrain, StockEnvValidation, StockEnvTrade, 
                  dataset_version=DATASET_VERSION, population=1,
                     config = tconfig, model_name= MODEL_NAME, debug=False)

Total number of stocks:   469
GPU available


In [38]:
trainer.train(dataset='datasets/downew30.csv', timesteps=50500, 
              load=False, model_to_load='model_to_load', normalize =NORNAMLIZE)

[34m[1mwandb[0m: wandb version 0.12.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

Training time  jim_PPO-v31 :  5.161948092778524  minutes
-----------------
Total Reward:  -1.2527346042916179
Total Trades:  3940
End total asset for validation 1114740.6625499846
Mean Reward: 2.948597278111265
STD reward: 1.7208016180661836
-----------------
Total reward at validation for Reccurent PPO -1.2527346042916179
Sharpe Ratio:  0
Best params,  {'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'seed': 42069, 'device': 'cuda'}
180
previous_total_asset:1000000
end_total_asset:1112969.7936781154
total_asset_change:112969.79367811535
Total cash is: 1.6565347703257167$ and total holdings in stocks are 1112968.137143345$
Buy & Hold strategy with previous tota

[34m[1mwandb[0m: wandb version 0.12.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.5047424e+18
end_total_asset,1112969.79368
trade_reward,0.1765


Training time  jim_PPO-v31 :  5.5763593475023905  minutes
-----------------
Total Reward:  0.08103596698492765
Total Trades:  3996
End total asset for validation 1177260.2103577538
Mean Reward: -8.263389944983647
STD reward: 3.2968650298190685
-----------------
Total reward at validation for Reccurent PPO 0.08103596698492765
Sharpe Ratio:  0
Best params,  {'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'seed': 42069, 'device': 'cuda'}
603
180
Saving to  results/account_value_trade_main_jim_PPO-v31.csv
previous_total_asset:1112969.7936781154
end_total_asset:1299789.7413149024
total_asset_change:186819.94763678708
Total cash is: 24.23409209873111$ and total hold

[34m[1mwandb[0m: wandb version 0.12.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.527552e+18
end_total_asset,1299789.74131
trade_reward,0.07098


Training time  jim_PPO-v31 :  5.553692507743835  minutes
-----------------
Total Reward:  -22.256062127649784
Total Trades:  3750
End total asset for validation 1104740.7340809056
Mean Reward: -4.093841185292694
STD reward: 0.2622352647454694
-----------------
Total reward at validation for Reccurent PPO -22.256062127649784
Sharpe Ratio:  0
Best params,  {'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'seed': 42069, 'device': 'cuda'}
603
180
Saving to  results/account_value_trade_main_jim_PPO-v31.csv
previous_total_asset:1299789.7413149024
end_total_asset:1521288.5137489557
total_asset_change:221498.77243405324
Total cash is: 28.660310263306414$ and total hold

[34m[1mwandb[0m: wandb version 0.12.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.5501888e+18
end_total_asset,1521288.51375
trade_reward,0.27084


Training time  jim_PPO-v31 :  6.459399664402008  minutes
-----------------
Total Reward:  -24.69371535629034
Total Trades:  3454
End total asset for validation 984964.6828098473
Mean Reward: -3.5115389151498677
STD reward: 0.4220369763460703
-----------------
Total reward at validation for Reccurent PPO -24.69371535629034
Sharpe Ratio:  0
Best params,  {'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'seed': 42069, 'device': 'cuda'}
603
180
Saving to  results/account_value_trade_main_jim_PPO-v31.csv
previous_total_asset:1521288.5137489557
end_total_asset:1683155.4660900133
total_asset_change:161866.9523410576
Total cash is: 20.534970446396983$ and total holding

[34m[1mwandb[0m: wandb version 0.12.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.5728256e+18
end_total_asset,1683155.46609
trade_reward,0.10424


Training time  jim_PPO-v31 :  6.446135632197062  minutes
-----------------
Total Reward:  20.322920036502182
Total Trades:  2695
End total asset for validation 1073568.7059230462
Mean Reward: 7.08889589010505
STD reward: 0.620421121933786
-----------------
Total reward at validation for Reccurent PPO 20.322920036502182
Sharpe Ratio:  0
Best params,  {'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'seed': 42069, 'device': 'cuda'}
603
180
Saving to  results/account_value_trade_main_jim_PPO-v31.csv
previous_total_asset:1683155.4660900133
end_total_asset:1792121.7235864156
total_asset_change:108966.25749640237
Total cash is: 10.692164754344049$ and total holdings 

[34m[1mwandb[0m: wandb version 0.12.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.5955488e+18
end_total_asset,1792121.72359
trade_reward,0.10861


Training time  jim_PPO-v31 :  6.667622709274292  minutes
-----------------
Total Reward:  -13.837684423429891
Total Trades:  1889
End total asset for validation 946679.2430480494
Mean Reward: -2.4040049287257714
STD reward: 0.3475872560284754
-----------------
Total reward at validation for Reccurent PPO -13.837684423429891
Sharpe Ratio:  0
Best params,  {'n_steps': 8, 'batch_size': 8, 'gamma': 0.95, 'learning_rate': 0.027651081986547765, 'ent_coef': 0.009004220378275581, 'clip_range': 0.3, 'n_epochs': 1, 'gae_lambda': 0.95, 'max_grad_norm': 0.9, 'vf_coef': 0.9311987337638572, 'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': <class 'torch.nn.modules.activation.Tanh'>, 'ortho_init': False}, 'seed': 42069, 'device': 'cuda'}
603
180
Saving to  results/account_value_trade_main_jim_PPO-v31.csv
previous_total_asset:1792121.7235864156
end_total_asset:2388473.4223075616
total_asset_change:596351.698721146
Total cash is: 10.239728520964597$ and total holdin

[34m[1mwandb[0m: wandb version 0.12.9 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.6183584e+18
end_total_asset,2388473.42231
trade_reward,0.32416


Ensemble Strategy took:  37.92124247948329  minutes


In [20]:

pretrain_set = pd.read_csv('datasets/pretrain_set3.csv')
pretrain_set['turbulence'] = 0

In [None]:
pretrain = trainer.pretrain(sn, '2011-05-01', START_TRADE, NORNAMLIZE)

In [67]:
model_to_load = pretrain.split('/')[1]

In [None]:
trainer.cluster(False, period=365, number_of_clusters=5, stocks_per_cluster=5)

In [60]:
m = pd.read_csv('./datasets/.csv')