In [1]:
import pandas as pd
import torch as th
import os
import re
import yfinance as yf
import numpy as np
import warnings
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
warnings.filterwarnings('ignore')
            
import sys; sys.path.insert(0, '..')
from model.models import run_ensemble_strategy, TrainerConfig, Trainer
import data.preprocessing as pp
from utils.utils_analyze import get_price, check_index_dim
from utils.indicators import indicator_list, indicators_stock_stats
from env.BaseEnv import EnvConfig
from env.EnvStock_val import StockEnvValidation
from env.EnvStock_train import StockEnvTrain
from env.EnvStock_trade import StockEnvTrade
from policy.Policies import MlActorCriticPolicy
from config.config import indexes, ticker_list, category_dict, ticker_list_with_dict, tech_tickers
%matplotlib inline
%load_ext autoreload
%autoreload 2

### Do not Run these two cell if you do not want to preprocess data from the begining

In [2]:
START_DATE = '2011-01-01'
SEED = 42069
NORNAMLIZE = True

###  Run these two cell if you do not want to preprocess data from the begining

In [175]:
SAC_PARAMS = {'gamma':0.995,
'learning_rate':0.016241092434986143,
'batch_size':64,
'buffer_size':1000000,
'learning_starts':10000,
'gradient_steps':8,
'ent_coef':'auto',
'tau':0.005,
'target_entropy':'auto',
'policy_kwargs':{'log_std_init': -3.3486909298160947,'net_arch': [256, 256] },
'use_sde': False,
'seed':42099,
         "device":"cuda"}


PPO_PARAMS =  {'n_steps': 512, 'batch_size': 64, 'gamma': 0.999, 'learning_rate': 0.03691157097485833, 
                                'ent_coef': 0.08932887565089782, 
                                'clip_range': 0.1, 'n_epochs': 5, 
                                'gae_lambda': 0.9, 'max_grad_norm': 0.8, 'vf_coef': 0.891060944163853, 
                                'policy_kwargs': {'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 
              'activation_fn': th.nn.ReLU, 'ortho_init': False}, 'seed': 42069, "device":"cuda"}
DATASET_VERSION = 'clusterTrain'
model = 'PPO'
MODEL_NAME = f'jim_clustered_{model}-v1'
POPULATION = 10
TIME_INTERVAL = 5
START_TRADE = '2011-01-01'
policy_kwargs = dict(activation_fn=th.nn.ReLU,
                     net_arch=[dict(pi=[256, 128, 64], vf=[256, 128])])
TIME_FRAME = 0

In [176]:
winner_params = {'n_steps':8,'batch_size':8,'gamma':0.95,'learning_rate':0.027651081986547765,
 'ent_coef':0.009004220378275581,'clip_range':0.3,'n_epochs':1,'gae_lambda':0.95,'max_grad_norm':0.9,'vf_coef':0.9311987337638572,
 'policy_kwargs':{'net_arch': [{'pi': [256, 256], 'vf': [256, 256]}], 'activation_fn': th.nn.Tanh, 'ortho_init': False},'seed':42069,'device':'cuda'}

In [177]:
params = {'SAC':SAC_PARAMS, 'PPO':PPO_PARAMS}

In [178]:
tconfig = TrainerConfig(start_date=START_TRADE,
                        rebalance_window = 242,
                        validation_window = 242,
                        **{'hparams':winner_params},
                        timesteps=50000,
                       policy_kwargs = policy_kwargs,
                       index_list = indexes)

In [179]:
trainer = Trainer(model, 'MlpPolicy', 
                  StockEnvTrain, StockEnvValidation, StockEnvTrade, 
                  dataset_version=DATASET_VERSION, population=10,
                     config = tconfig, model_name= MODEL_NAME, debug=False)

Total number of stocks:   469


In [181]:
trainer.cluster(normalize=True)

The dataset has been made before starting training :)
Stocks trading this period is  ['AAPL' 'BMY' 'BSX' 'CF' 'CFG' 'COTY' 'CRM' 'CVS' 'EBAY' 'GILD' 'KEY' 'KO'
 'MDT' 'MU' 'NKE' 'NVDA' 'STX' 'TGT' 'WMB' 'WY']
Training time  jim_clustered_PPO-v1 :  6.101178658008576  minutes
-----------------
Total Reward:  -2.0010880364570767
Total Trades:  3289
End total asset for validation 1094655.3175395427
Mean Reward: -4.224979552638979
STD reward: 0.7174313488959476
-----------------
Reward for the period is -2.0010880364570767
Agent #0 has better performance for the training period with total reward: 1094655.3175395427
Training time  jim_clustered_PPO-v1 :  10.566005237897237  minutes
-----------------
Total Reward:  -6.360578395891935
Total Trades:  4042
End total asset for validation 1001394.9152931207
Mean Reward: -2.1259300023375545
STD reward: 0.12687430440522768
-----------------
Reward for the period is -6.360578395891935
Training time  jim_clustered_PPO-v1 :  7.224518342812856  minutes


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.512432e+18
end_total_asset,1070815.92599
trade_reward,0.0


Stocks trading this period is  ['ABBV' 'ATVI' 'BABA' 'EW' 'GE' 'GILD' 'GM' 'GPS' 'INTC' 'IPG' 'KR' 'MGM'
 'MOS' 'MRK' 'NVDA' 'NWL' 'PYPL' 'RRC' 'SLB' 'TWX']
Training time  jim_clustered_PPO-v1 :  5.346305000782013  minutes
-----------------
Total Reward:  -24.70571554452181
Total Trades:  3768
End total asset for validation 1155680.146302363
Mean Reward: -9.118631633673795
STD reward: 1.2724390817808864
-----------------
Reward for the period is -24.70571554452181
Agent #0 has better performance for the training period with total reward: 1155680.146302363
Training time  jim_clustered_PPO-v1 :  4.665276336669922  minutes
-----------------
Total Reward:  -24.06323904544115
Total Trades:  4396
End total asset for validation 1010828.4347841125
Mean Reward: -6.3545404399745165
STD reward: 0.7763473957041129
-----------------
Reward for the period is -24.06323904544115
Training time  jim_clustered_PPO-v1 :  4.947942864894867  minutes
-----------------
Total Reward:  -24.639866828918457
Total

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.5427584e+18
end_total_asset,930664.3298
trade_reward,0.0


Stocks trading this period is  ['AAL' 'AMAT' 'AMD' 'BSX' 'CMCSA' 'CSX' 'CTRA' 'FCX' 'GIS' 'HBAN' 'INTC'
 'KEY' 'MRK' 'NEM' 'PCG' 'PFE' 'RF' 'TJX' 'UAA' 'VZ']
Training time  jim_clustered_PPO-v1 :  5.39120211203893  minutes
-----------------
Total Reward:  -27.053942263126373
Total Trades:  3536
End total asset for validation 937803.1027707446
Mean Reward: -6.259399187704548
STD reward: 0.5624339599531597
-----------------
Reward for the period is -27.053942263126373
Agent #0 has better performance for the training period with total reward: 937803.1027707446
Training time  jim_clustered_PPO-v1 :  4.718649808565775  minutes
-----------------
Total Reward:  -28.46925513446331
Total Trades:  2093
End total asset for validation 645385.0143746776
Mean Reward: -7.1044027913128955
STD reward: 0.10351983003486533
-----------------
Reward for the period is -28.46925513446331
Training time  jim_clustered_PPO-v1 :  5.236831486225128  minutes
-----------------
Total Reward:  -26.676388770341873
Tot

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.5731712e+18
end_total_asset,995008.25411
trade_reward,0.0


Stocks trading this period is  ['BMY' 'CSCO' 'CTRA' 'CVS' 'EQT' 'FTI' 'GILD' 'HBI' 'KMI' 'KO' 'KR' 'MDLZ'
 'MDT' 'NWL' 'OXY' 'PG' 'RRC' 'TGT' 'WMT' 'WU']
Training time  jim_clustered_PPO-v1 :  5.45647702217102  minutes
-----------------
Total Reward:  -21.19313907623291
Total Trades:  2769
End total asset for validation 955791.1455459977
Mean Reward: -7.6142420265082364
STD reward: 0.269701867901767
-----------------
Reward for the period is -21.19313907623291
Agent #0 has better performance for the training period with total reward: 955791.1455459977
Training time  jim_clustered_PPO-v1 :  4.845827428499858  minutes
-----------------
Total Reward:  -21.8882935247384
Total Trades:  4542
End total asset for validation 928662.7311466867
Mean Reward: -7.327758980695217
STD reward: 0.06106538758572949
-----------------
Reward for the period is -21.8882935247384
Training time  jim_clustered_PPO-v1 :  4.7462431708971655  minutes
-----------------
Total Reward:  -22.292668115580454
Total Trade

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.6036704e+18
end_total_asset,1079177.542
trade_reward,0.0


Stocks trading this period is  ['AMD' 'EQT' 'FE' 'GILD' 'IVZ' 'JNJ' 'KHC' 'KR' 'LUV' 'MAC' 'MDLZ' 'NEM'
 'NVDA' 'OXY' 'QCOM' 'RF' 'SCHW' 'TSLA' 'V' 'WFC']
Training time  jim_clustered_PPO-v1 :  5.468295458952586  minutes
-----------------
Total Reward:  -31.076597690582275
Total Trades:  3273
End total asset for validation 936679.6548404315
Mean Reward: -6.698402003681986
STD reward: 0.19025128067607458
-----------------
Reward for the period is -31.076597690582275
Agent #0 has better performance for the training period with total reward: 936679.6548404315
Training time  jim_clustered_PPO-v1 :  5.19429524342219  minutes
-----------------
Total Reward:  -27.90204866975546
Total Trades:  4167
End total asset for validation 1569760.6145119462
Mean Reward: -7.389123258541804
STD reward: 0.14028955183538197
-----------------
Reward for the period is -27.90204866975546
Agent #1 has better performance for the training period with total reward: 1569760.6145119462
Training time  jim_clustered_P

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
date,▁
end_total_asset,▁
trade_reward,▁

0,1
date,1.6340832e+18
end_total_asset,1257481.77171
trade_reward,0.0


Ensemble Strategy took:  266.65156384706495  minutes


In [54]:
len(['AMD', 'CAT', 'CSCO', 'EW', 'GILD', 'GM', 'HIG', 'INTC', 'LLY', 'MDLZ', 'MNST',
 'MRK', 'MRO' ,'NEE', 'NEM' ,'NFLX', 'TJX', 'WMT' ,'WU'])

19

In [124]:
df = pd.read_csv('datasets/clusterTrain_processed.csv')

In [130]:
for col in df.columns:
    if any(df[col].isna()):
        print(col)

index


In [131]:
df.head()

Unnamed: 0.1,Unnamed: 0,level_0,Date,Open,High,Low,Close,adjcp,volume,ticker,...,dx_30,vr,atr,dma,volume_delta,category,turbulence,month,day,index
0,0,168,2012-01-03,23.77,24.5,23.73,24.07,19.94598,4221700.0,AIG,...,9.370646,195.323013,4.461909,0.206669,1955700.0,3.0,0.0,1,3,
1,1,585,2012-01-03,3.116891,3.14622,3.072897,3.094227,1.725874,8963773.0,AIV,...,27.52005,150.817767,1.359578,0.015653,2527109.0,7.0,0.0,1,3,
2,2,1002,2012-01-03,5.75,5.89,5.74,5.8,5.018795,246293200.0,BAC,...,9.254764,170.040108,0.884104,-0.380769,69852200.0,3.0,0.0,1,3,
3,3,1419,2012-01-03,27.229767,27.577404,27.229767,27.452471,23.091703,6193308.0,BAX,...,3.335021,93.284261,4.624437,-0.84131,3463841.0,4.0,0.0,1,3,
4,4,1836,2012-01-03,20.4,20.76,20.4,20.51,16.506168,5953100.0,BK,...,22.240598,148.917496,4.149062,-0.066494,2254200.0,3.0,0.0,1,3,
