In [1]:
%%time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import os
from src.models.frontier import MultiStockEnv, Agent, play_one_episode, maybe_make_dir
from src.config import market_tickers
from src.config.model_config import model_parameters
import tensorflow as tf

CPU times: user 1.02 s, sys: 144 ms, total: 1.16 s
Wall time: 1.43 s


In [2]:
#------ small test-sweep ------#
GAMMA_TRADES = [0.1, 1, 10, 100]
GAMMA_RISKS = [0.1, 1, 10, 100, 1000]
GAMMA_HOLDS = [0.1, 1]
#------ extended-boyd-sweep ------#
# GAMMA_TRADES =  [0.1, 0.5, 1, 2, 3, 4, 5, 5.5, 6, 6.5, 7, 7.5, 8, 9, 10, 11, 12, 15, 20, 30, 45, 60, 100, 200]
# GAMMA_RISKS = [0.1, 0.178, 0.316, 0.562, 1, 2, 3, 6, 10, 18, 32, 56, 100, 178, 316, 562, 1000, 2000, 5000, 10000, 20000]
#---------------------------------#

GAMMA_TRIOS = []
for tr in GAMMA_TRADES:
    for ri in GAMMA_RISKS:
        for hd in GAMMA_HOLDS:
            GAMMA_TRIOS.append((tr, ri, hd))
        
#format: (gamma_trade, gamma_risk, gamma_hold)
print('number of trios: ', len(GAMMA_TRIOS))
#GAMMA_TRIOS

number of trios:  40


In [3]:
# papermill parameters
SEED_LIST = [
             0,
             #1111,
             #2222, 
             #3333, 
             #4444, 
             #5555,
             #6666,
             #7777,
             #8888,
             #9999
            ]

#TICKERS = market_tickers.DOW_30_TICKER
TICKERS = ['AAPL', 'AMD', 'CSCO', 'F', 'INTC']
MARKET_NAME = 'TEST_5'
MODEL_BASE_NAME = 'RL_CNN' # use either | 'RL_CNN' | 'RL_str_fcast' | 'RL_all_inp' |
FROM = '2017-01-03' # start of training set
UNTIL = '2019-01-01' # end of training set
NB_EPISODES = 200 # number of episodes to train for
SAVE_EVERY = 200 # (used 100 when not from tretrained) save model weights every time this amount of episodes pass

In [4]:
# other constants and parameters
TICKERS.sort()
FILE_PERIOD = '1d' # weekly='5d', daily='1d'
DAYS_IN_EPISODE = 30 # 365 for one-year long episodes (conditions checked at end of episode)
EPISODE_DRAW_DISTRIBUTION = 'uniform' # 'uniform' or 'geometric'. select starting point of eposide according to this distribution when generated
HALF_SPREAD = 0.0005/2.0 # 'a' in transaction cost function
NONLIN_COEFF = 1.0 # 'b' transaction cost function
POWER = 1.5 # power for change in poertfolio vector used in transaction cost
#GAMMA_RISK, GAMMA_TRADE, GAMMA_HOLD = 18, 6.5, 0.0 # relative importance of risk, trading cost, and holding cost
INIT_PORTFOLIO = 100000000.0 # initial portfolio value
#model_name = f'REINFORCE_CNN' #_seeded_{UNTIL} # give model a name to distinguish saved files
#NB_EPISODES = 300 #2000
MODE = 'train' # train or test mode
#FROM_PRETRAINED = False
DATA_DIR = f'../data/{MARKET_NAME}/preprocessed_data/'

# Tune and double-check these before every test:
USE_FORECASTS = model_parameters[MODEL_BASE_NAME]['use_forecasts'] # whether to use forecasts as inputs to agent policy net or not
NB_FORECASTS = model_parameters[MODEL_BASE_NAME]['nb_forecasts'] # number of forecasts to use in state. Set to None for no forecasts
FORECAST_TYPE = model_parameters[MODEL_BASE_NAME]['forecast_type'] # use 'strong' or 'weak' forecasts (if no forecasts given, this does not matter)
USE_CNN_STATE = model_parameters[MODEL_BASE_NAME]['use_cnn_state'] # whether to pass log-rets block for CNN part of policy net (set to false for forecast only)

In [5]:
%%time
##########################
### main training loop ###
##########################

# define investor preferences to train for
#------ extended-boyd-sweep ------#
#GAMMA_TRADES = [5.5, 6, 6.5, 7, 7.5, 8, 9, 10, 11, 12, 15, 20, 30]
#GAMMA_TRADES = [4, 5]
#GAMMA_TRADES = [0.1, 0.5, 1, 2, 3, 45, 60, 100, 200]
#GAMMA_RISKS = [0.1, 0.178, 0.316, 0.562, 1, 2, 3, 6, 10, 18, 32, 56, 100, 178, 316, 562, 1000, 2000, 5000, 10000, 20000]
#---------------------------------#
counter = 0

for RANDOM_SEED in SEED_LIST:
    model_name = f'{MODEL_BASE_NAME}_seed_{RANDOM_SEED}'
    maybe_make_dir(f'../experiments/{MARKET_NAME}/seeded/{MODEL_BASE_NAME}/seed_{RANDOM_SEED}')
    
    for (GAMMA_TRADE, GAMMA_RISK, GAMMA_HOLD) in GAMMA_TRIOS:
        # set random generator seed for tf and np
        # this is probably unnecessary because it is done when env is initialised but just incase
        tf.random.set_seed(RANDOM_SEED)
        np.random.seed(RANDOM_SEED)
        counter += 1

        # check if test lready done. skip if so
        #if not FROM_PRETRAINED:
            #skip = os.path.exists(f'experiments/12_assets/00_seeded/seed_{RANDOM_SEED}/trade_{GAMMA_TRADE}_risk_{GAMMA_RISK}') # check if already exists
        #else:
        
        skip = os.path.exists(f'../experiments/{MARKET_NAME}/seeded/{MODEL_BASE_NAME}/seed_{RANDOM_SEED}/trade_{GAMMA_TRADE}_risk_{GAMMA_RISK}_hold_{GAMMA_HOLD}')

        if not skip: # if not exists, do test

            print('#############################################################################')
            print(f'##\t[{counter}/{len(GAMMA_TRIOS)}]\tmodel={MODEL_BASE_NAME},\tSEED={RANDOM_SEED},\ttrade={GAMMA_TRADE},\trisk={GAMMA_RISK}\thold={GAMMA_HOLD}\t##')
            print('#############################################################################')        

            # create agent and environment
            env = MultiStockEnv(tickers=TICKERS, 
                                from_date=FROM,
                                until=UNTIL, 
                                #nb_episodes=100,
                                cash_key='USDOLLAR', 
                                gamma_risk=GAMMA_RISK, 
                                gamma_trade=GAMMA_TRADE,
                                gamma_hold=GAMMA_HOLD,
                                half_spread=HALF_SPREAD, 
                                nonlin_coef=NONLIN_COEFF, 
                                power=POWER, 
                                datadir=DATA_DIR, 
                                state_lookback_window=20, 
                                distribution=EPISODE_DRAW_DISTRIBUTION,
                                days_duration=DAYS_IN_EPISODE, 
                                mode='train', 
                                random_seed=RANDOM_SEED,
                                init_portfolio=INIT_PORTFOLIO, 
                                period_in_file_name=FILE_PERIOD, 
                                nb_forecasts=NB_FORECASTS, 
                                forecast_type=FORECAST_TYPE,
                                use_CNN_state=USE_CNN_STATE,
                                verbose=False)

            agent = Agent(alpha=0.001, 
                          gamma=0.99, 
                          n_assets=len(TICKERS)+1, 
                          tau=5, 
                          lookback_window=20, 
                          n_feature_maps=len(TICKERS)+1,
                          use_forecasts=USE_FORECASTS,
                          use_CNN_state=USE_CNN_STATE,
                          allow_long_short_trades=True)

            # initialise policy by one forward pass then load pretrained weights
            #if FROM_PRETRAINED: 
                #agent.choose_action(env._get_obs())
                #agent.load(pretrined_weights_dir)

            # configure folders
            experiments_folder = f'../experiments/{MARKET_NAME}'
            from_seeded_folder = f'{experiments_folder}/seeded'
            base_model_folder = f'{from_seeded_folder}/{MODEL_BASE_NAME}'
            seed_folder = f'{base_model_folder}/seed_{RANDOM_SEED}'
            #if FROM_PRETRAINED:
            #    pretrained_folder = f'{nb_assets_folder}/0_further_trained'
            #    preference_folder = f'{pretrained_folder}/trade_{GAMMA_TRADE}_risk_{GAMMA_RISK}'
            #else:
            preference_folder = f'{seed_folder}/trade_{GAMMA_TRADE}_risk_{GAMMA_RISK}_hold_{GAMMA_HOLD}'
            models_folder = f'{preference_folder}/models' # where saved models will be saved
            losses_folder = f'{preference_folder}/losses' # where losses will be saved for plotting training progress
            fig_folder = f'{preference_folder}/figures'

            # create dirictories for saving outputs to (if they don't exist already)
            #maybe_make_dir(nb_assets_folder)
            maybe_make_dir(seed_folder)
            maybe_make_dir(preference_folder)
            maybe_make_dir(models_folder)
            maybe_make_dir(losses_folder)
            maybe_make_dir(fig_folder)

            if MODE == 'train':
                # play the game num_episodes times and update weights according to loss
                losses = []
                for e in range(NB_EPISODES):

                    # save model weights every few episodes
                    if (e%SAVE_EVERY == 0) and (e!=0):
                        agent.save(f'{models_folder}/{model_name}_{e}')
                        # save losses of training period
                        np.save(f'{losses_folder}/{model_name}_{MODE}_{e}_losses.npy', np.array(losses))

                    t0 = datetime.datetime.now()
                    try:
                        loss = play_one_episode(agent, env, MODE)
                    except Exception as err:
                        print(f'\t*** error in episode {e}:', err)
                        loss = np.nan#5.0 # just an arbitraty number that seems high compared to usual losses
                    dt = datetime.datetime.now() - t0
                    losses.append(loss) # append episode loss
                    # print progress
                    if (e%10 == 0):
                        print(f"episode: {e + 1}/{NB_EPISODES}, loss: {loss:.4f}, duration: {dt}")


                # save the weights and losses when done	
                agent.save(f'{models_folder}/{model_name}_{NB_EPISODES}')
                np.save(f'{losses_folder}/{model_name}_{MODE}_{NB_EPISODES}_losses.npy', np.array(losses))

            # plot losses
            save_fig = True

            # load losses file
            file = f'{losses_folder}/{model_name}_{MODE}_{NB_EPISODES}_losses.npy'
            loaded_losses = np.load(file)

            # plot losses
            plt.figure(figsize=(14,5))
            plt.plot(loaded_losses)
            plt.title(f'{model_name} Training Loss')
            plt.xlabel('Episode Number')
            plt.ylabel(r'Loss: $-G \approx -\mathbb{E}\left[ \sum_{k=0}^{T} \gamma^k R_t \right]$')
            #plt.yscale('symlog')
            if save_fig:
                plt.savefig(f'{fig_folder}/{model_name}_losses.png', dpi=150, facecolor=None, edgecolor=None, bbox_inches='tight')
            plt.clf()

        else:
            print(f'\talready exists: trade_{GAMMA_TRADE}_risk_{GAMMA_RISK}_hold_{GAMMA_HOLD}')
            print('\tskip to next.')

#############################################################################
##	[1/35]	model=RL_CNN,	SEED=0,	trade=0.1,	risk=0.1	hold=0.1	##
#############################################################################
episode: 1/200, loss: 0.0118, duration: 0:00:00.463857
episode: 11/200, loss: -0.0350, duration: 0:00:00.413086
episode: 21/200, loss: 0.0333, duration: 0:00:00.411152
episode: 31/200, loss: -0.0616, duration: 0:00:00.419885
episode: 41/200, loss: 0.0263, duration: 0:00:00.406845
episode: 51/200, loss: -0.0601, duration: 0:00:00.445481
episode: 61/200, loss: -0.0055, duration: 0:00:00.417842
episode: 71/200, loss: 0.0244, duration: 0:00:00.414129
episode: 81/200, loss: 0.0294, duration: 0:00:00.417419
episode: 91/200, loss: -0.0692, duration: 0:00:00.418981
episode: 101/200, loss: 0.0084, duration: 0:00:00.419074
episode: 111/200, loss: 0.3968, duration: 0:00:00.431906
episode: 121/200, loss: -0.7013, duration: 0:00:00.474263
episode: 131/200, loss: 0.2777, duration: 0:



#############################################################################
##	[22/35]	model=RL_CNN,	SEED=0,	trade=60,	risk=0.1	hold=0.1	##
#############################################################################
episode: 1/200, loss: 0.7367, duration: 0:00:00.428746
episode: 11/200, loss: 0.0464, duration: 0:00:00.393733
episode: 21/200, loss: 0.1512, duration: 0:00:00.403357
episode: 31/200, loss: 0.0168, duration: 0:00:00.366941
episode: 41/200, loss: 0.0598, duration: 0:00:00.375478
episode: 51/200, loss: -0.0220, duration: 0:00:00.391058
episode: 61/200, loss: 0.0142, duration: 0:00:00.379494
episode: 71/200, loss: 0.0486, duration: 0:00:00.372882
episode: 81/200, loss: 0.0852, duration: 0:00:00.410028
episode: 91/200, loss: 0.0146, duration: 0:00:00.402390
episode: 101/200, loss: -0.0095, duration: 0:00:00.391477
episode: 111/200, loss: -0.0421, duration: 0:00:00.408906
episode: 121/200, loss: -0.0483, duration: 0:00:00.406579
episode: 131/200, loss: 0.0200, duration: 0:00



#############################################################################
##	[23/35]	model=RL_CNN,	SEED=0,	trade=60,	risk=1	hold=0.1	##
#############################################################################
episode: 1/200, loss: 0.7405, duration: 0:00:00.417265
episode: 11/200, loss: 0.0473, duration: 0:00:00.370702
episode: 21/200, loss: 0.1510, duration: 0:00:00.440354
episode: 31/200, loss: 0.0220, duration: 0:00:00.361632
episode: 41/200, loss: 0.0604, duration: 0:00:00.423642
episode: 51/200, loss: -0.0198, duration: 0:00:00.366595
episode: 61/200, loss: 0.0139, duration: 0:00:00.388140
episode: 71/200, loss: 0.0490, duration: 0:00:00.393781
episode: 81/200, loss: 0.0806, duration: 0:00:00.367344
episode: 91/200, loss: 0.0168, duration: 0:00:00.372269
episode: 101/200, loss: -0.0062, duration: 0:00:00.405092
episode: 111/200, loss: -0.0335, duration: 0:00:00.434994
episode: 121/200, loss: -0.0441, duration: 0:00:00.380239
episode: 131/200, loss: 0.0203, duration: 0:00:0



#############################################################################
##	[24/35]	model=RL_CNN,	SEED=0,	trade=60,	risk=18	hold=0.1	##
#############################################################################
episode: 1/200, loss: 0.8126, duration: 0:00:00.416789
episode: 11/200, loss: 0.0641, duration: 0:00:00.441630
episode: 21/200, loss: 0.2014, duration: 0:00:00.403266
episode: 31/200, loss: 0.1029, duration: 0:00:00.398852
episode: 41/200, loss: 0.0712, duration: 0:00:00.366705
episode: 51/200, loss: 0.0184, duration: 0:00:00.365389
episode: 61/200, loss: 0.0135, duration: 0:00:00.418770
episode: 71/200, loss: 0.0496, duration: 0:00:00.438983
episode: 81/200, loss: 0.0995, duration: 0:00:00.420164
episode: 91/200, loss: 0.0775, duration: 0:00:00.399828
episode: 101/200, loss: 0.0491, duration: 0:00:00.382989
episode: 111/200, loss: 0.0256, duration: 0:00:00.386252
episode: 121/200, loss: 0.0296, duration: 0:00:00.383998
episode: 131/200, loss: 0.0463, duration: 0:00:00.3



#############################################################################
##	[25/35]	model=RL_CNN,	SEED=0,	trade=60,	risk=56	hold=0.1	##
#############################################################################
episode: 1/200, loss: 0.9738, duration: 0:00:00.439986
episode: 11/200, loss: 0.1045, duration: 0:00:00.413478
episode: 21/200, loss: 0.3151, duration: 0:00:00.419865
episode: 31/200, loss: 0.2151, duration: 0:00:00.395101
episode: 41/200, loss: 0.1003, duration: 0:00:00.392012
episode: 51/200, loss: 0.0738, duration: 0:00:00.384823
episode: 61/200, loss: 0.0339, duration: 0:00:00.412992
episode: 71/200, loss: 0.0760, duration: 0:00:00.369370
episode: 81/200, loss: 0.1570, duration: 0:00:00.392136
episode: 91/200, loss: 0.1277, duration: 0:00:00.374561
episode: 101/200, loss: 0.0986, duration: 0:00:00.398210
episode: 111/200, loss: 0.0640, duration: 0:00:00.366603
episode: 121/200, loss: 0.0763, duration: 0:00:00.375829
episode: 131/200, loss: 0.0733, duration: 0:00:00.4



#############################################################################
##	[26/35]	model=RL_CNN,	SEED=0,	trade=60,	risk=100	hold=0.1	##
#############################################################################
episode: 1/200, loss: 1.1604, duration: 0:00:00.432920
episode: 11/200, loss: 0.1480, duration: 0:00:00.433122
episode: 21/200, loss: 0.4256, duration: 0:00:00.500560
episode: 31/200, loss: 0.3148, duration: 0:00:00.469249
episode: 41/200, loss: 0.1279, duration: 0:00:00.429335
episode: 51/200, loss: 0.1110, duration: 0:00:00.406770
episode: 61/200, loss: 0.0591, duration: 0:00:00.398716
episode: 71/200, loss: 0.1006, duration: 0:00:00.379397
episode: 81/200, loss: 0.2084, duration: 0:00:00.402782
episode: 91/200, loss: 0.1776, duration: 0:00:00.416556
episode: 101/200, loss: 0.1483, duration: 0:00:00.418251
episode: 111/200, loss: 0.0955, duration: 0:00:00.401766
episode: 121/200, loss: 0.1316, duration: 0:00:00.401259
episode: 131/200, loss: 0.0994, duration: 0:00:00.



#############################################################################
##	[27/35]	model=RL_CNN,	SEED=0,	trade=60,	risk=562	hold=0.1	##
#############################################################################
episode: 1/200, loss: 3.1198, duration: 0:00:00.424634
episode: 11/200, loss: 0.5229, duration: 0:00:00.389953
episode: 21/200, loss: 1.4902, duration: 0:00:00.394260
episode: 31/200, loss: 1.1215, duration: 0:00:00.402958
episode: 41/200, loss: 0.4012, duration: 0:00:00.404590
episode: 51/200, loss: 0.3915, duration: 0:00:00.390804
episode: 61/200, loss: 0.2446, duration: 0:00:00.374538
episode: 71/200, loss: 0.3570, duration: 0:00:00.405428
episode: 81/200, loss: 0.7102, duration: 0:00:00.409672
episode: 91/200, loss: 0.4936, duration: 0:00:00.406648
episode: 101/200, loss: 0.4501, duration: 0:00:00.457028
episode: 111/200, loss: 0.3063, duration: 0:00:00.405141
episode: 121/200, loss: 0.3830, duration: 0:00:00.405352
episode: 131/200, loss: 0.2910, duration: 0:00:00.



#############################################################################
##	[28/35]	model=RL_CNN,	SEED=0,	trade=60,	risk=1000	hold=0.1	##
#############################################################################
episode: 1/200, loss: 4.9774, duration: 0:00:00.440945
episode: 11/200, loss: 0.8736, duration: 0:00:00.430212
episode: 21/200, loss: 2.4731, duration: 0:00:00.473977
episode: 31/200, loss: 1.8436, duration: 0:00:00.427131
episode: 41/200, loss: 0.6601, duration: 0:00:00.384906
episode: 51/200, loss: 0.6545, duration: 0:00:00.417054
episode: 61/200, loss: 0.4159, duration: 0:00:00.432723
episode: 71/200, loss: 0.5986, duration: 0:00:00.415381
episode: 81/200, loss: 1.1641, duration: 0:00:00.409536
episode: 91/200, loss: 0.7715, duration: 0:00:00.419263
episode: 101/200, loss: 0.6795, duration: 0:00:00.469135
episode: 111/200, loss: 0.4709, duration: 0:00:00.372479
episode: 121/200, loss: 0.6051, duration: 0:00:00.413164
episode: 131/200, loss: 0.4490, duration: 0:00:00



#############################################################################
##	[29/35]	model=RL_CNN,	SEED=0,	trade=100,	risk=0.1	hold=0.1	##
#############################################################################
episode: 1/200, loss: 1.2207, duration: 0:00:00.401645
episode: 11/200, loss: 0.0976, duration: 0:00:00.397695
episode: 21/200, loss: 0.2150, duration: 0:00:00.407761
episode: 31/200, loss: 0.0591, duration: 0:00:00.400150
episode: 41/200, loss: 0.0761, duration: 0:00:00.435368
episode: 51/200, loss: -0.0060, duration: 0:00:00.404045
episode: 61/200, loss: 0.0210, duration: 0:00:00.387786
episode: 71/200, loss: 0.0589, duration: 0:00:00.398512
episode: 81/200, loss: 0.1101, duration: 0:00:00.394122
episode: 91/200, loss: 0.0326, duration: 0:00:00.409935
episode: 101/200, loss: 0.0008, duration: 0:00:00.417704
episode: 111/200, loss: -0.0333, duration: 0:00:00.447472
episode: 121/200, loss: -0.0358, duration: 0:00:00.401086
episode: 131/200, loss: 0.0235, duration: 0:00



#############################################################################
##	[30/35]	model=RL_CNN,	SEED=0,	trade=100,	risk=1	hold=0.1	##
#############################################################################
episode: 1/200, loss: 1.2246, duration: 0:00:00.436119
episode: 11/200, loss: 0.0984, duration: 0:00:00.531679
episode: 21/200, loss: 0.2140, duration: 0:00:00.379493
episode: 31/200, loss: 0.0645, duration: 0:00:00.363977
episode: 41/200, loss: 0.0744, duration: 0:00:00.367472
episode: 51/200, loss: -0.0005, duration: 0:00:00.380779
episode: 61/200, loss: 0.0187, duration: 0:00:00.465298
episode: 71/200, loss: 0.0596, duration: 0:00:00.443566
episode: 81/200, loss: 0.0984, duration: 0:00:00.464333
episode: 91/200, loss: 0.0326, duration: 0:00:00.392216
episode: 101/200, loss: 0.0030, duration: 0:00:00.374402
episode: 111/200, loss: -0.0193, duration: 0:00:00.365275
episode: 121/200, loss: -0.0318, duration: 0:00:00.379786
episode: 131/200, loss: 0.0277, duration: 0:00:0



#############################################################################
##	[31/35]	model=RL_CNN,	SEED=0,	trade=100,	risk=18	hold=0.1	##
#############################################################################
episode: 1/200, loss: 1.2966, duration: 0:00:00.418368
episode: 11/200, loss: 0.1151, duration: 0:00:00.374952
episode: 21/200, loss: 0.2695, duration: 0:00:00.371062
episode: 31/200, loss: 0.1526, duration: 0:00:00.364136
episode: 41/200, loss: 0.0931, duration: 0:00:00.362688
episode: 51/200, loss: 0.0390, duration: 0:00:00.379973
episode: 61/200, loss: 0.0261, duration: 0:00:00.382690
episode: 71/200, loss: 0.0653, duration: 0:00:00.374394
episode: 81/200, loss: 0.1231, duration: 0:00:00.390788
episode: 91/200, loss: 0.1001, duration: 0:00:00.364272
episode: 101/200, loss: 0.0607, duration: 0:00:00.371832
episode: 111/200, loss: 0.0368, duration: 0:00:00.360699
episode: 121/200, loss: 0.0444, duration: 0:00:00.366071
episode: 131/200, loss: 0.0515, duration: 0:00:00.



#############################################################################
##	[32/35]	model=RL_CNN,	SEED=0,	trade=100,	risk=56	hold=0.1	##
#############################################################################
episode: 1/200, loss: 1.4578, duration: 0:00:00.405744
episode: 11/200, loss: 0.1604, duration: 0:00:00.360799
episode: 21/200, loss: 0.4071, duration: 0:00:00.380974
episode: 31/200, loss: 0.2875, duration: 0:00:00.373615
episode: 41/200, loss: 0.1243, duration: 0:00:00.366847
episode: 51/200, loss: 0.0886, duration: 0:00:00.372247
episode: 61/200, loss: 0.0543, duration: 0:00:00.373431
episode: 71/200, loss: 0.0936, duration: 0:00:00.360675
episode: 81/200, loss: 0.1931, duration: 0:00:00.370501
episode: 91/200, loss: 0.1537, duration: 0:00:00.372982
episode: 101/200, loss: 0.1173, duration: 0:00:00.377362
episode: 111/200, loss: 0.0900, duration: 0:00:00.355507
episode: 121/200, loss: 0.1006, duration: 0:00:00.425966
episode: 131/200, loss: 0.0868, duration: 0:00:00.



#############################################################################
##	[33/35]	model=RL_CNN,	SEED=0,	trade=100,	risk=100	hold=0.1	##
#############################################################################
episode: 1/200, loss: 1.6444, duration: 0:00:00.417567
episode: 11/200, loss: 0.2115, duration: 0:00:00.370058
episode: 21/200, loss: 0.5274, duration: 0:00:00.370079
episode: 31/200, loss: 0.4027, duration: 0:00:00.371186
episode: 41/200, loss: 0.1603, duration: 0:00:00.367300
episode: 51/200, loss: 0.1575, duration: 0:00:00.372802
episode: 61/200, loss: 0.0769, duration: 0:00:00.371224
episode: 71/200, loss: 0.1279, duration: 0:00:00.362212
episode: 81/200, loss: 0.2543, duration: 0:00:00.417010
episode: 91/200, loss: 0.2240, duration: 0:00:00.380588
episode: 101/200, loss: 0.1818, duration: 0:00:00.366003
episode: 111/200, loss: 0.1268, duration: 0:00:00.361713
episode: 121/200, loss: 0.1684, duration: 0:00:00.372488
episode: 131/200, loss: 0.1201, duration: 0:00:00



#############################################################################
##	[34/35]	model=RL_CNN,	SEED=0,	trade=100,	risk=562	hold=0.1	##
#############################################################################
episode: 1/200, loss: 3.6038, duration: 0:00:00.405475
episode: 11/200, loss: 0.5955, duration: 0:00:00.374361
episode: 21/200, loss: 1.6210, duration: 0:00:00.372367
episode: 31/200, loss: 1.2554, duration: 0:00:00.381927
episode: 41/200, loss: 0.4393, duration: 0:00:00.374909
episode: 51/200, loss: 0.4496, duration: 0:00:00.363538
episode: 61/200, loss: 0.2738, duration: 0:00:00.366149
episode: 71/200, loss: 0.3864, duration: 0:00:00.361119
episode: 81/200, loss: 0.7786, duration: 0:00:00.364716
episode: 91/200, loss: 0.5678, duration: 0:00:00.526521
episode: 101/200, loss: 0.5345, duration: 0:00:00.461973
episode: 111/200, loss: 0.3704, duration: 0:00:00.466789
episode: 121/200, loss: 0.4586, duration: 0:00:00.401110
episode: 131/200, loss: 0.3273, duration: 0:00:00



#############################################################################
##	[35/35]	model=RL_CNN,	SEED=0,	trade=100,	risk=1000	hold=0.1	##
#############################################################################
episode: 1/200, loss: 5.4614, duration: 0:00:00.407518
episode: 11/200, loss: 0.9468, duration: 0:00:00.455340
episode: 21/200, loss: 2.6105, duration: 0:00:00.437594
episode: 31/200, loss: 1.9866, duration: 0:00:00.415313
episode: 41/200, loss: 0.6972, duration: 0:00:00.460560
episode: 51/200, loss: 0.7174, duration: 0:00:00.389259
episode: 61/200, loss: 0.4455, duration: 0:00:00.374134
episode: 71/200, loss: 0.6290, duration: 0:00:00.469727
episode: 81/200, loss: 1.2357, duration: 0:00:00.420789
episode: 91/200, loss: 0.8564, duration: 0:00:00.385578
episode: 101/200, loss: 0.7789, duration: 0:00:00.380261
episode: 111/200, loss: 0.5482, duration: 0:00:00.412399
episode: 121/200, loss: 0.6819, duration: 0:00:00.388693
episode: 131/200, loss: 0.5042, duration: 0:00:0



CPU times: user 46min 53s, sys: 17.4 s, total: 47min 10s
Wall time: 46min 39s


<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

<Figure size 1400x500 with 0 Axes>

In [6]:
# double check the end of training
#env.until

In [7]:
print('training done.')

training done.


In [8]:
### Now repeat this for the different RL models.
### remember to update the gamma-pairs, model base name, and model specs/setup in beginning of notebook!