In [1]:
%%time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import os
from src.models.frontier import MultiStockEnv, Agent, play_one_episode, maybe_make_dir
from src.config import market_tickers
from src.config.model_config import model_parameters
import tensorflow as tf

CPU times: user 6.31 s, sys: 3.71 s, total: 10 s
Wall time: 8.15 s


In [2]:
#------ small test-sweep ------#
GAMMA_TRADES = [0.1, 1, 10, 60, 100]
GAMMA_RISKS = [0.1, 1, 18, 56, 100, 562, 1000]
#------ extended-boyd-sweep ------#
# GAMMA_TRADES =  [0.1, 0.5, 1, 2, 3, 4, 5, 5.5, 6, 6.5, 7, 7.5, 8, 9, 10, 11, 12, 15, 20, 30, 45, 60, 100, 200]
# GAMMA_RISKS = [0.1, 0.178, 0.316, 0.562, 1, 2, 3, 6, 10, 18, 32, 56, 100, 178, 316, 562, 1000, 2000, 5000, 10000, 20000]
#---------------------------------#

GAMMA_PAIRS = []
for tr in GAMMA_TRADES:
    for ri in GAMMA_RISKS:
        GAMMA_PAIRS.append((tr, ri))
        
#format: (gamma_trade, gamma_risk)
print('number of pairs: ', len(GAMMA_PAIRS))
#GAMMA_PAIRS

number of pairs:  35


In [3]:
# papermill parameters
SEED_LIST = [
             0,
             #1111,
             #2222, 
             #3333, 
             #4444, 
             #5555,
             #6666,
             #7777,
             #8888,
             #9999
            ]

#TICKERS = market_tickers.DOW_30_TICKER
TICKERS = ['AAPL', 'AMD', 'CSCO', 'F', 'INTC']
MARKET_NAME = 'TEST_5'
MODEL_BASE_NAME = 'RL_CNN' # use either | 'RL_CNN' | 'RL_str_fcast' | 'RL_all_inp' |
FROM = '2017-01-03' # start of training set
UNTIL = '2019-01-01' # end of training set
NB_EPISODES = 200 # number of episodes to train for
SAVE_EVERY = 200 # (used 100 when not from tretrained) save model weights every time this amount of episodes pass

In [4]:
# other constants and parameters
TICKERS.sort()
FILE_PERIOD = '1d' # weekly='5d', daily='1d'
DAYS_IN_EPISODE = 30 # 365 for one-year long episodes (conditions checked at end of episode)
EPISODE_DRAW_DISTRIBUTION = 'uniform' # 'uniform' or 'geometric'. select starting point of eposide according to this distribution when generated
HALF_SPREAD = 0.0005/2.0 # 'a' in transaction cost function
NONLIN_COEFF = 1.0 # 'b' transaction cost function
POWER = 1.5 # power for change in poertfolio vector used in transaction cost
#GAMMA_RISK, GAMMA_TRADE, GAMMA_HOLD = 18, 6.5, 0.0 # relative importance of risk, trading cost, and holding cost
INIT_PORTFOLIO = 100000000.0 # initial portfolio value
#model_name = f'REINFORCE_CNN' #_seeded_{UNTIL} # give model a name to distinguish saved files
#NB_EPISODES = 300 #2000
MODE = 'train' # train or test mode
#FROM_PRETRAINED = False
DATA_DIR = f'../data/{MARKET_NAME}/preprocessed_data/'

# Tune and double-check these before every test:
USE_FORECASTS = model_parameters[MODEL_BASE_NAME]['use_forecasts'] # whether to use forecasts as inputs to agent policy net or not
NB_FORECASTS = model_parameters[MODEL_BASE_NAME]['nb_forecasts'] # number of forecasts to use in state. Set to None for no forecasts
FORECAST_TYPE = model_parameters[MODEL_BASE_NAME]['forecast_type'] # use 'strong' or 'weak' forecasts (if no forecasts given, this does not matter)
USE_CNN_STATE = model_parameters[MODEL_BASE_NAME]['use_cnn_state'] # whether to pass log-rets block for CNN part of policy net (set to false for forecast only)

In [5]:
%%time
##########################
### main training loop ###
##########################

# define investor preferences to train for
#------ extended-boyd-sweep ------#
#GAMMA_TRADES = [5.5, 6, 6.5, 7, 7.5, 8, 9, 10, 11, 12, 15, 20, 30]
#GAMMA_TRADES = [4, 5]
#GAMMA_TRADES = [0.1, 0.5, 1, 2, 3, 45, 60, 100, 200]
#GAMMA_RISKS = [0.1, 0.178, 0.316, 0.562, 1, 2, 3, 6, 10, 18, 32, 56, 100, 178, 316, 562, 1000, 2000, 5000, 10000, 20000]
#---------------------------------#
counter = 0

for RANDOM_SEED in SEED_LIST:
    model_name = f'{MODEL_BASE_NAME}_seed_{RANDOM_SEED}'
    maybe_make_dir(f'../experiments/{MARKET_NAME}/seeded/{MODEL_BASE_NAME}/seed_{RANDOM_SEED}')
    
    for (GAMMA_TRADE, GAMMA_RISK) in GAMMA_PAIRS:
        # set random generator seed for tf and np
        # this is probably unnecessary because it is done when env is initialised but just incase
        tf.random.set_seed(RANDOM_SEED)
        np.random.seed(RANDOM_SEED)
        counter += 1

        # check if test lready done. skip if so
        #if not FROM_PRETRAINED:
            #skip = os.path.exists(f'experiments/12_assets/00_seeded/seed_{RANDOM_SEED}/trade_{GAMMA_TRADE}_risk_{GAMMA_RISK}') # check if already exists
        #else:
        
        skip = os.path.exists(f'../experiments/{MARKET_NAME}/seeded/{MODEL_BASE_NAME}/seed_{RANDOM_SEED}/trade_{GAMMA_TRADE}_risk_{GAMMA_RISK}')

        if not skip: # if not exists, do test

            print('#############################################################################')
            print(f'##\t[{counter}/{len(GAMMA_PAIRS)}]\tmodel={MODEL_BASE_NAME},\tSEED={RANDOM_SEED},\ttrade={GAMMA_TRADE},\trisk={GAMMA_RISK}\t##')
            print('#############################################################################')        

            # create agent and environment
            env = MultiStockEnv(tickers=TICKERS, 
                                from_date=FROM,
                                until=UNTIL, 
                                #nb_episodes=100,
                                cash_key='USDOLLAR', 
                                gamma_risk=GAMMA_RISK, 
                                gamma_trade=GAMMA_TRADE,
                                half_spread=HALF_SPREAD, 
                                nonlin_coef=NONLIN_COEFF, 
                                power=POWER, 
                                datadir=DATA_DIR, 
                                state_lookback_window=20, 
                                distribution=EPISODE_DRAW_DISTRIBUTION,
                                days_duration=DAYS_IN_EPISODE, 
                                mode='train', 
                                random_seed=RANDOM_SEED,
                                init_portfolio=INIT_PORTFOLIO, 
                                period_in_file_name=FILE_PERIOD, 
                                nb_forecasts=NB_FORECASTS, 
                                forecast_type=FORECAST_TYPE,
                                use_CNN_state=USE_CNN_STATE,
                                verbose=False)

            agent = Agent(alpha=0.001, 
                          gamma=0.99, 
                          n_assets=len(TICKERS)+1, 
                          tau=5, 
                          lookback_window=20, 
                          n_feature_maps=len(TICKERS)+1,
                          use_forecasts=USE_FORECASTS,
                          use_CNN_state=USE_CNN_STATE)

            # initialise policy by one forward pass then load pretrained weights
            #if FROM_PRETRAINED: 
                #agent.choose_action(env._get_obs())
                #agent.load(pretrined_weights_dir)

            # configure folders
            experiments_folder = f'../experiments/{MARKET_NAME}'
            from_seeded_folder = f'{experiments_folder}/seeded'
            base_model_folder = f'{from_seeded_folder}/{MODEL_BASE_NAME}'
            seed_folder = f'{base_model_folder}/seed_{RANDOM_SEED}'
            #if FROM_PRETRAINED:
            #    pretrained_folder = f'{nb_assets_folder}/0_further_trained'
            #    preference_folder = f'{pretrained_folder}/trade_{GAMMA_TRADE}_risk_{GAMMA_RISK}'
            #else:
            preference_folder = f'{seed_folder}/trade_{GAMMA_TRADE}_risk_{GAMMA_RISK}'
            models_folder = f'{preference_folder}/models' # where saved models will be saved
            losses_folder = f'{preference_folder}/losses' # where losses will be saved for plotting training progress
            fig_folder = f'{preference_folder}/figures'

            # create dirictories for saving outputs to (if they don't exist already)
            #maybe_make_dir(nb_assets_folder)
            maybe_make_dir(seed_folder)
            maybe_make_dir(preference_folder)
            maybe_make_dir(models_folder)
            maybe_make_dir(losses_folder)
            maybe_make_dir(fig_folder)

            if MODE == 'train':
                # play the game num_episodes times and update weights according to loss
                losses = []
                for e in range(NB_EPISODES):

                    # save model weights every few episodes
                    if (e%SAVE_EVERY == 0) and (e!=0):
                        agent.save(f'{models_folder}/{model_name}_{e}')
                        # save losses of training period
                        np.save(f'{losses_folder}/{model_name}_{MODE}_{e}_losses.npy', np.array(losses))

                    t0 = datetime.datetime.now()
                    try:
                        loss = play_one_episode(agent, env, MODE)
                    except Exception as err:
                        print(f'\t*** error in episode {e}:', err)
                        loss = np.nan#5.0 # just an arbitraty number that seems high compared to usual losses
                    dt = datetime.datetime.now() - t0
                    losses.append(loss) # append episode loss
                    # print progress
                    if (e%10 == 0):
                        print(f"episode: {e + 1}/{NB_EPISODES}, loss: {loss:.4f}, duration: {dt}")


                # save the weights and losses when done	
                agent.save(f'{models_folder}/{model_name}_{NB_EPISODES}')
                np.save(f'{losses_folder}/{model_name}_{MODE}_{NB_EPISODES}_losses.npy', np.array(losses))

            # plot losses
            save_fig = True

            # load losses file
            file = f'{losses_folder}/{model_name}_{MODE}_{NB_EPISODES}_losses.npy'
            loaded_losses = np.load(file)

            # plot losses
            plt.figure(figsize=(14,5))
            plt.plot(loaded_losses)
            plt.title(f'{model_name} Training Loss')
            plt.xlabel('Episode Number')
            plt.ylabel(r'Loss: $-G \approx -\mathbb{E}\left[ \sum_{k=0}^{T} \gamma^k R_t \right]$')
            #plt.yscale('symlog')
            if save_fig:
                plt.savefig(f'{fig_folder}/{model_name}_losses.png', dpi=150, facecolor=None, edgecolor=None, bbox_inches='tight')
            plt.clf()

        else:
            print(f'\talready exists: trade_{GAMMA_TRADE}_risk_{GAMMA_RISK}')
            print('\tskip to next.')

#############################################################################
##	[1/35]	model=RL_CNN,	SEED=0,	trade=0.1,	risk=0.1	##
#############################################################################
episode: 1/200, loss: 0.0050, duration: 0:00:00.408424
episode: 11/200, loss: -0.0266, duration: 0:00:00.412944
episode: 21/200, loss: 0.0276, duration: 0:00:00.376945
episode: 31/200, loss: -0.0561, duration: 0:00:00.392636
episode: 41/200, loss: 0.0090, duration: 0:00:00.381066
episode: 51/200, loss: -0.0514, duration: 0:00:00.376535
episode: 61/200, loss: -0.0109, duration: 0:00:00.368045
episode: 71/200, loss: 0.0075, duration: 0:00:00.460880
episode: 81/200, loss: 0.0189, duration: 0:00:00.459529
episode: 91/200, loss: -0.0218, duration: 0:00:00.477249
episode: 101/200, loss: -0.0296, duration: 0:00:00.500449
episode: 111/200, loss: -0.0577, duration: 0:00:00.416677
episode: 121/200, loss: -0.0729, duration: 0:00:00.511209
episode: 131/200, loss: -0.0017, duration: 0:00:00.



#############################################################################
##	[22/35]	model=RL_CNN,	SEED=0,	trade=60,	risk=0.1	##
#############################################################################
episode: 1/200, loss: 0.0661, duration: 0:00:00.356094
episode: 11/200, loss: -0.0057, duration: 0:00:00.325212
episode: 21/200, loss: 0.1172, duration: 0:00:00.332649
episode: 31/200, loss: -0.0080, duration: 0:00:00.320936
episode: 41/200, loss: 0.0242, duration: 0:00:00.395794
episode: 51/200, loss: -0.0407, duration: 0:00:00.338697
episode: 61/200, loss: 0.0000, duration: 0:00:00.348935
episode: 71/200, loss: 0.0155, duration: 0:00:00.330607
episode: 81/200, loss: 0.0819, duration: 0:00:00.378798
episode: 91/200, loss: -0.0098, duration: 0:00:00.332579
episode: 101/200, loss: -0.0212, duration: 0:00:00.334821
episode: 111/200, loss: -0.0480, duration: 0:00:00.327369
episode: 121/200, loss: -0.0547, duration: 0:00:00.330119
episode: 131/200, loss: 0.0095, duration: 0:00:00.33



#############################################################################
##	[23/35]	model=RL_CNN,	SEED=0,	trade=60,	risk=1	##
#############################################################################
episode: 1/200, loss: 0.0679, duration: 0:00:00.362521
episode: 11/200, loss: -0.0052, duration: 0:00:00.385587
episode: 21/200, loss: 0.1203, duration: 0:00:00.322026
episode: 31/200, loss: -0.0041, duration: 0:00:00.326487
episode: 41/200, loss: 0.0253, duration: 0:00:00.330875
episode: 51/200, loss: -0.0384, duration: 0:00:00.328144
episode: 61/200, loss: 0.0015, duration: 0:00:00.326886
episode: 71/200, loss: 0.0176, duration: 0:00:00.335046
episode: 81/200, loss: 0.0987, duration: 0:00:00.323393
episode: 91/200, loss: -0.0036, duration: 0:00:00.326472
episode: 101/200, loss: -0.0142, duration: 0:00:00.326285
episode: 111/200, loss: -0.0433, duration: 0:00:00.345755
episode: 121/200, loss: -0.0477, duration: 0:00:00.326285
episode: 131/200, loss: 0.0099, duration: 0:00:00.3249



#############################################################################
##	[24/35]	model=RL_CNN,	SEED=0,	trade=60,	risk=18	##
#############################################################################
episode: 1/200, loss: 0.1008, duration: 0:00:00.357372
episode: 11/200, loss: 0.0051, duration: 0:00:00.339060
episode: 21/200, loss: 0.1594, duration: 0:00:00.330739
episode: 31/200, loss: 0.0516, duration: 0:00:00.333059
episode: 41/200, loss: 0.0535, duration: 0:00:00.317392
episode: 51/200, loss: -0.0111, duration: 0:00:00.329352
episode: 61/200, loss: 0.0149, duration: 0:00:00.336259
episode: 71/200, loss: 0.0422, duration: 0:00:00.322542
episode: 81/200, loss: 0.1364, duration: 0:00:00.322365
episode: 91/200, loss: 0.0277, duration: 0:00:00.324699
episode: 101/200, loss: 0.0258, duration: 0:00:00.320304
episode: 111/200, loss: -0.0082, duration: 0:00:00.323911
episode: 121/200, loss: -0.0063, duration: 0:00:00.329021
episode: 131/200, loss: 0.0273, duration: 0:00:00.320139




#############################################################################
##	[25/35]	model=RL_CNN,	SEED=0,	trade=60,	risk=56	##
#############################################################################
episode: 1/200, loss: 0.1745, duration: 0:00:00.353860
episode: 11/200, loss: 0.0288, duration: 0:00:00.320651
episode: 21/200, loss: 0.2367, duration: 0:00:00.325383
episode: 31/200, loss: 0.1553, duration: 0:00:00.315506
episode: 41/200, loss: 0.0959, duration: 0:00:00.326587
episode: 51/200, loss: 0.0165, duration: 0:00:00.320525
episode: 61/200, loss: 0.0350, duration: 0:00:00.328745
episode: 71/200, loss: 0.0587, duration: 0:00:00.330103
episode: 81/200, loss: 0.1153, duration: 0:00:00.316701
episode: 91/200, loss: 0.0449, duration: 0:00:00.334585
episode: 101/200, loss: 0.0255, duration: 0:00:00.333353
episode: 111/200, loss: 0.0041, duration: 0:00:00.334566
episode: 121/200, loss: 0.0032, duration: 0:00:00.377374
episode: 131/200, loss: 0.0179, duration: 0:00:00.332110
epi



#############################################################################
##	[26/35]	model=RL_CNN,	SEED=0,	trade=60,	risk=100	##
#############################################################################
episode: 1/200, loss: 0.2597, duration: 0:00:00.366713
episode: 11/200, loss: 0.0514, duration: 0:00:00.326337
episode: 21/200, loss: 0.3231, duration: 0:00:00.321034
episode: 31/200, loss: 0.2549, duration: 0:00:00.315668
episode: 41/200, loss: 0.1397, duration: 0:00:00.334738
episode: 51/200, loss: 0.0412, duration: 0:00:00.318055
episode: 61/200, loss: 0.0505, duration: 0:00:00.318226
episode: 71/200, loss: 0.0647, duration: 0:00:00.330563
episode: 81/200, loss: 0.0984, duration: 0:00:00.322960
episode: 91/200, loss: 0.0485, duration: 0:00:00.326019
episode: 101/200, loss: 0.0264, duration: 0:00:00.331334
episode: 111/200, loss: 0.0019, duration: 0:00:00.373976
episode: 121/200, loss: 0.0023, duration: 0:00:00.318665
episode: 131/200, loss: 0.0103, duration: 0:00:00.325064
ep



#############################################################################
##	[27/35]	model=RL_CNN,	SEED=0,	trade=60,	risk=562	##
#############################################################################
episode: 1/200, loss: 1.1550, duration: 0:00:00.344255
episode: 11/200, loss: 0.2892, duration: 0:00:00.314505
episode: 21/200, loss: 1.2250, duration: 0:00:00.329746
episode: 31/200, loss: 1.1681, duration: 0:00:00.341231
episode: 41/200, loss: 0.5512, duration: 0:00:00.323147
episode: 51/200, loss: 0.2497, duration: 0:00:00.327713
episode: 61/200, loss: 0.2301, duration: 0:00:00.325565
episode: 71/200, loss: 0.2127, duration: 0:00:00.334635
episode: 81/200, loss: 0.2059, duration: 0:00:00.329502
episode: 91/200, loss: 0.1416, duration: 0:00:00.329818
episode: 101/200, loss: 0.0610, duration: 0:00:00.322343
episode: 111/200, loss: 0.0143, duration: 0:00:00.321966
episode: 121/200, loss: 0.0132, duration: 0:00:00.329627
episode: 131/200, loss: 0.0142, duration: 0:00:00.334090
ep



#############################################################################
##	[28/35]	model=RL_CNN,	SEED=0,	trade=60,	risk=1000	##
#############################################################################
episode: 1/200, loss: 2.0038, duration: 0:00:00.344900
episode: 11/200, loss: 0.5157, duration: 0:00:00.318109
episode: 21/200, loss: 2.0620, duration: 0:00:00.318621
episode: 31/200, loss: 2.0234, duration: 0:00:00.327585
episode: 41/200, loss: 0.9340, duration: 0:00:00.326473
episode: 51/200, loss: 0.4416, duration: 0:00:00.322330
episode: 61/200, loss: 0.3974, duration: 0:00:00.329794
episode: 71/200, loss: 0.3544, duration: 0:00:00.325174
episode: 81/200, loss: 0.3169, duration: 0:00:00.335146
episode: 91/200, loss: 0.2311, duration: 0:00:00.319006
episode: 101/200, loss: 0.0976, duration: 0:00:00.331010
episode: 111/200, loss: 0.0272, duration: 0:00:00.325798
episode: 121/200, loss: 0.0241, duration: 0:00:00.335074
episode: 131/200, loss: 0.0199, duration: 0:00:00.336643
e



#############################################################################
##	[29/35]	model=RL_CNN,	SEED=0,	trade=100,	risk=0.1	##
#############################################################################
episode: 1/200, loss: 0.1069, duration: 0:00:00.347131
episode: 11/200, loss: 0.0071, duration: 0:00:00.331381
episode: 21/200, loss: 0.1651, duration: 0:00:00.322221
episode: 31/200, loss: 0.0167, duration: 0:00:00.327530
episode: 41/200, loss: 0.0327, duration: 0:00:00.310755
episode: 51/200, loss: -0.0352, duration: 0:00:00.329436
episode: 61/200, loss: 0.0035, duration: 0:00:00.336863
episode: 71/200, loss: 0.0192, duration: 0:00:00.323207
episode: 81/200, loss: 0.0975, duration: 0:00:00.358886
episode: 91/200, loss: -0.0087, duration: 0:00:00.333891
episode: 101/200, loss: -0.0186, duration: 0:00:00.322576
episode: 111/200, loss: -0.0435, duration: 0:00:00.324786
episode: 121/200, loss: -0.0490, duration: 0:00:00.307255
episode: 131/200, loss: 0.0127, duration: 0:00:00.317



#############################################################################
##	[30/35]	model=RL_CNN,	SEED=0,	trade=100,	risk=1	##
#############################################################################
episode: 1/200, loss: 0.1087, duration: 0:00:00.349821
episode: 11/200, loss: 0.0074, duration: 0:00:00.328176
episode: 21/200, loss: 0.1675, duration: 0:01:23.365283
episode: 31/200, loss: 0.0215, duration: 0:00:00.326622
episode: 41/200, loss: 0.0338, duration: 0:00:00.324028
episode: 51/200, loss: -0.0331, duration: 0:00:00.434478
episode: 61/200, loss: 0.0061, duration: 0:00:00.317655
episode: 71/200, loss: 0.0204, duration: 0:00:00.316035
episode: 81/200, loss: 0.1058, duration: 0:00:00.324497
episode: 91/200, loss: -0.0009, duration: 0:00:00.317500
episode: 101/200, loss: -0.0152, duration: 0:00:00.325252
episode: 111/200, loss: -0.0415, duration: 0:00:00.325416
episode: 121/200, loss: -0.0432, duration: 0:00:00.342264
episode: 131/200, loss: 0.0129, duration: 0:00:00.32998



#############################################################################
##	[31/35]	model=RL_CNN,	SEED=0,	trade=100,	risk=18	##
#############################################################################
episode: 1/200, loss: 0.1416, duration: 0:00:00.433052
episode: 11/200, loss: 0.0176, duration: 0:00:00.328962
episode: 21/200, loss: 0.2170, duration: 0:00:00.327246
episode: 31/200, loss: 0.0797, duration: 0:00:00.316994
episode: 41/200, loss: 0.0674, duration: 0:00:00.341505
episode: 51/200, loss: -0.0014, duration: 0:00:00.330749
episode: 61/200, loss: 0.0234, duration: 0:00:00.322026
episode: 71/200, loss: 0.0524, duration: 0:00:00.320179
episode: 81/200, loss: 0.1800, duration: 0:00:00.326932
episode: 91/200, loss: 0.0345, duration: 0:00:00.326316
episode: 101/200, loss: 0.0306, duration: 0:00:00.316096
episode: 111/200, loss: -0.0018, duration: 0:00:00.328845
episode: 121/200, loss: -0.0023, duration: 0:00:00.319156
episode: 131/200, loss: 0.0338, duration: 0:00:00.316432



#############################################################################
##	[32/35]	model=RL_CNN,	SEED=0,	trade=100,	risk=56	##
#############################################################################
episode: 1/200, loss: 0.2153, duration: 0:00:00.346235
episode: 11/200, loss: 0.0435, duration: 0:00:00.316483
episode: 21/200, loss: 0.2975, duration: 0:00:00.327037
episode: 31/200, loss: 0.1820, duration: 0:00:00.320750
episode: 41/200, loss: 0.1143, duration: 0:00:00.315253
episode: 51/200, loss: 0.0329, duration: 0:00:00.338352
episode: 61/200, loss: 0.0514, duration: 0:00:00.313724
episode: 71/200, loss: 0.0875, duration: 0:00:00.330824
episode: 81/200, loss: 0.2185, duration: 0:00:00.358605
episode: 91/200, loss: 0.0682, duration: 0:00:00.313259
episode: 101/200, loss: 0.0523, duration: 0:00:00.334155
episode: 111/200, loss: 0.0148, duration: 0:00:00.385491
episode: 121/200, loss: 0.0174, duration: 0:00:00.338663
episode: 131/200, loss: 0.0402, duration: 0:00:00.328524
ep



#############################################################################
##	[33/35]	model=RL_CNN,	SEED=0,	trade=100,	risk=100	##
#############################################################################
episode: 1/200, loss: 0.3005, duration: 0:00:00.360608
episode: 11/200, loss: 0.0664, duration: 0:00:00.377808
episode: 21/200, loss: 0.3881, duration: 0:00:00.313817
episode: 31/200, loss: 0.2963, duration: 0:00:00.319660
episode: 41/200, loss: 0.1640, duration: 0:00:00.326289
episode: 51/200, loss: 0.0650, duration: 0:00:00.334636
episode: 61/200, loss: 0.0765, duration: 0:00:00.329825
episode: 71/200, loss: 0.1101, duration: 0:00:00.336314
episode: 81/200, loss: 0.2238, duration: 0:00:00.381668
episode: 91/200, loss: 0.0889, duration: 0:00:00.378696
episode: 101/200, loss: 0.0569, duration: 0:00:00.512864
episode: 111/200, loss: 0.0136, duration: 0:00:00.397610
episode: 121/200, loss: 0.0132, duration: 0:00:00.379214
episode: 131/200, loss: 0.0197, duration: 0:00:00.376588
e



#############################################################################
##	[34/35]	model=RL_CNN,	SEED=0,	trade=100,	risk=562	##
#############################################################################
episode: 1/200, loss: 1.1958, duration: 0:00:00.436754
episode: 11/200, loss: 0.3062, duration: 0:00:00.461668
episode: 21/200, loss: 1.2981, duration: 0:00:00.375970
episode: 31/200, loss: 1.2307, duration: 0:00:00.390956
episode: 41/200, loss: 0.5801, duration: 0:00:00.405437
episode: 51/200, loss: 0.2779, duration: 0:00:00.374926
episode: 61/200, loss: 0.2500, duration: 0:00:00.390741
episode: 71/200, loss: 0.2385, duration: 0:00:00.427483
episode: 81/200, loss: 0.2449, duration: 0:00:00.381299
episode: 91/200, loss: 0.1602, duration: 0:00:00.460896
episode: 101/200, loss: 0.0715, duration: 0:00:01.257667
episode: 111/200, loss: 0.0191, duration: 0:00:00.513092
episode: 121/200, loss: 0.0174, duration: 0:00:00.395279
episode: 131/200, loss: 0.0180, duration: 0:00:00.383241
e



#############################################################################
##	[35/35]	model=RL_CNN,	SEED=0,	trade=100,	risk=1000	##
#############################################################################
episode: 1/200, loss: 2.0446, duration: 0:00:00.427362
episode: 11/200, loss: 0.5324, duration: 0:00:00.354763
episode: 21/200, loss: 2.1442, duration: 0:00:00.382988
episode: 31/200, loss: 2.0973, duration: 0:00:00.482131
episode: 41/200, loss: 0.9695, duration: 0:00:00.376934
episode: 51/200, loss: 0.4704, duration: 0:00:00.419601
episode: 61/200, loss: 0.4181, duration: 0:00:00.407451
episode: 71/200, loss: 0.3737, duration: 0:00:00.394271
episode: 81/200, loss: 0.3509, duration: 0:00:00.375362
episode: 91/200, loss: 0.2475, duration: 0:00:00.395805
episode: 101/200, loss: 0.1066, duration: 0:00:00.382047
episode: 111/200, loss: 0.0311, duration: 0:00:00.400436
episode: 121/200, loss: 0.0279, duration: 0:00:00.476819
episode: 131/200, loss: 0.0232, duration: 0:00:00.439869




CPU times: user 46min 12s, sys: 24.3 s, total: 46min 36s
Wall time: 46min 19s


<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

In [6]:
# double check the end of training
#env.until

In [7]:
print('training done.')

training done.


In [8]:
### Now repeat this for the different RL models.
### remember to update the gamma-pairs, model base name, and model specs/setup in beginning of notebook!