In [1]:
%%time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import os
from src.models.frontier import MultiStockEnv, Agent, play_one_episode, maybe_make_dir
from src.config import market_tickers
from src.config.model_config import model_parameters
import tensorflow as tf

CPU times: user 4.28 s, sys: 2.04 s, total: 6.32 s
Wall time: 4.66 s


In [2]:
from src.config.model_config import model_parameters
model_parameters

{'RL_CNN': {'use_forecasts': False,
  'nb_forecasts': None,
  'forecast_type': 'strong',
  'use_cnn_state': True},
 'RL_str_fcast': {'use_forecasts': True,
  'nb_forecasts': 2,
  'forecast_type': 'strong',
  'use_cnn_state': False},
 'RL_all_inp': {'use_forecasts': True,
  'nb_forecasts': 2,
  'forecast_type': 'strong',
  'use_cnn_state': True}}

In [3]:
# papermill parameters
SEED_LIST = [
             0,
             #1111,
             #2222, 
             #3333, 
             #4444, 
             #5555,
             #6666,
             #7777,
             #8888,
             #9999
            ]

#TICKERS = market_tickers.DOW_30_TICKER
TICKERS = ['AAPL', 'AMD', 'CSCO', 'F', 'INTC']
MARKET_NAME = 'TEST_5'
MODEL_BASE_NAME = 'RL_CNN' # use either | 'RL_CNN' | 'RL_str_fcast' | 'RL_all_inp' |
FROM = '2017-01-03' # start of training set
UNTIL = '2019-01-01' # end of training set
NB_EPISODES = 200 # number of episodes to train for
SAVE_EVERY = 200 # (used 100 when not from tretrained) save model weights every time this amount of episodes pass

# investor preferences
GAMMA_TRADES = [0.1, 1, 10, 100]
GAMMA_RISKS = [0.1, 1, 10, 100, 1000]
GAMMA_HOLDS = [0.1, 1]

In [4]:
# Parameters
SEED_LIST = [0]
TICKERS = [
    "VALE",
    "ITUB",
    "PBR-A",
    "BBD",
    "PBR",
    "AMXL.MX",
    "WALMEX.MX",
    "FEMSAUBD.MX",
    "GFNORTEO.MX",
    "WEGE3.SA",
    "BBAS3.SA",
    "BAP",
    "CEMEXCPO.MX",
    "SQM",
    "SCCO",
    "GGB",
    "CHILE.SN",
    "ENIA",
    "CIB",
    "TLEVISACPO.MX",
    "EC",
    "BRFS",
    "BSAC",
    "CCRO3.SA",
]
MARKET_NAME = "LA_40"
MODEL_BASE_NAME = "RL_CNN"
FROM = "2010-03-01"
UNTIL = "2014-12-01"
NB_EPISODES = 200
SAVE_EVERY = 200
GAMMA_TRADES = [1, 7, 8, 9, 10, 11, 12, 50, 100]
GAMMA_RISKS = [10000]
GAMMA_HOLDS = [0.1, 1, 10, 100, 1000, 10000]


In [5]:
#------ small test-sweep ------#
# GAMMA_TRADES = [0.1, 1, 10, 100]
# GAMMA_RISKS = [0.1, 1, 10, 100, 1000]
# GAMMA_HOLDS = [0.1, 1]
#------ extended-boyd-sweep ------#
# GAMMA_TRADES =  [0.1, 0.5, 1, 2, 3, 4, 5, 5.5, 6, 6.5, 7, 7.5, 8, 9, 10, 11, 12, 15, 20, 30, 45, 60, 100, 200]
# GAMMA_RISKS = [0.1, 0.178, 0.316, 0.562, 1, 2, 3, 6, 10, 18, 32, 56, 100, 178, 316, 562, 1000, 2000, 5000, 10000, 20000]
#---------------------------------#

GAMMA_TRIOS = []
for tr in GAMMA_TRADES:
    for ri in GAMMA_RISKS:
        for hd in GAMMA_HOLDS:
            GAMMA_TRIOS.append((tr, ri, hd))
        
#format: (gamma_trade, gamma_risk, gamma_hold)
print('number of trios: ', len(GAMMA_TRIOS))
#GAMMA_TRIOS

number of trios:  54


In [6]:
# other constants and parameters
TICKERS.sort()
FILE_PERIOD = '1d' # weekly='5d', daily='1d'
DAYS_IN_EPISODE = 30 # 365 for one-year long episodes (conditions checked at end of episode)
EPISODE_DRAW_DISTRIBUTION = 'uniform' # 'uniform' or 'geometric'. select starting point of eposide according to this distribution when generated
HALF_SPREAD = 0.0005/2.0 # 'a' in transaction cost function
NONLIN_COEFF = 1.0 # 'b' transaction cost function
POWER = 1.5 # power for change in poertfolio vector used in transaction cost
#GAMMA_RISK, GAMMA_TRADE, GAMMA_HOLD = 18, 6.5, 0.0 # relative importance of risk, trading cost, and holding cost
INIT_PORTFOLIO = 100000000.0 # initial portfolio value
#model_name = f'REINFORCE_CNN' #_seeded_{UNTIL} # give model a name to distinguish saved files
#NB_EPISODES = 300 #2000
MODE = 'train' # train or test mode
#FROM_PRETRAINED = False
DATA_DIR = f'../data/{MARKET_NAME}/preprocessed_data/'

# Tune and double-check these before every test:
USE_FORECASTS = model_parameters[MODEL_BASE_NAME]['use_forecasts'] # whether to use forecasts as inputs to agent policy net or not
NB_FORECASTS = model_parameters[MODEL_BASE_NAME]['nb_forecasts'] # number of forecasts to use in state. Set to None for no forecasts
FORECAST_TYPE = model_parameters[MODEL_BASE_NAME]['forecast_type'] # use 'strong' or 'weak' forecasts (if no forecasts given, this does not matter)
USE_CNN_STATE = model_parameters[MODEL_BASE_NAME]['use_cnn_state'] # whether to pass log-rets block for CNN part of policy net (set to false for forecast only)

In [7]:
%%time
##########################
### main training loop ###
##########################

# define investor preferences to train for
#------ extended-boyd-sweep ------#
#GAMMA_TRADES = [5.5, 6, 6.5, 7, 7.5, 8, 9, 10, 11, 12, 15, 20, 30]
#GAMMA_TRADES = [4, 5]
#GAMMA_TRADES = [0.1, 0.5, 1, 2, 3, 45, 60, 100, 200]
#GAMMA_RISKS = [0.1, 0.178, 0.316, 0.562, 1, 2, 3, 6, 10, 18, 32, 56, 100, 178, 316, 562, 1000, 2000, 5000, 10000, 20000]
#---------------------------------#
counter = 0

for RANDOM_SEED in SEED_LIST:
    model_name = f'{MODEL_BASE_NAME}_seed_{RANDOM_SEED}'
    maybe_make_dir(f'../experiments/{MARKET_NAME}/seeded/{MODEL_BASE_NAME}/seed_{RANDOM_SEED}')
    
    for (GAMMA_TRADE, GAMMA_RISK, GAMMA_HOLD) in GAMMA_TRIOS:
        # set random generator seed for tf and np
        # this is probably unnecessary because it is done when env is initialised but just incase
        tf.random.set_seed(RANDOM_SEED)
        np.random.seed(RANDOM_SEED)
        counter += 1

        # check if test lready done. skip if so
        #if not FROM_PRETRAINED:
            #skip = os.path.exists(f'experiments/12_assets/00_seeded/seed_{RANDOM_SEED}/trade_{GAMMA_TRADE}_risk_{GAMMA_RISK}') # check if already exists
        #else:
        
        skip = os.path.exists(f'../experiments/{MARKET_NAME}/seeded/{MODEL_BASE_NAME}/seed_{RANDOM_SEED}/trade_{GAMMA_TRADE}_risk_{GAMMA_RISK}_hold_{GAMMA_HOLD}')

        if not skip: # if not exists, do test

            print('#############################################################################')
            print(f'##\t[{counter}/{len(GAMMA_TRIOS)}]\tmodel={MODEL_BASE_NAME},\tSEED={RANDOM_SEED},\ttrade={GAMMA_TRADE},\trisk={GAMMA_RISK}\thold={GAMMA_HOLD}\t##')
            print('#############################################################################')        

            # create agent and environment
            env = MultiStockEnv(tickers=TICKERS, 
                                from_date=FROM,
                                until=UNTIL, 
                                #nb_episodes=100,
                                cash_key='USDOLLAR', 
                                gamma_risk=GAMMA_RISK, 
                                gamma_trade=GAMMA_TRADE,
                                gamma_hold=GAMMA_HOLD,
                                half_spread=HALF_SPREAD, 
                                nonlin_coef=NONLIN_COEFF, 
                                power=POWER, 
                                datadir=DATA_DIR, 
                                state_lookback_window=20, 
                                distribution=EPISODE_DRAW_DISTRIBUTION,
                                days_duration=DAYS_IN_EPISODE, 
                                mode='train', 
                                random_seed=RANDOM_SEED,
                                init_portfolio=INIT_PORTFOLIO, 
                                period_in_file_name=FILE_PERIOD, 
                                nb_forecasts=NB_FORECASTS, 
                                forecast_type=FORECAST_TYPE,
                                use_CNN_state=USE_CNN_STATE,
                                verbose=False)

            agent = Agent(alpha=0.001, 
                          gamma=0.99, 
                          n_assets=len(TICKERS)+1, 
                          tau=5, 
                          lookback_window=20, 
                          n_feature_maps=len(TICKERS)+1,
                          use_forecasts=USE_FORECASTS,
                          use_CNN_state=USE_CNN_STATE,
                          allow_long_short_trades=True)

            # initialise policy by one forward pass then load pretrained weights
            #if FROM_PRETRAINED: 
                #agent.choose_action(env._get_obs())
                #agent.load(pretrined_weights_dir)

            # configure folders
            experiments_folder = f'../experiments/{MARKET_NAME}'
            from_seeded_folder = f'{experiments_folder}/seeded'
            base_model_folder = f'{from_seeded_folder}/{MODEL_BASE_NAME}'
            seed_folder = f'{base_model_folder}/seed_{RANDOM_SEED}'
            #if FROM_PRETRAINED:
            #    pretrained_folder = f'{nb_assets_folder}/0_further_trained'
            #    preference_folder = f'{pretrained_folder}/trade_{GAMMA_TRADE}_risk_{GAMMA_RISK}'
            #else:
            preference_folder = f'{seed_folder}/trade_{GAMMA_TRADE}_risk_{GAMMA_RISK}_hold_{GAMMA_HOLD}'
            models_folder = f'{preference_folder}/models' # where saved models will be saved
            losses_folder = f'{preference_folder}/losses' # where losses will be saved for plotting training progress
            fig_folder = f'{preference_folder}/figures'

            # create dirictories for saving outputs to (if they don't exist already)
            #maybe_make_dir(nb_assets_folder)
            maybe_make_dir(seed_folder)
            maybe_make_dir(preference_folder)
            maybe_make_dir(models_folder)
            maybe_make_dir(losses_folder)
            maybe_make_dir(fig_folder)

            if MODE == 'train':
                # play the game num_episodes times and update weights according to loss
                losses = []
                for e in range(NB_EPISODES):

                    # save model weights every few episodes
                    if (e%SAVE_EVERY == 0) and (e!=0):
                        agent.save(f'{models_folder}/{model_name}_{e}')
                        # save losses of training period
                        np.save(f'{losses_folder}/{model_name}_{MODE}_{e}_losses.npy', np.array(losses))

                    t0 = datetime.datetime.now()
                    try:
                        loss = play_one_episode(agent, env, MODE)
                    except Exception as err:
                        print(f'\t*** error in episode {e}:', err)
                        loss = np.nan#5.0 # just an arbitraty number that seems high compared to usual losses
                    dt = datetime.datetime.now() - t0
                    losses.append(loss) # append episode loss
                    # print progress
                    if (e%10 == 0):
                        print(f"episode: {e + 1}/{NB_EPISODES}, loss: {loss:.4f}, duration: {dt}")


                # save the weights and losses when done	
                agent.save(f'{models_folder}/{model_name}_{NB_EPISODES}')
                np.save(f'{losses_folder}/{model_name}_{MODE}_{NB_EPISODES}_losses.npy', np.array(losses))

            # plot losses
            save_fig = True

            # load losses file
            file = f'{losses_folder}/{model_name}_{MODE}_{NB_EPISODES}_losses.npy'
            loaded_losses = np.load(file)

            # plot losses
            plt.figure(figsize=(14,5))
            plt.plot(loaded_losses)
            plt.title(f'{model_name} Training Loss')
            plt.xlabel('Episode Number')
            plt.ylabel(r'Loss: $-G \approx -\mathbb{E}\left[ \sum_{k=0}^{T} \gamma^k R_t \right]$')
            #plt.yscale('symlog')
            if save_fig:
                plt.savefig(f'{fig_folder}/{model_name}_losses.png', dpi=150, facecolor=None, edgecolor=None, bbox_inches='tight')
            plt.clf()

        else:
            print(f'\talready exists: trade_{GAMMA_TRADE}_risk_{GAMMA_RISK}_hold_{GAMMA_HOLD}')
            print('\tskip to next.')

	already exists: trade_1_risk_10000_hold_0.1
	skip to next.
	already exists: trade_1_risk_10000_hold_1
	skip to next.
	already exists: trade_1_risk_10000_hold_10
	skip to next.
	already exists: trade_1_risk_10000_hold_100
	skip to next.
	already exists: trade_1_risk_10000_hold_1000
	skip to next.
	already exists: trade_1_risk_10000_hold_10000
	skip to next.
	already exists: trade_7_risk_10000_hold_0.1
	skip to next.
	already exists: trade_7_risk_10000_hold_1
	skip to next.
	already exists: trade_7_risk_10000_hold_10
	skip to next.
	already exists: trade_7_risk_10000_hold_100
	skip to next.
	already exists: trade_7_risk_10000_hold_1000
	skip to next.
	already exists: trade_7_risk_10000_hold_10000
	skip to next.
	already exists: trade_8_risk_10000_hold_0.1
	skip to next.
	already exists: trade_8_risk_10000_hold_1
	skip to next.
	already exists: trade_8_risk_10000_hold_10
	skip to next.
	already exists: trade_8_risk_10000_hold_100
	skip to next.
	already exists: trade_8_risk_10000_hold_10

episode: 1/200, loss: 11.1494, duration: 0:00:01.160296


episode: 11/200, loss: 5.3769, duration: 0:00:01.144258


episode: 21/200, loss: 2.0395, duration: 0:00:01.438983


episode: 31/200, loss: 1.6253, duration: 0:00:01.224376


episode: 41/200, loss: 5.3484, duration: 0:00:01.266068


episode: 51/200, loss: 6.0320, duration: 0:00:01.174265


episode: 61/200, loss: 4.7736, duration: 0:00:01.419973


episode: 71/200, loss: 9.5278, duration: 0:00:01.327720


episode: 81/200, loss: 2.6070, duration: 0:00:01.409507


episode: 91/200, loss: 4.5308, duration: 0:00:01.285141


episode: 101/200, loss: 4.2362, duration: 0:00:01.134107


episode: 111/200, loss: 4.3640, duration: 0:00:01.887444


episode: 121/200, loss: 2.8870, duration: 0:00:01.136865


episode: 131/200, loss: 4.1565, duration: 0:00:01.383552


episode: 141/200, loss: 6.1746, duration: 0:00:01.151163


episode: 151/200, loss: 2.1267, duration: 0:00:01.267960


episode: 161/200, loss: 2.6678, duration: 0:00:01.309485


episode: 171/200, loss: 4.8275, duration: 0:00:01.373773


episode: 181/200, loss: 4.8910, duration: 0:00:01.142681


episode: 191/200, loss: 3.1305, duration: 0:00:01.246171


#############################################################################
##	[50/54]	model=RL_CNN,	SEED=0,	trade=100,	risk=10000	hold=1	##
#############################################################################


episode: 1/200, loss: 11.1501, duration: 0:00:01.423719


episode: 11/200, loss: 5.3775, duration: 0:00:01.216199


episode: 21/200, loss: 2.0399, duration: 0:00:01.141745


episode: 31/200, loss: 1.6257, duration: 0:00:01.122507


episode: 41/200, loss: 5.3486, duration: 0:00:01.140536


episode: 51/200, loss: 6.0325, duration: 0:00:01.207537


episode: 61/200, loss: 4.7734, duration: 0:00:01.206501


episode: 71/200, loss: 9.5278, duration: 0:00:01.115136


episode: 81/200, loss: 2.6072, duration: 0:00:01.154301


episode: 91/200, loss: 4.5309, duration: 0:00:01.284250


episode: 101/200, loss: 4.2337, duration: 0:00:01.282034


episode: 111/200, loss: 4.3638, duration: 0:00:01.197519


episode: 121/200, loss: 2.8878, duration: 0:00:01.213617


episode: 131/200, loss: 4.1558, duration: 0:00:01.112250


episode: 141/200, loss: 6.1685, duration: 0:00:01.112842


episode: 151/200, loss: 2.1266, duration: 0:00:01.672087


episode: 161/200, loss: 2.6677, duration: 0:00:01.148613


episode: 171/200, loss: 4.8250, duration: 0:00:01.150472


episode: 181/200, loss: 4.8938, duration: 0:00:01.111495


episode: 191/200, loss: 3.1305, duration: 0:00:01.226232


#############################################################################
##	[51/54]	model=RL_CNN,	SEED=0,	trade=100,	risk=10000	hold=10	##
#############################################################################


episode: 1/200, loss: 11.1571, duration: 0:00:01.133613


episode: 11/200, loss: 5.3841, duration: 0:00:01.140555


episode: 21/200, loss: 2.0454, duration: 0:00:01.435847


episode: 31/200, loss: 1.6281, duration: 0:00:01.858283


episode: 41/200, loss: 5.3540, duration: 0:00:01.229297


episode: 51/200, loss: 6.0418, duration: 0:00:01.307376


episode: 61/200, loss: 4.7707, duration: 0:00:01.352170


episode: 71/200, loss: 9.5343, duration: 0:00:01.309162


episode: 81/200, loss: 2.6111, duration: 0:00:01.387205


episode: 91/200, loss: 4.5338, duration: 0:00:01.317273


episode: 101/200, loss: 4.2440, duration: 0:00:01.189454


episode: 111/200, loss: 4.3598, duration: 0:00:01.115353


episode: 121/200, loss: 2.8892, duration: 0:00:01.144309


episode: 131/200, loss: 4.1559, duration: 0:00:01.104842


episode: 141/200, loss: 6.1465, duration: 0:00:01.114668


episode: 151/200, loss: 2.1144, duration: 0:00:01.239526


episode: 161/200, loss: 2.6493, duration: 0:00:01.239085


episode: 171/200, loss: 4.7766, duration: 0:00:01.254597


episode: 181/200, loss: 4.9026, duration: 0:00:01.663659


episode: 191/200, loss: 3.1522, duration: 0:00:01.111588


#############################################################################
##	[52/54]	model=RL_CNN,	SEED=0,	trade=100,	risk=10000	hold=100	##
#############################################################################


episode: 1/200, loss: 11.2271, duration: 0:00:01.184348


episode: 11/200, loss: 5.4440, duration: 0:00:01.259868


episode: 21/200, loss: 2.0901, duration: 0:00:01.161886


episode: 31/200, loss: 1.6650, duration: 0:00:01.815575


episode: 41/200, loss: 5.3912, duration: 0:00:01.584997


episode: 51/200, loss: 6.0839, duration: 0:00:01.119294


episode: 61/200, loss: 4.7984, duration: 0:00:01.203554


episode: 71/200, loss: 9.5478, duration: 0:00:01.152649


episode: 81/200, loss: 2.6458, duration: 0:00:01.155331


episode: 91/200, loss: 4.6149, duration: 0:00:01.171083


episode: 101/200, loss: 4.2789, duration: 0:00:01.118930


episode: 111/200, loss: 4.3819, duration: 0:00:01.733004


episode: 121/200, loss: 2.9128, duration: 0:00:01.159570


episode: 131/200, loss: 4.1982, duration: 0:00:01.106398


episode: 141/200, loss: 6.0725, duration: 0:00:01.272971


episode: 151/200, loss: 2.1118, duration: 0:00:01.579514


episode: 161/200, loss: 2.6740, duration: 0:00:01.269290


episode: 171/200, loss: 4.8747, duration: 0:00:01.723506


episode: 181/200, loss: 4.9442, duration: 0:00:01.120008


episode: 191/200, loss: 3.1310, duration: 0:00:01.357873


#############################################################################
##	[53/54]	model=RL_CNN,	SEED=0,	trade=100,	risk=10000	hold=1000	##
#############################################################################


episode: 1/200, loss: 11.9277, duration: 0:00:01.123809


episode: 11/200, loss: 6.0317, duration: 0:00:01.143312


episode: 21/200, loss: 2.5163, duration: 0:00:01.644659


episode: 31/200, loss: 2.0067, duration: 0:00:01.141441


episode: 41/200, loss: 5.6485, duration: 0:00:01.169411


episode: 51/200, loss: 6.5415, duration: 0:00:01.136236


episode: 61/200, loss: 4.9567, duration: 0:00:01.087108


episode: 71/200, loss: 9.7148, duration: 0:00:01.224781


episode: 81/200, loss: 2.9287, duration: 0:00:01.137596


episode: 91/200, loss: 4.9916, duration: 0:00:01.116799


episode: 101/200, loss: 4.5751, duration: 0:00:01.088854


episode: 111/200, loss: 4.4220, duration: 0:00:01.202934


episode: 121/200, loss: 3.0294, duration: 0:00:01.426866


episode: 131/200, loss: 4.2627, duration: 0:00:01.105602


episode: 141/200, loss: 5.8705, duration: 0:00:01.098630


episode: 151/200, loss: 2.1197, duration: 0:00:01.086100


episode: 161/200, loss: 2.8295, duration: 0:00:01.111708


episode: 171/200, loss: 5.9311, duration: 0:00:01.306808


episode: 181/200, loss: 5.2899, duration: 0:00:01.115725


episode: 191/200, loss: 3.0281, duration: 0:00:01.193613


#############################################################################
##	[54/54]	model=RL_CNN,	SEED=0,	trade=100,	risk=10000	hold=10000	##
#############################################################################


episode: 1/200, loss: 18.9333, duration: 0:00:01.112113


episode: 11/200, loss: 9.3074, duration: 0:00:01.085614


episode: 21/200, loss: 4.9351, duration: 0:00:01.136290


episode: 31/200, loss: 3.3781, duration: 0:00:01.138834


episode: 41/200, loss: 5.8824, duration: 0:00:01.096924


episode: 51/200, loss: 7.4887, duration: 0:00:01.101395


episode: 61/200, loss: 5.2010, duration: 0:00:01.073385


episode: 71/200, loss: 10.8095, duration: 0:00:01.129723


episode: 81/200, loss: 3.2117, duration: 0:00:01.087291


episode: 91/200, loss: 8.1862, duration: 0:00:01.089277


episode: 101/200, loss: 5.4363, duration: 0:00:01.168186


episode: 111/200, loss: 4.3122, duration: 0:00:01.164098


episode: 121/200, loss: 2.7601, duration: 0:00:01.232582


episode: 131/200, loss: 4.4738, duration: 0:00:01.100694


episode: 141/200, loss: 5.1909, duration: 0:00:01.113422


episode: 151/200, loss: 2.3990, duration: 0:00:01.093399


episode: 161/200, loss: 3.6421, duration: 0:00:01.118452


episode: 171/200, loss: 8.2467, duration: 0:00:01.192177


episode: 181/200, loss: 6.4698, duration: 0:00:01.032822


episode: 191/200, loss: 2.9955, duration: 0:00:01.020393


CPU times: user 24min 53s, sys: 11.9 s, total: 25min 5s
Wall time: 24min 42s


<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

<Figure size 1008x360 with 0 Axes>

In [8]:
# double check the end of training
#env.until

In [9]:
print('training done.')

training done.


In [10]:
### Now repeat this for the different RL models.
### remember to update the gamma-pairs, model base name, and model specs/setup in beginning of notebook!