In [14]:
from utils import *
import importlib

# DQN, DDPG, DDQN
model_name = "DQN"
data_path = "../../data/SP500_all_time_more_data.csv"
window_size = 10
num_episode = 10
initial_balance = 1000000

In [4]:
import pandas as pd

df = pd.read_csv(data_path)
# turn all columns into lowercase
df.columns = df.columns.str.lower()
df['date'] = pd.to_datetime(df['date'])
df.head()

Unnamed: 0,date,open,high,low,close,adj close,volume,dff,dtb3,dgs10,dfii10,dgs1,dgs2,dgs5,dfii5,bamlh0a0hym2
0,2003-01-02,879.820007,909.030029,879.820007,909.030029,909.030029,1229200000,1.3,1.2,4.07,2.43,1.42,1.8,3.05,1.75,8.65
1,2003-01-03,909.030029,911.25,903.070007,908.590027,908.590027,1130800000,1.12,1.2,4.05,2.43,1.41,1.79,3.03,1.75,8.57
2,2003-01-06,908.590027,931.77002,908.590027,929.01001,929.01001,1435900000,1.22,1.19,4.09,2.46,1.44,1.84,3.1,1.79,8.41
3,2003-01-07,929.01001,930.809998,919.929993,922.929993,922.929993,1545200000,1.2,1.17,4.04,2.42,1.4,1.77,3.04,1.76,8.26
4,2003-01-08,922.929993,922.929993,908.320007,909.929993,909.929993,1467600000,1.29,1.17,4.0,2.29,1.36,1.71,3.01,1.68,8.18


In [7]:
stock_prices = df['close'].to_list()
trading_period = len(stock_prices) - 1
returns_across_episodes = []
num_experience_replay = 0
action_dict = {0: 'Hold', 1: 'Buy', 2: 'Sell'}

In [10]:
model = importlib.import_module(f'agents.{model_name}')
agent = model.Agent(state_dim=window_size + 3, balance=initial_balance)







In [11]:
def hold(actions):
    # encourage selling for profit and liquidity
    next_probable_action = np.argsort(actions)[1]
    if next_probable_action == 2 and len(agent.inventory) > 0:
        max_profit = stock_prices[t] - min(agent.inventory)
        if max_profit > 0:
            sell(t)
            actions[next_probable_action] = 1 # reset this action's value to the highest
            return 'Hold', actions

def buy(t):
    if agent.balance > stock_prices[t]:
        agent.balance -= stock_prices[t]
        agent.inventory.append(stock_prices[t])
        return 'Buy: ${:.2f}'.format(stock_prices[t])

def sell(t):
    if len(agent.inventory) > 0:
        agent.balance += stock_prices[t]
        bought_price = agent.inventory.pop(0)
        profit = stock_prices[t] - bought_price
        global reward
        reward = profit
        return 'Sell: ${:.2f} | Profit: ${:.2f}'.format(stock_prices[t], profit)

In [15]:
import logging

stock_name = "SP500"

# configure logging
logging.basicConfig(filename=f'logs/{model_name}_training_{stock_name}.log', filemode='w',
                    format='[%(asctime)s.%(msecs)03d %(filename)s:%(lineno)3s] %(message)s', 
                    datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO)

logging.info(f'Trading Object:           {stock_name}')
logging.info(f'Trading Period:           {trading_period} days')
logging.info(f'Window Size:              {window_size} days')
logging.info(f'Training Episode:         {num_episode}')
logging.info(f'Model Name:               {model_name}')
logging.info('Initial Portfolio Value: ${:,}'.format(initial_balance))

In [17]:
import time
start_time = time.time()

for e in range(1, num_episode + 1):
    print(f'\nEpisode: {e}/{num_episode}')
    agent.reset()  # reset to initial balance and hyperparameters
    state = generate_combined_state(0, window_size, stock_prices, agent.balance, len(agent.inventory))

    for t in range(1, trading_period + 1):
        if t % 100 == 0:
            print(f'\n-------------------Period: {t}/{trading_period}-------------------')

        reward = 0
        next_state = generate_combined_state(t, window_size, stock_prices, agent.balance, len(agent.inventory))
        previous_portfolio_value = len(agent.inventory) * stock_prices[t] + agent.balance

        if model_name == 'DDPG':
            actions = agent.act(state, t)
            action = np.argmax(actions)
        else:
            actions = agent.model.predict(state)[0]
            action = agent.act(state)

        # execute position
        print('Step: {}\tHold signal: {:.4} \tBuy signal: {:.4} \tSell signal: {:.4}'.format(t, actions[0], actions[1], actions[2]))

        if action != np.argmax(actions):
            print(f"\t\t'{action_dict[action]}' is an exploration.")

        if action == 0:  # hold
            execution_result = hold(actions)
        if action == 1:  # buy
            execution_result = buy(t)
        if action == 2:  # sell
            execution_result = sell(t)

        # check execution result
        if execution_result is None:
            reward -= treasury_bond_daily_return_rate() * agent.balance  # missing opportunity
        else:
            if isinstance(execution_result, tuple):  # if execution_result is 'Hold'
                actions = execution_result[1]
                execution_result = execution_result[0]
            print(execution_result)

        # calculate reward
        current_portfolio_value = len(agent.inventory) * stock_prices[t] + agent.balance
        unrealized_profit = current_portfolio_value - agent.initial_portfolio_value
        reward += unrealized_profit
        agent.portfolio_values.append(current_portfolio_value)
        agent.return_rates.append((current_portfolio_value - previous_portfolio_value) / previous_portfolio_value)

        done = True if t == trading_period else False
        agent.remember(state, actions, reward, next_state, done)
        # update state
        state = next_state

        # experience replay
        if len(agent.memory) > agent.buffer_size:
            num_experience_replay += 1
            loss = agent.experience_replay()
            print('Episode: {}\tLoss: {:.2f}\tAction: {}\tReward: {:.2f}\tBalance: {:.2f}\tNumber of Stocks: {}'.format(
                e, loss, action_dict[action], reward, agent.balance, len(agent.inventory)))
            agent.tensorboard.on_batch_end(num_experience_replay, {'loss': loss, 'portfolio value': current_portfolio_value})

    if done:
        portfolio_return = evaluate_portfolio_performance(agent, print)
        returns_across_episodes.append(portfolio_return)

    # save models periodically
    if e % 5 == 0:
        if model_name == 'DQN':
            agent.model.save('saved_models/DQN_ep' + str(e) + '.h5')
        elif model_name == 'DDPG':
            agent.actor.model.save_weights('saved_models/DDPG_ep{}_actor.h5'.format(str(e)))
            agent.critic.model.save_weights('saved_models/DDPG_ep{}_critic.h5'.format(str(e)))
        print('model saved')

print('total training time: {0:.2f} min'.format((time.time() - start_time) / 60))
plot_portfolio_returns_across_episodes(model_name, returns_across_episodes)


Episode: 1/10
Step: 1	Hold signal: 4.275e-07 	Buy signal: 8.865e-11 	Sell signal: 1.0
		'Buy' is an exploration.
Buy: $908.59
Episode: 1	Loss: 0.00	Action: Buy	Reward: 0.00	Balance: 999091.41	Number of Stocks: 1
Step: 2	Hold signal: 4.033e-07 	Buy signal: 8.456e-11 	Sell signal: 1.0
Sell: $929.01 | Profit: $20.42
Episode: 1	Loss: 554.61	Action: Sell	Reward: 40.84	Balance: 1000020.42	Number of Stocks: 0
Step: 3	Hold signal: 3.771e-06 	Buy signal: 5.466e-09 	Sell signal: 1.0
		'Buy' is an exploration.
Buy: $922.93
Episode: 1	Loss: 138.31	Action: Buy	Reward: 20.42	Balance: 999097.49	Number of Stocks: 1
Step: 4	Hold signal: 3.495e-07 	Buy signal: 7.115e-11 	Sell signal: 1.0
Sell: $909.93 | Profit: $-13.00
Episode: 1	Loss: 10.57	Action: Sell	Reward: -5.58	Balance: 1000007.42	Number of Stocks: 0
Step: 5	Hold signal: 3.922e-06 	Buy signal: 8.192e-09 	Sell signal: 1.0
		'Hold' is an exploration.
Episode: 1	Loss: 1494.48	Action: Hold	Reward: -66.91	Balance: 1000007.42	Number of Stocks: 0
Step:

KeyboardInterrupt: 