## 使用 Tensorflow 訓練交易員

本篇的原理是透過既有市場資料訓練出 Agent 執行對應的策略並做到最佳優化，訓練完成的模型將可用於交易評估，必須注意的是市場中充滿黑天鵝，不同市場間無法通用，比如：當你使用 SPY 訓練的 Agent 無法應用在 FB、AMZN，反之亦此，大家可以下載已經訓練好的 SPY Agent(DQN_ep10.h5) 去執行

> 目前本算法只支持 [買入、賣出、持平] 信號，可以 [做多long、做空short] 以及整合 backtrade 的算法還需要一段時間
>
> 本次算法參考了 [TA 指標](https://github.com/bukosabino/ta)、[Agent](https://github.com/Albert-Z-Guo/Deep-Reinforcement-Stock-Trading/blob/master/train.py)

In [None]:
from shutil import copyfile

copyfile(src = "../input/quantitative-trading/keras-rl/requirements.txt", dst = "../working/requirements.txt")
copyfile(src = "../input/quantitative-trading/keras-rl/saved_models/DQN_ep5.h5", dst = "../working/DQN_ep5.h5")
copyfile(src = "../input/quantitative-trading/keras-rl/saved_models/DQN_ep10.h5", dst = "../working/DQN_ep10.h5")

### 安裝相依套件

In [None]:
!pip install -r requirements.txt

In [None]:
import time
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from empyrical import sharpe_ratio
from scipy.signal import argrelextrema
from statsmodels.nonparametric.kernel_regression import KernelReg
from collections import deque,defaultdict
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam

%matplotlib inline

### 所有會用到的工具，之後解釋

In [None]:
class Portfolio:
    def __init__(self, balance=50000):
        self.initial_portfolio_value = balance
        self.balance = balance
        self.inventory = []
        self.return_rates = []
        self.portfolio_values = [balance]
        self.buy_dates = []
        self.sell_dates = []

    def reset_portfolio(self):
        self.balance = self.initial_portfolio_value
        self.inventory = []
        self.return_rates = []
        self.portfolio_values = [self.initial_portfolio_value]

        
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def softmax(x):
    return np.exp(x) / np.sum(np.exp(x))


def stock_close_prices(key):
    prices = []
    lines = open("../input/quantitative-trading/" + key + ".csv", "r").read().splitlines()
    for line in lines[1:]:
        prices.append(float(line.split(",")[4]))
    return prices


def generate_price_state(stock_prices, end_index, window_size):
    start_index = end_index - window_size
    if start_index >= 0:
        period = stock_prices[start_index:end_index+1]
    else:
        # if end_index cannot suffice window_size, pad with prices on start_index
        period = -start_index * [stock_prices[0]] + stock_prices[0:end_index+1]
    return sigmoid(np.diff(period))


def generate_portfolio_state(stock_price, balance, num_holding):
    return [np.log(stock_price), np.log(balance), np.log(num_holding + 1e-6)]


def generate_combined_state(end_index, window_size, stock_prices, balance, num_holding):
    prince_state = generate_price_state(stock_prices, end_index, window_size)
    portfolio_state = generate_portfolio_state(stock_prices[end_index], balance, num_holding)
    return np.array([np.concatenate((prince_state, portfolio_state), axis=None)])


def treasury_bond_daily_return_rate():
    r_year = 2.75 / 100  # approximate annual U.S. Treasury bond return rate
    return (1 + r_year)**(1 / 365) - 1


def maximum_drawdown(portfolio_values):
    end_index = np.argmax(np.maximum.accumulate(portfolio_values) - portfolio_values)
    if end_index == 0:
        return 0
    beginning_iudex = np.argmax(portfolio_values[:end_index])
    return (portfolio_values[end_index] - portfolio_values[beginning_iudex]) / portfolio_values[beginning_iudex]


def evaluate_portfolio_performance(agent):
    portfolio_return = agent.portfolio_values[-1] - agent.initial_portfolio_value
    print("--------------------------------")
    print('投資組合總價值：         ${:.2f}'.format(agent.portfolio_values[-1]))
    print('投資組合現金餘額：       ${:.2f}'.format(agent.balance))
    print('投資組合持股數量：        {}'.format(len(agent.inventory)))
    print('總收益:                ${:.2f}'.format(portfolio_return))
    print('平均/每日投报率:         {:.3f}%'.format(np.mean(agent.return_rates) * 100))
    print('調整後價格夏普比率：      {:.3f}'.format(sharpe_ratio(np.array(agent.return_rates)), risk_free=treasury_bond_daily_return_rate()))
    print('最大回撤：              {:.3f}%'.format(maximum_drawdown(agent.portfolio_values) * 100))
    print("--------------------------------")
    return portfolio_return


def plot_portfolio_transaction_history(stock_name, agent):
	portfolio_return = agent.portfolio_values[-1] - agent.initial_portfolio_value
	df = pd.read_csv('../output/{}.csv'.format(stock_name))
	buy_prices = [df.iloc[t, 4] for t in agent.buy_dates]
	sell_prices = [df.iloc[t, 4] for t in agent.sell_dates]
	plt.figure(figsize=(15, 5), dpi=100)
	plt.title('{} 總回報率 {}： ${:.2f}'.format(agent.model_type, stock_name, portfolio_return))
	plt.plot(df['Date'], df['Close'], color='black', label=stock_name)
	plt.scatter(agent.buy_dates, buy_prices, c='green', alpha=0.5, label='buy')
	plt.scatter(agent.sell_dates, sell_prices,c='red', alpha=0.5, label='sell')
	plt.xticks(np.linspace(0, len(df), 10))
	plt.ylabel('Price')
	plt.legend()
	plt.grid()
	plt.show()


def buy_and_hold_benchmark(stock_name, agent):
    df = pd.read_csv('./data/{}.csv'.format(stock_name))
    dates = df['Date']
    num_holding = agent.initial_portfolio_value // df.iloc[0, 4]
    balance_left = agent.initial_portfolio_value % df.iloc[0, 4]
    buy_and_hold_portfolio_values = df['Close']*num_holding + balance_left
    buy_and_hold_return = buy_and_hold_portfolio_values.iloc[-1] - agent.initial_portfolio_value
    return dates, buy_and_hold_portfolio_values, buy_and_hold_return


def plot_portfolio_performance_comparison(stock_name, agent):
	dates, buy_and_hold_portfolio_values, buy_and_hold_return = buy_and_hold_benchmark(stock_name, agent)
	agent_return = agent.portfolio_values[-1] - agent.initial_portfolio_value
	plt.figure(figsize=(15, 5), dpi=100)
	plt.title('{} vs. HODL'.format(agent.model_type))
	plt.plot(dates, agent.portfolio_values, color='green', label='{} 總收益： ${:.2f}'.format(agent.model_type, agent_return))
	plt.plot(dates, buy_and_hold_portfolio_values, color='blue', label='{} HODL 總收益： ${:.2f}'.format(stock_name, buy_and_hold_return))
	# compare with S&P 500 performance in 2018
	if '^GSPC' not in stock_name:
		dates, GSPC_buy_and_hold_portfolio_values, GSPC_buy_and_hold_return = buy_and_hold_benchmark('^GSPC_2018', agent)
		plt.plot(dates, GSPC_buy_and_hold_portfolio_values, color='red', label='S&P 500 2018 HODL 總收益： ${:.2f}'.format(GSPC_buy_and_hold_return))
	plt.xticks(np.linspace(0, len(dates), 10))
	plt.ylabel('資產價值 ($)')
	plt.legend()
	plt.grid()
	plt.show()


def plot_all(stock_name, agent):
    fig, ax = plt.subplots(2, 1, figsize=(16,8), dpi=100)

    portfolio_return = agent.portfolio_values[-1] - agent.initial_portfolio_value
    df = pd.read_csv('../output/{}.csv'.format(stock_name))
    buy_prices = [df.iloc[t, 4] for t in agent.buy_dates]
    sell_prices = [df.iloc[t, 4] for t in agent.sell_dates]
    ax[0].set_title('{} 總收益 {}: ${:.2f}'.format(agent.model_type, stock_name, portfolio_return))
    ax[0].plot(df['Date'], df['Close'], color='black', label=stock_name)
    ax[0].scatter(agent.buy_dates, buy_prices, c='green', alpha=0.5, label='buy')
    ax[0].scatter(agent.sell_dates, sell_prices,c='red', alpha=0.5, label='sell')
    ax[0].set_ylabel('Price')
    ax[0].set_xticks(np.linspace(0, len(df), 10))
    ax[0].legend()
    ax[0].grid()

    dates, buy_and_hold_portfolio_values, buy_and_hold_return = buy_and_hold_benchmark(stock_name, agent)
    agent_return = agent.portfolio_values[-1] - agent.initial_portfolio_value
    ax[1].set_title('{} vs. HODL'.format(agent.model_type))
    ax[1].plot(dates, agent.portfolio_values, color='green', label='{} 總收益： ${:.2f}'.format(agent.model_type, agent_return))
    ax[1].plot(dates, buy_and_hold_portfolio_values, color='blue', label='{} HODL 總收益： ${:.2f}'.format(stock_name, buy_and_hold_return))
    # compare with S&P 500 performance in 2018 if stock is not S&P 500
    if '^GSPC' not in stock_name:
    	dates, GSPC_buy_and_hold_portfolio_values, GSPC_buy_and_hold_return = buy_and_hold_benchmark('^GSPC_2018', agent)
    	ax[1].plot(dates, GSPC_buy_and_hold_portfolio_values, color='red', label='S&P 500 2018 HODL 總收益： ${:.2f}'.format(GSPC_buy_and_hold_return))
    ax[1].set_ylabel('資產價值 ($)')
    ax[1].set_xticks(np.linspace(0, len(df), 10))
    ax[1].legend()
    ax[1].grid()

    plt.subplots_adjust(hspace=0.5)
    plt.savefig('../output/{}_trading_history.png'.format(stock_name))
    plt.show()


def plot_portfolio_returns_across_epochs(model_name, returns_across_epochs):
    len_epochs = len(returns_across_epochs)
    plt.figure(figsize=(15, 5), dpi=100)
    plt.title('投資組合收益')
    plt.plot(returns_across_epochs, color='black')
    plt.xlabel('Epoch')
    plt.ylabel('Return Value')
    plt.grid()
    plt.savefig('../output/{}_returns_ep{}.png'.format(model_name, len_epochs))
    plt.show()


In [None]:
class Agent(Portfolio):
    def __init__(self, state_dim, balance, is_eval=False, model_name=""):
        super().__init__(balance=balance)
        self.model_type = 'DQN'
        self.state_dim = state_dim
        self.action_dim = 3  # hold, buy, sell
        self.memory = deque(maxlen=100)
        self.buffer_size = 60

        self.gamma = 0.95
        self.epsilon = 1.0  # initial exploration rate
        self.epsilon_min = 0.01  # minimum exploration rate
        self.epsilon_decay = 0.995 # decrease exploration rate as the agent becomes good at trading
        self.is_eval = is_eval
        self.model = load_model('../{}.h5'.format(model_name)) if is_eval else self.model()

        self.tensorboard = TensorBoard(log_dir='./logs/DQN_tensorboard', update_freq=90)
        self.tensorboard.set_model(self.model)

    def model(self):
        model = Sequential()
        model.add(Dense(units=64, input_dim=self.state_dim, activation='relu'))
        model.add(Dense(units=32, activation='relu'))
        model.add(Dense(units=8, activation='relu'))
        model.add(Dense(self.action_dim, activation='softmax'))
        model.compile(loss='mse', optimizer=Adam(lr=0.01))
        return model

    def reset(self):
        self.reset_portfolio()
        self.epsilon = 1.0 # reset exploration rate

    def remember(self, state, actions, reward, next_state, done):
        self.memory.append((state, actions, reward, next_state, done))

    def act(self, state):
        if not self.is_eval and np.random.rand() <= self.epsilon:
            return random.randrange(self.action_dim)
        options = self.model.predict(state)
        return np.argmax(options[0])

    def experience_replay(self):
        # retrieve recent buffer_size long memory
        mini_batch = [self.memory[i] for i in range(len(self.memory) - self.buffer_size + 1, len(self.memory))]

        for state, actions, reward, next_state, done in mini_batch:
            if not done:
                Q_target_value = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            else:
                Q_target_value = reward
            next_actions = self.model.predict(state)
            next_actions[0][np.argmax(actions)] = Q_target_value
            history = self.model.fit(state, next_actions, epochs=1, verbose=0)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        return history.history['loss'][0]

In [None]:
model_name = "DQN"
# 資料過大會失準，盡量以年度、季度來拆分資料集
stock_name = "SPY_2018"
window_size = 10
num_epoch = 5
initial_balance = 50000

In [None]:
stock_prices = stock_close_prices(stock_name)
trading_period = len(stock_prices) - 1
returns_across_epochs = []
num_experience_replay = 0
action_dict = {0: 'Hold', 1: 'Buy', 2: 'Sell'}

In [None]:
agent = Agent(state_dim=window_size + 3, balance=initial_balance)

def hold(actions):
    # encourage selling for profit and liquidity
    next_probable_action = np.argsort(actions)[1]
    if next_probable_action == 2 and len(agent.inventory) > 0:
        max_profit = stock_prices[t] - min(agent.inventory)
        if max_profit > 0:
            sell(t)
            actions[next_probable_action] = 1 # reset this action's value to the highest
            return 'HODL', actions

def buy(t):
    if agent.balance > stock_prices[t]:
        agent.balance -= stock_prices[t]
        agent.inventory.append(stock_prices[t])
        return '購買: ${:.2f}'.format(stock_prices[t])

def sell(t):
    if len(agent.inventory) > 0:
        agent.balance += stock_prices[t]
        bought_price = agent.inventory.pop(0)
        profit = stock_prices[t] - bought_price
        global reward
        reward = profit
        return '賣出: ${:.2f} | 獲利： ${:.2f}'.format(stock_prices[t], profit)

In [None]:
print(f'交易市場：     {stock_name}')
print(f'資料長度：     {trading_period} 天')
print(f'均線維度：     {window_size} 天')
print(f'訓練次數：     {num_epoch}')
print(f'使用模型：     {model_name}')
print('初始持有資金：  ${:,}'.format(initial_balance))

In [None]:
start_time = time.time()
for e in range(1, num_epoch + 1):
    print(f'\n訓練迭代： {e}/{num_epoch}')

    agent.reset() # reset to initial balance and hyperparameters
    state = generate_combined_state(0, window_size, stock_prices, agent.balance, len(agent.inventory))

    for t in range(1, trading_period + 1):
        if t % 100 == 0:
            print(f'\n-------------------期數： {t}/{trading_period}-------------------')

        reward = 0
        next_state = generate_combined_state(t, window_size, stock_prices, agent.balance, len(agent.inventory))
        previous_portfolio_value = len(agent.inventory) * stock_prices[t] + agent.balance

        actions = agent.model.predict(state)[0]
        action = agent.act(state)
        
        # execute position
        print('批次： {}\tHODL 訊號： {:.4} \t買入 訊號： {:.4} \t賣出 訊號： {:.4}'.format(t, actions[0], actions[1], actions[2]))
        if action != np.argmax(actions): print(f"\t\t'{action_dict[action]}' is an exploration.")
        if action == 0: # hold
            execution_result = hold(actions)
        if action == 1: # buy
            execution_result = buy(t)      
        if action == 2: # sell
            execution_result = sell(t)        
        
        # check execution result
        if execution_result is None:
            reward -= treasury_bond_daily_return_rate() * agent.balance  # missing opportunity
        else:
            if isinstance(execution_result, tuple): # if execution_result is 'Hold'
                actions = execution_result[1]
                execution_result = execution_result[0]   
            print(execution_result)                

        # calculate reward
        current_portfolio_value = len(agent.inventory) * stock_prices[t] + agent.balance
        unrealized_profit = current_portfolio_value - agent.initial_portfolio_value
        reward += unrealized_profit

        agent.portfolio_values.append(current_portfolio_value)
        agent.return_rates.append((current_portfolio_value - previous_portfolio_value) / previous_portfolio_value)

        done = True if t == trading_period else False
        agent.remember(state, actions, reward, next_state, done)

        # update state
        state = next_state

        # experience replay
        if len(agent.memory) > agent.buffer_size:
            num_experience_replay += 1
            loss = agent.experience_replay()
            print('迭代： {}\t損失： {:.2f}\t執行動作： {}\t獎勵： {:.2f}\t現金餘額： {:.2f}\t持有股數： {}'.format(e, loss, action_dict[action], reward, agent.balance, len(agent.inventory)))
            agent.tensorboard.on_batch_end(num_experience_replay, {'loss': loss, 'portfolio value': current_portfolio_value})

        if done:
            portfolio_return = evaluate_portfolio_performance(agent)
            returns_across_epochs.append(portfolio_return)

    # save models periodically
    if e % 5 == 0:
        agent.model.save('../output/saved_models/DQN_ep' + str(e) + '.h5')
        print('model saved')

print('總訓練時間： {0:.2f} 分鐘'.format((time.time() - start_time)/60))
plot_portfolio_returns_across_epochs(model_name, returns_across_epochs)