In [None]:
!pip install empyrical

In [None]:

import logging
import time
import numpy as np
import pandas as pd
import pandas
from empyrical import sharpe_ratio
from matplotlib import pyplot as plt
from turtle import pd
import argparse
import importlib
import sys
import time
from pathlib import Path
from datetime import datetime
import random
from collections import deque
import numpy as np
import tensorflow as tf
from tensorflow import keras

#------------------------------------------------------------------------
def RSI(data, n=14):
    price = pandas.DataFrame(data)
    delta = price.diff()
    delta[0][0]=delta[0][1]
    dUp, dDown = delta.copy(), delta.copy()
    dUp[dUp < 0] = 0
    dDown[dDown > 0] = 0
    RolUp=dUp.ewm(span=n).mean()
    RolDown=dDown.ewm(span=n).mean().abs()
    RS = RolUp / RolDown
    rsi= 100.0 - (100.0 / (1.0 + RS))
    t_rsi=np.array(rsi.replace(np.nan,0)).tolist()
    #make2list(t_rsi)
    #print('rsi:',[l[0] for l in t_rsi])
    return [l[0] for l in t_rsi]

def MACD(data):
    price=pd.DataFrame(data)
    exp1 = price.ewm(span=12, adjust=False).mean()
    exp2 = price.ewm(span=26, adjust=False).mean()
    macd = exp1-exp2
    exp3 = macd.ewm(span=9, adjust=False).mean()
    macds=macd-exp3
    t_macd=np.array(macds)
    return [l[0] for l in t_macd]

def SMA(data, window=20):
    sma = data.rolling(window = window).mean()
    return sma

def BB(data):
    #https://medium.com/codex/algorithmic-trading-with-bollinger-bands-in-python-1b0a00c9ef99
    price=pd.DataFrame(data)
    sma=SMA(price,20)
    std = price.rolling(window = 20).std()
    upper_bb = sma + std * 2
    lower_bb = sma - std * 2
    bb=(price-lower_bb)/(upper_bb-lower_bb)
    t_bb=np.array(bb)
    return [l[0] for l in t_bb]

def OBV(data1,data2):
    #https://medium.com/wwblog/implement-the-on-balance-volume-obv-indicator-in-python-10ac889efe72
    price=pd.DataFrame(data1)
    volume=pd.DataFrame(data2)
    obv= (np.sign(price.diff()) * volume).fillna(0).cumsum()
    t_obv=np.array(obv)
    return [l[0] for l in t_obv]
#----------------------------------------------------------------------------------------------

def log_txt(file,str):
  with open(file, 'a') as fid: # 'w' creates a new file
    now = datetime.now()
    strs = now.strftime("%d/%m/%Y %H:%M:%S")+ "\t" + str + "\n"
    fid.write(strs)              
    #fid.writelines(['He', 'Ne', 'Ar'])  # writelines writes each element on its own

#////////////////////////////////////////////////////////////////////////////////////////////////
class Portfolio:
    def __init__(self, balance=50000):
        self.initial_portfolio_value = balance
        self.balance = balance
        self.inventory = []
        self.return_rates = []
        self.portfolio_values = [balance]
        self.buy_dates = []
        self.sell_dates = []

    def reset_portfolio(self):
        self.balance = self.initial_portfolio_value
        self.inventory = []
        self.return_rates = []
        self.portfolio_values = [self.initial_portfolio_value]

        
def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def softmax(x):
    return np.exp(x) / np.sum(np.exp(x))


def stock_close_prices(key):
    '''return a list containing stock close prices from a .csv file'''
    prices = []
    lines = open(key + ".csv", "r").read().splitlines()
    for line in lines[1:]:
        prices.append(float(line.split(",")[4]))
    return prices


def generate_price_state(stock_prices, end_index, window_size,indicator=""):
    '''
    return a state representation, defined as
    the adjacent stock price differences after sigmoid function (for the past window_size days up to end_date)
    note that a state has length window_size, a period has length window_size+1
    '''
    start_index = end_index - window_size
    if start_index >= 0:
        period = stock_prices[start_index:end_index+1]
        if indicator=="":
            return sigmoid(np.diff(period))
        
        elif indicator=="RSI":
            rsi=RSI(period)    
            rsi.pop(0)            
            rsi_t=np.array(rsi)
            t_rsi=sigmoid(rsi_t/max(rsi_t))
            #print('rsi_indicator: ',t_rsi,len(t_rsi), type(t_rsi))
            return t_rsi
        
        elif indicator=="MACD":
            macd=MACD(period)
            if max(macd)==0:
                macd_indicator=sigmoid(0)
            else:
                macd_indicator=macd[-1]
            
            return macd_indicator

    else: # if end_index cannot suffice window_size, pad with prices on start_index
        period = -start_index * [stock_prices[0]] + stock_prices[0:end_index+1]
        return sigmoid(np.diff(period))


def generate_portfolio_state(stock_price, balance, num_holding):
    '''logarithmic values of stock price, portfolio balance, and number of holding stocks'''
    return [np.log(stock_price), np.log(balance), np.log(num_holding + 1e-6)]


def generate_combined_state(end_index, window_size, stock_prices, balance, num_holding,indicator=""):
    '''
    return a state representation, defined as
    adjacent stock prices differences after sigmoid function (for the past window_size days up to end_date) plus
    logarithmic values of stock price at end_date, portfolio balance, and number of holding stocks
    '''
    prince_state = generate_price_state(stock_prices, end_index, window_size,indicator)
    portfolio_state = generate_portfolio_state(stock_prices[end_index], balance, num_holding)
    print('prince_state len:',len(prince_state),'portfolio_state len :', len(portfolio_state))
    return np.array([np.concatenate((prince_state, portfolio_state), axis=None)])


def treasury_bond_daily_return_rate():
    r_year = 2.75 / 100  # approximate annual U.S. Treasury bond return rate
    return (1 + r_year)**(1 / 365) - 1


def maximum_drawdown(portfolio_values):
    end_index = np.argmax(np.maximum.accumulate(portfolio_values) - portfolio_values)
    if end_index == 0:
        return 0
    beginning_iudex = np.argmax(portfolio_values[:end_index])
    return (portfolio_values[end_index] - portfolio_values[beginning_iudex]) / portfolio_values[beginning_iudex]


def evaluate_portfolio_performance(agent, filename):
    portfolio_return = agent.portfolio_values[-1] - agent.initial_portfolio_value
    return portfolio_return


def plot_portfolio_transaction_history(stock_name, agent):
	portfolio_return = agent.portfolio_values[-1] - agent.initial_portfolio_value
	df = pd.read_csv('{}.csv'.format(stock_name))
	buy_prices = [df.iloc[t, 4] for t in agent.buy_dates]
	sell_prices = [df.iloc[t, 4] for t in agent.sell_dates]
	plt.figure(figsize=(15, 5), dpi=100)
	plt.title('{} Total Return on {}: ${:.2f}'.format(agent.model_type, stock_name, portfolio_return))
	plt.plot(df['Date'], df['Close'], color='black', label=stock_name)
	plt.scatter(agent.buy_dates, buy_prices, c='green', alpha=0.5, label='buy')
	plt.scatter(agent.sell_dates, sell_prices,c='red', alpha=0.5, label='sell')
	plt.xticks(np.linspace(0, len(df), 10))
	plt.ylabel('Price')
	plt.legend()
	plt.grid()
	plt.show()


def buy_and_hold_benchmark(stock_name, agent):
    df = pd.read_csv('{}.csv'.format(stock_name))
    dates = df['time']
    num_holding = agent.initial_portfolio_value // df.iloc[0, 4]
    balance_left = agent.initial_portfolio_value % df.iloc[0, 4]
    buy_and_hold_portfolio_values = df['close']*num_holding + balance_left
    buy_and_hold_return = buy_and_hold_portfolio_values.iloc[-1] - agent.initial_portfolio_value
    return dates, buy_and_hold_portfolio_values, buy_and_hold_return


def plot_portfolio_performance_comparison(stock_name, agent):
	dates, buy_and_hold_portfolio_values, buy_and_hold_return = buy_and_hold_benchmark(stock_name, agent)
	agent_return = agent.portfolio_values[-1] - agent.initial_portfolio_value
	plt.figure(figsize=(15, 5), dpi=100)
	plt.title('{} vs. Buy and Hold'.format(agent.model_type))
	plt.plot(dates, agent.portfolio_values, color='green', label='{} Total Return: ${:.2f}'.format(agent.model_type, agent_return))
	plt.plot(dates, buy_and_hold_portfolio_values, color='blue', label='{} Buy and Hold Total Return: ${:.2f}'.format(stock_name, buy_and_hold_return))
	plt.xticks(np.linspace(0, len(dates), 10))
	plt.ylabel('Portfolio Value ($)')
	plt.legend()
	plt.grid()
	plt.show()

def plot_all(stock_name, agent):
    '''combined plots of plot_portfolio_transaction_history and plot_portfolio_performance_comparison'''
    fig, ax = plt.subplots(2, 1, figsize=(16,8), dpi=100)

    portfolio_return = agent.portfolio_values[-1] - agent.initial_portfolio_value
    df = pd.read_csv('{}.csv'.format(stock_name))
    buy_prices = [df.iloc[t, 4] for t in agent.buy_dates]
    sell_prices = [df.iloc[t, 4] for t in agent.sell_dates]
    ax[0].set_title('{} Total Return on {}: ${:.2f} |'.format(agent.model_type, stock_name.split('_')[0], portfolio_return))
    ax[0].plot(df['time'], df['close'], color='black', label=stock_name.split('_')[0])
    ax[0].scatter(agent.buy_dates, buy_prices, c='green', alpha=0.5, label='buy')
    ax[0].scatter(agent.sell_dates, sell_prices,c='red', alpha=0.5, label='sell')
    ax[0].set_ylabel('Price')
    ax[0].set_xticks(np.linspace(0, len(df), 10))
    ax[0].legend()
    ax[0].grid()

    dates, buy_and_hold_portfolio_values, buy_and_hold_return = buy_and_hold_benchmark(stock_name, agent)
    agent_return = agent.portfolio_values[-1] - agent.initial_portfolio_value
    ax[1].set_title('{} vs. Buy and Hold'.format(agent.model_type))
    ax[1].plot(dates, agent.portfolio_values, color='green', label='{} Total Return: ${:.2f}'.format(agent.model_type, agent_return))
    ax[1].plot(dates, buy_and_hold_portfolio_values, color='blue', label='{} Buy and Hold Total Return: ${:.2f}'.format(stock_name.split('_')[0], buy_and_hold_return))
    
    ax[1].set_ylabel('Portfolio Value ($)')    
    ax[1].set_xticks(np.linspace(0, len(df), 10))
    ax[1].legend()
    ax[1].grid()

    plt.subplots_adjust(hspace=0.5)
    plt.show()


def plot_portfolio_returns_across_episodes(model_name, returns_across_episodes,coinname,inducator):
    len_episodes = len(returns_across_episodes)
    print('returns_across_episodes:',returns_across_episodes)
    plt.figure(figsize=(15, 5), dpi=100)
    plt.title(coinname +': Portfolio Returns')
    plt.plot(np.arange(1, len_episodes+1),returns_across_episodes, color='black')
    plt.xlabel('Episode')
    plt.ylabel('Return Value')
    plt.grid()
    plt.savefig('./{}_{}_{}_returns_ep{}.png'.format(coinname,model_name, inducator, len_episodes))
    #plt.show()
    plt.show(block=False)
    plt.pause(10)
    plt.close()
#////////////////////////////////////////////////////////////////////////////////////////////////



#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
class Agent(Portfolio):
    def __init__(self, state_dim, balance, is_eval=False, model_name=""):
        super().__init__(balance=balance)
        self.model_type = 'RNN'
        self.state_dim = state_dim
        self.action_dim = 3  # hold, buy, sell
        self.memory = deque(maxlen=100)
        self.buffer_size = 60

        self.gamma = 0.95
        self.epsilon = 1.0  # initial exploration rate
        self.epsilon_min = 0.01  # minimum exploration rate
        self.epsilon_decay = 0.995 # decrease exploration rate as the agent becomes good at trading
        self.is_eval = is_eval        self.model = tf.keras.models.load_model('RNN_ep10.h5') if is_eval else self.model()

        self.tensorboard = tf.keras.callbacks.TensorBoard(log_dir='./logs/RNN_tensorboard', update_freq=90)
        self.tensorboard.set_model(self.model)

    def model(self):
        model = tf.keras.models.Sequential()
        model.add(tf.keras.layers.Reshape((self.state_dim,1), input_shape=(self.state_dim,1)))
        model.add(tf.keras.layers.SimpleRNN(32, return_sequences=True))
        model.add(tf.keras.layers.SimpleRNN(16, return_sequences=False))
        model.add(tf.keras.layers.Dense(units=16, activation='relu'))
        model.add(tf.keras.layers.Dense(units=8, activation='relu'))
        model.add(tf.keras.layers.Dense(self.action_dim, activation='sigmoid'))
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=0.01))
        model.summary()
        return model

    def reset(self):
        self.reset_portfolio()
        self.epsilon = 1.0 # reset exploration rate

    def remember(self, state, actions, reward, next_state, done):
        self.memory.append((state, actions, reward, next_state, done))

    def act(self, state):
        if not self.is_eval and np.random.rand() <= self.epsilon:
            return random.randrange(self.action_dim)
        options = self.model.predict(state)
        return np.argmax(options[0])

    def experience_replay(self):
        # retrieve recent buffer_size long memory
        mini_batch = [self.memory[i] for i in range(len(self.memory) - self.buffer_size + 1, len(self.memory))]

        for state, actions, reward, next_state, done in mini_batch:
            if not done:
                Q_target_value = reward + self.gamma * np.amax(self.model.predict(next_state.reshape(1, next_state.shape[1], 1))[0])
            else:
                Q_target_value = reward
            next_actions = self.model.predict(state.reshape(1, state.shape[1], 1))
            next_actions[0][np.argmax(actions)] = Q_target_value
            history = self.model.fit(state.reshape(1, state.shape[1], 1), next_actions, epochs=1, verbose=0)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        return history.history['loss'][0]
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

model_name = 'RNN'
stock_name = 'ETHUSDT_1h'
window_size = 30
num_episode = 10
initial_balance = 50000
indicator="RSI"   # "", RSI, MACD BB, OBV

stock_prices = stock_close_prices(stock_name)
trading_period = len(stock_prices) - 1
returns_across_episodes = []
num_experience_replay = 0
action_dict = {0: 'Hold', 1: 'Buy', 2: 'Sell'}

filename=f'{model_name}_{indicator}_training_{stock_name}.txt'

log_txt(filename,f'Trading Object:           {stock_name}')
log_txt(filename,f'Window Size:              {window_size} step')
log_txt(filename,f'Training Episode:         {num_episode}')
log_txt(filename,f'Model Name:               {model_name}')
log_txt(filename,'Initial Portfolio Value: ${:,}'.format(initial_balance))

agent = Agent(state_dim=window_size + 3, balance=initial_balance)


def hold(actions):
    # encourage selling for profit and liquidity
    next_probable_action = np.argsort(actions)[1]
    if next_probable_action == 2 and len(agent.inventory) > 0:
        max_profit = stock_prices[t] - min(agent.inventory)
        if max_profit > 0:
            sell(t)
            actions[next_probable_action] = 1 # reset this action's value to the highest
            return 'Hold', actions

def buy(t):
    if agent.balance > stock_prices[t]:
        agent.balance -= stock_prices[t]
        agent.inventory.append(stock_prices[t])
        return 'Buy: ${:.2f}'.format(stock_prices[t])

def sell(t):
    if len(agent.inventory) > 0:
        agent.balance += stock_prices[t]
        bought_price = agent.inventory.pop(0)
        profit = stock_prices[t] - bought_price
        global reward
        reward = profit
        return 'Sell: ${:.2f} | Profit: ${:.2f}'.format(stock_prices[t], profit)


start_time = time.time()
for e in range(1, num_episode + 1):
    print(f'\nEpisode: {e}/{num_episode}')
    #logging.info(f'\nEpisode: {e}/{num_episode}')
    log_txt(filename,f'\nEpisode: {e}/{num_episode}')

    agent.reset() # reset to initial balance and hyperparameters
    state = generate_combined_state(0, window_size, stock_prices, agent.balance, len(agent.inventory),indicator)

    for t in range(1, trading_period + 1):
        if t % 100 == 0:
            print(f'\n-------------------Period: {t}/{trading_period}-------------------')
            #logging.info(f'\n-------------------Period: {t}/{trading_period}-------------------')
            log_txt(filename,f'\n-------------------Period: {t}/{trading_period}-------------------')

        reward = 0
        next_state = generate_combined_state(t, window_size, stock_prices, agent.balance, len(agent.inventory),indicator)
        #previous_portfolio_value = len(agent.inventory) * stock_prices[t] + agent.balance

        print('predict state:',(state).shape)
        #print('predict state:',(state))
        #print('agent.inventory:',agent.inventory)

        if model_name == 'DDPG':
            actions = agent.act(state, t)
            action = np.argmax(actions)
        elif model_name in ['ConvLSTM','ConvGRU','LSTM','GRU','RNN']:
            actions = agent.model.predict(state.reshape(1,state.shape[1], 1))[0]
            action = agent.act(state.reshape(1, state.shape[1], 1))
        elif model_name == 'MLP':
            actions = agent.model.predict(state.reshape(1,state.shape[1], 1))[0][-1]
            action = agent.act(state.reshape(1, state.shape[1], 1))
        #elif model_name == 'ConvGRU':
        #    actions = agent.model.predict(state.reshape(1,state.shape[1], 1))[0]
        #    action = agent.act(state.reshape(1, state.shape[1], 1))
        else:
            actions = agent.model.predict(state)[0]
            action = agent.act(state)
        
        # execute position
        #print(actions)
        #print(action)
        print('Step: {}\tHold signal: {:.4} \tBuy signal: {:.4} \tSell signal: {:.4}'.format(t, actions[0], actions[1], actions[2]))
        #logging.info('Step: {}\tHold signal: {:.4} \tBuy signal: {:.4} \tSell signal: {:.4}'.format(t, actions[0], actions[1], actions[2]))
        log_txt(filename,'Step: {}\tHold signal: {:.4} \tBuy signal: {:.4} \tSell signal: {:.4}'.format(t, actions[0], actions[1], actions[2]))
        if action != np.argmax(actions): logging.info(f"\t\t'{action_dict[action]}' is an exploration.")
        if action == 0: # hold
            execution_result = hold(actions)
        if action == 1: # buy
            execution_result = buy(t)      
        if action == 2: # sell
            execution_result = sell(t)        
        
        print('\t\t',action_dict[action],' is an exploration.\t',stock_name.split("_")[0],model_name,indicator)
        # check execution result
        if execution_result is None:
            reward -= treasury_bond_daily_return_rate() * agent.balance  # missing opportunity
        else:
            if isinstance(execution_result, tuple): # if execution_result is 'Hold'
                actions = execution_result[1]
                execution_result = execution_result[0]
            #logging.info(execution_result)     
            log_txt(filename,execution_result)             
        
        print('execution_result:',execution_result)
        # calculate reward
        current_portfolio_value = len(agent.inventory) * stock_prices[t] + agent.balance
        unrealized_profit = current_portfolio_value - agent.initial_portfolio_value
        reward += unrealized_profit

        agent.portfolio_values.append(current_portfolio_value)
        #agent.return_rates.append((current_portfolio_value - previous_portfolio_value) / previous_portfolio_value)
        #print('current_portfolio_value:',current_portfolio_value)
        #print('previous_portfolio_value:',previous_portfolio_value)
        #print('return_rates:',agent.return_rates)
        #print('reward:',reward)
        #print('agent.portfolio_values:',agent.portfolio_values)
        if len(agent.portfolio_values)>1:
            agent.return_rates.append((agent.portfolio_values[-1] - agent.portfolio_values[-2]) / agent.portfolio_values[-2])

        #print('return_rates:',agent.return_rates)

        done = True if t == trading_period else False
        agent.remember(state, actions, reward, next_state, done)

        # update state
        state = next_state

        # experience replay
        if len(agent.memory) > agent.buffer_size:
            num_experience_replay += 1
            loss = agent.experience_replay()
            aa = agent.balance
            bb = agent.inventory
            print('Episode: {}\tLoss: {:.2f}\tAction: {}\tReward: {:.2f}\tBalance: {:.2f}\tNumber of Stocks: {}\tindicator: {}'.format(e, loss, action_dict[action], reward, aa, len(bb),indicator))
            #logging.info('Episode: {}\tLoss: {:.2f}\tAction: {}\tReward: {:.2f}\tBalance: {:.2f}\tNumber of Stocks: {}\tindicator: {}'.format(e, loss, action_dict[action], reward, aa, len(bb),indicator))
            log_txt(filename,'Episode: {}\tLoss: {:.2f}\tAction: {}\tReward: {:.2f}\tBalance: {:.2f}\tNumber of Stocks: {}\tindicator: {}'.format(e, loss, action_dict[action], reward, aa, len(bb),indicator))
            agent.tensorboard.on_batch_end(num_experience_replay, {'loss': loss, 'portfolio value': current_portfolio_value})

        if done:
            portfolio_return = evaluate_portfolio_performance(agent, filename)
            returns_across_episodes.append(portfolio_return)

    # save models periodically
    coinname=stock_name.split("_")[0]
    if model_name == 'DDPG':
        agent.actor.model.save_weights('./{}/{}_DDPG_w{}_{}_ep{}_actor.h5'.format(model_name,coinname,window_size,indicator,str(e)))
        agent.critic.model.save_weights('./{}/{}_DDPG_w{}_{}_ep{}_critic.h5'.format(model_name,coinname,window_size,indicator,str(e)))
    else:
        agent.model.save('./models/'+model_name+'/'+ coinname + '_' + model_name +'_w' + str(window_size) +'_'+ indicator +'_ep' + str(e) + '.h5')
    #logging.info('model saved')
    log_txt(filename,'model saved')
    
#logging.info('total training time: {0:.2f} min'.format((time.time() - start_time)/60))
log_txt(filename,'total training time: {0:.2f} min'.format((time.time() - start_time)/60))
plot_portfolio_returns_across_episodes(model_name, returns_across_episodes,coinname,indicator)