In [None]:
import pandas as pd
import numpy as np

ETFs = ["159920.SZ","510900.SH","518880.SH","511010.SH","159915.SZ","510050.SH","510300.SH","510500.SH"]

modelInitialLookbackWindowSize = 400
lookbackNumWindowsForSignals = 20

outputDir = "/Users/kuen/Desktop/RL/Output/"
featureDir = "/Users/kuen/Desktop/RL/Features/"

readInCols = ['Date', 'vwap', 'todayOpen', 'zt', 'zt0', 'zt1', 'zt2', 'zt3', 'zt4']

features = {}

for ticker in ETFs:
    features[ticker] = pd.read_csv(featureDir + 'features_' + ticker + '.csv', usecols=readInCols)

In [None]:
# build models for individual ETFs with tensorflow
# set the right device for computation
import os
os.environ['CUDA_DEVIDE_ORDER'] = 'PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '0' # only use gpu 0
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # or any 0,1,2

import tensorflow as tf
# check if gpu is used correctly
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

In [None]:
# to make this notebook's output stable across runs 
def reset_graph(seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)
    
pastDays = modelInitialLookbackWindowSize

In [None]:
# hyper parameters
n_stepsArray = [20, 200] # T: length of recurrent cells
n_inputs = 5 # number of input features, zt0 - zt4
n_hidden1 = 128
n_hidden2 = 128
n_hidden3 = 128
n_hidden4 = 16
n_neurons = 1 # number of recurrent neurons
n_outputs = 1 # output dimension

n_epochs = 200
batch_size = 200

c = 0.0002 # transaction fee level
learning_rate = 0.001

In [None]:
 # iterate through all ETF tickers
for ticker in ETFs:
    featureData = features[ticker]
    # iterate through days
    dates = featureData.Date
    date_index = np.unique(dates)
    date_num = date_index.size
    
    for n_steps in n_stepsArray:
        cols = ['ticker','date','delta','nextCloseToOpen','nextDayOpen','TR','SR','nDCtoCPnL','nDCtoCPnLMinusTC','nDCtoCCumuPnLMinusTC']
        outputFile = pd.DataFrame(columns = cols)
        print('date_num='+str(date_num))
        print('pastDays='+str(pastDays))
        cumuPnL = 0
        cumuPnLMinusTC = 0
        for i in range(date_num - pastDays - 1): # from 0 to data_num - pastDays - 1
            trainDates = [date_index[i]] # i = 0, the first trading day.
            for j in range(i+1, i+pastDays): # [this for loop is unnecessary, change it in the next version]
                trainDates.append(date_index[j])
            
            dataPd = featureData[featureData.Date.isin(trainDates)]
            dataNextD = featureData[featureData.Date == date_index[i+pastDays]]
            
            # prepare data
            startIdx = 4
            F_train_all = dataPd.iloc[:, startIdx:(n_inputs+startIdx)].values
            z_train_all = dataPd.zt.values 
            p_train_all = dataPd.todayOpen.values
            
            F_train = F_train_all[:int(len(dataPd)/n_steps)*n_steps, :]
            z_train = z_train_all[:int(len(dataPd)/n_steps)*n_steps]
            p_train = p_train_all[:int(len(dataPd)/n_steps)]
            
            # build computation graph
            reset_graph()
            
            f = tf.placeholder(tf.float32, shape=(None, n_inputs),name='input')
            z = tf.placeholder(tf.float32, [None, n_steps, n_outputs], name='z')
            p = tf.placeholder(tf.float32, [None, n_steps, n_outputs], name='p')
            
            with tf.name_scope('DNN'):
                hidden1 = tf.layers.dense(f, n_hidden1, activation=tf.nn.selu,name='hidden1')
                hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.selu, name='hidden2')
                hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.selu, name='hideen3')
                hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.selu, name='hidden4')
            
            F = tf.reshape(hidden4, [-1, n_steps, n_hidden4])
            
            cell = tf.contrib.rnn.OutputProjectionWrapper(tf.nn.rnn_cell.BasicRNNCell(num_units=n_neurons, activation=None,name='rnn'), output_size=n_outputs)
            
            deltaTemp, states = tf.nn.dynamic_rnn(cell, F, dtype=tf.float32)
            delta = tf.nn.relu(deltaTemp, name='deltaCalc')
            
            R = tf.pad(delta[:, :(n_steps-1), :] * z[:,1:(n_steps), :] - 
                       tf.abs(delta[:, 1:n_steps,:] - delta[:,:(n_steps-1),:])*
                       p[:,1:n_steps] * c, 
                       paddings = [[0,0],
                                   [1,0],
                                   [0,0]])
            
            U = tf.reduce_mean(tf.reduce_sum(R, axis=1))
            
            optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)
            
            grad = optimizer.compute_gradients(-U)
            apply_grad = optimizer.apply_gradients(grad)
            
            # Objective function
            TR = tf.reduce_sum(R)
            SR = tf.reduce_sum(R)/ (tf.sqrt(tf.nn.moments(tf.reshape(R,[-1]),
                                                         axes=0)[1])+1e-10)*np.sqrt(252)