In [1]:
import os
import json
import numpy as np
import io
from datetime import datetime
from data_generator import DataGenerator
from trade_env import TraderEnv
from rnn.model_keras import NeuralNetwork
import numpy
import matplotlib.pyplot as plt
from pandas import read_csv
import math
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM
from sklearn.preprocessing import MinMaxScaler

from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.layers import Bidirectional
from keras.models import Sequential

#for logging
import time

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
dt = DataGenerator(random=False, first_index=10)
dt.rewind()

In [3]:
last_price = 0
last_time = 0
steps = 100
def get_state(raw_state):
    global last_price
    global last_time
    list = []

    price = raw_state["price"]

    def prepare_orders(orders, price, multi):
        amount = float(orders[0][1])
        for order in orders:
            list.append((float(order[0])/price) * multi)
            #list.append(float(order[1])/amount)

    bids = raw_state["bids"][:5]
    asks = raw_state["asks"][:5]
    prepare_orders(bids, price, 1)
    prepare_orders(asks, price, -1)


    if last_price != 0:
        list.extend([price/last_price])
    else:
        list.extend([0])
        
    
    if last_time != 0:
        list.extend([int(state['timestamp'])/last_time])
    else:
        list.extend([0])
        
    last_time = int(state['timestamp'])
    last_price = price
    
    y = dt.get_from_index(dt.index + steps)["price"]
    
    if y > price:
        y = onehot_encoded(0)
    elif y < price: 
        y = onehot_encoded(2)
    else:
        y = onehot_encoded(1)
        
    #datetime.fromtimestamp(int(state['timestamp']))
    
    list.extend(y)
    
    #return [list, y]
    return list


In [4]:
# integer encode input data
def onehot_encoded (integer_encoded, char_to_int = 3):
    # one hot encode
    onehot_encoded = list()
    letter = [0 for _ in range(char_to_int)]
    letter[integer_encoded] = 1
    onehot_encoded.append(letter)
    
    return onehot_encoded[0]

In [5]:
def load_data(data, sequence_length):
    #Convert the data to a 3D array (a x b x c) 
    #Where a is the number of days, b is the window size, and c is the number of features in the data file
    result = []
    for index in range(len(data) - sequence_length):
        result.append(data[index: index + sequence_length])
    
    #Normalizing data by going through each window
    #Every value in the window is divided by the first value in the window, and then 1 is subtracted
    dr = np.array(result)
    #print(d0.shape)
    #dr = np.zeros_like(d0)
    #dr[:,1:,:] = d0[:,1:,:] / d0[:,0:1,:] - 1
    
    #Keeping the unnormalized prices for Y_test
    #Useful when graphing bitcoin price over time later
    #start = 2400
    #end = int(dr.shape[0] + 1)
    #unnormalized_bases = d0[start:end,0:1,20]
    
    #Splitting data set into training (First 90% of data points) and testing data (last 10% of data points)
    split_line = round(0.9 * dr.shape[0])
    training_data = dr[:int(split_line), :]
    
    #Shuffle the data
    np.random.shuffle(training_data)
    
    #Training Data
    X_train = training_data[:, :-1]
    Y_train = training_data[:, -1]
    Y_train = Y_train[:,-3:]
    
    window_size = sequence_length - 1
    
    return X_train, Y_train, window_size #, X_test, Y_test#, Y_daybefore, unnormalized_bases, window_size

In [6]:
data = []
while len(data) < 1000: #(dt.max_steps() - steps*3):
    state = dt.next()
    state = get_state(state)
    data.append(state)
dt.rewind()

In [7]:
result = []
sequence_length = 5
print (len(data))
for index in range(len(data) - sequence_length):
    result.append(data[index: index + sequence_length])
np.array(result).shape

1000


(995, 5, 15)

In [8]:
X_train, Y_train, window_size = load_data(data, 30)

In [9]:
def initialize_model(window_size, dropout_value, activation_function, loss_function, optimizer):

    #Create a Sequential model using Keras
    model = Sequential()

    #First recurrent layer with dropout
    model.add(Bidirectional(LSTM(window_size, return_sequences=True), input_shape=(window_size, X_train.shape[-1]),))
    model.add(Dropout(dropout_value))

    #Second recurrent layer with dropout
    model.add(Bidirectional(LSTM((window_size*2), return_sequences=True)))
    model.add(Dropout(dropout_value))

    #Third recurrent layer
    model.add(Bidirectional(LSTM(window_size, return_sequences=False)))

    #Output layer (returns the predicted value)
    model.add(Dense(units=3))
    
    #Set activation function
    model.add(Activation(activation_function))

    #Set loss function and optimizer
    model.compile(loss=loss_function, optimizer=optimizer, metrics=['mae', 'acc'])
    
    return model

In [10]:
def fit_model(model, X_train, Y_train, batch_num, num_epoch, val_split):
    """
    Fits the model to the training data
    
    Arguments:
    model -- The previously initalized 3 layer Recurrent Neural Network
    X_train -- A tensor of shape (2400, 49, 35) that represents the x values of the training data
    Y_train -- A tensor of shape (2400,) that represents the y values of the training data
    batch_num -- An integer representing the batch size to be used, in this case 1024
    num_epoch -- An integer defining the number of epochs to be run, in this case 100
    val_split -- A decimal representing the proportion of training data to be used as validation data
    
    Returns:
    model -- The 3 layer Recurrent Neural Network that has been fitted to the training data
    training_time -- An integer representing the amount of time (in seconds) that the model was training
    """
    #Record the time the model starts training
    start = time.time()

    #Train the model on X_train and Y_train
    model.fit(X_train, Y_train, batch_size= batch_num, nb_epoch=num_epoch, validation_split= val_split)

    #Get the time it took to train the model (in seconds)
    training_time = int(math.floor(time.time() - start))
    return model, training_time

In [None]:
model = initialize_model(window_size, 0.2, 'softmax', 'mse', 'adam')
#print (model.summary())

In [None]:
model, training_time = fit_model(model, X_train, Y_train, 1024, 100, .05)

#Print the training time
print ("Training time", training_time, "seconds")



Train on 829 samples, validate on 44 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100


Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
