In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [12]:
# load packages
import pandas as pd
import gc
import tensorflow as tf
import pickle
import numpy as np
import keras
from keras import backend as K
from keras.models import load_model, Model
from keras.layers import Flatten, Dense, Dropout, Activation, Input, LSTM, Reshape, Conv2D, MaxPooling2D
from keras.optimizers import Adam
from keras.layers.advanced_activations import LeakyReLU

from keras.utils import np_utils
import matplotlib.pyplot as plt

# set random seeds
np.random.seed(1)
tf.random.set_seed(2)


# Data preparation


In [13]:
def prepare_x(data):
    df1 = data[:40, :].T
    return np.array(df1)

def get_label(data):
    lob = data[-5:, :].T
    return lob

def data_classification(X, Y, T):
    [N, D] = X.shape
    df = np.array(X)

    dY = np.array(Y)

    dataY = dY[T - 1:N]

    dataX = np.zeros((N - T + 1, T, D))
    for i in range(T, N + 1):
        dataX[i - T] = df[i - T:i, :]

    return dataX.reshape(dataX.shape + (1,)), dataY

In [14]:
dec_train = np.loadtxt('/content/drive/MyDrive/College/6th sem/DA/DA Mini Project/1.NoAuction_Zscore/NoAuction_Zscore_Training/Train_Dst_NoAuction_ZScore_CF_7.txt')
dec_test3 = np.loadtxt('/content/drive/MyDrive/College/6th sem/DA/DA Mini Project/1.NoAuction_Zscore/NoAuction_Zscore_Testing/Test_Dst_NoAuction_ZScore_CF_7.txt')
dec_test4 = np.loadtxt('/content/drive/MyDrive/College/6th sem/DA/DA Mini Project/1.NoAuction_Zscore/NoAuction_Zscore_Testing/Test_Dst_NoAuction_ZScore_CF_8.txt')
dec_test5 = np.loadtxt('/content/drive/MyDrive/College/6th sem/DA/DA Mini Project/1.NoAuction_Zscore/NoAuction_Zscore_Testing/Test_Dst_NoAuction_ZScore_CF_9.txt')
dec_test = np.hstack((dec_test3, dec_test4, dec_test5))
del dec_test3
del dec_test4
del dec_test5
gc.collect()

# extract limit order book data from the FI-2010 dataset
train_lob = prepare_x(dec_train)
test_lob = prepare_x(dec_test)

# extract label from the FI-2010 dataset
train_label = get_label(dec_train)
test_label = get_label(dec_test)

# prepare training data. We feed past 100 observations into our algorithms and choose the prediction horizon. 
trainX_CNN, trainY_CNN = data_classification(train_lob, train_label, T=10)
trainY_CNN = trainY_CNN[:,3] - 1
trainY_CNN = np_utils.to_categorical(trainY_CNN, 3)

# prepare test data.
testX_CNN, testY_CNN = data_classification(test_lob, test_label, T=10)
testY_CNN = testY_CNN[:,3] - 1
testY_CNN = np_utils.to_categorical(testY_CNN, 3)

# Model Architecture


In [20]:
def create_deeplob(T, NF, number_of_lstm):
    input_lmd = Input(shape=(T, NF, 1))
    
    # build the convolutional block
    conv_first1 = Conv2D(32, (1, 2), strides=(1, 2))(input_lmd)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.05)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.05)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.05)(conv_first1)

    conv_first1 = Conv2D(32, (1, 2), strides=(1, 2))(conv_first1)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.05)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.05)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.05)(conv_first1)

    conv_first1 = Conv2D(32, (1, 10))(conv_first1)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.05)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.05)(conv_first1)
    conv_first1 = Conv2D(32, (4, 1), padding='same')(conv_first1)
    conv_first1 = keras.layers.LeakyReLU(alpha=0.05)(conv_first1)
    
    # build the inception module
    convsecond_1 = Conv2D(64, (1, 1), padding='same')(conv_first1)
    convsecond_1 = keras.layers.LeakyReLU(alpha=0.05)(convsecond_1)
    convsecond_1 = Conv2D(64, (3, 1), padding='same')(convsecond_1)
    convsecond_1 = keras.layers.LeakyReLU(alpha=0.05)(convsecond_1)

    convsecond_2 = Conv2D(64, (1, 1), padding='same')(conv_first1)
    convsecond_2 = keras.layers.LeakyReLU(alpha=0.05)(convsecond_2)
    convsecond_2 = Conv2D(64, (5, 1), padding='same')(convsecond_2)
    convsecond_2 = keras.layers.LeakyReLU(alpha=0.05)(convsecond_2)

    convsecond_3 = MaxPooling2D((3, 1), strides=(1, 1), padding='same')(conv_first1)
    convsecond_3 = Conv2D(64, (1, 1), padding='same')(convsecond_3)
    convsecond_3 = keras.layers.LeakyReLU(alpha=0.05)(convsecond_3)
    
    convsecond_output = keras.layers.concatenate([convsecond_1, convsecond_2, convsecond_3], axis=3)
    conv_reshape = Reshape((int(convsecond_output.shape[1]), int(convsecond_output.shape[3])))(convsecond_output)

    # build the last LSTM layer
    conv_lstm = LSTM(number_of_lstm)(conv_reshape)

    # build the output layer
    out = Dense(3, activation='softmax')(conv_lstm)
    model = Model(inputs=input_lmd, outputs=out)
    adam = keras.optimizers.Adam(lr=0.005, beta_1=0.9, beta_2=0.999, epsilon=1)
    model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

    return model

deeplob = create_deeplob(10, 40, 64)


# Model Training and Testing

In [21]:
deeplob.fit(trainX_CNN, trainY_CNN, epochs=100, batch_size=64, verbose=2, validation_data=(testX_CNN, testY_CNN))

Epoch 1/100
3981/3981 - 36s - loss: 1.0969 - accuracy: 0.3452 - val_loss: 1.1064 - val_accuracy: 0.3762
Epoch 2/100
3981/3981 - 33s - loss: 1.0852 - accuracy: 0.3796 - val_loss: 1.0529 - val_accuracy: 0.4926
Epoch 3/100
3981/3981 - 33s - loss: 0.9753 - accuracy: 0.4711 - val_loss: 0.8091 - val_accuracy: 0.6003
Epoch 4/100
3981/3981 - 33s - loss: 0.8944 - accuracy: 0.5172 - val_loss: 0.7826 - val_accuracy: 0.6072
Epoch 5/100
3981/3981 - 33s - loss: 0.8795 - accuracy: 0.5271 - val_loss: 0.7658 - val_accuracy: 0.6100
Epoch 6/100
3981/3981 - 33s - loss: 0.8696 - accuracy: 0.5318 - val_loss: 0.7716 - val_accuracy: 0.6066
Epoch 7/100
3981/3981 - 34s - loss: 0.8641 - accuracy: 0.5352 - val_loss: 0.7583 - val_accuracy: 0.6149
Epoch 8/100
3981/3981 - 34s - loss: 0.8592 - accuracy: 0.5389 - val_loss: 0.7701 - val_accuracy: 0.6215
Epoch 9/100
3981/3981 - 33s - loss: 0.8562 - accuracy: 0.5428 - val_loss: 0.7759 - val_accuracy: 0.6059
Epoch 10/100
3981/3981 - 33s - loss: 0.8540 - accuracy: 0.5462 -

<tensorflow.python.keras.callbacks.History at 0x7f8025e93a90>

In [18]:
from sklearn.metrics import classification_report

y_pred = deeplob.predict(testX_CNN, batch_size=64, verbose=2)
y_pred_bool = np.argmax(y_pred, axis=1)

round_testy = np.argmax(testY_CNN, axis=1)

print(classification_report(round_testy, y_pred_bool))

2181/2181 - 6s
              precision    recall  f1-score   support

           0       0.76      0.67      0.71     38464
           1       0.79      0.89      0.84     66002
           2       0.74      0.66      0.70     35112

    accuracy                           0.77    139578
   macro avg       0.77      0.74      0.75    139578
weighted avg       0.77      0.77      0.77    139578

