In [1]:
import pandas as pd
import numpy as np
import itertools
#from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error
import scipy.stats
from hyperopt import hp, tpe, fmin, Trials, STATUS_OK
from hyperopt.pyll.base import scope
import seaborn as sns
from matplotlib import cm, pyplot as plt
from scipy import stats as st
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from torch import nn, optim
import torch.nn.functional as F
import torch
import brevitas.nn as qnn
from brevitas.quant import Int8WeightPerTensorFixedPoint as WeightQuant
from brevitas.quant import Int8ActPerTensorFixedPoint as ActQuant
from brevitas.quant import Int8BiasPerTensorFixedPointInternalScaling as BiasQuant
from brevitas.export import PyXIRManager
from preProcessing import *


## ORDER BOOK DATA
Getting the Data
Let’s download the data samples from LOBSTER. This service provides Google, Apple, Amazon, Intel, Microsoft assets as an examples with 3 levels as market depth (1, 5, 10 levels).

In [3]:

RANDOM_SEED = 42
## reading csv files and format the data adding column names
## set Asset and level 
asset = 'AAPL'
level = 1     # Can only be 1, 5 and 10

data1 = loadDataOrderBook('../../Data/{0}_2012-06-21_34200000_57600000_orderbook_{1}.csv', asset, level)
data2 = loadDataMessage('../../Data/{0}_2012-06-21_34200000_57600000_message_{1}.csv', asset, level)

# displaying input files
print("ORDER BOOK")
print("data1.shape = ", data1.shape)
print(data1.head())

print("MESSAGES")
print("data2.shape = ", data1.shape)
print(data2.head())



[1]
ORDER BOOK
data1.shape =  (118497, 4)
     ask_1  volume_ask_1    bid_1  volume_bid_1
0  5859400           200  5853300            18
1  5859100            18  5853300            18
2  5859200            18  5853300            18
3  5859300           100  5853300            18
4  5859300           100  5853600            18
MESSAGES
data2.shape =  (118497, 4)
         Time_1  Type_1  OrderID_1  Size_1  Price_1  Direction_1
0  34200.004241       1   16113575      18  5853300            1
1  34200.025552       1   16120456      18  5859100           -1
2  34200.201743       3   16120456      18  5859100           -1
3  34200.201781       3   16120480      18  5859200           -1
4  34200.205573       1   16167159      18  5853600            1


## Merging of the order Book and the Messages

In [36]:
# using merge function by setting how='outer'
result = data1.merge(data2, left_index=True, right_index=True, how='left')
result['Direction_1'].replace({-1 : 0.1, 1 : 0.9}, inplace=True)
print("RESULTS")  
# displaying result
print("Shape of the result matrix is = ",result.shape)
print(result.head())

print("number of rows =",result.shape[0])
print("number of columns =",result.shape[1])
print("Number of inputs for each categories")
#result.Type_1.value_counts()/result.shape[0]

RESULTS
Shape of the result matrix is =  (118497, 10)
     ask_1  volume_ask_1    bid_1  volume_bid_1        Time_1  Type_1  \
0  5859400           200  5853300            18  34200.004241       1   
1  5859100            18  5853300            18  34200.025552       1   
2  5859200            18  5853300            18  34200.201743       3   
3  5859300           100  5853300            18  34200.201781       3   
4  5859300           100  5853600            18  34200.205573       1   

   OrderID_1  Size_1  Price_1  Direction_1  
0   16113575      18  5853300          0.9  
1   16120456      18  5859100          0.1  
2   16120456      18  5859100          0.1  
3   16120480      18  5859200          0.1  
4   16167159      18  5853600          0.9  
number of rows = 118497
number of columns = 10
Number of inputs for each categories


## Preparing the Network Inputs

In [37]:
#Creating the 7 Inputs for the DNN 
X = result[["ask_1", "volume_ask_1", "bid_1", "volume_bid_1", "Type_1", "Size_1", "Price_1"]]
print("Shape of the matrix X is:\t",X.shape)
#print(X.head())

#Creating the 1 output for the DNN to train the network with results 
Y = result[["Direction_1"]]
print("Shape of the matrix Y is:\t",Y.shape,"\n")

##pre-processing
print("INPUT VALUES TO FEED THE CNN")
print(X.head(1))
print("OUTPUT VALUES FOR TRAINING")
print(Y.head(1))

#Normalise the INPUT value to improve the learning
X=preprocess(3)(X)



Shape of the matrix X is:	 (118497, 7)
Shape of the matrix Y is:	 (118497, 1) 

INPUT VALUES TO FEED THE CNN
     ask_1  volume_ask_1    bid_1  volume_bid_1  Type_1  Size_1  Price_1
0  5859400           200  5853300            18       1      18  5853300
OUTPUT VALUES FOR TRAINING
   Direction_1
0          0.9


### SPLIT DATA FOR TRAINING AND TESTING

In [38]:
#Split the data between trianing 80% and test 20% no shuffling as it is a time series
#0.2 = 20% of the data for test
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)
X_train_np, X_test_np, Y_train_np, Y_test_np = train_test_split(X, Y, test_size=0.2, shuffle = False, stratify = None)
print("Shape of the matrix X_train_np is:\t",X_train_np.shape)
print("Shape of the matrix Y_train_np is:\t",Y_train_np.shape)
print("Shape of the matrix X_test_np is:\t",X_test_np.shape,"\n")
print("Shape of the matrix Y_test_np is:\t",Y_test_np.shape,"\n")

#checking the X_test_np data
print("INPUT",X_test_np.head(1))

#checking the Y_test_np data
print("OUTPUT",Y_test_np.head(1))
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

print('Convert Numpy array to Torch\n')
#_T for Torch arrays
X_train_T = torch.from_numpy(X_train_np.to_numpy()).float()

print("Dimension of input tensor:", X_train_T.dim())
print("Input tensor Size:\n",X_train_T.size())


#remove a dimension
Y_train_T = torch.squeeze(torch.from_numpy(Y_train_np.to_numpy()).float())
X_test_T = torch.from_numpy(X_test_np.to_numpy()).float()
Y_test_T = torch.squeeze(torch.from_numpy(Y_test_np.to_numpy()).float())

print("Dimension of input tensor:", Y_train_T.dim())
print("Input tensor Size:\n",Y_train_T.size())
print("Shape of the matrix X_train_T is:\t",X_train_T.shape)
print("Shape of the matrix X_test_T is:\t",X_test_T.shape,"\n")
print("Shape of the matrix Y_train_T is:\t",Y_train_T.shape)
print("Shape of the matrix Y_test_T is:\t",Y_test_T.shape,"\n")

Shape of the matrix X_train_np is:	 (94797, 7)
Shape of the matrix Y_train_np is:	 (94797, 1)
Shape of the matrix X_test_np is:	 (23700, 7) 

Shape of the matrix Y_test_np is:	 (23700, 1) 

INPUT           ask_1  volume_ask_1     bid_1  volume_bid_1  Type_1    Size_1  \
94797  0.985329      0.011901  0.985225      0.008991     0.2  0.013333   

        Price_1  
94797  0.985108  
OUTPUT        Direction_1
94797          0.9
Convert Numpy array to Torch

Dimension of input tensor: 2
Input tensor Size:
 torch.Size([94797, 7])
Dimension of input tensor: 1
Input tensor Size:
 torch.Size([94797])
Shape of the matrix X_train_T is:	 torch.Size([94797, 7])
Shape of the matrix X_test_T is:	 torch.Size([23700, 7]) 

Shape of the matrix Y_train_T is:	 torch.Size([94797])
Shape of the matrix Y_test_T is:	 torch.Size([23700]) 



### Building of the Pytorch Model

In [39]:
#base network
class Net(nn.Module):
  def __init__(self, n_features):
    super(Net, self).__init__()
    self.fc1 = nn.Linear(n_features, 7)
    self.fc2 = nn.Linear(7, 20)
    self.fc3 = nn.Linear(20, 1)
  def forward(self, x):
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    return torch.sigmoid(self.fc3(x))*0.8+0.1

net = Net(X_train_T.shape[1])
print(X_train_T.shape[1])
print("NN structure is: ",net)
print(net.parameters)

#augmented network
class Net1(nn.Module):
  def __init__(self, n_features):
    super(Net1, self).__init__()
    self.fc1 = nn.Linear(n_features, 7)
    self.fc2 = nn.Linear(7, 20)
    self.fc4 = nn.Linear(20, 20)
    self.fc5 = nn.Linear(20, 20)
    self.fc3 = nn.Linear(20, 1)
  def forward(self, x):
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = F.relu(self.fc4(x))
    #x = F.relu(self.fc5(x))
    return (torch.sigmoid(self.fc3(x))*0.8+0.1)

net1 = Net1(X_train_T.shape[1])
print(X_train_T.shape[1])
print("NN structure is: ",net1)
print(net1.parameters)


7
NN structure is:  Net(
  (fc1): Linear(in_features=7, out_features=7, bias=True)
  (fc2): Linear(in_features=7, out_features=20, bias=True)
  (fc3): Linear(in_features=20, out_features=1, bias=True)
)
<bound method Module.parameters of Net(
  (fc1): Linear(in_features=7, out_features=7, bias=True)
  (fc2): Linear(in_features=7, out_features=20, bias=True)
  (fc3): Linear(in_features=20, out_features=1, bias=True)
)>
7
NN structure is:  Net1(
  (fc1): Linear(in_features=7, out_features=7, bias=True)
  (fc2): Linear(in_features=7, out_features=20, bias=True)
  (fc4): Linear(in_features=20, out_features=20, bias=True)
  (fc5): Linear(in_features=20, out_features=20, bias=True)
  (fc3): Linear(in_features=20, out_features=1, bias=True)
)
<bound method Module.parameters of Net1(
  (fc1): Linear(in_features=7, out_features=7, bias=True)
  (fc2): Linear(in_features=7, out_features=20, bias=True)
  (fc4): Linear(in_features=20, out_features=20, bias=True)
  (fc5): Linear(in_features=20, out_

## Quantized the Network using Brevitas

In [40]:

#quantized network
class QuantWeightNet(nn.Module):
    def __init__(self, n_features):
        super(QuantWeightNet, self).__init__()
        self.fc1   = qnn.QuantLinear(n_features, 7, bias=True, weight_bit_width=8)
        self.relu1 = qnn.QuantReLU()
        self.fc2   = qnn.QuantLinear(7, 20, bias=True, weight_bit_width=8)
        self.relu2 = qnn.QuantReLU()
        self.fc4   = qnn.QuantLinear(20, 20, bias=True, weight_bit_width=8)
        self.relu3 = qnn.QuantReLU()
        self.fc5   = qnn.QuantLinear(20, 20, bias=True, weight_bit_width=8)
        self.relu4 = qnn.QuantReLU()
        self.fc3   = qnn.QuantLinear(20, 1, bias=False, weight_bit_width=8)
        

    def forward(self, x):
        out = self.relu1(self.fc1(x))
        out = self.relu2(self.fc2(out))
        out = self.relu3(self.fc4(out))
        #out = self.relu4(self.fc5(out))
        return (torch.sigmoid(self.fc3(out))*0.8+0.1)

Net_q = QuantWeightNet(X_train_T.shape[1])
print("NN structure is: ",Net_q)
print(Net_q.parameters)

#Creates a criterion that measures the Binary Cross Entropy between the target and the input probabilities
criterion = nn.BCELoss(weight=None, size_average=None, reduce=None, reduction='mean')


net = Net(X_train_T.shape[1])
net1 = Net1(X_train_T.shape[1])
net_q= QuantWeightNet(X_train_T.shape[1])
#optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.Adam(net.parameters(), lr=0.001)
optimizer1 = optim.Adam(net1.parameters(), lr=0.0003)
optimizer_q = optim.Adam(net_q.parameters(), lr=0.0003)



device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#transfer data to the GPU
X_train = X_train_T.to(device)
Y_train = Y_train_T.to(device)
X_test = X_test_T.to(device)
Y_test = Y_test_T.to(device)
net = net.to(device)
criterion = criterion.to(device)

net1 = net1.to(device)
net_q = net_q.to(device)

device
#print(list(net.parameters()))
#print(optimizer.param_groups)

print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

#Input Normalization

#print(X_train[1,:])
#X_test[:,[0,2,6]]=X_test[:,[0,2,6]]/X_train[0,0]
#X_train[:,[0,2,6]]=X_train[:,[0,2,6]]/X_train[0,0]
#X_test[:,[1,3,5]]=X_test[:,[1,3,5]]/X_train[0,1]
#X_train[:,[1,3,5]]=X_train[:,[1,3,5]]/X_train[0,1]
#X_test[:,[1,3,5]]=X_test[:,[1,3,5]]/X_train[0,1]
#X_test[:,4]=X_test[:,4]/X_train[0,4]
#X_train[:,4]=X_train[:,4]/X_train[0,4]
#print(X_test[:,[0,2,6]])
#print(X_train[:,[0,2,6]])
#print(X_test)
#print(X_train)




#print(net.fc1.weight)

#print(net.fc2.weight)

#print(net.fc3.weight)
#print(Y_pred)
#print(Y_train)

print(optimizer1.param_groups)
print(optimizer_q.param_groups)

NN structure is:  QuantWeightNet(
  (fc1): QuantLinear(
    in_features=7, out_features=7, bias=True
    (input_quant): ActQuantProxyFromInjector(
      (_zero_hw_sentinel): StatelessBuffer()
    )
    (output_quant): ActQuantProxyFromInjector(
      (_zero_hw_sentinel): StatelessBuffer()
    )
    (weight_quant): WeightQuantProxyFromInjector(
      (_zero_hw_sentinel): StatelessBuffer()
      (tensor_quant): RescalingIntQuant(
        (int_quant): IntQuant(
          (float_to_int_impl): RoundSte()
          (tensor_clamp_impl): TensorClampSte()
          (delay_wrapper): DelayWrapper(
            (delay_impl): _NoDelay()
          )
        )
        (scaling_impl): StatsFromParameterScaling(
          (parameter_list_stats): _ParameterListStats(
            (first_tracked_param): _ViewParameterWrapper(
              (view_shape_impl): OverTensorView()
            )
            (stats): _Stats(
              (stats_impl): AbsMax()
            )
          )
          (stats_scaling_im

### Forcasting

In [51]:
for epoch in range(1000):
      optimizer1.zero_grad() #reset Gradient descent
      optimizer_q.zero_grad()      
      Y_pred = net1(X_train)
      Y_pred_q = net_q(X_train)
      Y_pred = torch.squeeze(Y_pred)
      Y_pred_q = torch.squeeze(Y_pred_q)
      criterion=nn.BCELoss()
      train_loss = criterion(Y_pred, Y_train)
      train_loss_q = criterion(Y_pred_q, Y_train)
      train_loss.backward() #propagate the error currently making     
      optimizer1.step()      #optimise 
      train_loss_q.backward() #propagate the error currently making     
      optimizer_q.step()

      if epoch % 100==0:
        train_acc = calculate_accuracy(Y_train, Y_pred)
        train_acc_q = calculate_accuracy(Y_train, Y_pred)
        Y_test_pred = net1(X_test)
        Y_test_pred = torch.squeeze(Y_test_pred)
        test_loss = criterion(Y_test_pred, Y_test)
        test_acc = calculate_accuracy(Y_test, Y_test_pred)
        print(
  f'''epoch {epoch}
  Train set - loss: {round_tensor(train_loss)}, accuracy: {round_tensor(train_acc)}
  Test  set - loss: {round_tensor(test_loss)}, accuracy: {round_tensor(test_acc)}
  ''')
        Y_test_pred_q = net_q(X_test)
        Y_test_pred_q = torch.squeeze(Y_test_pred_q)
        test_loss_q = criterion(Y_test_pred_q, Y_test)
        test_acc_q = calculate_accuracy(Y_test, Y_test_pred_q)
        print(
  f'''epoch {epoch}
  QTrain set - loss: {round_tensor(train_loss_q)}, accuracy: {round_tensor(train_acc_q)}
  QTest  set - loss: {round_tensor(test_loss_q)}, accuracy: {round_tensor(test_acc_q)}
  ''')


epoch 0
  Train set - loss: 0.68971, accuracy: 0.47511
  Test  set - loss: 0.69558, accuracy: 0.38532
  
epoch 0
  QTrain set - loss: 0.69035, accuracy: 0.47511
  QTest  set - loss: 0.69698, accuracy: 0.41376
  
epoch 100
  Train set - loss: 0.68911, accuracy: 0.47347
  Test  set - loss: 0.69462, accuracy: 0.37342
  
epoch 100
  QTrain set - loss: 0.68995, accuracy: 0.47347
  QTest  set - loss: 0.69646, accuracy: 0.40177
  
epoch 200
  Train set - loss: 0.68868, accuracy: 0.47151
  Test  set - loss: 0.69402, accuracy: 0.36333
  
epoch 200
  QTrain set - loss: 0.68951, accuracy: 0.47151
  QTest  set - loss: 0.69642, accuracy: 0.3984
  
epoch 300
  Train set - loss: 0.68843, accuracy: 0.47512
  Test  set - loss: 0.69356, accuracy: 0.34717
  
epoch 300
  QTrain set - loss: 0.68895, accuracy: 0.47512
  QTest  set - loss: 0.69604, accuracy: 0.38882
  
epoch 400
  Train set - loss: 0.68831, accuracy: 0.47619
  Test  set - loss: 0.6933, accuracy: 0.33025
  
epoch 400
  QTrain set - loss: 0.68