In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
class DeepLOB(nn.Module):
    def __init__(self, sequence_length, n_features, n_levels, n_classes):
        """ 
        Args:
            sequence_length (int): Number of time steps in the input.
            n_features (int): Number of features (e.g., bid/ask prices, volumes).
            n_levels (int): Depth of the data channels (e.g., 1 for univariate time series) - 10 was used in the paper. This can be change based on our further discussion and data processing output.
            n_classes (int): Number of output classes (for classification) or 1 for regression - Buy/Sell/Hold/
        """
        super(DeepLOB, self).__init__()

        # 1. Convolutional layers
        ## 1st:
        self.conv11 = nn.Conv2d(in_channels=n_levels, out_channels=16, kernel_size=(1, 2), stride=(1, 2), padding=0)
        self.conv12 = nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(1, 4), padding=0)
        self.conv13 = nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(1, 4), padding=0)

        ## 2nd:
        self.conv21 = nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(1, 2), stride=(1, 2), padding=0)
        self.conv22 = nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(1, 4), padding=0)
        self.conv23 = nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(1, 4), padding=0)

        ## 3rd:
        self.conv31 = nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(1, 10), padding=0)
        self.conv32 = nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(1, 4), padding=0)
        self.conv33 = nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(1, 4), padding=0)

        # 2. Inception Layer:
        ## 1x1 convolution for dimensionality reduction - orginally, they have 3 1x1 conv for filters. However, I think we can change this to other kernel size to have more variance for the window filters. -> Futher discussion.
        self.conv1x1 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(1, 1), padding=0)
        self.conv7x1 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(7, 1), padding=0)
        self.conv9x1 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(9, 1), padding=0)

        ## 3x1 convolution
        self.conv3x1 = nn.Conv2d(out_channels, out_channels, kernel_size=(3, 1), padding=(1, 0))

        ## 5x1 convolution
        self.conv5x1 = nn.Conv2d(out_channels, out_channels, kernel_size=(5, 1), padding=(2, 0))

        ## Max pooling followed by a 1x1 convolution
        self.maxpool = nn.MaxPool2d(kernel_size=(3, 1), stride=1, padding=(1, 0))

        # 2. LSTM layers
        self.lstm_input_size = 32 * (n_features - 10 + 1) * 6  # Adjust based on convolution output
        self.lstm = nn.LSTM(input_size=self.lstm_input_size, hidden_size=64, num_layers=2, 
                            batch_first=True, bidirectional=True)

        # 3. Fully connected layers
        self.fc1 = nn.Linear(64 * 2, 64)  # Bi-directional LSTM doubles hidden size
        self.dropout = nn.Dropout(p=0.5)
        self.fc2 = nn.Linear(64, n_classes)

    def forward(self, x):
        """
        Forward pass through the DeepLOB model.
        
        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, n_levels, sequence_length, n_features).
        
        Returns:
            torch.Tensor: Output predictions.
        """
        # Convolutional layers
        ## 1st
        x = F.LeakyReLU(self.conv11(x))
        x = F.LeakyReLU(self.conv12(x))
        x = F.LeakyReLU(self.conv13(x))

        ## 2nd
        x = F.LeakyReLU(self.conv21(x))
        x = F.LeakyReLU(self.conv22(x))
        x = F.LeakyReLU(self.conv23(x))

        ## 3rd
        x = F.LeakyReLU(self.conv31(x))
        x = F.LeakyReLU(self.conv32(x))
        x = F.LeakyReLU(self.conv33(x))

        #Inception Layers:
        i1 = self.conv1x1(x)
        i2 = self.conv3x1(x)
        i3 = self.conv5x1(x)
        i4 = self.conv7x1(x)
        i5 = self.conv9x1(x)
        i6 = self.maxpool(x)

        x = torch.cat([i1, i2, i3, i4, i5, i6], dim=1)
        
        
        # Reshape for LSTM layers
        x = x.permute(0, 2, 1, 3)  # (batch_size, sequence_length, n_levels, features)
        x = x.reshape(x.size(0), x.size(1), -1)  # (batch_size, sequence_length, lstm_input_size)

        # LSTM layers
        x, _ = self.lstm(x)

        # Take the output of the last time step
        x = x[:, -1, :]  # (batch_size, hidden_size * 2)

        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x