<a href="https://colab.research.google.com/github/thegallier/timeseries/blob/main/timeseries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
import torch.nn.functional as F

# Parameters
num_timesteps = 50000  # Reduced for practical purposes
num_securities = 10
num_features_per_security = 4
num_classes = 3
num_features = num_securities * num_features_per_security

# Generate timestamps
timestamps = np.arange(num_timesteps)

# Generate random data for the primary dataset
X_data = np.random.rand(num_timesteps, num_features).astype(np.float32)
y_data = np.random.randint(0, num_classes, size=(num_timesteps, num_securities)).astype(np.int64)

# Generate the second dataset
# 3 strings and 2 floats per timestamp
str_columns = ['str1', 'str2', 'str3']
float_columns = ['float1', 'float2']
second_dataset = {
    'timestamp': timestamps,
    'str1': np.random.choice(['A', 'B', 'C'], num_timesteps),
    'str2': np.random.choice(['D', 'E', 'F'], num_timesteps),
    'str3': np.random.choice(['G', 'H', 'I'], num_timesteps),
    'float1': np.random.rand(num_timesteps),
    'float2': np.random.rand(num_timesteps),
}

# Encode string columns
label_encoders = {}
for col in str_columns:
    le = LabelEncoder()
    second_dataset[col] = le.fit_transform(second_dataset[col])
    label_encoders[col] = le

# Combine all features from the second dataset
second_X_data = np.column_stack([second_dataset[col] for col in str_columns + float_columns]).astype(np.float32)

# Min-max scaling for both datasets
scaler_X = MinMaxScaler()
X_data = scaler_X.fit_transform(X_data)

scaler_second_X = MinMaxScaler()
second_X_data = scaler_second_X.fit_transform(second_X_data)

# Add positional encoding options
def add_positional_encoding(X, timestamps, option='shared'):
    if option == 'shared':
        pe = np.sin(timestamps[:, None] / 10000 ** (np.arange(X.shape[1]) / X.shape[1]))
        X_pe = X + pe.astype(np.float32)
    elif option == 'per_security':
        pe_list = []
        for i in range(num_securities):
            pe = np.sin(timestamps[:, None] / 10000 ** (np.arange(num_features_per_security) / num_features_per_security))
            pe_list.append(pe)
        pe_concat = np.hstack(pe_list)
        X_pe = X + pe_concat.astype(np.float32)
    else:
        X_pe = X  # No positional encoding
    return X_pe

# Apply positional encoding
positional_encoding_option = 'shared'  # 'shared' or 'per_security'
X_data = add_positional_encoding(X_data, timestamps, positional_encoding_option)

# Function to create windows (updated to handle second dataset)
def create_windows(X1, X2, y, window_size, horizon):
    X1_windows = []
    X2_windows = []
    y_windows = []
    for i in range(len(X1) - window_size - horizon + 1):
        X1_windows.append(X1[i:i+window_size])
        X2_windows.append(X2[i:i+window_size])
        y_windows.append(y[i+window_size+horizon-1])
    return np.array(X1_windows), np.array(X2_windows), np.array(y_windows)

# Define window sizes
window_size = 20
horizon = 1

# Create windows
X1_windows, X2_windows, y_windows = create_windows(X_data, second_X_data, y_data, window_size, horizon)

# Split into train and test sets
train_ratio = 0.8
train_size = int(len(X1_windows) * train_ratio)

X1_train = X1_windows[:train_size]
X2_train = X2_windows[:train_size]
y_train = y_windows[:train_size]

X1_test = X1_windows[train_size:]
X2_test = X2_windows[train_size:]
y_test = y_windows[train_size:]

# Convert to PyTorch tensors
X1_train_tensor = torch.tensor(X1_train)
X2_train_tensor = torch.tensor(X2_train)
y_train_tensor = torch.tensor(y_train)

X1_test_tensor = torch.tensor(X1_test)
X2_test_tensor = torch.tensor(X2_test)
y_test_tensor = torch.tensor(y_test)

# Custom Dataset (updated for two datasets)
class TimeSeriesDataset(Dataset):
    def __init__(self, X1, X2, y):
        self.X1 = X1.float()
        self.X2 = X2.float()
        self.y = y.long()
    def __len__(self):
        return len(self.X1)
    def __getitem__(self, idx):
        return self.X1[idx], self.X2[idx], self.y[idx]

# DataLoaders
batch_size = 64

train_dataset = TimeSeriesDataset(X1_train_tensor, X2_train_tensor, y_train_tensor)
test_dataset = TimeSeriesDataset(X1_test_tensor, X2_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Matrix Regression Model (Y = A * X * B)
class MatrixRegressionModel(nn.Module):
    def __init__(self, num_securities, num_features):
        super(MatrixRegressionModel, self).__init__()
        self.A = nn.Parameter(torch.randn(1, num_securities))
        self.B = nn.Parameter(torch.randn(num_features, num_classes))
    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten over time
        out = self.A @ x @ self.B  # Shape: (batch_size, num_classes)
        out = out.view(-1, num_securities, num_classes)
        return out

class LogisticRegressionModel(nn.Module):
    def __init__(self, input_size, num_securities, num_classes):
        super(LogisticRegressionModel, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        self.linear = nn.Linear(input_size, num_securities * num_classes)
    def forward(self, x):
        # x shape: (batch_size, window_size, num_features)
        x = x.view(x.size(0), -1)
        out = self.linear(x)
        out = out.view(-1, self.num_securities, self.num_classes)
        return out

# CNN Model
class CNNModel(nn.Module):
    def __init__(self, num_features, num_securities, num_classes):
        super(CNNModel, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        self.conv1 = nn.Conv1d(in_channels=num_features, out_channels=64, kernel_size=3)
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(128, num_securities * num_classes)
    def forward(self, x):
        x = x.permute(0, 2, 1)  # (batch_size, num_features, window_size)
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = torch.mean(x, dim=2)  # Global average pooling
        x = self.fc(x)
        out = x.view(-1, self.num_securities, self.num_classes)
        return out

# LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_securities, num_classes):
        super(LSTMModel, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_securities * num_classes)
    def forward(self, x):
        h0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        c0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = out[:, -1, :]  # Last time step
        out = self.fc(out)
        out = out.view(-1, self.num_securities, self.num_classes)
        return out

# Graph Neural Network Model
class GNNModel(nn.Module):
    def __init__(self, num_securities, num_features_per_security, num_classes):
        super(GNNModel, self).__init__()
        self.num_securities = num_securities
        self.gcn_layers = nn.ModuleList([nn.Linear(num_features_per_security, 64) for _ in range(num_securities)])
        self.attention = nn.MultiheadAttention(embed_dim=64, num_heads=4)
        self.fc = nn.Linear(64, num_classes)
    def forward(self, x):
        # x shape: (batch_size, window_size, num_features)
        batch_size = x.size(0)
        x = x[:, -1, :]  # Use the last time step
        x = x.view(batch_size, self.num_securities, -1)  # (batch_size, num_securities, num_features_per_security)
        node_embeddings = []
        for i in range(self.num_securities):
            h = self.gcn_layers[i](x[:, i, :])
            node_embeddings.append(h)
        h = torch.stack(node_embeddings, dim=0)  # (num_securities, batch_size, 64)
        attn_output, _ = self.attention(h, h, h)
        attn_output = attn_output.permute(1, 0, 2)  # (batch_size, num_securities, 64)
        out = self.fc(attn_output)  # (batch_size, num_securities, num_classes)
        return out

# Transformer Model
class TransformerModel(nn.Module):
    def __init__(self, num_features, num_securities, num_classes):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Linear(num_features, 128)
        self.pos_encoder = PositionalEncoding(128)
        encoder_layers = nn.TransformerEncoderLayer(d_model=128, nhead=8)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=2)
        self.decoder = nn.Linear(128, num_securities * num_classes)
    def forward(self, x):
        x = x.view(x.size(0), x.size(1), -1)  # (batch_size, window_size, num_features)
        x = self.embedding(x)
        x = self.pos_encoder(x)
        x = x.permute(1, 0, 2)  # Transformer expects (sequence_length, batch_size, embedding_dim)
        output = self.transformer_encoder(x)
        output = output[-1, :, :]  # Use the last output
        output = self.decoder(output)
        output = output.view(-1, num_securities, num_classes)
        return output

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1).float()
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        if d_model % 2 == 1:
            # If odd, last dimension is sine
            pe[:, 1::2] = torch.sin(position * div_term)
        else:
            pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(1)
        self.register_buffer('pe', pe)
    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return x

# Mamba Model (Assuming Mamba refers to a model combining multiple methods)
class MambaModel(nn.Module):
    def __init__(self, num_features, num_securities, num_classes):
        super(MambaModel, self).__init__()
        # Combine CNN and LSTM
        self.cnn = nn.Conv1d(in_channels=num_features, out_channels=64, kernel_size=3)
        self.lstm = nn.LSTM(input_size=64, hidden_size=128, num_layers=2, batch_first=True)
        self.fc = nn.Linear(128, num_securities * num_classes)
    def forward(self, x):
        x = x.permute(0, 2, 1)  # (batch_size, num_features, window_size)
        x = F.relu(self.cnn(x))
        x = x.permute(0, 2, 1)  # (batch_size, window_size, channels)
        h0 = torch.zeros(2, x.size(0), 128).to(x.device)
        c0 = torch.zeros(2, x.size(0), 128).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = out[:, -1, :]
        out = self.fc(out)
        out = out.view(-1, num_securities, num_classes)
        return out

# Liquid Neural Network Model
class LiquidNetModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_securities, num_classes):
        super(LiquidNetModel, self).__init__()
        self.rnn_cell = nn.RNNCell(input_size, hidden_size, nonlinearity='relu')
        self.fc = nn.Linear(hidden_size, num_securities * num_classes)
    def forward(self, x):
        h_t = torch.zeros(x.size(0), self.rnn_cell.hidden_size).to(x.device)
        for t in range(x.size(1)):
            h_t = self.rnn_cell(x[:, t, :], h_t)
        out = self.fc(h_t)
        out = out.view(-1, num_securities, num_classes)
        return out

# Hidden Markov Model (simplified implementation)
class HiddenMarkovModel(nn.Module):
    def __init__(self, num_states, num_securities, num_classes):
        super(HiddenMarkovModel, self).__init__()
        self.num_states = num_states
        self.start_prob = nn.Parameter(torch.randn(num_states))
        self.transition_prob = nn.Parameter(torch.randn(num_states, num_states))
        self.emission_prob = nn.Parameter(torch.randn(num_states, num_classes))
    def forward(self, x):
        batch_size = x.size(0)
        # Simplified; in practice, you'd implement the forward algorithm
        out = torch.softmax(self.emission_prob, dim=1)
        out = out.unsqueeze(0).repeat(batch_size, self.num_securities, 1)
        return out

# Common training loop (updated for two datasets and model combination)
def train_model(model1, model2, train_loader, criterion, optimizer, num_epochs, device):
    model1 = model1.to(device)
    model2 = model2.to(device)
    for epoch in range(num_epochs):
        model1.train()
        model2.train()
        total_loss = 0
        for X1_batch, X2_batch, y_batch in train_loader:
            X1_batch = X1_batch.to(device)
            X2_batch = X2_batch.to(device)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            outputs1 = model1(X1_batch)
            outputs2 = model2(X2_batch)
            # Combine outputs
            outputs = torch.cat((outputs1, outputs2), dim=2)  # Concatenate over feature dimension
            # Final linear layer to predict y
            final_output = outputs.mean(dim=2)
            loss = 0
            for i in range(num_securities):
                loss += criterion(final_output[:, i], y_batch[:, i])
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')

# Common evaluation function
def evaluate_model(model1, model2, test_loader, device):
    model1 = model1.to(device)
    model2 = model2.to(device)
    model1.eval()
    model2.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for X1_batch, X2_batch, y_batch in test_loader:
            X1_batch = X1_batch.to(device)
            X2_batch = X2_batch.to(device)
            y_batch = y_batch.to(device)
            outputs1 = model1(X1_batch)
            outputs2 = model2(X2_batch)
            outputs = torch.cat((outputs1, outputs2), dim=2)
            final_output = outputs.mean(dim=2)
            _, predicted = torch.max(final_output.data, 2)
            total += y_batch.numel()
            correct += (predicted == y_batch).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy on test set: {accuracy:.2f}%')

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
input_size = num_features
second_input_size = second_X_data.shape[1]
num_epochs = 5
learning_rate = 0.001

# Instantiate models for primary and second datasets
print("Training Combined Model with Transformer and LSTM...")

# Primary model
primary_model = TransformerModel(num_features=num_features, num_securities=num_securities, num_classes=num_classes)
# Secondary model
secondary_model = LSTMModel(input_size=second_input_size, hidden_size=128, num_layers=2, num_securities=num_securities, num_classes=num_classes)

# Combine the parameters of both models
params = list(primary_model.parameters()) + list(secondary_model.parameters())
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params, lr=learning_rate)

train_model(primary_model, secondary_model, train_loader, criterion, optimizer, num_epochs, device)
evaluate_model(primary_model, secondary_model, test_loader, device)

# Similarly, you can instantiate and train other combinations

Training Combined Model with Transformer and LSTM...




RuntimeError: Expected floating point type for target with class probabilities, got Long

In [3]:
# ... [Previous code remains the same up to model definitions]

# Instantiate models for primary and second datasets
print("Training Combined Model with Transformer and LSTM...")

# Primary model
primary_model = TransformerModel(num_features=num_features, num_securities=num_securities, num_classes=num_classes)
# Secondary model
secondary_model = LSTMModel(input_size=second_input_size, hidden_size=128, num_layers=2, num_securities=num_securities, num_classes=num_classes)

# Define final linear layer
class FinalModel(nn.Module):
    def __init__(self, num_classes, num_securities):
        super(FinalModel, self).__init__()
        self.fc = nn.Linear(num_classes * 2, num_classes)
        self.num_securities = num_securities
    def forward(self, outputs1, outputs2):
        # Concatenate over class dimension
        outputs = torch.cat((outputs1, outputs2), dim=2)
        # Pass through linear layer
        batch_size = outputs.size(0)
        outputs = outputs.view(-1, outputs.size(2))
        final_output = self.fc(outputs)
        final_output = final_output.view(batch_size, self.num_securities, -1)
        return final_output

final_model = FinalModel(num_classes=num_classes, num_securities=num_securities)

# Combine the parameters of all models
params = list(primary_model.parameters()) + list(secondary_model.parameters()) + list(final_model.parameters())
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params, lr=learning_rate)

# Updated training function
def train_model(model1, model2, final_model, train_loader, criterion, optimizer, num_epochs, device):
    model1 = model1.to(device)
    model2 = model2.to(device)
    final_model = final_model.to(device)
    for epoch in range(num_epochs):
        model1.train()
        model2.train()
        final_model.train()
        total_loss = 0
        for X1_batch, X2_batch, y_batch in train_loader:
            X1_batch = X1_batch.to(device)
            X2_batch = X2_batch.to(device)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            outputs1 = model1(X1_batch)
            outputs2 = model2(X2_batch)
            final_output = final_model(outputs1, outputs2)
            loss = 0
            for i in range(num_securities):
                loss += criterion(final_output[:, i, :], y_batch[:, i])
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')

# Updated evaluation function
def evaluate_model(model1, model2, final_model, test_loader, device):
    model1 = model1.to(device)
    model2 = model2.to(device)
    final_model = final_model.to(device)
    model1.eval()
    model2.eval()
    final_model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for X1_batch, X2_batch, y_batch in test_loader:
            X1_batch = X1_batch.to(device)
            X2_batch = X2_batch.to(device)
            y_batch = y_batch.to(device)
            outputs1 = model1(X1_batch)
            outputs2 = model2(X2_batch)
            final_output = final_model(outputs1, outputs2)
            _, predicted = torch.max(final_output.data, 2)
            total += y_batch.numel()
            correct += (predicted == y_batch).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy on test set: {accuracy:.2f}%')

# Start training
train_model(primary_model, secondary_model, final_model, train_loader, criterion, optimizer, num_epochs, device)
evaluate_model(primary_model, secondary_model, final_model, test_loader, device)

Training Combined Model with Transformer and LSTM...




Epoch [1/5], Loss: 11.0111
Epoch [2/5], Loss: 10.9914
Epoch [3/5], Loss: 10.9887


KeyboardInterrupt: 

In [4]:
!pip install mamba-ssm[dev]

Collecting mamba-ssm[dev]
  Downloading mamba_ssm-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.4/85.4 kB[0m [31m959.6 kB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting ninja (from mamba-ssm[dev])
  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl.metadata (5.3 kB)
Collecting triton (from mamba-ssm[dev])
  Downloading triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB)
Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.2/307.2 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (209.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.4/209.4 MB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for

In [22]:
import torch.nn.functional as F

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch
import torch.nn as nn

class HiddenMarkovModel(nn.Module):
    def __init__(self, num_states, num_securities, num_classes, num_features_per_security):
        super(HiddenMarkovModel, self).__init__()
        self.num_states = num_states
        self.num_securities = num_securities
        self.num_classes = num_classes
        self.num_features_per_security = num_features_per_security

        # Define parameters for start, transition, and emission probabilities
        self.start_logits = nn.Parameter(torch.randn(num_securities, num_states))
        self.trans_logits = nn.Parameter(torch.randn(num_securities, num_states, num_states))
        self.emission_logits = nn.Parameter(torch.randn(num_securities, num_states, num_classes))

    def forward(self, x):
        # x shape: (batch_size, window_size, num_features)
        batch_size, window_size, num_features = x.size()
        device = x.device

        # Reshape x to separate securities
        x = x.view(batch_size, window_size, self.num_securities, self.num_features_per_security)
        # For simplicity, we'll use the last time step
        x_last = x[:, -1, :, :]  # Shape: (batch_size, num_securities, num_features_per_security)

        # We'll compute the logits for each class per security
        # For this simplified HMM, we'll combine start and emission logits to produce class logits
        outputs = []
        for s in range(self.num_securities):
            # Compute the log probabilities for each class
            # We use log-sum-exp over the states to compute the logits for each class
            # start_logits[s]: (num_states,)
            # emission_logits[s]: (num_states, num_classes)
            start_log_probs = self.start_logits[s]  # (num_states,)
            emission_log_probs = self.emission_logits[s]  # (num_states, num_classes)

            # Compute logits for classes
            logits = torch.logsumexp(start_log_probs.unsqueeze(1) + emission_log_probs, dim=0)  # (num_classes,)
            outputs.append(logits.unsqueeze(0))  # (1, num_classes)

        # Stack outputs over securities and expand to batch size
        outputs = torch.cat(outputs, dim=0)  # (num_securities, num_classes)
        outputs = outputs.unsqueeze(0).expand(batch_size, -1, -1)  # (batch_size, num_securities, num_classes)

        return outputs  # Shape: (batch_size, num_securities, num_classes)


# Adjusted training loop for HMM with continuous emissions
def train_hmm_model(model, train_loader, criterion, optimizer, num_epochs, device):
    model = model.to(device)
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for _, _, y_batch in train_loader:
            y_batch = y_batch.to(device)  # Shape: (batch_size, num_securities)
            y_batch = y_batch.unsqueeze(1).expand(-1, window_size, -1)  # (batch_size, window_size, num_securities)

            optimizer.zero_grad()
            outputs = model(y_batch)  # outputs shape: (batch_size, window_size, num_securities)
            loss = criterion(outputs.view(-1), torch.zeros_like(outputs.view(-1)))  # Dummy target
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}')
# Mamba Model Integration
# Since you will install the Mamba package, we'll import and use it directly
try:
    from mamba_ssm.modules.mamba2 import Mamba2
except ImportError:
    raise ImportError("Please install the mamba_ssm package to use the MambaModel.")

class MambaModel(nn.Module):
    def __init__(self, num_features, num_securities, num_classes):
        super(MambaModel, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        self.mamba = Mamba2(
            d_model=num_features,
            d_state=64,
            d_conv=4,
            expand=2,
        )

        # Final linear layer to map to the desired output size
        self.fc = nn.Linear(num_features, num_securities * num_classes)

    def forward(self, x):
        # x shape: (batch_size, window_size, num_features)
        batch_size = x.size(0)
        x = x.view(batch_size, -1, x.size(-1))  # Ensure proper shape
        y = self.mamba(x)
        y = self.fc(y[:, -1, :])  # Use the last time step
        y = y.view(batch_size, self.num_securities, self.num_classes)
        return y



In [26]:
def train_single_model(model, train_loader, criterion, optimizer, num_epochs, device):
    model = model.to(device)
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for X_batch, _, y_batch in train_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = 0
            for i in range(num_securities):
                loss += criterion(outputs[:, i, :], y_batch[:, i])
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')

def evaluate_single_model(model, test_loader, device):
    model = model.to(device)
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for X_batch, _, y_batch in test_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            outputs = model(X_batch)
            _, predicted = torch.max(outputs.data, 2)
            total += y_batch.numel()
            correct += (predicted == y_batch).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy on test set: {accuracy:.2f}%')

In [30]:
# Instantiate and train Hidden Markov Model
print("\nTraining Hidden Markov Model...")
num_states = 5  # Number of hidden states

hmm_model = HiddenMarkovModel(
    num_states=num_states,
    num_securities=num_securities,
    num_classes=num_classes,
    num_features_per_security=num_features//num_securities,
)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(hmm_model.parameters(), lr=learning_rate)

train_single_model(hmm_model, train_loader, criterion, optimizer, num_epochs, device)
evaluate_single_model(hmm_model, test_loader, device)


Training Hidden Markov Model...
Epoch [1/5], Loss: 11.9516
Epoch [2/5], Loss: 11.0472
Epoch [3/5], Loss: 10.9873
Epoch [4/5], Loss: 10.9865
Epoch [5/5], Loss: 10.9865
Accuracy on test set: 33.44%


In [37]:
class LSTMModelData2(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_securities, num_classes):
        super(LSTMModelData2, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_securities * num_classes)

    def forward(self, x):
        batch_size = x.size(0)
        h0 = torch.zeros(self.lstm.num_layers, batch_size, self.lstm.hidden_size).to(x.device)
        c0 = torch.zeros(self.lstm.num_layers, batch_size, self.lstm.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])  # Use the last time step
        out = out.view(batch_size, self.num_securities, self.num_classes)
        return out

In [35]:
class TimeSeriesDataset(Dataset):
    def __init__(self, X1, X2, y):
        self.X1 = X1.float()
        self.X2 = X2.float()
        self.y = y.long()
    def __len__(self):
        return len(self.X1)
    def __getitem__(self, idx):
        return self.X1[idx], self.X2[idx], self.y[idx]

# DataLoaders
batch_size = 64

train_dataset = TimeSeriesDataset(X1_train_tensor, X2_train_tensor, y_train_tensor)
test_dataset = TimeSeriesDataset(X1_test_tensor, X2_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [39]:
class CombinedModel(nn.Module):
    def __init__(self, model1, model2):
        super(CombinedModel, self).__init__()
        self.model1 = model1  # HMM for data1
        self.model2 = model2  # LSTM for data2
        self.num_securities = model1.num_securities
        self.num_classes = model1.num_classes
        # Final linear layer to combine the outputs
        self.fc = nn.Linear(self.num_classes * 2, self.num_classes)

    def forward(self, x1, x2):
        # x1: Input for data1 (HMM)
        # x2: Input for data2 (LSTM)
        outputs1 = self.model1(x1)  # (batch_size, num_securities, num_classes)
        outputs2 = self.model2(x2)  # (batch_size, num_securities, num_classes)
        # Concatenate the outputs over the class dimension
        combined_outputs = torch.cat((outputs1, outputs2), dim=2)  # (batch_size, num_securities, num_classes * 2)
        # Pass through the final linear layer
        final_outputs = self.fc(combined_outputs)  # (batch_size, num_securities, num_classes)
        return final_outputs

In [45]:
def train_combined_model(model, train_loader, criterion, optimizer, num_epochs, device):
    model = model.to(device)
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for X1_batch, X2_batch, y_batch in train_loader:
            X1_batch = X1_batch.to(device)
            X2_batch = X2_batch.to(device)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X1_batch, X2_batch)
            loss = 0
            for i in range(model.num_securities):
                loss += criterion(outputs[:, i, :], y_batch[:, i])
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')
def evaluate_combined_model(model, test_loader, device):
    model = model.to(device)
    model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for X1_batch, X2_batch, y_batch in test_loader:
            X1_batch = X1_batch.to(device)
            X2_batch = X2_batch.to(device)
            y_batch = y_batch.to(device)
            outputs = model(X1_batch, X2_batch)
            _, predicted = torch.max(outputs.data, 2)
            total += y_batch.numel()
            correct += (predicted == y_batch).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy on test set: {accuracy:.2f}%')

In [46]:
num_states = 5  # Number of hidden states
hmm_model = HiddenMarkovModel(
    num_states=num_states,
    num_securities=num_securities,
    num_classes=num_classes,
    num_features_per_security=num_features_per_security,
)

# LSTM for data2
input_size_data2 = X2_train_tensor.size(2)  # Number of features for data2
hidden_size = 128
num_layers = 2
lstm_model_data2 = LSTMModelData2(
    input_size=input_size_data2,
    hidden_size=hidden_size,
    num_layers=num_layers,
    num_securities=num_securities,
    num_classes=num_classes,
)

# Combined model
combined_model = CombinedModel(hmm_model, lstm_model_data2)

# Combine parameters for optimization
params = list(combined_model.parameters())
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params, lr=learning_rate)

# Train the combined model
train_combined_model(combined_model, train_loader, criterion, optimizer, num_epochs, device)
evaluate_combined_model(combined_model, test_loader, device)

Accuracy on test set: 33.37%
