<a href="https://colab.research.google.com/github/thegallier/timeseries/blob/main/timeseries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
import torch.nn.functional as F
from sklearn.metrics import cohen_kappa_score
from torch.utils.tensorboard import SummaryWriter
from itertools import product
import time

# Parameters
num_timesteps = 5000  # Reduced for practical purposes
num_securities = 10  # Number of different securities
num_features_per_security = 4  # Features per security
num_classes = 3  # Number of target classes
num_features = num_securities * num_features_per_security  # Total number of features

# Generate timestamps
timestamps = np.arange(num_timesteps)

# Generate random data for the primary dataset (X_data) and labels (y_data)
X_data = np.random.rand(num_timesteps, num_features).astype(np.float32)  # Random features
y_data = np.random.randint(0, num_classes, size=(num_timesteps, num_securities)).astype(np.int64)  # Random labels

# Generate the second dataset with 3 categorical strings and 2 continuous floats per timestamp
str_columns = ['str1', 'str2', 'str3']
float_columns = ['float1', 'float2']
second_dataset = {
    'timestamp': timestamps,
    'str1': np.random.choice(['A', 'B', 'C'], num_timesteps),
    'str2': np.random.choice(['D', 'E', 'F'], num_timesteps),
    'str3': np.random.choice(['G', 'H', 'I'], num_timesteps),
    'float1': np.random.rand(num_timesteps),
    'float2': np.random.rand(num_timesteps),
}

# Encode categorical string columns using LabelEncoder
label_encoders = {}
for col in str_columns:
    le = LabelEncoder()
    second_dataset[col] = le.fit_transform(second_dataset[col])
    label_encoders[col] = le

# Combine all features from the second dataset into a single array
second_X_data = np.column_stack([second_dataset[col] for col in str_columns + float_columns]).astype(np.float32)

# Apply Min-Max scaling to both datasets
scaler_X = MinMaxScaler()
X_data = scaler_X.fit_transform(X_data)

scaler_second_X = MinMaxScaler()
second_X_data = scaler_second_X.fit_transform(second_X_data)

def add_positional_encoding(X, timestamps, option='shared'):
    """
    Add positional encoding to the feature matrix.

    Parameters:
    - X (ndarray): Feature matrix.
    - timestamps (ndarray): Array of timestamps.
    - option (str): 'shared' or 'per_security'.

    Returns:
    - X_pe (ndarray): X with positional encoding added.
    """
    if option == 'shared':
        # Shared positional encoding across all features
        pe = np.sin(timestamps[:, None] / 10000 ** (np.arange(X.shape[1]) / X.shape[1]))
        X_pe = X + pe.astype(np.float32)
    elif option == 'per_security':
        # Separate positional encoding for each security
        pe_list = []
        for i in range(num_securities):
            pe = np.sin(timestamps[:, None] / 10000 ** (np.arange(num_features_per_security) / num_features_per_security))
            pe_list.append(pe)
        pe_concat = np.hstack(pe_list)
        X_pe = X + pe_concat.astype(np.float32)
    else:
        X_pe = X  # No positional encoding
    return X_pe

# Apply positional encoding
positional_encoding_option = 'shared'  # 'shared' or 'per_security'
X_data = add_positional_encoding(X_data, timestamps, positional_encoding_option)

def create_windows(X1, X2, y, window_size, horizon):
    """
    Create sliding windows for time series data.

    Parameters:
    - X1 (ndarray): Primary dataset features.
    - X2 (ndarray): Secondary dataset features.
    - y (ndarray): Target variable.
    - window_size (int): Size of the window.
    - horizon (int): Prediction horizon.

    Returns:
    - Tuple[np.ndarray, np.ndarray, np.ndarray]: Tuples of windows for X1, X2, and y.
    """
    X1_windows = []
    X2_windows = []
    y_windows = []
    for i in range(len(X1) - window_size - horizon + 1):
        X1_windows.append(X1[i:i+window_size])
        X2_windows.append(X2[i:i+window_size])
        y_windows.append(y[i+window_size+horizon-1])
    return np.array(X1_windows), np.array(X2_windows), np.array(y_windows)

# Define window sizes
window_size = 20
horizon = 1

# Create windows
X1_windows, X2_windows, y_windows = create_windows(X_data, second_X_data, y_data, window_size, horizon)

# Split into train and test sets
train_ratio = 0.8
train_size = int(len(X1_windows) * train_ratio)

X1_train = X1_windows[:train_size]
X2_train = X2_windows[:train_size]
y_train = y_windows[:train_size]

X1_test = X1_windows[train_size:]
X2_test = X2_windows[train_size:]
y_test = y_windows[train_size:]

# Convert to PyTorch tensors
X1_train_tensor = torch.tensor(X1_train)
X2_train_tensor = torch.tensor(X2_train)
y_train_tensor = torch.tensor(y_train)

X1_test_tensor = torch.tensor(X1_test)
X2_test_tensor = torch.tensor(X2_test)
y_test_tensor = torch.tensor(y_test)

class TimeSeriesDataset(Dataset):
    """
    Custom Dataset for time series data with two feature sets.

    Parameters:
    - X1 (torch.Tensor): Primary dataset features.
    - X2 (torch.Tensor): Secondary dataset features.
    - y (torch.Tensor): Target variable.
    """
    def __init__(self, X1, X2, y):
        self.X1 = X1.float()
        self.X2 = X2.float()
        self.y = y.long()
    def __len__(self):
        return len(self.X1)
    def __getitem__(self, idx):
        return self.X1[idx], self.X2[idx], self.y[idx]

# DataLoaders
batch_size = 64

train_dataset = TimeSeriesDataset(X1_train_tensor, X2_train_tensor, y_train_tensor)
test_dataset = TimeSeriesDataset(X1_test_tensor, X2_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

class MatrixRegressionModel(nn.Module):
    """
    Matrix Regression Model: Y = A * X * B

    Advantages:
    - Simple and interpretable.
    - Fast training.

    Disadvantages:
    - Limited in capturing complex patterns.
    """
    def __init__(self, num_securities, num_features):
        super(MatrixRegressionModel, self).__init__()
        self.A = nn.Parameter(torch.randn(1, num_securities))
        self.B = nn.Parameter(torch.randn(num_features, num_classes))
    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten over time
        out = self.A @ x @ self.B  # Shape: (batch_size, num_classes)
        out = out.view(-1, num_securities, num_classes)
        return out

class LogisticRegressionModel(nn.Module):
    """
    Logistic Regression Model.

    Advantages:
    - Simple and interpretable.
    - Good baseline model.

    Disadvantages:
    - Assumes linear relationship.
    - May underfit complex data.
    """
    def __init__(self, input_size, num_securities, num_classes):
        super(LogisticRegressionModel, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        self.linear = nn.Linear(input_size, num_securities * num_classes)
    def forward(self, x):
        x = x.view(x.size(0), -1)
        out = self.linear(x)
        out = out.view(-1, self.num_securities, self.num_classes)
        return out

class CNNModel(nn.Module):
    """
    Convolutional Neural Network Model.

    Advantages:
    - Captures local temporal patterns.
    - Efficient computation.

    Disadvantages:
    - Limited in capturing long-term dependencies.
    """
    def __init__(self, num_features, num_securities, num_classes):
        super(CNNModel, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        self.conv1 = nn.Conv1d(in_channels=num_features, out_channels=64, kernel_size=3)
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(128, num_securities * num_classes)
    def forward(self, x):
        x = x.permute(0, 2, 1)  # (batch_size, num_features, window_size)
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = torch.mean(x, dim=2)  # Global average pooling
        x = self.fc(x)
        out = x.view(-1, self.num_securities, self.num_classes)
        return out

class LSTMModel(nn.Module):
    """
    Long Short-Term Memory Model.

    Advantages:
    - Captures long-term dependencies.
    - Suitable for sequential data.

    Disadvantages:
    - Computationally intensive.
    - Prone to overfitting.
    """
    def __init__(self, input_size, hidden_size, num_layers, num_securities, num_classes):
        super(LSTMModel, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_securities * num_classes)
    def forward(self, x):
        h0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        c0 = torch.zeros(self.lstm.num_layers, x.size(0), self.lstm.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = out[:, -1, :]  # Last time step
        out = self.fc(out)
        out = out.view(-1, self.num_securities, self.num_classes)
        return out

class TransformerModel(nn.Module):
    """
    Transformer Model.

    Advantages:
    - Captures global dependencies.
    - Parallel computation.

    Disadvantages:
    - Requires large datasets.
    - Computationally intensive.
    """
    def __init__(self, num_features, num_securities, num_classes):
        super(TransformerModel, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        self.embedding = nn.Linear(num_features, 128)
        self.pos_encoder = PositionalEncoding(128)
        encoder_layers = nn.TransformerEncoderLayer(d_model=128, nhead=8)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=2)
        self.decoder = nn.Linear(128, num_securities * num_classes)
    def forward(self, x):
        x = x.view(x.size(0), x.size(1), -1)  # (batch_size, window_size, num_features)
        x = self.embedding(x)
        x = self.pos_encoder(x)
        x = x.permute(1, 0, 2)  # (sequence_length, batch_size, embedding_dim)
        output = self.transformer_encoder(x)
        output = output[-1, :, :]  # Last output
        output = self.decoder(output)
        output = output.view(-1, self.num_securities, self.num_classes)
        return output

class PositionalEncoding(nn.Module):
    """
    Positional Encoding for Transformer.

    Adds positional information to the embeddings.
    """
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1).float()
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        if d_model % 2 == 1:
            pe[:, 1::2] = torch.sin(position * div_term)
        else:
            pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(1)
        self.register_buffer('pe', pe)
    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return x

class SimpleRNNModel(nn.Module):
    """
    Simple Recurrent Neural Network Model.

    Advantages:
    - Captures sequential dependencies.
    - Simpler than LSTM.

    Disadvantages:
    - Struggles with long-term dependencies.
    - May suffer from vanishing gradients.
    """
    def __init__(self, input_size, hidden_size, num_securities, num_classes):
        super(SimpleRNNModel, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_securities * num_classes)
    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.rnn.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = out[:, -1, :]  # Last time step
        out = self.fc(out)
        out = out.view(-1, self.num_securities, self.num_classes)
        return out

class CombinedModel(nn.Module):
    """
    Combined Model: Merges outputs from two models.

    Advantages:
    - Leverages multiple data sources.
    - Potentially better performance.

    Disadvantages:
    - More complex.
    - Computationally intensive.
    """
    def __init__(self, model1, model2, num_classes, num_securities):
        super(CombinedModel, self).__init__()
        self.model1 = model1
        self.model2 = model2
        self.fc = nn.Linear(num_classes * 2, num_classes)
        self.num_securities = num_securities
        self.num_classes = num_classes
    def forward(self, x1, x2):
        outputs1 = self.model1(x1)
        outputs2 = self.model2(x2)
        outputs = torch.cat((outputs1, outputs2), dim=2)
        batch_size = outputs.size(0)
        outputs = outputs.view(-1, outputs.size(2))
        final_output = self.fc(outputs)
        final_output = final_output.view(batch_size, self.num_securities, -1)
        return final_output

def train_model(model, train_loader, criterion, optimizer, num_epochs, device, writer):
    """
    Training loop for the model.

    Parameters:
    - model (nn.Module): The model to train.
    - train_loader (DataLoader): DataLoader for training data.
    - criterion (nn.Module): Loss function.
    - optimizer (torch.optim.Optimizer): Optimizer.
    - num_epochs (int): Number of epochs.
    - device (torch.device): Computation device.
    - writer (SummaryWriter): TensorBoard SummaryWriter.
    """
    model = model.to(device)
    global_step = 0
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for X1_batch, X2_batch, y_batch in train_loader:
            X1_batch = X1_batch.to(device)
            X2_batch = X2_batch.to(device)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X1_batch, X2_batch)
            loss = 0
            for i in range(num_securities):
                loss += criterion(outputs[:, i, :], y_batch[:, i])
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            writer.add_scalar('Training Loss', loss.item(), global_step)
            global_step += 1
        avg_loss = total_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')
        # Save checkpoint
        torch.save(model.state_dict(), f'checkpoint_epoch_{epoch+1}.pth')

def evaluate_model(model, test_loader, device):
    """
    Evaluate the model on the test set.

    Parameters:
    - model (nn.Module): The model to evaluate.
    - test_loader (DataLoader): DataLoader for test data.
    - device (torch.device): Computation device.

    Returns:
    - float: Cohen's kappa score.
    """
    model = model.to(device)
    model.eval()
    total = 0
    correct = 0
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for X1_batch, X2_batch, y_batch in test_loader:
            X1_batch = X1_batch.to(device)
            X2_batch = X2_batch.to(device)
            y_batch = y_batch.to(device)
            outputs = model(X1_batch, X2_batch)
            _, predicted = torch.max(outputs.data, 2)
            total += y_batch.numel()
            correct += (predicted == y_batch).sum().item()
            all_preds.extend(predicted.cpu().numpy().flatten())
            all_targets.extend(y_batch.cpu().numpy().flatten())
    accuracy = 100 * correct / total
    kappa = cohen_kappa_score(all_targets, all_preds)
    print(f'Accuracy on test set: {accuracy:.2f}%')
    print(f"Cohen's Kappa: {kappa:.4f}")
    return kappa

def hyperparameter_tuning(hyperparams, train_loader, test_loader, device):
    """
    Perform hyperparameter tuning.

    Parameters:
    - hyperparams (dict): Dictionary of hyperparameters to try.
    - train_loader (DataLoader): DataLoader for training data.
    - test_loader (DataLoader): DataLoader for test data.
    - device (torch.device): Computation device.
    """
    best_kappa = -1
    best_params = None
    for params in product(*hyperparams.values()):
        param_dict = dict(zip(hyperparams.keys(), params))
        print(f"Trying hyperparameters: {param_dict}")
        model1 = TransformerModel(num_features=num_features, num_securities=num_securities, num_classes=num_classes)
        model2 = LSTMModel(input_size=second_X_data.shape[1], hidden_size=param_dict['hidden_size'],
                           num_layers=param_dict['num_layers'], num_securities=num_securities, num_classes=num_classes)
        combined_model = CombinedModel(model1, model2, num_classes, num_securities)
        optimizer = torch.optim.Adam(combined_model.parameters(), lr=param_dict['learning_rate'])
        criterion = nn.CrossEntropyLoss()
        writer = SummaryWriter()
        train_model(combined_model, train_loader, criterion, optimizer, param_dict['num_epochs'], device, writer)
        kappa = evaluate_model(combined_model, test_loader, device)
        writer.close()
        if kappa > best_kappa:
            best_kappa = kappa
            best_params = param_dict
    print(f"Best Cohen's Kappa: {best_kappa:.4f} with parameters: {best_params}")

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters for tuning
hyperparams = {
    'hidden_size': [64, 128],
    'num_layers': [1, 2],
    'learning_rate': [0.001, 0.0001],
    'num_epochs': [5]
}

# Start hyperparameter tuning
hyperparameter_tuning(hyperparams, train_loader, test_loader, device)

# Example 1: Combine TransformerModel and LSTMModel
print("\nTraining Combined Model 1: TransformerModel + LSTMModel")
model1 = TransformerModel(num_features=num_features, num_securities=num_securities, num_classes=num_classes)
model2 = LSTMModel(input_size=second_X_data.shape[1], hidden_size=128, num_layers=2, num_securities=num_securities, num_classes=num_classes)
combined_model = CombinedModel(model1, model2, num_classes, num_securities)
optimizer = torch.optim.Adam(combined_model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter()
train_model(combined_model, train_loader, criterion, optimizer, num_epochs=5, device=device, writer=writer)
evaluate_model(combined_model, test_loader, device)
writer.close()

# Example 2: Combine CNNModel and SimpleRNNModel
print("\nTraining Combined Model 2: CNNModel + SimpleRNNModel")
model1 = CNNModel(num_features=num_features, num_securities=num_securities, num_classes=num_classes)
model2 = SimpleRNNModel(input_size=second_X_data.shape[1], hidden_size=64, num_securities=num_securities, num_classes=num_classes)
combined_model = CombinedModel(model1, model2, num_classes, num_securities)
optimizer = torch.optim.Adam(combined_model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter()
train_model(combined_model, train_loader, criterion, optimizer, num_epochs=5, device=device, writer=writer)
evaluate_model(combined_model, test_loader, device)
writer.close()

# Example 3: Combine LogisticRegressionModel and LSTMModel
print("\nTraining Combined Model 3: LogisticRegressionModel + LSTMModel")
model1 = LogisticRegressionModel(input_size=num_features*window_size, num_securities=num_securities, num_classes=num_classes)
model2 = LSTMModel(input_size=second_X_data.shape[1], hidden_size=128, num_layers=1, num_securities=num_securities, num_classes=num_classes)
combined_model = CombinedModel(model1, model2, num_classes, num_securities)
optimizer = torch.optim.Adam(combined_model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter()
train_model(combined_model, train_loader, criterion, optimizer, num_epochs=5, device=device, writer=writer)
evaluate_model(combined_model, test_loader, device)
writer.close()

Trying hyperparameters: {'hidden_size': 64, 'num_layers': 1, 'learning_rate': 0.001, 'num_epochs': 5}




KeyboardInterrupt: 

In [None]:
"""
Financial Time Series Forecasting Package
---------------------------------------
A comprehensive package for financial time series forecasting using various deep learning models.

This package includes:
- Multiple model architectures (AutoRegressive, LSTM, TCN, ProphetLike)
- Data validation and preprocessing
- Model checkpointing and logging
- Configuration management
- Comprehensive testing suite

Author: Assistant
Date: 2024-10-20
Version: 1.0.0
"""

import logging
import yaml
import torch
import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pathlib import Path
from dataclasses import dataclass
from typing import Optional, List, Dict, Any, Tuple, Union
from datetime import datetime, timedelta
from pytorch_lightning.callbacks import EarlyStopping
from torch.utils.data import DataLoader
from pytorch_forecasting import TimeSeriesDataSet
from pytorch_forecasting.metrics import RMSE, QuantileLoss
from scipy import stats

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

@dataclass
class ModelConfig:
    """Configuration class for model parameters.

    Attributes:
        model_name (str): Name of the model to use
        max_epochs (int): Maximum number of training epochs
        batch_size (int): Batch size for training
        learning_rate (float): Learning rate for optimization
        hidden_size (int): Number of hidden units in layers
        dropout (float): Dropout rate for regularization
        max_encoder_length (int): Maximum length of encoder sequence
        max_prediction_length (int): Maximum length of prediction sequence
    """
    model_name: str
    max_epochs: int = 30
    batch_size: int = 64
    learning_rate: float = 0.03
    hidden_size: int = 16
    dropout: float = 0.1
    max_encoder_length: int = 30
    max_prediction_length: int = 7

    @classmethod
    def from_yaml(cls, yaml_path: str) -> 'ModelConfig':
        """Load configuration from YAML file.

        Args:
            yaml_path: Path to YAML configuration file

        Returns:
            ModelConfig instance
        """
        with open(yaml_path, 'r') as f:
            config_dict = yaml.safe_load(f)
        return cls(**config_dict)

    def to_yaml(self, yaml_path: str) -> None:
        """Save configuration to YAML file.

        Args:
            yaml_path: Path to save configuration
        """
        with open(yaml_path, 'w') as f:
            yaml.dump(self.__dict__, f)

class DataValidator:
    """Data validation utilities."""

    @staticmethod
    def validate_dataframe(df: pd.DataFrame, required_columns: List[str]) -> bool:
        """Validate DataFrame structure and content.

        Args:
            df: Input DataFrame
            required_columns: List of required column names

        Returns:
            bool: True if validation passes

        Raises:
            ValueError: If validation fails
        """
        # Check for required columns
        missing_cols = set(required_columns) - set(df.columns)
        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")

        # Check for null values
        null_cols = df.columns[df.isnull().any()].tolist()
        if null_cols:
            raise ValueError(f"Null values found in columns: {null_cols}")

        # Check for infinite values
        inf_cols = df.columns[np.isinf(df.select_dtypes(include=np.number)).any()].tolist()
        if inf_cols:
            raise ValueError(f"Infinite values found in columns: {inf_cols}")

        return True

class ModelCheckpointer:
    """Model checkpointing utilities."""

    def __init__(self, checkpoint_dir: str):
        """Initialize checkpointer.

        Args:
            checkpoint_dir: Directory to store checkpoints
        """
        self.checkpoint_dir = Path(checkpoint_dir)
        self.checkpoint_dir.mkdir(exist_ok=True)

    def save_checkpoint(self, model: torch.nn.Module, epoch: int,
                       optimizer: torch.optim.Optimizer, loss: float) -> str:
        """Save model checkpoint.

        Args:
            model: PyTorch model
            epoch: Current epoch number
            optimizer: PyTorch optimizer
            loss: Current loss value

        Returns:
            str: Path to saved checkpoint
        """
        checkpoint_path = self.checkpoint_dir / f"checkpoint_epoch_{epoch}.pt"
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
        }, checkpoint_path)
        return str(checkpoint_path)

    def load_checkpoint(self, model: torch.nn.Module,
                       optimizer: torch.optim.Optimizer,
                       checkpoint_path: str) -> Tuple[int, float]:
        """Load model checkpoint.

        Args:
            model: PyTorch model
            optimizer: PyTorch optimizer
            checkpoint_path: Path to checkpoint file

        Returns:
            Tuple containing (epoch_number, loss_value)
        """
        checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        return checkpoint['epoch'], checkpoint['loss']

class AutoRegressiveModel(pl.LightningModule):
    """Simple Autoregressive model using a Linear layer."""

    def __init__(self, input_size: int, output_size: int = 1, **kwargs):
        """Initialize AR model.

        Args:
            input_size: Number of input features
            output_size: Number of output features
            **kwargs: Additional arguments
        """
        super().__init__()
        self.linear = torch.nn.Linear(input_size, output_size)
        self.save_hyperparameters()

    def forward(self, x: Dict[str, torch.Tensor]) -> torch.Tensor:
        """Forward pass.

        Args:
            x: Dictionary containing input tensors

        Returns:
            Model predictions
        """
        encoder_output = x["encoder_cont"][:, -1, :]  # Use last time step
        prediction = self.linear(encoder_output)
        return prediction

class LSTMForecaster(pl.LightningModule):
    """LSTM-based Forecaster."""

    def __init__(self, input_size: int, hidden_size: int, num_layers: int,
                 dropout: float = 0.0, **kwargs):
        """Initialize LSTM model.

        Args:
            input_size: Number of input features
            hidden_size: Number of hidden units
            num_layers: Number of LSTM layers
            dropout: Dropout rate
            **kwargs: Additional arguments
        """
        super().__init__()
        self.lstm = torch.nn.LSTM(
            input_size, hidden_size, num_layers,
            batch_first=True, dropout=dropout
        )
        self.output_layer = torch.nn.Linear(hidden_size, 1)
        self.save_hyperparameters()

    def forward(self, x: Dict[str, torch.Tensor]) -> torch.Tensor:
        """Forward pass."""
        encoder_input = x["encoder_cont"]
        output, (hidden, _) = self.lstm(encoder_input)
        prediction = self.output_layer(hidden[-1])
        return prediction

class TCNBlock(torch.nn.Module):
    """Temporal Convolutional Network block."""

    def __init__(self, n_inputs: int, n_outputs: int, kernel_size: int,
                 stride: int, dilation: int, padding: int, dropout: float = 0.2):
        """Initialize TCN block."""
        super().__init__()
        self.conv1 = torch.nn.Conv1d(
            n_inputs, n_outputs, kernel_size,
            stride=stride, padding=padding, dilation=dilation
        )
        self.chomp1 = torch.nn.functional.pad  # Remove future timesteps
        self.relu1 = torch.nn.ReLU()
        self.dropout1 = torch.nn.Dropout(dropout)

        self.net = torch.nn.Sequential(
            self.conv1,
            self.relu1,
            self.dropout1
        )

        self.downsample = torch.nn.Conv1d(n_inputs, n_outputs, 1) \
            if n_inputs != n_outputs else None
        self.relu = torch.nn.ReLU()
        self.init_weights()

    def init_weights(self):
        """Initialize network weights."""
        self.conv1.weight.data.normal_(0, 0.01)
        if self.downsample is not None:
            self.downsample.weight.data.normal_(0, 0.01)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """Forward pass."""
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)

class TCNForecaster(pl.LightningModule):
    """Temporal Convolutional Network Forecaster."""

    def __init__(self, input_size: int, num_channels: List[int],
                 kernel_size: int = 2, dropout: float = 0.2, **kwargs):
        """Initialize TCN model."""
        super().__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = input_size if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            layers += [TCNBlock(
                in_channels, out_channels, kernel_size, stride=1,
                dilation=dilation_size,
                padding=(kernel_size-1) * dilation_size,
                dropout=dropout
            )]

        self.network = torch.nn.Sequential(*layers)
        self.output_layer = torch.nn.Linear(num_channels[-1], 1)
        self.save_hyperparameters()

    def forward(self, x: Dict[str, torch.Tensor]) -> torch.Tensor:
        """Forward pass."""
        encoder_input = x["encoder_cont"].permute(0, 2, 1)
        output = self.network(encoder_input)
        output = output[:, :, -1]  # Take last time step
        prediction = self.output_layer(output)
        return prediction

class ProphetLikeModel(pl.LightningModule):
    """Prophet-like model capturing trend and seasonality."""

    def __init__(self, seasonality: int, **kwargs):
        """Initialize Prophet-like model."""
        super().__init__()
        self.trend = torch.nn.Linear(1, 1)
        self.seasonality = torch.nn.Linear(seasonality, 1)
        self.save_hyperparameters()

    def forward(self, x: Dict[str, torch.Tensor]) -> torch.Tensor:
        """Forward pass."""
        time = x["encoder_cont"][:, :, 0].unsqueeze(-1)
        trend = self.trend(time)

        seasonal_features = x["encoder_cont"][:, :, 1:self.hparams.seasonality+1]
        seasonality = self.seasonality(seasonal_features)

        prediction = trend + seasonality
        return prediction[:, -1, :]

class BaseWrapper:
    """Base wrapper for all models."""

    def __init__(self, config: ModelConfig):
        """Initialize wrapper."""
        self.config = config
        self.logger = logging.getLogger(f"{self.__class__.__name__}")
        self.validator = DataValidator()
        self.checkpointer = ModelCheckpointer("checkpoints")
        self.model = None
        self.training_metrics = []

    def prepare_data(self, df: pd.DataFrame, time_idx: str,
                    target: str, group_ids: List[str]) -> None:
        """Prepare data for training."""
        # Validate data
        self.validator.validate_dataframe(df, [time_idx, target] + group_ids)

        # Create TimeSeriesDataSet
        self.training = TimeSeriesDataSet(
            df[df[time_idx] <= df[time_idx].max() - self.config.max_prediction_length],
            time_idx=time_idx,
            target=target,
            group_ids=group_ids,
            max_encoder_length=self.config.max_encoder_length,
            max_prediction_length=self.config.max_prediction_length,
            static_categoricals=group_ids,
            time_varying_known_reals=[time_idx],
            time_varying_unknown_reals=[target],
            target_normalizer=None,
            allow_missings=True,
        )

        self.validation = TimeSeriesDataSet.from_dataset(
            self.training,
            df[df[time_idx] > df[time_idx].max() - self.config.max_prediction_length],
            min_prediction_idx=df[time_idx].max() - self.config.max_prediction_length + 1
        )

        self.train_dataloader = DataLoader(
            self.training, batch_size=self.config.batch_size, shuffle=True
        )
        self.val_dataloader = DataLoader(
            self.validation, batch_size=self.config.batch_size
        )

    def fit(self) -> None:
        """Train the model."""
        if self.model is None:
            raise ValueError("Model not initialized")

        trainer = pl.Trainer(
            max_epochs=self.config.max_epochs,
            callbacks=[
                EarlyStopping(
                    monitor="val_loss",
                    min_delta=1e-4,
                    patience=5,
                    verbose=False,
                    mode="min"
                )
            ],
            gradient_clip_val=0.1,
        )

        trainer.fit(
            self.model,
            train_dataloaders=self.train_dataloader,
            val_dataloaders=self.val_dataloader
        )

    def predict(self, df: pd.DataFrame) -> np.ndarray:
        """Make predictions."""
        if self.model is None:
            raise ValueError("Model not trained")

        self.model.eval()
        with torch.no_grad():
            predictions = self.model(
                self.validation.to_dataloader(df, batch_size=self.config.batch_size)
            )
        return predictions.numpy()

def generate_sample_data(n_assets: int = 5, n_days: int = 100,
                        seed: int = 42) -> pd.DataFrame:
    """Generate sample financial data for testing.

    Args:
        n_assets: Number of assets
        n_days: Number of days
        seed: Random seed

    Returns:
        DataFrame containing sample data
    """
    np.random.seed(seed)

    dates = [datetime.today() - timedelta(days=x) for x in range(n_days)]
    dates.reverse()

    data = {
        'time': dates * n_assets,
        'group_id': np

SyntaxError: incomplete input (<ipython-input-2-92dd88ec71fe>, line 423)

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import cohen_kappa_score
from torch.utils.tensorboard import SummaryWriter
from itertools import product
import time

# Parameters
num_timesteps = 5000  # Total number of time steps
num_securities = 10  # Number of different securities
num_features_per_security = 4  # Features per security
num_classes = 3  # Number of target classes
num_features = num_securities * num_features_per_security  # Total number of features

# Generate timestamps
timestamps = np.arange(num_timesteps)

# Generate random data for the primary dataset (X_data) and labels (y_data)
X_data = np.random.rand(num_timesteps, num_features).astype(np.float32)  # Random features
y_data = np.random.randint(0, num_classes, size=(num_timesteps, num_securities)).astype(np.int64)  # Random labels

# Generate the second dataset with 3 categorical strings and 2 continuous floats per timestamp
str_columns = ['str1', 'str2', 'str3']
float_columns = ['float1', 'float2']
second_dataset = {
    'timestamp': timestamps,
    'str1': np.random.choice(['A', 'B', 'C'], num_timesteps),
    'str2': np.random.choice(['D', 'E', 'F'], num_timesteps),
    'str3': np.random.choice(['G', 'H', 'I'], num_timesteps),
    'float1': np.random.rand(num_timesteps),
    'float2': np.random.rand(num_timesteps),
}

# Encode categorical string columns using LabelEncoder
label_encoders = {}
for col in str_columns:
    le = LabelEncoder()
    second_dataset[col] = le.fit_transform(second_dataset[col])
    label_encoders[col] = le

# Combine all features from the second dataset into a single array
second_X_data = np.column_stack([second_dataset[col] for col in str_columns + float_columns]).astype(np.float32)

# Apply Min-Max scaling to both datasets
scaler_X = MinMaxScaler()
X_data = scaler_X.fit_transform(X_data)

scaler_second_X = MinMaxScaler()
second_X_data = scaler_second_X.fit_transform(second_X_data)

def add_positional_encoding(X, timestamps, option='shared'):
    """
    Add positional encoding to the feature matrix.

    Parameters:
    - X (ndarray): Feature matrix.
    - timestamps (ndarray): Array of timestamps.
    - option (str): 'shared' or 'per_security'.

    Returns:
    - X_pe (ndarray): X with positional encoding added.
    """
    if option == 'shared':
        # Shared positional encoding across all features
        pe = np.sin(timestamps[:, None] / 10000 ** (np.arange(X.shape[1]) / X.shape[1]))
        X_pe = X + pe.astype(np.float32)
    elif option == 'per_security':
        # Separate positional encoding for each security
        pe_list = []
        for i in range(num_securities):
            pe = np.sin(timestamps[:, None] / 10000 ** (np.arange(num_features_per_security) / num_features_per_security))
            pe_list.append(pe)
        pe_concat = np.hstack(pe_list)
        X_pe = X + pe_concat.astype(np.float32)
    else:
        X_pe = X  # No positional encoding
    return X_pe

# Apply positional encoding
positional_encoding_option = 'shared'  # 'shared' or 'per_security'
X_data = add_positional_encoding(X_data, timestamps, positional_encoding_option)

def create_windows(X1, X2, y, window_size, horizon):
    """
    Create sliding windows for time series data.

    Parameters:
    - X1 (ndarray): Primary dataset features.
    - X2 (ndarray): Secondary dataset features.
    - y (ndarray): Target variable.
    - window_size (int): Size of the window.
    - horizon (int): Prediction horizon.

    Returns:
    - Tuple[np.ndarray, np.ndarray, np.ndarray]: Tuples of windows for X1, X2, and y.
    """
    X1_windows = []
    X2_windows = []
    y_windows = []
    for i in range(len(X1) - window_size - horizon + 1):
        X1_windows.append(X1[i:i+window_size])
        X2_windows.append(X2[i:i+window_size])
        y_windows.append(y[i+window_size+horizon-1])
    return np.array(X1_windows), np.array(X2_windows), np.array(y_windows)

# Define window sizes
window_size = 20
horizon = 1

# Create windows
X1_windows, X2_windows, y_windows = create_windows(X_data, second_X_data, y_data, window_size, horizon)

# Split into train and test sets
train_ratio = 0.8
train_size = int(len(X1_windows) * train_ratio)

X1_train = X1_windows[:train_size]
X2_train = X2_windows[:train_size]
y_train = y_windows[:train_size]

X1_test = X1_windows[train_size:]
X2_test = X2_windows[train_size:]
y_test = y_windows[train_size:]

# Convert to PyTorch tensors
X1_train_tensor = torch.tensor(X1_train)
X2_train_tensor = torch.tensor(X2_train)
y_train_tensor = torch.tensor(y_train)

X1_test_tensor = torch.tensor(X1_test)
X2_test_tensor = torch.tensor(X2_test)
y_test_tensor = torch.tensor(y_test)

class TimeSeriesDataset(Dataset):
    """
    Custom Dataset for time series data with two feature sets.

    Parameters:
    - X1 (torch.Tensor): Primary dataset features.
    - X2 (torch.Tensor): Secondary dataset features.
    - y (torch.Tensor): Target variable.
    """
    def __init__(self, X1, X2, y):
        self.X1 = X1.float()
        self.X2 = X2.float()
        self.y = y.long()
    def __len__(self):
        return len(self.X1)
    def __getitem__(self, idx):
        return self.X1[idx], self.X2[idx], self.y[idx]

# DataLoaders
batch_size = 64

train_dataset = TimeSeriesDataset(X1_train_tensor, X2_train_tensor, y_train_tensor)
test_dataset = TimeSeriesDataset(X1_test_tensor, X2_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

class MatrixRegressionModel(nn.Module):
    """
    Matrix Regression Model: Y = A * X * B

    Advantages:
    - Simple and interpretable.
    - Fast training.

    Disadvantages:
    - Limited in capturing complex patterns.

    Parameters:
    - num_securities (int): Number of securities.
    - num_features (int): Total number of features.
    - num_classes (int): Number of target classes.
    """
    def __init__(self, num_securities, num_features, num_classes):
        super(MatrixRegressionModel, self).__init__()
        self.A = nn.Parameter(torch.randn(1, num_securities))
        self.B = nn.Parameter(torch.randn(num_features, num_classes))
        self.num_securities = num_securities
        self.num_classes = num_classes
    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten over time
        out = self.A @ x @ self.B  # Shape: (batch_size, num_classes)
        out = out.view(-1, self.num_securities, self.num_classes)
        return out

class LogisticRegressionModel(nn.Module):
    """
    Logistic Regression Model.

    Advantages:
    - Simple and interpretable.
    - Good baseline model.

    Disadvantages:
    - Assumes linear relationship.
    - May underfit complex data.

    Parameters:
    - input_size (int): Size of input features.
    - num_securities (int): Number of securities.
    - num_classes (int): Number of target classes.
    """
    def __init__(self, input_size, num_securities, num_classes):
        super(LogisticRegressionModel, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        self.linear = nn.Linear(input_size, num_securities * num_classes)
    def forward(self, x):
        x = x.view(x.size(0), -1)
        out = self.linear(x)
        out = out.view(-1, self.num_securities, self.num_classes)
        return out

class CNNModel(nn.Module):
    """
    Convolutional Neural Network Model.

    Advantages:
    - Captures local temporal patterns.
    - Efficient computation.

    Disadvantages:
    - Limited in capturing long-term dependencies.

    Parameters:
    - num_features (int): Total number of features.
    - num_securities (int): Number of securities.
    - num_classes (int): Number of target classes.
    """
    def __init__(self, num_features, num_securities, num_classes):
        super(CNNModel, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        self.conv1 = nn.Conv1d(in_channels=num_features, out_channels=64, kernel_size=3)
        self.conv2 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3)
        self.relu = nn.ReLU()
        self.fc = nn.Linear(128, num_securities * num_classes)
    def forward(self, x):
        x = x.permute(0, 2, 1)  # (batch_size, num_features, window_size)
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = torch.mean(x, dim=2)  # Global average pooling
        x = self.fc(x)
        out = x.view(-1, self.num_securities, self.num_classes)
        return out

class LSTMModel(nn.Module):
    """
    Long Short-Term Memory Model.

    Advantages:
    - Captures long-term dependencies.
    - Suitable for sequential data.

    Disadvantages:
    - Computationally intensive.
    - Prone to overfitting.

    Parameters:
    - input_size (int): Size of input features.
    - hidden_size (int): Size of hidden state.
    - num_layers (int): Number of LSTM layers.
    - num_securities (int): Number of securities.
    - num_classes (int): Number of target classes.
    """
    def __init__(self, input_size, hidden_size, num_layers, num_securities, num_classes):
        super(LSTMModel, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_securities * num_classes)
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = out[:, -1, :]  # Last time step
        out = self.fc(out)
        out = out.view(-1, self.num_securities, self.num_classes)
        return out

class TransformerModel(nn.Module):
    """
    Transformer Model.

    Advantages:
    - Captures global dependencies.
    - Parallel computation.

    Disadvantages:
    - Requires large datasets.
    - Computationally intensive.

    Parameters:
    - num_features (int): Total number of features.
    - num_securities (int): Number of securities.
    - num_classes (int): Number of target classes.
    """
    def __init__(self, num_features, num_securities, num_classes):
        super(TransformerModel, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        self.embedding = nn.Linear(num_features, 128)
        self.pos_encoder = PositionalEncoding(128)
        encoder_layers = nn.TransformerEncoderLayer(d_model=128, nhead=8)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=2)
        self.decoder = nn.Linear(128, num_securities * num_classes)
    def forward(self, x):
        x = x.view(x.size(0), x.size(1), -1)  # (batch_size, window_size, num_features)
        x = self.embedding(x)
        x = self.pos_encoder(x)
        x = x.permute(1, 0, 2)  # (sequence_length, batch_size, embedding_dim)
        output = self.transformer_encoder(x)
        output = output[-1, :, :]  # Last output
        output = self.decoder(output)
        output = output.view(-1, self.num_securities, self.num_classes)
        return output

class PositionalEncoding(nn.Module):
    """
    Positional Encoding for Transformer.

    Adds positional information to the embeddings.

    Parameters:
    - d_model (int): Embedding dimension.
    - max_len (int): Maximum sequence length.
    """
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1).float()
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        if d_model % 2 == 1:
            pe[:, 1::2] = torch.sin(position * div_term)
        else:
            pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(1)
        self.register_buffer('pe', pe)
    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return x

class SimpleRNNModel(nn.Module):
    """
    Simple Recurrent Neural Network Model.

    Advantages:
    - Captures sequential dependencies.
    - Simpler than LSTM.

    Disadvantages:
    - Struggles with long-term dependencies.
    - May suffer from vanishing gradients.

    Parameters:
    - input_size (int): Size of input features.
    - hidden_size (int): Size of hidden state.
    - num_securities (int): Number of securities.
    - num_classes (int): Number of target classes.
    """
    def __init__(self, input_size, hidden_size, num_securities, num_classes):
        super(SimpleRNNModel, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_securities * num_classes)
    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.rnn(x, h0)
        out = out[:, -1, :]  # Last time step
        out = self.fc(out)
        out = out.view(-1, self.num_securities, self.num_classes)
        return out

# Missing Models with Docstrings and Comments

class GNNModel(nn.Module):
    """
    Graph Neural Network Model.

    Advantages:
    - Captures relationships between securities.
    - Utilizes attention mechanisms.

    Disadvantages:
    - Computationally intensive.
    - Requires graph structure.

    Parameters:
    - num_securities (int): Number of securities.
    - num_features_per_security (int): Features per security.
    - num_classes (int): Number of target classes.
    """
    def __init__(self, num_securities, num_features_per_security, num_classes):
        super(GNNModel, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        # Create a linear layer for each security
        self.gcn_layers = nn.ModuleList([nn.Linear(num_features_per_security, 64) for _ in range(num_securities)])
        # Multi-head attention mechanism
        self.attention = nn.MultiheadAttention(embed_dim=64, num_heads=4, batch_first=True)
        # Fully connected layer for classification
        self.fc = nn.Linear(64, num_classes)
    def forward(self, x):
        # x shape: (batch_size, window_size, num_features)
        batch_size = x.size(0)
        x = x[:, -1, :]  # Use the last time step
        x = x.view(batch_size, self.num_securities, -1)  # (batch_size, num_securities, num_features_per_security)
        node_embeddings = []
        for i in range(self.num_securities):
            h = self.gcn_layers[i](x[:, i, :])  # (batch_size, 64)
            node_embeddings.append(h)
        h = torch.stack(node_embeddings, dim=1)  # (batch_size, num_securities, 64)
        # Apply attention mechanism
        attn_output, _ = self.attention(h, h, h)
        out = self.fc(attn_output)  # (batch_size, num_securities, num_classes)
        return out

class MambaModel(nn.Module):
    """
    Mamba Model: Combines CNN and LSTM architectures.

    Advantages:
    - Captures both local and long-term dependencies.
    - Utilizes CNN for feature extraction and LSTM for temporal patterns.

    Disadvantages:
    - More complex and computationally intensive.

    Parameters:
    - num_features (int): Total number of features.
    - num_securities (int): Number of securities.
    - num_classes (int): Number of target classes.
    """
    def __init__(self, num_features, num_securities, num_classes):
        super(MambaModel, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        # Convolutional layer
        self.cnn = nn.Conv1d(in_channels=num_features, out_channels=64, kernel_size=3)
        # LSTM layer
        self.lstm = nn.LSTM(input_size=64, hidden_size=128, num_layers=2, batch_first=True)
        # Fully connected layer
        self.fc = nn.Linear(128, num_securities * num_classes)
    def forward(self, x):
        # x shape: (batch_size, window_size, num_features)
        x = x.permute(0, 2, 1)  # (batch_size, num_features, window_size)
        x = F.relu(self.cnn(x))  # Apply CNN
        x = x.permute(0, 2, 1)  # (batch_size, new_window_size, channels)
        h0 = torch.zeros(2, x.size(0), 128).to(x.device)  # Initial hidden state
        c0 = torch.zeros(2, x.size(0), 128).to(x.device)  # Initial cell state
        out, _ = self.lstm(x, (h0, c0))  # Apply LSTM
        out = out[:, -1, :]  # Get the last output
        out = self.fc(out)
        out = out.view(-1, self.num_securities, self.num_classes)
        return out

class LiquidNetModel(nn.Module):
    """
    Liquid Neural Network Model.

    Advantages:
    - Dynamic adaptation to inputs.
    - Good for time-series data.

    Disadvantages:
    - Relatively new, less tested.
    - May be complex to tune.

    Parameters:
    - input_size (int): Size of input features.
    - hidden_size (int): Size of hidden state.
    - num_securities (int): Number of securities.
    - num_classes (int): Number of target classes.
    """
    def __init__(self, input_size, hidden_size, num_securities, num_classes):
        super(LiquidNetModel, self).__init__()
        self.num_securities = num_securities
        self.num_classes = num_classes
        # RNN Cell with ReLU activation
        self.rnn_cell = nn.RNNCell(input_size, hidden_size, nonlinearity='relu')
        # Fully connected layer
        self.fc = nn.Linear(hidden_size, num_securities * num_classes)
    def forward(self, x):
        h_t = torch.zeros(x.size(0), self.rnn_cell.hidden_size).to(x.device)  # Initial hidden state
        for t in range(x.size(1)):
            h_t = self.rnn_cell(x[:, t, :], h_t)  # Update hidden state
        out = self.fc(h_t)
        out = out.view(-1, self.num_securities, self.num_classes)
        return out

class HiddenMarkovModel(nn.Module):
    """
    Hidden Markov Model.

    Advantages:
    - Probabilistic approach.
    - Good for sequential data.

    Disadvantages:
    - Simplified version here.
    - May not capture complex patterns.

    Parameters:
    - num_states (int): Number of hidden states.
    - num_securities (int): Number of securities.
    - num_classes (int): Number of target classes.
    """
    def __init__(self, num_states, num_securities, num_classes):
        super(HiddenMarkovModel, self).__init__()
        self.num_states = num_states
        self.num_securities = num_securities
        self.num_classes = num_classes
        # Start probabilities
        self.start_prob = nn.Parameter(torch.randn(num_states))
        # Transition probabilities
        self.transition_prob = nn.Parameter(torch.randn(num_states, num_states))
        # Emission probabilities
        self.emission_prob = nn.Parameter(torch.randn(num_states, num_classes))
    def forward(self, x):
        batch_size = x.size(0)
        # Simplified; in practice, you'd implement the forward algorithm
        out = torch.softmax(self.emission_prob, dim=1)
        # Corrected repeat dimensions to match tensor dimensions
        out = out.unsqueeze(0).unsqueeze(0).repeat(batch_size, self.num_securities, 1, 1)
        # Since the model expects output shape (batch_size, num_securities, num_classes)
        # We need to aggregate over the hidden states (num_states)
        out = torch.mean(out, dim=2)  # Average over hidden states
        return out

class CombinedModel(nn.Module):
    """
    Combined Model: Merges outputs from two models.

    Advantages:
    - Leverages multiple data sources.
    - Potentially better performance.

    Disadvantages:
    - More complex.
    - Computationally intensive.

    Parameters:
    - model1 (nn.Module): First model.
    - model2 (nn.Module): Second model.
    - num_classes (int): Number of target classes.
    - num_securities (int): Number of securities.
    """
    def __init__(self, model1, model2, num_classes, num_securities):
        super(CombinedModel, self).__init__()
        self.model1 = model1
        self.model2 = model2
        self.fc = nn.Linear(num_classes * 2, num_classes)
        self.num_securities = num_securities
        self.num_classes = num_classes
    def forward(self, x1, x2):
        outputs1 = self.model1(x1)
        outputs2 = self.model2(x2)
        outputs = torch.cat((outputs1, outputs2), dim=2)
        batch_size = outputs.size(0)
        outputs = outputs.view(-1, outputs.size(2))
        final_output = self.fc(outputs)
        final_output = final_output.view(batch_size, self.num_securities, -1)
        return final_output

def train_model(model, train_loader, criterion, optimizer, num_epochs, device, writer):
    """
    Training loop for the model.

    Parameters:
    - model (nn.Module): The model to train.
    - train_loader (DataLoader): DataLoader for training data.
    - criterion (nn.Module): Loss function.
    - optimizer (torch.optim.Optimizer): Optimizer.
    - num_epochs (int): Number of epochs.
    - device (torch.device): Computation device.
    - writer (SummaryWriter): TensorBoard SummaryWriter.
    """
    model = model.to(device)
    global_step = 0
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for X1_batch, X2_batch, y_batch in train_loader:
            X1_batch = X1_batch.to(device)
            X2_batch = X2_batch.to(device)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X1_batch, X2_batch)
            loss = 0
            for i in range(num_securities):
                loss += criterion(outputs[:, i, :], y_batch[:, i])
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            writer.add_scalar('Training Loss', loss.item(), global_step)
            global_step += 1
        avg_loss = total_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')
        # Save checkpoint
        torch.save(model.state_dict(), f'checkpoint_epoch_{epoch+1}.pth')

def evaluate_model(model, test_loader, device):
    """
    Evaluate the model on the test set.

    Parameters:
    - model (nn.Module): The model to evaluate.
    - test_loader (DataLoader): DataLoader for test data.
    - device (torch.device): Computation device.

    Returns:
    - float: Cohen's kappa score.
    """
    model = model.to(device)
    model.eval()
    total = 0
    correct = 0
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for X1_batch, X2_batch, y_batch in test_loader:
            X1_batch = X1_batch.to(device)
            X2_batch = X2_batch.to(device)
            y_batch = y_batch.to(device)
            outputs = model(X1_batch, X2_batch)
            _, predicted = torch.max(outputs.data, 2)
            total += y_batch.numel()
            correct += (predicted == y_batch).sum().item()
            all_preds.extend(predicted.cpu().numpy().flatten())
            all_targets.extend(y_batch.cpu().numpy().flatten())
    accuracy = 100 * correct / total
    kappa = cohen_kappa_score(all_targets, all_preds)
    print(f'Accuracy on test set: {accuracy:.2f}%')
    print(f"Cohen's Kappa: {kappa:.4f}")
    return kappa

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Examples of combination models using the new models

# Example 1: Combine GNNModel and TransformerModel
print("\nTraining Combined Model 1: GNNModel + TransformerModel")
model1 = GNNModel(num_securities=num_securities, num_features_per_security=num_features_per_security, num_classes=num_classes)
model2 = TransformerModel(num_features=second_X_data.shape[1], num_securities=num_securities, num_classes=num_classes)
combined_model = CombinedModel(model1, model2, num_classes, num_securities)
optimizer = torch.optim.Adam(combined_model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter()
train_model(combined_model, train_loader, criterion, optimizer, num_epochs=5, device=device, writer=writer)
evaluate_model(combined_model, test_loader, device)
writer.close()

# Example 2: Combine MambaModel and LSTMModel
print("\nTraining Combined Model 2: MambaModel + LSTMModel")
model1 = MambaModel(num_features=num_features, num_securities=num_securities, num_classes=num_classes)
model2 = LSTMModel(input_size=second_X_data.shape[1], hidden_size=64, num_layers=1, num_securities=num_securities, num_classes=num_classes)
combined_model = CombinedModel(model1, model2, num_classes, num_securities)
optimizer = torch.optim.Adam(combined_model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter()
train_model(combined_model, train_loader, criterion, optimizer, num_epochs=5, device=device, writer=writer)
evaluate_model(combined_model, test_loader, device)
writer.close()

# Example 3: Combine LiquidNetModel and CNNModel
print("\nTraining Combined Model 3: LiquidNetModel + CNNModel")
model1 = LiquidNetModel(input_size=num_features, hidden_size=128, num_securities=num_securities, num_classes=num_classes)
model2 = CNNModel(num_features=second_X_data.shape[1], num_securities=num_securities, num_classes=num_classes)
combined_model = CombinedModel(model1, model2, num_classes, num_securities)
optimizer = torch.optim.Adam(combined_model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter()
train_model(combined_model, train_loader, criterion, optimizer, num_epochs=5, device=device, writer=writer)
evaluate_model(combined_model, test_loader, device)
writer.close()

# Example 4: Combine HiddenMarkovModel and LogisticRegressionModel
print("\nTraining Combined Model 4: HiddenMarkovModel + LogisticRegressionModel")
model1 = HiddenMarkovModel(num_states=5, num_securities=num_securities, num_classes=num_classes)
model2 = LogisticRegressionModel(input_size=second_X_data.shape[1]*window_size, num_securities=num_securities, num_classes=num_classes)
combined_model = CombinedModel(model1, model2, num_classes, num_securities)
optimizer = torch.optim.Adam(combined_model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter()
train_model(combined_model, train_loader, criterion, optimizer, num_epochs=5, device=device, writer=writer)
evaluate_model(combined_model, test_loader, device)
writer.close()

# Example 5: Combine GNNModel and MambaModel
print("\nTraining Combined Model 5: GNNModel + MambaModel")
model1 = GNNModel(num_securities=num_securities, num_features_per_security=num_features_per_security, num_classes=num_classes)
model2 = MambaModel(num_features=second_X_data.shape[1], num_securities=num_securities, num_classes=num_classes)
combined_model = CombinedModel(model1, model2, num_classes, num_securities)
optimizer = torch.optim.Adam(combined_model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
writer = SummaryWriter()
train_model(combined_model, train_loader, criterion, optimizer, num_epochs=5, device=device, writer=writer)
evaluate_model(combined_model, test_loader, device)
writer.close()


Training Combined Model 1: GNNModel + TransformerModel




Epoch [1/5], Loss: 11.0505
Epoch [2/5], Loss: 11.0070
Epoch [3/5], Loss: 11.0030
Epoch [4/5], Loss: 10.9930
Epoch [5/5], Loss: 10.9843
Accuracy on test set: 33.00%
Cohen's Kappa: -0.0043

Training Combined Model 2: MambaModel + LSTMModel
Epoch [1/5], Loss: 11.0014
Epoch [2/5], Loss: 10.9867
Epoch [3/5], Loss: 10.9818
Epoch [4/5], Loss: 10.9776
Epoch [5/5], Loss: 10.9677
Accuracy on test set: 33.00%
Cohen's Kappa: -0.0045

Training Combined Model 3: LiquidNetModel + CNNModel
Epoch [1/5], Loss: 10.9973
Epoch [2/5], Loss: 10.9749
Epoch [3/5], Loss: 10.9603
Epoch [4/5], Loss: 10.9382
Epoch [5/5], Loss: 10.9085
Accuracy on test set: 33.32%
Cohen's Kappa: -0.0008

Training Combined Model 4: HiddenMarkovModel + LogisticRegressionModel
Epoch [1/5], Loss: 11.0670
Epoch [2/5], Loss: 10.9946
Epoch [3/5], Loss: 10.9829
Epoch [4/5], Loss: 10.9717
Epoch [5/5], Loss: 10.9646
Accuracy on test set: 33.44%
Cohen's Kappa: 0.0014

Training Combined Model 5: GNNModel + MambaModel
Epoch [1/5], Loss: 11.0025

In [None]:
# ... [Previous code remains the same up to model definitions]

# Instantiate models for primary and second datasets
print("Training Combined Model with Transformer and LSTM...")

# Primary model
primary_model = TransformerModel(num_features=num_features, num_securities=num_securities, num_classes=num_classes)
# Secondary model
secondary_model = LSTMModel(input_size=second_input_size, hidden_size=128, num_layers=2, num_securities=num_securities, num_classes=num_classes)

# Define final linear layer
class FinalModel(nn.Module):
    def __init__(self, num_classes, num_securities):
        super(FinalModel, self).__init__()
        self.fc = nn.Linear(num_classes * 2, num_classes)
        self.num_securities = num_securities
    def forward(self, outputs1, outputs2):
        # Concatenate over class dimension
        outputs = torch.cat((outputs1, outputs2), dim=2)
        # Pass through linear layer
        batch_size = outputs.size(0)
        outputs = outputs.view(-1, outputs.size(2))
        final_output = self.fc(outputs)
        final_output = final_output.view(batch_size, self.num_securities, -1)
        return final_output

final_model = FinalModel(num_classes=num_classes, num_securities=num_securities)

# Combine the parameters of all models
params = list(primary_model.parameters()) + list(secondary_model.parameters()) + list(final_model.parameters())
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params, lr=learning_rate)

# Updated training function
def train_model(model1, model2, final_model, train_loader, criterion, optimizer, num_epochs, device):
    model1 = model1.to(device)
    model2 = model2.to(device)
    final_model = final_model.to(device)
    for epoch in range(num_epochs):
        model1.train()
        model2.train()
        final_model.train()
        total_loss = 0
        for X1_batch, X2_batch, y_batch in train_loader:
            X1_batch = X1_batch.to(device)
            X2_batch = X2_batch.to(device)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            outputs1 = model1(X1_batch)
            outputs2 = model2(X2_batch)
            final_output = final_model(outputs1, outputs2)
            loss = 0
            for i in range(num_securities):
                loss += criterion(final_output[:, i, :], y_batch[:, i])
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')

# Updated evaluation function
def evaluate_model(model1, model2, final_model, test_loader, device):
    model1 = model1.to(device)
    model2 = model2.to(device)
    final_model = final_model.to(device)
    model1.eval()
    model2.eval()
    final_model.eval()
    total = 0
    correct = 0
    with torch.no_grad():
        for X1_batch, X2_batch, y_batch in test_loader:
            X1_batch = X1_batch.to(device)
            X2_batch = X2_batch.to(device)
            y_batch = y_batch.to(device)
            outputs1 = model1(X1_batch)
            outputs2 = model2(X2_batch)
            final_output = final_model(outputs1, outputs2)
            _, predicted = torch.max(final_output.data, 2)
            total += y_batch.numel()
            correct += (predicted == y_batch).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy on test set: {accuracy:.2f}%')

# Start training
train_model(primary_model, secondary_model, final_model, train_loader, criterion, optimizer, num_epochs, device)
evaluate_model(primary_model, secondary_model, final_model, test_loader, device)

Training Combined Model with Transformer and LSTM...




Epoch [1/5], Loss: 11.0751
Epoch [2/5], Loss: 11.0117
Epoch [3/5], Loss: 11.0131
Epoch [4/5], Loss: 11.0011
Epoch [5/5], Loss: 10.9982
Accuracy on test set: 32.97%


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Time2Vec implementation
class Time2Vec(nn.Module):
    def __init__(self, seq_len, d_model):
        super(Time2Vec, self).__init__()
        self.seq_len = seq_len
        self.d_model = d_model
        self.w0 = nn.Parameter(torch.randn(1))
        self.b0 = nn.Parameter(torch.randn(1))
        self.w = nn.Parameter(torch.randn(d_model - 1))
        self.b = nn.Parameter(torch.randn(d_model - 1))

    def forward(self, batch_size):
        # Create time indices
        t = torch.arange(self.seq_len).unsqueeze(-1).float().to(self.w0.device)  # Shape: (seq_len, 1)
        # Linear component
        v = self.w0 * t + self.b0  # Shape: (seq_len, 1)
        # Periodic component
        vp = torch.sin(self.w * t + self.b)  # Broadcasting over (seq_len, d_model - 1)
        # Concatenate components
        time_emb = torch.cat([v, vp], dim=-1)  # Shape: (seq_len, d_model)
        # Expand to match batch size
        time_emb = time_emb.unsqueeze(0).expand(batch_size, -1, -1)  # Shape: (batch_size, seq_len, d_model)
        return time_emb

# Corrected Multi-Head Attention Mechanism
class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        assert d_model % num_heads == 0, "d_model must be divisible by num_heads"

        self.num_heads = num_heads
        self.d_k = d_model // num_heads

        self.q_linear = nn.Linear(d_model, d_model)
        self.k_linear = nn.Linear(d_model, d_model)
        self.v_linear = nn.Linear(d_model, d_model)
        self.out_linear = nn.Linear(d_model, d_model)

    def forward(self, q, k, v, mask=None):
        batch_size = q.size(0)

        # Perform linear projections and split into heads
        q = self.q_linear(q).view(batch_size, -1, self.num_heads, self.d_k).transpose(1,2)
        k = self.k_linear(k).view(batch_size, -1, self.num_heads, self.d_k).transpose(1,2)
        v = self.v_linear(v).view(batch_size, -1, self.num_heads, self.d_k).transpose(1,2)

        # Calculate attention scores
        scores = torch.matmul(q, k.transpose(-2, -1)) / np.sqrt(self.d_k)
        if mask is not None:
            scores = scores.masked_fill(mask == 0, -1e9)

        attn_weights = torch.softmax(scores, dim=-1)
        context = torch.matmul(attn_weights, v)

        # Concatenate heads
        context = context.transpose(1,2).contiguous().view(batch_size, -1, self.num_heads * self.d_k)
        output = self.out_linear(context)

        return output

# Corrected Gated Residual Network
class GatedResidualNetwork(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(GatedResidualNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.elu = nn.ELU()
        self.fc2 = nn.Linear(hidden_size, 2 * input_size)  # Output size doubled for GLU
        self.glu = nn.GLU(dim=-1)  # Splits last dimension into two halves

    def forward(self, x):
        residual = x
        x = self.fc1(x)
        x = self.elu(x)
        x = self.fc2(x)
        x = self.glu(x)
        return residual + x

# Corrected Transformer Encoder Block
class TransformerEncoder(nn.Module):
    def __init__(self, d_model, num_heads, hidden_size):
        super(TransformerEncoder, self).__init__()
        self.attention = MultiHeadAttention(d_model, num_heads)
        self.grn = GatedResidualNetwork(d_model, hidden_size)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)

    def forward(self, x, mask=None):
        attn_output = self.attention(x, x, x, mask)
        x = self.norm1(x + attn_output)
        grn_output = self.grn(x)
        x = self.norm2(x + grn_output)
        return x

# Corrected Transformer Decoder Block
class TransformerDecoder(nn.Module):
    def __init__(self, d_model, num_heads, hidden_size):
        super(TransformerDecoder, self).__init__()
        self.self_attention = MultiHeadAttention(d_model, num_heads)
        self.cross_attention = MultiHeadAttention(d_model, num_heads)
        self.grn = GatedResidualNetwork(d_model, hidden_size)
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.norm3 = nn.LayerNorm(d_model)

    def forward(self, x, enc_output, src_mask=None, tgt_mask=None):
        # Self-attention with masking
        attn_output = self.self_attention(x, x, x, tgt_mask)
        x = self.norm1(x + attn_output)

        # Cross-attention with encoder outputs
        attn_output = self.cross_attention(x, enc_output, enc_output, src_mask)
        x = self.norm2(x + attn_output)

        # Apply GRN and residual connection
        grn_output = self.grn(x)
        x = self.norm3(x + grn_output)
        return x

# Corrected Portfolio Transformer Model
class PortfolioTransformer(nn.Module):
    def __init__(self, num_assets, d_model, num_heads, hidden_size, num_layers, seq_len):
        super(PortfolioTransformer, self).__init__()
        self.num_assets = num_assets
        self.d_model = d_model
        self.seq_len = seq_len
        self.input_proj = nn.Linear(num_assets, d_model)
        self.time2vec = Time2Vec(seq_len, d_model)
        self.encoder_layers = nn.ModuleList(
            [TransformerEncoder(d_model, num_heads, hidden_size) for _ in range(num_layers)]
        )
        self.decoder_layers = nn.ModuleList(
            [TransformerDecoder(d_model, num_heads, hidden_size) for _ in range(num_layers)]
        )
        self.fc_out = nn.Linear(d_model, num_assets)

    def forward(self, x):
        # x shape: (batch_size, seq_len, num_assets)
        batch_size = x.size(0)
        # Project input features to d_model
        x_proj = self.input_proj(x)  # Shape: (batch_size, seq_len, d_model)
        # Get time embeddings
        time_emb = self.time2vec(batch_size)  # Shape: (batch_size, seq_len, d_model)
        # Combine input embeddings and time embeddings
        x = x_proj + time_emb  # Element-wise addition
        # Encoder
        for encoder in self.encoder_layers:
            x = encoder(x)
        # Decoder
        dec_input = x
        for decoder in self.decoder_layers:
            x = decoder(dec_input, x)
        s_i_t = self.fc_out(x)  # Output shape: (batch_size, seq_len, num_assets)
        # Compute weights using the compound function
        weights = torch.sign(s_i_t) * torch.softmax(torch.abs(s_i_t), dim=-1)
        return weights

In [None]:
import numpy as np
import torch

# Generate random returns data: 100 securities over 1000 time steps
num_securities = 100
num_time_steps = 1000
np.random.seed(42)  # For reproducibility

# Simulate random returns
returns_data = np.random.randn(num_time_steps, num_securities) * 0.01  # Small random returns
prices = 100 + np.cumsum(returns_data, axis=0)  # Simulate price paths

# Convert to PyTorch tensors
prices = torch.tensor(prices, dtype=torch.float32)
returns = torch.tensor(returns_data, dtype=torch.float32)

In [None]:
from torch.utils.data import DataLoader, TensorDataset

# Prepare the dataset
sequence_length = 20  # Use last 20 days for prediction
X = []
y = []

for i in range(len(prices) - sequence_length):
    X.append(returns[i:i+sequence_length])
    y.append(returns[i+sequence_length])

X = torch.stack(X)  # Shape: (num_samples, seq_len, num_assets)
y = torch.stack(y)  # Shape: (num_samples, num_assets)

dataset = TensorDataset(X, y)
data_loader = DataLoader(dataset, batch_size=32, shuffle=True)

# Initialize the model
num_assets = num_securities
model = PortfolioTransformer(
    num_assets=num_assets, d_model=64, num_heads=8, hidden_size=128, num_layers=4, seq_len=sequence_length
)

optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
def train(model, optimizer, data_loader, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for batch_X, batch_y in data_loader:
            optimizer.zero_grad()
            portfolio_weights = model(batch_X)  # Output shape: (batch_size, seq_len, num_assets)
            # Use the last time step's weights
            portfolio_weights = portfolio_weights[:, -1, :]  # Shape: (batch_size, num_assets)
            # Compute portfolio returns
            portfolio_returns = torch.sum(portfolio_weights * batch_y, dim=1)
            loss = sharpe_loss(portfolio_returns)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(data_loader):.4f}")

# Loss function (Negative Sharpe Ratio)
def sharpe_loss(portfolio_returns, risk_free_rate=0.0):
    # Assuming daily returns, annualize by multiplying by sqrt(252)
    mean_return = torch.mean(portfolio_returns)
    std_return = torch.std(portfolio_returns)
    sharpe_ratio = (mean_return - risk_free_rate) / (std_return + 1e-6)
    return -sharpe_ratio  # Negative for minimization

# Train the model
train(model, optimizer, data_loader, num_epochs=10)

Epoch 1/10, Loss: 0.0227
Epoch 2/10, Loss: -0.1482
Epoch 3/10, Loss: -0.1936
Epoch 4/10, Loss: -0.2280
Epoch 5/10, Loss: -0.2235
Epoch 6/10, Loss: -0.2329
Epoch 7/10, Loss: -0.2364
Epoch 8/10, Loss: -0.2379
Epoch 9/10, Loss: -0.2571
Epoch 10/10, Loss: -0.2559


In [None]:
## version 2

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Advanced Time Encoding: Learnable Positional Encoding
class LearnablePositionalEncoding(nn.Module):
    """
    Implements learnable positional encoding for sequences.

    Args:
        seq_len (int): The maximum length of the input sequences.
        d_model (int): The dimension of the model.
    """
    def __init__(self, seq_len, d_model):
        super(LearnablePositionalEncoding, self).__init__()
        self.position_embeddings = nn.Embedding(seq_len, d_model)
        self.seq_len = seq_len

    def forward(self, x):
        """
        Forward pass for positional encoding.

        Args:
            x (Tensor): Input tensor of shape (batch_size, seq_len, d_model).

        Returns:
            Tensor: Positionally encoded tensor.
        """
        positions = torch.arange(0, self.seq_len, device=x.device).unsqueeze(0)
        pos_embed = self.position_embeddings(positions)
        x = x + pos_embed
        return x

# PortfolioTransformer using PyTorch's nn.Transformer
class PortfolioTransformer(nn.Module):
    """
    Portfolio Transformer model for asset allocation.

    Args:
        num_assets (int): Number of assets in the portfolio.
        d_model (int): Dimension of the model.
        nhead (int): Number of attention heads.
        num_layers (int): Number of transformer encoder layers.
        seq_len (int): Length of the input sequences.
        dropout (float): Dropout probability.
    """
    def __init__(
        self, num_assets, d_model, nhead, num_layers, seq_len, dropout=0.1
    ):
        super(PortfolioTransformer, self).__init__()
        self.num_assets = num_assets
        self.d_model = d_model

        # Input projection layer to map asset returns to model dimension
        self.input_proj = nn.Linear(num_assets, d_model)

        # Learnable positional encoding
        self.pos_encoder = LearnablePositionalEncoding(seq_len, d_model)

        # Transformer encoder layers
        encoder_layers = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dropout=dropout, batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layers, num_layers=num_layers
        )

        # Output layer to map back to asset space
        self.fc_out = nn.Linear(d_model, num_assets)

        # Initialize weights
        self._init_weights()

    def _init_weights(self):
        """
        Initializes weights for better convergence.
        """
        initrange = 0.1
        self.input_proj.weight.data.uniform_(-initrange, initrange)
        self.input_proj.bias.data.zero_()
        self.fc_out.bias.data.zero_()
        self.fc_out.weight.data.uniform_(-initrange, initrange)

    def forward(self, x):
        """
        Forward pass of the model.

        Args:
            x (Tensor): Input tensor of shape (batch_size, seq_len, num_assets).

        Returns:
            Tensor: Portfolio weights of shape (batch_size, seq_len, num_assets).
        """
        # Project input to model dimension and scale
        x = self.input_proj(x) * np.sqrt(self.d_model)
        # Add positional encoding
        x = self.pos_encoder(x)
        # Pass through transformer encoder
        x = self.transformer_encoder(x)
        # Map back to asset space
        s_i_t = self.fc_out(x)  # Shape: (batch_size, seq_len, num_assets)
        # Apply activation and normalize weights
        weights = torch.tanh(s_i_t)
        weights = weights / torch.sum(torch.abs(weights), dim=-1, keepdim=True)
        return weights

def sharpe_loss(
    portfolio_returns, portfolio_weights, prev_weights, transaction_cost=0.0002
):
    """
    Computes the negative Sharpe ratio as the loss function, including transaction costs.

    Args:
        portfolio_returns (Tensor): Portfolio returns of shape (batch_size,).
        portfolio_weights (Tensor): Current portfolio weights of shape (batch_size, num_assets).
        prev_weights (Tensor): Previous portfolio weights of shape (batch_size, num_assets).
        transaction_cost (float): Transaction cost rate per unit weight change.

    Returns:
        Tensor: Loss value (negative Sharpe ratio).
    """
    # Compute transaction costs based on weight changes
    tc = transaction_cost * torch.sum(torch.abs(portfolio_weights - prev_weights), dim=1)
    # Net returns after subtracting transaction costs
    net_returns = portfolio_returns - tc
    # Compute mean and standard deviation of returns
    mean_return = torch.mean(net_returns)
    std_return = torch.std(net_returns)
    # Compute Sharpe ratio
    sharpe_ratio = mean_return / (std_return + 1e-6)
    # Return negative Sharpe ratio for minimization
    return -sharpe_ratio

def compute_expected_time_to_target(portfolio_returns, target_sharpe, window=20):
    """
    Estimates the expected time to reach the target Sharpe ratio.

    Args:
        portfolio_returns (list): List of past portfolio returns.
        target_sharpe (float): Target Sharpe ratio.
        window (int): Window size for rolling calculation.

    Returns:
        Tensor: Estimated time to reach the target Sharpe ratio.
    """
    # If insufficient data, return zero
    if len(portfolio_returns) < window:
        return torch.tensor(0.0)
    # Compute rolling mean and std
    recent_returns = torch.tensor(portfolio_returns[-window:])
    rolling_returns = torch.mean(recent_returns)
    rolling_std = torch.std(recent_returns)
    rolling_sharpe = rolling_returns / (rolling_std + 1e-6)
    # Estimate time to target
    if rolling_sharpe >= target_sharpe:
        time_to_target = torch.tensor(0.0)
    else:
        # Simplified estimation
        time_to_target = (target_sharpe - rolling_sharpe) * window
    return time_to_target

def adjust_position_scaling(portfolio_weights, time_to_target, scaling_factor=0.1):
    """
    Adjusts position scaling based on the estimated time to target Sharpe ratio.

    Args:
        portfolio_weights (Tensor): Current portfolio weights.
        time_to_target (Tensor): Estimated time to reach target Sharpe ratio.
        scaling_factor (float): Factor to control scaling sensitivity.

    Returns:
        Tensor: Adjusted portfolio weights.
    """
    # Compute scaling coefficient
    scaling = 1.0 - scaling_factor * torch.sigmoid(time_to_target)
    # Adjust portfolio weights
    adjusted_weights = portfolio_weights * scaling
    return adjusted_weights

def train(model, optimizer, data_loader, num_epochs, target_sharpe=1.0):
    """
    Trains the PortfolioTransformer model.

    Args:
        model (nn.Module): The PortfolioTransformer model.
        optimizer (torch.optim.Optimizer): The optimizer.
        data_loader (DataLoader): DataLoader for training data.
        num_epochs (int): Number of training epochs.
        target_sharpe (float): Target Sharpe ratio for scaling positions.
    """
    model.train()
    portfolio_returns_history = []
    for epoch in range(num_epochs):
        total_loss = 0
        for batch_X, batch_y in data_loader:
            optimizer.zero_grad()
            # Forward pass to get portfolio weights
            portfolio_weights = model(batch_X)
            # Use the last time step's weights
            portfolio_weights = portfolio_weights[:, -1, :]
            # Initialize prev_weights with zeros matching portfolio_weights
            prev_weights = torch.zeros_like(portfolio_weights)
            # Compute portfolio returns
            portfolio_returns = torch.sum(portfolio_weights * batch_y, dim=1)
            # Adjust positions based on expected time to target
            time_to_target = compute_expected_time_to_target(
                portfolio_returns_history, target_sharpe
            )
            portfolio_weights = adjust_position_scaling(
                portfolio_weights, time_to_target
            )
            # Update returns history
            portfolio_returns_history.extend(portfolio_returns.tolist())
            # Compute loss with transaction costs
            loss = sharpe_loss(portfolio_returns, portfolio_weights, prev_weights)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            # Update prev_weights for the next batch
            prev_weights = portfolio_weights.detach()
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss/len(data_loader):.4f}")

def backtest(model, data, train_size, test_size, sequence_length, target_sharpe=1.0):
    """
    Performs backtesting by training on a subset of data and testing on future data.

    Args:
        model (nn.Module): The PortfolioTransformer model.
        data (Tensor): Asset returns data.
        train_size (int): Number of samples for training.
        test_size (int): Number of samples for testing.
        sequence_length (int): Length of input sequences.
        target_sharpe (float): Target Sharpe ratio for scaling positions.
    """
    num_samples = len(data) - sequence_length
    train_indices = range(train_size)
    test_indices = range(train_size, train_size + test_size)

    # Prepare training data
    X_train = []
    y_train = []
    for i in train_indices:
        X_train.append(data[i:i+sequence_length])
        y_train.append(data[i+sequence_length])
    X_train = torch.stack(X_train)
    y_train = torch.stack(y_train)
    train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=False)

    # Prepare test data
    X_test = []
    y_test = []
    for i in test_indices:
        X_test.append(data[i:i+sequence_length])
        y_test.append(data[i+sequence_length])
    X_test = torch.stack(X_test)
    y_test = torch.stack(y_test)

    # Train the model
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    train(model, optimizer, train_loader, num_epochs=10, target_sharpe=target_sharpe)

    # Backtest on test data
    model.eval()
    prev_weights = torch.zeros(y_test.size(1))
    portfolio_returns_history = []
    with torch.no_grad():
        for i in range(len(X_test)):
            batch_X = X_test[i:i+1]  # Shape: (1, seq_len, num_assets)
            batch_y = y_test[i:i+1]  # Shape: (1, num_assets)
            # Get portfolio weights
            portfolio_weights = model(batch_X)
            portfolio_weights = portfolio_weights[:, -1, :]
            # Compute portfolio returns
            portfolio_returns = torch.sum(portfolio_weights * batch_y, dim=1)
            # Adjust positions based on expected time to target
            time_to_target = compute_expected_time_to_target(
                portfolio_returns_history, target_sharpe=target_sharpe
            )
            portfolio_weights = adjust_position_scaling(
                portfolio_weights, time_to_target
            )
            # Update returns history
            portfolio_returns_history.extend(portfolio_returns.tolist())
            # Update prev_weights
            prev_weights = portfolio_weights.detach()
        # Compute cumulative returns and Sharpe ratio
        portfolio_returns = np.array(portfolio_returns_history)
        cumulative_returns = np.cumprod(1 + portfolio_returns) - 1
        mean_return = np.mean(portfolio_returns)
        std_return = np.std(portfolio_returns)
        sharpe_ratio = mean_return / (std_return + 1e-6) * np.sqrt(252)
        print(f"Cumulative Return: {cumulative_returns[-1]:.4f}")
        print(f"Annualized Sharpe Ratio: {sharpe_ratio:.4f}")

In [None]:
import numpy as np
import torch

# Set random seed for reproducibility
np.random.seed(42)
torch.manual_seed(42)

# Generate random returns data: 100 securities over 1000 time steps
num_securities = 100
num_time_steps = 1000

# Simulate random returns (e.g., normally distributed with small mean and std)
returns_data = np.random.normal(loc=0.001, scale=0.01, size=(num_time_steps, num_securities))
returns = torch.tensor(returns_data, dtype=torch.float32)

In [None]:
# Define parameters
sequence_length = 20
train_size = 700
test_size = 200
num_assets = num_securities

# Initialize the model
model = PortfolioTransformer(
    num_assets=num_assets,
    d_model=64,
    nhead=8,
    num_layers=4,
    seq_len=sequence_length,
    dropout=0.1
)

# Run backtest
backtest(model, returns, train_size, test_size, sequence_length, target_sharpe=1.0)

Epoch 1/10, Loss: -0.4861
Epoch 2/10, Loss: -0.7983
Epoch 3/10, Loss: -0.8931
Epoch 4/10, Loss: -0.9067
Epoch 5/10, Loss: -0.9006
Epoch 6/10, Loss: -0.9224
Epoch 7/10, Loss: -0.9115
Epoch 8/10, Loss: -0.9196
Epoch 9/10, Loss: -0.9191
Epoch 10/10, Loss: -0.9201
Cumulative Return: 0.2299
Annualized Sharpe Ratio: 12.6064


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from torch.utils.tensorboard import SummaryWriter
import time

# Parameters
num_timesteps = 5000  # Total number of time steps
num_assets = 10       # Number of assets in the portfolio
seq_len = 20          # Length of input sequences
d_model = 64          # Model dimension for Transformer
batch_size = 64       # Batch size for training
num_epochs = 5        # Number of training epochs

# Generate synthetic returns data
np.random.seed(42)
returns_data = np.random.normal(0, 0.01, size=(num_timesteps, num_assets)).astype(np.float32)
returns_data = torch.tensor(returns_data)

# Dataset and DataLoader for training
class ReturnsDataset(Dataset):
    """
    Dataset for asset returns.

    Args:
        data (Tensor): Asset returns data of shape (num_timesteps, num_assets).
        seq_len (int): Length of input sequences.
    """
    def __init__(self, data, seq_len):
        self.data = data
        self.seq_len = seq_len

    def __len__(self):
        return len(self.data) - self.seq_len

    def __getitem__(self, idx):
        X = self.data[idx:idx + self.seq_len]
        y = self.data[idx + self.seq_len]
        return X, y

# Split data into training and testing sets
train_ratio = 0.8
train_size = int(len(returns_data) * train_ratio)
train_data = returns_data[:train_size]
test_data = returns_data[train_size:]

train_dataset = ReturnsDataset(train_data, seq_len)
test_dataset = ReturnsDataset(test_data, seq_len)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Loss function: Negative Sharpe Ratio
def sharpe_loss(portfolio_returns, portfolio_weights, prev_weights, transaction_cost=0.0002):
    """
    Computes the negative Sharpe ratio as the loss function, including transaction costs.

    Args:
        portfolio_returns (Tensor): Portfolio returns of shape (batch_size,).
        portfolio_weights (Tensor): Current portfolio weights of shape (batch_size, num_assets).
        prev_weights (Tensor): Previous portfolio weights of shape (batch_size, num_assets).
        transaction_cost (float): Transaction cost rate per unit weight change.

    Returns:
        Tensor: Loss value (negative Sharpe ratio).
    """
    # Compute transaction costs based on weight changes
    tc = transaction_cost * torch.sum(torch.abs(portfolio_weights - prev_weights), dim=1)
    # Net returns after subtracting transaction costs
    net_returns = portfolio_returns - tc
    # Compute mean and standard deviation of returns
    mean_return = torch.mean(net_returns)
    std_return = torch.std(net_returns)
    # Compute Sharpe ratio
    sharpe_ratio = mean_return / (std_return + 1e-6)
    # Return negative Sharpe ratio for minimization
    return -sharpe_ratio

# Base class for models
class BaseModel(nn.Module):
    """
    Base class for asset allocation models.

    Args:
        num_assets (int): Number of assets in the portfolio.
    """
    def __init__(self, num_assets):
        super(BaseModel, self).__init__()
        self.num_assets = num_assets

    def forward(self, x):
        """
        Forward pass.

        Args:
            x (Tensor): Input tensor of shape (batch_size, seq_len, num_assets).

        Returns:
            Tensor: Portfolio weights of shape (batch_size, num_assets).
        """
        raise NotImplementedError

# Logistic Regression Model
class LogisticRegressionModel(BaseModel):
    """
    Logistic Regression Model for asset allocation.

    Args:
        input_size (int): Size of input features (seq_len * num_assets).
        num_assets (int): Number of assets.
    """
    def __init__(self, input_size, num_assets):
        super(LogisticRegressionModel, self).__init__(num_assets)
        self.linear = nn.Linear(input_size, num_assets)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten
        out = self.linear(x)
        weights = torch.tanh(out)
        weights = weights / torch.sum(torch.abs(weights), dim=1, keepdim=True)
        return weights

# LSTM Model
class LSTMModel(BaseModel):
    """
    LSTM Model for asset allocation.

    Args:
        num_assets (int): Number of assets.
        hidden_size (int): Size of hidden state.
        num_layers (int): Number of LSTM layers.
    """
    def __init__(self, num_assets, hidden_size=64, num_layers=2):
        super(LSTMModel, self).__init__(num_assets)
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size=num_assets, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_assets)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = out[:, -1, :]  # Last time step
        out = self.fc(out)
        weights = torch.tanh(out)
        weights = weights / torch.sum(torch.abs(weights), dim=1, keepdim=True)
        return weights

# CNN Model
class CNNModel(BaseModel):
    """
    CNN Model for asset allocation.

    Args:
        num_assets (int): Number of assets.
    """
    def __init__(self, num_assets):
        super(CNNModel, self).__init__(num_assets)
        self.conv1 = nn.Conv1d(in_channels=num_assets, out_channels=32, kernel_size=3)
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3)
        self.fc = nn.Linear(64, num_assets)

    def forward(self, x):
        x = x.permute(0, 2, 1)  # (batch_size, num_assets, seq_len)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = torch.mean(x, dim=2)  # Global average pooling
        x = self.fc(x)
        weights = torch.tanh(x)
        weights = weights / torch.sum(torch.abs(weights), dim=1, keepdim=True)
        return weights

# Transformer Model
class TransformerModel(BaseModel):
    """
    Transformer Model for asset allocation.

    Args:
        num_assets (int): Number of assets.
        d_model (int): Dimension of the model.
        nhead (int): Number of attention heads.
        num_layers (int): Number of transformer encoder layers.
        seq_len (int): Length of the input sequences.
        dropout (float): Dropout probability.
    """
    def __init__(self, num_assets, d_model=64, nhead=4, num_layers=2, seq_len=20, dropout=0.1):
        super(TransformerModel, self).__init__(num_assets)
        self.d_model = d_model
        self.input_proj = nn.Linear(num_assets, d_model)
        self.pos_encoder = LearnablePositionalEncoding(seq_len, d_model)
        encoder_layers = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)
        self.fc_out = nn.Linear(d_model, num_assets)
        self._init_weights()

    def _init_weights(self):
        initrange = 0.1
        self.input_proj.weight.data.uniform_(-initrange, initrange)
        self.input_proj.bias.data.zero_()
        self.fc_out.bias.data.zero_()
        self.fc_out.weight.data.uniform_(-initrange, initrange)

    def forward(self, x):
        x = self.input_proj(x) * np.sqrt(self.d_model)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x)
        x = x[:, -1, :]  # Last time step
        x = self.fc_out(x)
        weights = torch.tanh(x)
        weights = weights / torch.sum(torch.abs(weights), dim=1, keepdim=True)
        return weights

# Learnable Positional Encoding
class LearnablePositionalEncoding(nn.Module):
    """
    Implements learnable positional encoding for sequences.

    Args:
        seq_len (int): The maximum length of the input sequences.
        d_model (int): The dimension of the model.
    """
    def __init__(self, seq_len, d_model):
        super(LearnablePositionalEncoding, self).__init__()
        self.position_embeddings = nn.Embedding(seq_len, d_model)
        self.seq_len = seq_len

    def forward(self, x):
        positions = torch.arange(0, self.seq_len, device=x.device).unsqueeze(0)
        pos_embed = self.position_embeddings(positions)
        x = x + pos_embed
        return x

# Training function
def train_model(model, train_loader, criterion, optimizer, num_epochs, device):
    """
    Training loop for the model.

    Args:
        model (nn.Module): The model to train.
        train_loader (DataLoader): DataLoader for training data.
        criterion (function): Loss function.
        optimizer (torch.optim.Optimizer): Optimizer.
        num_epochs (int): Number of epochs.
        device (torch.device): Computation device.
    """
    model = model.to(device)
    prev_weights = torch.zeros((batch_size, model.num_assets)).to(device)
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            # Forward pass
            portfolio_weights = model(X_batch)
            # Compute portfolio returns
            portfolio_returns = torch.sum(portfolio_weights * y_batch, dim=1)
            # Compute loss
            loss = criterion(portfolio_returns, portfolio_weights, prev_weights[:portfolio_weights.size(0)])
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            # Update prev_weights
            prev_weights[:portfolio_weights.size(0)] = portfolio_weights.detach()
        avg_loss = total_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.6f}')

# Backtesting function
def backtest_model(model, test_loader, device):
    """
    Backtesting the model on test data.

    Args:
        model (nn.Module): The trained model.
        test_loader (DataLoader): DataLoader for test data.
        device (torch.device): Computation device.
    """
    model = model.to(device)
    model.eval()
    portfolio_returns_history = []
    prev_weights = torch.zeros((1, model.num_assets)).to(device)
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            portfolio_weights = model(X_batch)
            portfolio_returns = torch.sum(portfolio_weights * y_batch, dim=1)
            portfolio_returns_history.extend(portfolio_returns.cpu().numpy())
            # Update prev_weights
            prev_weights = portfolio_weights.detach()
    portfolio_returns = np.array(portfolio_returns_history)
    cumulative_returns = np.cumprod(1 + portfolio_returns) - 1
    mean_return = np.mean(portfolio_returns)
    std_return = np.std(portfolio_returns)
    sharpe_ratio = mean_return / (std_return + 1e-6) * np.sqrt(252)
    print(f"Cumulative Return: {cumulative_returns[-1]:.6f}")
    print(f"Annualized Sharpe Ratio: {sharpe_ratio:.6f}")

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Example usage with Logistic Regression Model
print("\nTraining Logistic Regression Model")
model = LogisticRegressionModel(input_size=seq_len * num_assets, num_assets=num_assets)
optimizer = optim.Adam(model.parameters(), lr=0.001)
train_model(model, train_loader, sharpe_loss, optimizer, num_epochs, device)
backtest_model(model, test_loader, device)

# Example usage with LSTM Model
print("\nTraining LSTM Model")
model = LSTMModel(num_assets=num_assets)
optimizer = optim.Adam(model.parameters(), lr=0.001)
train_model(model, train_loader, sharpe_loss, optimizer, num_epochs, device)
backtest_model(model, test_loader, device)

# Example usage with CNN Model
print("\nTraining CNN Model")
model = CNNModel(num_assets=num_assets)
optimizer = optim.Adam(model.parameters(), lr=0.001)
train_model(model, train_loader, sharpe_loss, optimizer, num_epochs, device)
backtest_model(model, test_loader, device)

# Example usage with Transformer Model
print("\nTraining Transformer Model")
model = TransformerModel(num_assets=num_assets, d_model=d_model, seq_len=seq_len)
optimizer = optim.Adam(model.parameters(), lr=0.001)
train_model(model, train_loader, sharpe_loss, optimizer, num_epochs, device)
backtest_model(model, test_loader, device)

# New Model provided by the user
print("\nTraining Portfolio Transformer Model")
model = PortfolioTransformer(num_assets=num_assets, d_model=d_model, nhead=4, num_layers=2, seq_len=seq_len)
optimizer = optim.Adam(model.parameters(), lr=0.001)
train_model(model, train_loader, sharpe_loss, optimizer, num_epochs, device)
backtest_model(model, test_loader, device)


Training Logistic Regression Model
Epoch [1/5], Loss: 0.009969
Epoch [2/5], Loss: -0.013706
Epoch [3/5], Loss: -0.030145
Epoch [4/5], Loss: -0.055203
Epoch [5/5], Loss: -0.082532
Cumulative Return: -0.071690
Annualized Sharpe Ratio: -0.277367

Training LSTM Model
Epoch [1/5], Loss: 0.015314
Epoch [2/5], Loss: -0.019160
Epoch [3/5], Loss: -0.023031
Epoch [4/5], Loss: -0.035522
Epoch [5/5], Loss: -0.023104
Cumulative Return: -0.001735
Annualized Sharpe Ratio: 0.024882

Training CNN Model
Epoch [1/5], Loss: 0.008797
Epoch [2/5], Loss: -0.006740
Epoch [3/5], Loss: -0.012274
Epoch [4/5], Loss: -0.018750
Epoch [5/5], Loss: -0.021238
Cumulative Return: 0.168103
Annualized Sharpe Ratio: 0.680245

Training Transformer Model
Epoch [1/5], Loss: 0.055456
Epoch [2/5], Loss: 0.000523
Epoch [3/5], Loss: -0.012663
Epoch [4/5], Loss: -0.023768
Epoch [5/5], Loss: -0.035341
Cumulative Return: -0.019282
Annualized Sharpe Ratio: -0.069464

Training Portfolio Transformer Model


RuntimeError: The size of tensor a (20) must match the size of tensor b (64) at non-singleton dimension 1

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from torch.utils.tensorboard import SummaryWriter
import time

# Parameters
num_timesteps = 5000  # Total number of time steps
num_assets = 10       # Number of assets in the portfolio
seq_len = 20          # Length of input sequences
d_model = 64          # Model dimension for Transformer
batch_size = 64       # Batch size for training
num_epochs = 5        # Number of training epochs

# Generate synthetic returns data
np.random.seed(42)
returns_data = np.random.normal(0, 0.01, size=(num_timesteps, num_assets)).astype(np.float32)
returns_data = torch.tensor(returns_data)

# Dataset and DataLoader for training
class ReturnsDataset(Dataset):
    """
    Dataset for asset returns.

    Args:
        data (Tensor): Asset returns data of shape (num_timesteps, num_assets).
        seq_len (int): Length of input sequences.
    """
    def __init__(self, data, seq_len):
        self.data = data
        self.seq_len = seq_len

    def __len__(self):
        return len(self.data) - self.seq_len

    def __getitem__(self, idx):
        X = self.data[idx:idx + self.seq_len]
        y = self.data[idx + self.seq_len]
        return X, y

# Split data into training and testing sets
train_ratio = 0.8
train_size = int(len(returns_data) * train_ratio)
train_data = returns_data[:train_size]
test_data = returns_data[train_size:]

train_dataset = ReturnsDataset(train_data, seq_len)
test_dataset = ReturnsDataset(test_data, seq_len)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)  # Use batch_size=1 for backtesting

# Loss function: Negative Sharpe Ratio
def sharpe_loss(portfolio_returns, portfolio_weights, prev_weights, transaction_cost=0.0002):
    """
    Computes the negative Sharpe ratio as the loss function, including transaction costs.

    Args:
        portfolio_returns (Tensor): Portfolio returns of shape (batch_size,).
        portfolio_weights (Tensor): Current portfolio weights of shape (batch_size, num_assets).
        prev_weights (Tensor): Previous portfolio weights of shape (batch_size, num_assets).
        transaction_cost (float): Transaction cost rate per unit weight change.

    Returns:
        Tensor: Loss value (negative Sharpe ratio).
    """
    # Compute transaction costs based on weight changes
    tc = transaction_cost * torch.sum(torch.abs(portfolio_weights - prev_weights), dim=1)
    # Net returns after subtracting transaction costs
    net_returns = portfolio_returns - tc
    # Compute mean and standard deviation of returns
    mean_return = torch.mean(net_returns)
    std_return = torch.std(net_returns)
    # Compute Sharpe ratio
    sharpe_ratio = mean_return / (std_return + 1e-6)
    # Return negative Sharpe ratio for minimization
    return -sharpe_ratio

# Base class for models
class BaseModel(nn.Module):
    """
    Base class for asset allocation models.

    Args:
        num_assets (int): Number of assets in the portfolio.
    """
    def __init__(self, num_assets):
        super(BaseModel, self).__init__()
        self.num_assets = num_assets

    def forward(self, x):
        """
        Forward pass.

        Args:
            x (Tensor): Input tensor of shape (batch_size, seq_len, num_assets).

        Returns:
            Tensor: Portfolio weights of shape (batch_size, num_assets).
        """
        raise NotImplementedError

# Logistic Regression Model
class LogisticRegressionModel(BaseModel):
    """
    Logistic Regression Model for asset allocation.

    Args:
        input_size (int): Size of input features (seq_len * num_assets).
        num_assets (int): Number of assets.
    """
    def __init__(self, input_size, num_assets):
        super(LogisticRegressionModel, self).__init__(num_assets)
        self.linear = nn.Linear(input_size, num_assets)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten
        out = self.linear(x)
        weights = torch.tanh(out)
        weights = weights / torch.sum(torch.abs(weights), dim=1, keepdim=True)
        return weights

# LSTM Model
class LSTMModel(BaseModel):
    """
    LSTM Model for asset allocation.

    Args:
        num_assets (int): Number of assets.
        hidden_size (int): Size of hidden state.
        num_layers (int): Number of LSTM layers.
    """
    def __init__(self, num_assets, hidden_size=64, num_layers=2):
        super(LSTMModel, self).__init__(num_assets)
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size=num_assets, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_assets)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.lstm(x, (h0, c0))
        out = out[:, -1, :]  # Last time step
        out = self.fc(out)
        weights = torch.tanh(out)
        weights = weights / torch.sum(torch.abs(weights), dim=1, keepdim=True)
        return weights

# CNN Model
class CNNModel(BaseModel):
    """
    CNN Model for asset allocation.

    Args:
        num_assets (int): Number of assets.
    """
    def __init__(self, num_assets):
        super(CNNModel, self).__init__(num_assets)
        self.conv1 = nn.Conv1d(in_channels=num_assets, out_channels=32, kernel_size=3)
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3)
        self.fc = nn.Linear(64, num_assets)

    def forward(self, x):
        x = x.permute(0, 2, 1)  # (batch_size, num_assets, seq_len)
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = torch.mean(x, dim=2)  # Global average pooling
        x = self.fc(x)
        weights = torch.tanh(x)
        weights = weights / torch.sum(torch.abs(weights), dim=1, keepdim=True)
        return weights

# Transformer Model
class TransformerModel(BaseModel):
    """
    Transformer Model for asset allocation.

    Args:
        num_assets (int): Number of assets.
        d_model (int): Dimension of the model.
        nhead (int): Number of attention heads.
        num_layers (int): Number of transformer encoder layers.
        seq_len (int): Length of the input sequences.
        dropout (float): Dropout probability.
    """
    def __init__(self, num_assets, d_model=64, nhead=4, num_layers=2, seq_len=20, dropout=0.1):
        super(TransformerModel, self).__init__(num_assets)
        self.d_model = d_model
        self.input_proj = nn.Linear(num_assets, d_model)
        self.pos_encoder = LearnablePositionalEncoding(seq_len, d_model)
        encoder_layers = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)
        self.fc_out = nn.Linear(d_model, num_assets)
        self._init_weights()

    def _init_weights(self):
        initrange = 0.1
        self.input_proj.weight.data.uniform_(-initrange, initrange)
        self.input_proj.bias.data.zero_()
        self.fc_out.bias.data.zero_()
        self.fc_out.weight.data.uniform_(-initrange, initrange)

    def forward(self, x):
        x = self.input_proj(x) * np.sqrt(self.d_model)
        x = self.pos_encoder(x)
        x = x.permute(1, 0, 2)  # (seq_len, batch_size, d_model)
        x = self.transformer_encoder(x)
        x = x.permute(1, 0, 2)  # (batch_size, seq_len, d_model)
        x = x[:, -1, :]  # Last time step
        x = self.fc_out(x)
        weights = torch.tanh(x)
        weights = weights / torch.sum(torch.abs(weights), dim=1, keepdim=True)
        return weights

# Learnable Positional Encoding
class LearnablePositionalEncoding(nn.Module):
    """
    Implements learnable positional encoding for sequences.

    Args:
        seq_len (int): The maximum length of the input sequences.
        d_model (int): The dimension of the model.
    """
    def __init__(self, seq_len, d_model):
        super(LearnablePositionalEncoding, self).__init__()
        self.position_embeddings = nn.Embedding(seq_len, d_model)
        self.seq_len = seq_len

    def forward(self, x):
        positions = torch.arange(0, x.size(1), device=x.device).unsqueeze(0).long()
        pos_embed = self.position_embeddings(positions)
        x = x + pos_embed
        return x

# Portfolio Transformer Model
class PortfolioTransformer(BaseModel):
    """
    Portfolio Transformer model for asset allocation.

    Args:
        num_assets (int): Number of assets in the portfolio.
        d_model (int): Dimension of the model.
        nhead (int): Number of attention heads.
        num_layers (int): Number of transformer encoder layers.
        seq_len (int): Length of the input sequences.
        dropout (float): Dropout probability.
    """
    def __init__(
        self, num_assets, d_model, nhead, num_layers, seq_len, dropout=0.1
    ):
        super(PortfolioTransformer, self).__init__(num_assets)
        self.d_model = d_model

        # Input projection layer to map asset returns to model dimension
        self.input_proj = nn.Linear(num_assets, d_model)

        # Learnable positional encoding
        self.pos_encoder = LearnablePositionalEncoding(seq_len, d_model)

        # Transformer encoder layers
        encoder_layers = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dropout=dropout
        )
        self.transformer_encoder = nn.TransformerEncoder(
            encoder_layers, num_layers=num_layers
        )

        # Output layer to map back to asset space
        self.fc_out = nn.Linear(d_model, num_assets)

        # Initialize weights
        self._init_weights()

    def _init_weights(self):
        """
        Initializes weights for better convergence.
        """
        initrange = 0.1
        self.input_proj.weight.data.uniform_(-initrange, initrange)
        self.input_proj.bias.data.zero_()
        self.fc_out.bias.data.zero_()
        self.fc_out.weight.data.uniform_(-initrange, initrange)

    def forward(self, x):
        """
        Forward pass of the model.

        Args:
            x (Tensor): Input tensor of shape (batch_size, seq_len, num_assets).

        Returns:
            Tensor: Portfolio weights of shape (batch_size, num_assets).
        """
        # Project input to model dimension and scale
        x = self.input_proj(x) * np.sqrt(self.d_model)
        # Add positional encoding
        x = self.pos_encoder(x)
        # Permute dimensions to match expected input of TransformerEncoder
        x = x.permute(1, 0, 2)  # (seq_len, batch_size, d_model)
        # Pass through transformer encoder
        x = self.transformer_encoder(x)
        # Permute back to (batch_size, seq_len, d_model)
        x = x.permute(1, 0, 2)
        # Get the last time step
        x = x[:, -1, :]  # (batch_size, d_model)
        # Map back to asset space
        s_i_t = self.fc_out(x)  # Shape: (batch_size, num_assets)
        # Apply activation and normalize weights
        weights = torch.tanh(s_i_t)
        weights = weights / torch.sum(torch.abs(weights), dim=1, keepdim=True)
        return weights

# Training function
def train_model(model, train_loader, criterion, optimizer, num_epochs, device):
    """
    Training loop for the model.

    Args:
        model (nn.Module): The model to train.
        train_loader (DataLoader): DataLoader for training data.
        criterion (function): Loss function.
        optimizer (torch.optim.Optimizer): Optimizer.
        num_epochs (int): Number of epochs.
        device (torch.device): Computation device.
    """
    model = model.to(device)
    prev_weights = torch.zeros((batch_size, model.num_assets)).to(device)
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            optimizer.zero_grad()
            # Forward pass
            portfolio_weights = model(X_batch)
            # Compute portfolio returns
            portfolio_returns = torch.sum(portfolio_weights * y_batch, dim=1)
            # Compute loss
            loss = criterion(portfolio_returns, portfolio_weights, prev_weights[:portfolio_weights.size(0)])
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            # Update prev_weights
            prev_weights[:portfolio_weights.size(0)] = portfolio_weights.detach()
        avg_loss = total_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.6f}')

# Backtesting function
def backtest_model(model, test_loader, device):
    """
    Backtesting the model on test data.

    Args:
        model (nn.Module): The trained model.
        test_loader (DataLoader): DataLoader for test data.
        device (torch.device): Computation device.
    """
    model = model.to(device)
    model.eval()
    portfolio_returns_history = []
    prev_weights = torch.zeros((1, model.num_assets)).to(device)
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)
            portfolio_weights = model(X_batch)
            portfolio_returns = torch.sum(portfolio_weights * y_batch, dim=1)
            portfolio_returns_history.extend(portfolio_returns.cpu().numpy())
            # Update prev_weights
            prev_weights = portfolio_weights.detach()
    portfolio_returns = np.array(portfolio_returns_history)
    cumulative_returns = np.cumprod(1 + portfolio_returns) - 1
    mean_return = np.mean(portfolio_returns)
    std_return = np.std(portfolio_returns)
    sharpe_ratio = mean_return / (std_return + 1e-6) * np.sqrt(252)
    print(f"Cumulative Return: {cumulative_returns[-1]:.6f}")
    print(f"Annualized Sharpe Ratio: {sharpe_ratio:.6f}")

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Example usage with Logistic Regression Model
print("\nTraining Logistic Regression Model")
model = LogisticRegressionModel(input_size=seq_len * num_assets, num_assets=num_assets)
optimizer = optim.Adam(model.parameters(), lr=0.001)
train_model(model, train_loader, sharpe_loss, optimizer, num_epochs, device)
backtest_model(model, test_loader, device)

# Example usage with LSTM Model
print("\nTraining LSTM Model")
model = LSTMModel(num_assets=num_assets)
optimizer = optim.Adam(model.parameters(), lr=0.001)
train_model(model, train_loader, sharpe_loss, optimizer, num_epochs, device)
backtest_model(model, test_loader, device)

# Example usage with CNN Model
print("\nTraining CNN Model")
model = CNNModel(num_assets=num_assets)
optimizer = optim.Adam(model.parameters(), lr=0.001)
train_model(model, train_loader, sharpe_loss, optimizer, num_epochs, device)
backtest_model(model, test_loader, device)

# Example usage with Transformer Model
print("\nTraining Transformer Model")
model = TransformerModel(num_assets=num_assets, d_model=d_model, seq_len=seq_len)
optimizer = optim.Adam(model.parameters(), lr=0.001)
train_model(model, train_loader, sharpe_loss, optimizer, num_epochs, device)
backtest_model(model, test_loader, device)

# New Model provided by the user
print("\nTraining Portfolio Transformer Model")
model = PortfolioTransformer(num_assets=num_assets, d_model=d_model, nhead=4, num_layers=2, seq_len=seq_len)
optimizer = optim.Adam(model.parameters(), lr=0.001)
train_model(model, train_loader, sharpe_loss, optimizer, num_epochs, device)
backtest_model(model, test_loader, device)


Training Logistic Regression Model
Epoch [1/5], Loss: 0.011724
Epoch [2/5], Loss: -0.026207
Epoch [3/5], Loss: -0.050812
Epoch [4/5], Loss: -0.090326
Epoch [5/5], Loss: -0.241300
Cumulative Return: 0.009765
Annualized Sharpe Ratio: 0.071672

Training LSTM Model
Epoch [1/5], Loss: 0.033284
Epoch [2/5], Loss: 0.007168


KeyboardInterrupt: 

In [None]:
!pip list

Package                            Version
---------------------------------- --------------------
absl-py                            1.4.0
accelerate                         0.34.2
aiohappyeyeballs                   2.4.3
aiohttp                            3.10.10
aiosignal                          1.3.1
alabaster                          0.7.16
albucore                           0.0.16
albumentations                     1.4.15
altair                             4.2.2
annotated-types                    0.7.0
anyio                              3.7.1
argon2-cffi                        23.1.0
argon2-cffi-bindings               21.2.0
array_record                       0.5.1
arviz                              0.19.0
astropy                            6.1.4
astropy-iers-data                  0.2024.10.14.0.32.55
astunparse                         1.6.3
async-timeout                      4.0.3
atpublic                           4.1.0
attrs                              24.2.0
audioread      

In [None]:
# torch_timeseriues

In [None]:
!pip freeze

absl-py==1.4.0
accelerate==0.34.2
aiohappyeyeballs==2.4.3
aiohttp==3.10.10
aiosignal==1.3.1
alabaster==0.7.16
albucore==0.0.16
albumentations==1.4.15
altair==4.2.2
annotated-types==0.7.0
anyio==3.7.1
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
array_record==0.5.1
arviz==0.19.0
astropy==6.1.4
astropy-iers-data==0.2024.10.14.0.32.55
astunparse==1.6.3
async-timeout==4.0.3
atpublic==4.1.0
attrs==24.2.0
audioread==3.0.1
autograd==1.7.0
babel==2.16.0
backcall==0.2.0
beautifulsoup4==4.12.3
bigframes==1.22.0
bigquery-magics==0.4.0
bleach==6.1.0
blinker==1.4
blis==0.7.11
blosc2==2.0.0
bokeh==3.4.3
Bottleneck==1.4.1
bqplot==0.12.43
branca==0.8.0
CacheControl==0.14.0
cachetools==5.5.0
catalogue==2.0.10
certifi==2024.8.30
cffi==1.17.1
chardet==5.2.0
charset-normalizer==3.4.0
chex==0.1.87
clarabel==0.9.0
click==8.1.7
cloudpathlib==0.19.0
cloudpickle==3.1.0
cmake==3.30.4
cmdstanpy==1.2.4
colorcet==3.1.0
colorlover==0.3.0
colour==0.1.5
community==1.0.0b1
confection==0.1.5
cons==0.4.6
contourpy==

In [None]:
!pip install pytorch_forecasting pytorch_lightning torchcde

Collecting pytorch_forecasting
  Downloading pytorch_forecasting-1.1.1-py3-none-any.whl.metadata (13 kB)
Collecting pytorch_lightning
  Downloading pytorch_lightning-2.4.0-py3-none-any.whl.metadata (21 kB)
Collecting torchcde
  Downloading torchcde-0.2.5-py3-none-any.whl.metadata (18 kB)
Collecting lightning<3.0.0,>=2.0.0 (from pytorch_forecasting)
  Downloading lightning-2.4.0-py3-none-any.whl.metadata (38 kB)
Collecting torchmetrics>=0.7.0 (from pytorch_lightning)
  Downloading torchmetrics-1.5.0-py3-none-any.whl.metadata (20 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch_lightning)
  Downloading lightning_utilities-0.11.8-py3-none-any.whl.metadata (5.2 kB)
Collecting torchdiffeq>=0.2.0 (from torchcde)
  Downloading torchdiffeq-0.2.4-py3-none-any.whl.metadata (440 bytes)
Collecting torchsde>=0.2.5 (from torchcde)
  Downloading torchsde-0.2.6-py3-none-any.whl.metadata (5.3 kB)
Collecting trampoline>=0.1.2 (from torchsde>=0.2.5->torchcde)
  Downloading trampoline-0.1.2-py3-no

In [None]:
import torch
from pytorch_forecasting.models.base_model import BaseModel
from pytorch_forecasting.metrics import RMSE

class AutoRegressiveModel(BaseModel):
    """
    Simple Autoregressive model using a Linear layer.
    """

    def __init__(self, input_size: int, output_size: int = 1, **kwargs):
        super().__init__(**kwargs)
        self.linear = torch.nn.Linear(input_size, output_size)
        self.save_hyperparameters()

    def forward(self, x):
        # x["encoder_cont"] shape: (batch_size, encoder_length, input_size)
        encoder_output = x["encoder_cont"][:, -1, :]  # Use last time step
        prediction = self.linear(encoder_output)
        return self.to_network_output(prediction=prediction)

    @classmethod
    def from_dataset(cls, dataset, **kwargs):
        new_kwargs = {
            "input_size": len(dataset.reals),
            "loss": RMSE(),
        }
        new_kwargs.update(kwargs)
        return super().from_dataset(dataset, **new_kwargs)

In [None]:
import torch
from pytorch_forecasting.models.base_model import BaseModelWithCovariates
from pytorch_forecasting.metrics import RMSE

class LSTMForecaster(BaseModelWithCovariates):
    """
    LSTM-based Forecaster.
    """

    def __init__(self, input_size: int, hidden_size: int, num_layers: int, dropout: float = 0.0, **kwargs):
        super().__init__(**kwargs)
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.output_layer = torch.nn.Linear(hidden_size, 1)
        self.save_hyperparameters()

    def forward(self, x):
        # Concatenate continuous and categorical variables
        encoder_input = x["encoder_cont"]  # Shape: (batch_size, seq_len, input_size)
        output, (hidden, cell) = self.lstm(encoder_input)
        # Use the last hidden state
        hidden_last = hidden[-1]
        prediction = self.output_layer(hidden_last)
        return self.to_network_output(prediction=prediction)

    @classmethod
    def from_dataset(cls, dataset, **kwargs):
        new_kwargs = {
            "input_size": len(dataset.reals),
            "loss": RMSE(),
        }
        new_kwargs.update(kwargs)
        return super().from_dataset(dataset, **new_kwargs)

In [None]:
import torch
from pytorch_forecasting.models.base_model import BaseModelWithCovariates
from pytorch_forecasting.metrics import RMSE
from torch.nn.utils import weight_norm

class Chomp1d(torch.nn.Module):
    def __init__(self, chomp_size):
        super(Chomp1d, self).__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, :-self.chomp_size].contiguous()

class TemporalBlock(torch.nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
        super(TemporalBlock, self).__init__()
        self.conv1 = weight_norm(torch.nn.Conv1d(n_inputs, n_outputs, kernel_size,
                                                 stride=stride, padding=padding, dilation=dilation))
        self.chomp1 = Chomp1d(padding)
        self.relu1 = torch.nn.ReLU()
        self.dropout1 = torch.nn.Dropout(dropout)

        self.net = torch.nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1)
        self.downsample = torch.nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
        self.relu = torch.nn.ReLU()
        self.init_weights()

    def init_weights(self):
        self.conv1.weight.data.normal_(0, 0.01)
        if self.downsample is not None:
            self.downsample.weight.data.normal_(0, 0.01)

    def forward(self, x):
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)

class TemporalConvNet(torch.nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
        super(TemporalConvNet, self).__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = num_inputs if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
                                     padding=(kernel_size-1) * dilation_size, dropout=dropout)]
        self.network = torch.nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

class TCNForecaster(BaseModelWithCovariates):
    """
    Temporal Convolutional Network Forecaster.
    """

    def __init__(self, input_size: int, num_channels: list, kernel_size: int = 2, dropout: float = 0.2, **kwargs):
        super().__init__(**kwargs)
        self.tcn = TemporalConvNet(input_size, num_channels, kernel_size=kernel_size, dropout=dropout)
        self.output_layer = torch.nn.Linear(num_channels[-1], 1)
        self.save_hyperparameters()

    def forward(self, x):
        # x["encoder_cont"] shape: (batch_size, seq_len, input_size)
        encoder_input = x["encoder_cont"].permute(0, 2, 1)  # Reshape to (batch_size, input_size, seq_len)
        tcn_output = self.tcn(encoder_input)
        # Use the last time step
        last_output = tcn_output[:, :, -1]
        prediction = self.output_layer(last_output)
        return self.to_network_output(prediction=prediction)

    @classmethod
    def from_dataset(cls, dataset, **kwargs):
        new_kwargs = {
            "input_size": len(dataset.reals),
            "loss": RMSE(),
        }
        new_kwargs.update(kwargs)
        return super().from_dataset(dataset, **new_kwargs)

In [None]:
import torch
from pytorch_forecasting.models.base_model import BaseModel
from pytorch_forecasting.metrics import RMSE

class ProphetLikeModel(BaseModel):
    """
    Simplified Prophet-like model capturing trend and seasonality.
    """

    def __init__(self, seasonality: int, **kwargs):
        super().__init__(**kwargs)
        self.trend = torch.nn.Linear(1, 1)
        self.seasonality = torch.nn.Linear(seasonality, 1)
        self.save_hyperparameters()

    def forward(self, x):
        time = x["encoder_cont"][:, :, 0]  # Assuming the first real variable is time
        time = time.unsqueeze(-1)
        trend = self.trend(time)

        seasonal_features = x["encoder_cont"][:, :, 1:self.hparams.seasonality+1]
        seasonality = self.seasonality(seasonal_features)

        prediction = trend + seasonality
        # Use the last time step
        prediction = prediction[:, -1, :]
        return self.to_network_output(prediction=prediction)

    @classmethod
    def from_dataset(cls, dataset, **kwargs):
        new_kwargs = {
            "loss": RMSE(),
            "seasonality": 10,  # Example value, adjust as needed
        }
        new_kwargs.update(kwargs)
        return super().from_dataset(dataset, **new_kwargs)

In [None]:
class AssetAllocationWrapper:
    """
    Wrapper to train models for asset allocation based on returns.
    """

    def __init__(self, model_name, max_epochs=30):
        """
        Initialize the wrapper.

        Args:
            model_name (str): Name of the model to use ('TFT', 'RNN', 'DeepAR', 'NBeats', 'NHiTS', 'Baseline', 'AutoRegressive', 'LSTM', 'TCN', 'ProphetLike').
            max_epochs (int): Maximum number of training epochs.
        """
        self.model_name = model_name
        self.max_epochs = max_epochs
        self.model = None

    def prepare_data(self, df, time_idx, target, group_ids):
        """
        Prepare dataset for asset allocation.

        Args:
            df (pd.DataFrame): Dataset containing returns.
            time_idx (str): Name of the time index column.
            target (str): Name of the target column (e.g., returns).
            group_ids (list): List of group identifier column names (e.g., securities).
        """
        # Ensure time_idx is integer and sort data
        df[time_idx] = pd.to_datetime(df[time_idx])
        df.sort_values(by=[group_ids[0], time_idx], inplace=True)
        df[time_idx] = df[time_idx].dt.strftime('%Y%m%d').astype(int)

        # Create TimeSeriesDataSet
        max_encoder_length = 30
        max_prediction_length = 7

        self.training_cutoff = df[time_idx].max() - max_prediction_length

        self.training = TimeSeriesDataSet(
            df[df[time_idx] <= self.training_cutoff],
            time_idx=time_idx,
            target=target,
            group_ids=group_ids,
            max_encoder_length=max_encoder_length,
            max_prediction_length=max_prediction_length,
            static_categoricals=group_ids,
            time_varying_known_reals=[time_idx],
            time_varying_unknown_reals=[target],
            target_normalizer=GroupNormalizer(groups=group_ids),
            allow_missings=True,
        )

        self.validation = TimeSeriesDataSet.from_dataset(
            self.training, df[df[time_idx] > self.training_cutoff], min_prediction_idx=self.training_cutoff + 1
        )

        self.batch_size = 64
        self.train_dataloader = DataLoader(self.training, batch_size=self.batch_size, shuffle=True)
        self.val_dataloader = DataLoader(self.validation, batch_size=self.batch_size)

    def fit(self):
        """
        Fit the selected model.
        """
        # Choose the model
        if self.model_name == 'TFT':
            self.model = TemporalFusionTransformer.from_dataset(
                self.training,
                learning_rate=0.03,
                hidden_size=16,
                attention_head_size=1,
                dropout=0.1,
                hidden_continuous_size=8,
                loss=QuantileLoss(),
                log_interval=10,
                reduce_on_plateau_patience=4,
            )
        elif self.model_name == 'RNN':
            self.model = RecurrentNetwork.from_dataset(
                self.training,
                learning_rate=0.03,
                hidden_size=16,
                rnn_layers=2,
                dropout=0.1,
                loss=RMSE(),
            )
        elif self.model_name == 'DeepAR':
            self.model = DeepAR.from_dataset(
                self.training,
                learning_rate=0.03,
                rnn_layers=2,
                hidden_size=16,
                dropout=0.1,
            )
        elif self.model_name == 'NBeats':
            self.model = NBeats.from_dataset(
                self.training,
                learning_rate=0.03,
            )
        elif self.model_name == 'NHiTS':
            self.model = NHiTS.from_dataset(
                self.training,
                learning_rate=0.03,
            )
        elif self.model_name == 'Baseline':
            self.model = Baseline()
        elif self.model_name == 'AutoRegressive':
            self.model = AutoRegressiveModel.from_dataset(
                self.training,
                learning_rate=0.03,
            )
        elif self.model_name == 'LSTM':
            self.model = LSTMForecaster.from_dataset(
                self.training,
                learning_rate=0.03,
                hidden_size=16,
                num_layers=2,
                dropout=0.1,
            )
        elif self.model_name == 'TCN':
            self.model = TCNForecaster.from_dataset(
                self.training,
                learning_rate=0.03,
                num_channels=[16]*3,
                kernel_size=2,
                dropout=0.1,
            )
        elif self.model_name == 'ProphetLike':
            self.model = ProphetLikeModel.from_dataset(
                self.training,
                learning_rate=0.03,
                seasonality=10,
            )
        else:
            raise ValueError(f"Model {self.model_name} is not supported.")

        # Trainer
        early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=5, verbose=False, mode="min")
        trainer = pl.Trainer(
            max_epochs=self.max_epochs,
            callbacks=[early_stop_callback],
            gradient_clip_val=0.1,
        )

        # Fit the model
        trainer.fit(self.model, train_dataloaders=self.train_dataloader, val_dataloaders=self.val_dataloader)

    # The predict and backtest methods remain the same.

In [None]:
# Include all the necessary imports and classes from previous code blocks
import pandas as pd
import numpy as np
# Generate synthetic data for demonstration
def generate_synthetic_data(num_securities=10, num_days=500):
    dates = pd.date_range(start='2020-01-01', periods=num_days, freq='D')
    data = []
    for security_id in range(num_securities):
        price = 100 + np.cumsum(np.random.randn(num_days) * 0.5)
        returns = np.diff(price) / price[:-1]
        returns = np.insert(returns, 0, 0)  # Insert zero return for the first day
        df = pd.DataFrame({
            'date': dates,
            'security_id': f'sec_{security_id}',
            'price': price,
            'returns': returns,
        })
        data.append(df)
    df = pd.concat(data)
    return df

# Generate data
df = generate_synthetic_data()

# Initialize the wrapper
wrapper = AssetAllocationWrapper(model_name='Signature', max_epochs=10) # add , max_leverage=2.0)
wrapper.prepare_data(df, time_idx='date', target='returns', group_ids=['security_id'])
wrapper.fit()
backtest_results = wrapper.backtest()
print(backtest_results.head())

NameError: name 'TimeSeriesDataSet' is not defined

In [None]:
# Assuming df is your dataset with columns: 'time', 'group_id', 'returns'
wrapper = AssetAllocationWrapper(model_name='LSTM', max_epochs=20)
wrapper.prepare_data(df, time_idx='time', target='returns', group_ids=['group_id'])
wrapper.fit()
backtest_results = wrapper.backtest()
print(backtest_results.head())

NameError: name 'df' is not defined

In [None]:
## pytorch_forecdasting

In [None]:
import torch
import torchcde
from pytorch_forecasting.models.base_model import BaseModelWithCovariates
from pytorch_forecasting.metrics import RMSE

class SignatureModel(BaseModelWithCovariates):
    """
    Signature-based model using Neural Controlled Differential Equations (Neural CDEs).
    """
    def __init__(self, input_size: int, hidden_size: int, output_size: int = 1, **kwargs):
        super().__init__(**kwargs)
        self.hidden_size = hidden_size
        # Define the vector field for the CDE
        self.func = torch.nn.Sequential(
            torch.nn.Linear(hidden_size, hidden_size),
            torch.nn.Tanh(),
            torch.nn.Linear(hidden_size, hidden_size * input_size)
        )
        self.initial_linear = torch.nn.Linear(input_size, hidden_size)
        self.readout = torch.nn.Linear(hidden_size, output_size)
        self.save_hyperparameters()

    def forward(self, x):
        """
        Forward pass of the model.

        Args:
            x (Dict[str, Tensor]): Input dictionary from TimeSeriesDataSet.

        Returns:
            Prediction output.
        """
        # x["encoder_cont"] shape: (batch_size, seq_len, input_size)
        batch_size, seq_len, input_size = x["encoder_cont"].shape
        # Create continuous path using cubic splines
        coeffs = torchcde.hermite_cubic_coefficients_with_backward_differences(x["encoder_cont"])
        X = torchcde.CubicSpline(coeffs)
        # Initial hidden state
        z0 = self.initial_linear(X.evaluate(X.interval[0]))
        # Solve the CDE
        z_T = torchcde.cdeint(X=X, func=self.func, z0=z0, t=X.interval)
        # z_T shape: (batch_size, 2, hidden_size)
        # We take the last time point
        z_T = z_T[:, -1, :]
        prediction = self.readout(z_T)
        return self.to_network_output(prediction=prediction)

    @classmethod
    def from_dataset(cls, dataset, **kwargs):
        """
        Create the model from a TimeSeriesDataSet.

        Args:
            dataset (TimeSeriesDataSet): The dataset used for training.
            **kwargs: Additional arguments.

        Returns:
            SignatureModel instance.
        """
        new_kwargs = {
            "input_size": len(dataset.reals),
            "loss": RMSE(),
        }
        new_kwargs.update(kwargs)
        return super().from_dataset(dataset, **new_kwargs)

In [None]:
import pytorch_lightning as pl
from pytorch_forecasting import (
    TimeSeriesDataSet,
    Baseline,
    TemporalFusionTransformer,
    RecurrentNetwork,
    DeepAR,
    NBeats,
    NHiTS,
)
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import RMSE, QuantileLoss
from pytorch_lightning.callbacks import EarlyStopping
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
import torch

# Include the SignatureModel class code here (from previous code block)

class AssetAllocationWrapper:
    """
    Wrapper to train models for asset allocation based on returns.
    """

    def __init__(self, model_name, max_epochs=30, max_leverage=2.0):
        """
        Initialize the wrapper.

        Args:
            model_name (str): Name of the model to use.
            max_epochs (int): Maximum number of training epochs.
            max_leverage (float): Maximum leverage allowed.
        """
        self.model_name = model_name
        self.max_epochs = max_epochs
        self.max_leverage = max_leverage
        self.model = None

    def prepare_data(self, df, time_idx, target, group_ids):
        """
        Prepare dataset for asset allocation.

        Args:
            df (pd.DataFrame): Dataset containing returns.
            time_idx (str): Name of the time index column.
            target (str): Name of the target column (e.g., returns).
            group_ids (list): List of group identifier column names (e.g., securities).
        """
        # Ensure time_idx is integer and sort data
        df[time_idx] = pd.to_datetime(df[time_idx])
        df.sort_values(by=[group_ids[0], time_idx], inplace=True)
        df[time_idx] = df[time_idx].dt.strftime('%Y%m%d').astype(int)

        # Create TimeSeriesDataSet
        max_encoder_length = 30
        max_prediction_length = 7

        self.training_cutoff = df[time_idx].max() - max_prediction_length

        self.training = TimeSeriesDataSet(
            df[df[time_idx] <= self.training_cutoff],
            time_idx=time_idx,
            target=target,
            group_ids=group_ids,
            max_encoder_length=max_encoder_length,
            max_prediction_length=max_prediction_length,
            static_categoricals=group_ids,
            time_varying_known_reals=[time_idx],
            time_varying_unknown_reals=[target],
            target_normalizer=GroupNormalizer(groups=group_ids),
            allow_missings=True,
        )

        self.validation = TimeSeriesDataSet.from_dataset(
            self.training, df[df[time_idx] > self.training_cutoff], min_prediction_idx=self.training_cutoff + 1
        )

        self.batch_size = 64
        self.train_dataloader = DataLoader(self.training, batch_size=self.batch_size, shuffle=True)
        self.val_dataloader = DataLoader(self.validation, batch_size=self.batch_size)

    def fit(self):
        """
        Fit the selected model.
        """
        # Choose the model
        if self.model_name == 'TFT':
            self.model = TemporalFusionTransformer.from_dataset(
                self.training,
                learning_rate=0.03,
                hidden_size=16,
                attention_head_size=1,
                dropout=0.1,
                hidden_continuous_size=8,
                loss=QuantileLoss(),
                log_interval=10,
                reduce_on_plateau_patience=4,
            )
        elif self.model_name == 'RNN':
            self.model = RecurrentNetwork.from_dataset(
                self.training,
                learning_rate=0.03,
                hidden_size=16,
                rnn_layers=2,
                dropout=0.1,
                loss=RMSE(),
            )
        elif self.model_name == 'DeepAR':
            self.model = DeepAR.from_dataset(
                self.training,
                learning_rate=0.03,
                rnn_layers=2,
                hidden_size=16,
                dropout=0.1,
            )
        elif self.model_name == 'NBeats':
            self.model = NBeats.from_dataset(
                self.training,
                learning_rate=0.03,
            )
        elif self.model_name == 'NHiTS':
            self.model = NHiTS.from_dataset(
                self.training,
                learning_rate=0.03,
            )
        elif self.model_name == 'Baseline':
            self.model = Baseline()
        elif self.model_name == 'AutoRegressive':
            self.model = AutoRegressiveModel.from_dataset(
                self.training,
                learning_rate=0.03,
            )
        elif self.model_name == 'LSTM':
            self.model = LSTMForecaster.from_dataset(
                self.training,
                learning_rate=0.03,
                hidden_size=16,
                num_layers=2,
                dropout=0.1,
            )
        elif self.model_name == 'TCN':
            self.model = TCNForecaster.from_dataset(
                self.training,
                learning_rate=0.03,
                num_channels=[16]*3,
                kernel_size=2,
                dropout=0.1,
            )
        elif self.model_name == 'ProphetLike':
            self.model = ProphetLikeModel.from_dataset(
                self.training,
                learning_rate=0.03,
                seasonality=10,
            )
        elif self.model_name == 'Signature':
            self.model = SignatureModel.from_dataset(
                self.training,
                learning_rate=0.03,
                hidden_size=32,
            )
        else:
            raise ValueError(f"Model {self.model_name} is not supported.")

        # Trainer
        early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=5, verbose=False, mode="min")
        trainer = pl.Trainer(
            max_epochs=self.max_epochs,
            callbacks=[early_stop_callback],
            gradient_clip_val=0.1,
        )

        # Fit the model
        trainer.fit(self.model, train_dataloaders=self.train_dataloader, val_dataloaders=self.val_dataloader)

    def predict(self, df):
        """
        Make predictions on new data.

        Args:
            df (pd.DataFrame): DataFrame containing new data.

        Returns:
            pd.DataFrame: DataFrame with predictions.
        """
        # Prepare dataset
        raw_predictions, x = self.model.predict(df, mode="raw", return_x=True)
        predictions = self.model.to_prediction(raw_predictions)

        # Combine predictions with input data
        df_pred = df.copy()
        df_pred['prediction'] = predictions
        return df_pred

    def backtest(self):
        """
        Perform backtesting using the validation set.

        Returns:
            pd.DataFrame: DataFrame with backtesting predictions.
        """
        actuals = torch.cat([y[0] for x, y in iter(self.val_dataloader)])
        predictions = self.model.predict(self.val_dataloader)
        df_backtest = pd.DataFrame({
            'actual': actuals.numpy().flatten(),
            'prediction': predictions.numpy().flatten(),
        })

        # Adjust leverage based on confidence
        df_backtest['confidence'] = self.calculate_confidence(df_backtest)
        df_backtest['leverage'] = df_backtest['confidence'] * self.max_leverage
        df_backtest['leverage'] = df_backtest['leverage'].clip(0, self.max_leverage)

        # Apply leverage to predictions
        df_backtest['adjusted_prediction'] = df_backtest['prediction'] * df_backtest['leverage']
        return df_backtest

    def calculate_confidence(self, df):
        """
        Calculate confidence level based on prediction errors.

        Args:
            df (pd.DataFrame): DataFrame with 'actual' and 'prediction' columns.

        Returns:
            pd.Series: Confidence levels between 0 and 1.
        """
        errors = np.abs(df['actual'] - df['prediction'])
        max_error = errors.max()
        confidence = 1 - (errors / (max_error + 1e-6))
        return confidence

In [None]:
def prepare_data(self, df, time_idx, target, group_ids):
    """
    Prepare dataset for asset allocation.

    Args:
        df (pd.DataFrame): Dataset containing returns.
        time_idx (str): Name of the time index column.
        target (str): Name of the target column (e.g., returns).
        group_ids (list): List of group identifier column names (e.g., securities).
    """
    # Ensure time_idx is integer and sort data
    df[time_idx] = pd.to_datetime(df[time_idx])
    df.sort_values(by=[group_ids[0], time_idx], inplace=True)
    df[time_idx] = df[time_idx].dt.strftime('%Y%m%d').astype(int)

    # Create TimeSeriesDataSet
    max_encoder_length = 30
    max_prediction_length = 7

    self.training_cutoff = df[time_idx].max() - max_prediction_length

    self.training = TimeSeriesDataSet(
        df[df[time_idx] <= self.training_cutoff],
        time_idx=time_idx,
        target=target,
        group_ids=group_ids,
        max_encoder_length=max_encoder_length,
        max_prediction_length=max_prediction_length,
        static_categoricals=group_ids,
        time_varying_known_reals=[time_idx],
        time_varying_unknown_reals=[target],
        target_normalizer=GroupNormalizer(groups=group_ids),
        allow_missing_timesteps=True,  # Corrected argument name
    )

    self.validation = TimeSeriesDataSet.from_dataset(
        self.training, df[df[time_idx] > self.training_cutoff], min_prediction_idx=self.training_cutoff + 1
    )

    self.batch_size = 64
    self.train_dataloader = DataLoader(self.training, batch_size=self.batch_size, shuffle=True)
    self.val_dataloader = DataLoader(self.validation, batch_size=self.batch_size)

In [None]:
class AssetAllocationWrapper:
    """
    Wrapper to train models for asset allocation based on returns.
    """

    """
    Wrapper to train models for asset allocation based on returns.
    """

    def __init__(self, model_name, max_epochs=30, max_leverage=2.0):
        """
        Initialize the wrapper.

        Args:
            model_name (str): Name of the model to use.
            max_epochs (int): Maximum number of training epochs.
            max_leverage (float): Maximum leverage allowed.
        """
        self.model_name = model_name
        self.max_epochs = max_epochs
        self.max_leverage = max_leverage
        self.model = None

    def prepare_data(self, df, time_idx, target, group_ids):
        """
        Prepare dataset for asset allocation.
        """
        # Ensure time_idx is datetime and sort data
        df[time_idx] = pd.to_datetime(df[time_idx])
        df.sort_values(by=group_ids + [time_idx], inplace=True)

        # Assign a unique sequential integer time index within each group
        df['time_idx'] = df.groupby(group_ids).cumcount()

        # Reindex each group to fill missing time_idx
        df_list = []
        grouped = df.groupby(group_ids)
        for name, group in grouped:
            idx = pd.Index(range(group['time_idx'].min(), group['time_idx'].max() + 1), name='time_idx')
            group = group.set_index('time_idx').reindex(idx).reset_index()
            if isinstance(name, tuple):
                for i, gid in enumerate(group_ids):
                    group[gid] = name[i]
            else:
                group[group_ids[0]] = name
            # Fill missing values as needed
            group[target] = group[target].fillna(0)  # Or use appropriate method
            df_list.append(group)
        df = pd.concat(df_list).reset_index(drop=True)

        # Now, time_idx should be consecutive within each group
        max_encoder_length = 20  # Reduced from 30
        max_prediction_length = 5  # Reduced from 7

        self.training_cutoff = df['time_idx'].max() - max_prediction_length

        self.training = TimeSeriesDataSet(
            df[df['time_idx'] <= self.training_cutoff],
            time_idx='time_idx',
            target=target,
            group_ids=group_ids,
            max_encoder_length=max_encoder_length,
            min_encoder_length=10,
            max_prediction_length=max_prediction_length,
            min_prediction_length=1,
            static_categoricals=group_ids,
            time_varying_known_reals=['time_idx'],
            time_varying_unknown_reals=[target],
            target_normalizer=GroupNormalizer(groups=group_ids),
            allow_missing_timesteps=True,
        )

        self.validation = TimeSeriesDataSet.from_dataset(
            self.training, df[df['time_idx'] > self.training_cutoff], min_prediction_idx=self.training_cutoff + 1
        )

        self.batch_size = 64
        self.train_dataloader = DataLoader(self.training, batch_size=self.batch_size, shuffle=True)
        self.val_dataloader = DataLoader(self.validation, batch_size=self.batch_size)
    def fit(self):
        """
        Fit the selected model.
        """
        # Choose the model
        if self.model_name == 'TFT':
            self.model = TemporalFusionTransformer.from_dataset(
                self.training,
                learning_rate=0.03,
                hidden_size=16,
                attention_head_size=1,
                dropout=0.1,
                hidden_continuous_size=8,
                loss=QuantileLoss(),
                log_interval=10,
                reduce_on_plateau_patience=4,
            )
        elif self.model_name == 'RNN':
            self.model = RecurrentNetwork.from_dataset(
                self.training,
                learning_rate=0.03,
                hidden_size=16,
                rnn_layers=2,
                dropout=0.1,
                loss=RMSE(),
            )
        elif self.model_name == 'DeepAR':
            self.model = DeepAR.from_dataset(
                self.training,
                learning_rate=0.03,
                rnn_layers=2,
                hidden_size=16,
                dropout=0.1,
            )
        elif self.model_name == 'NBeats':
            self.model = NBeats.from_dataset(
                self.training,
                learning_rate=0.03,
            )
        elif self.model_name == 'NHiTS':
            self.model = NHiTS.from_dataset(
                self.training,
                learning_rate=0.03,
            )
        elif self.model_name == 'Baseline':
            self.model = Baseline()
        elif self.model_name == 'AutoRegressive':
            self.model = AutoRegressiveModel.from_dataset(
                self.training,
                learning_rate=0.03,
            )
        elif self.model_name == 'LSTM':
            self.model = LSTMForecaster.from_dataset(
                self.training,
                learning_rate=0.03,
                hidden_size=16,
                num_layers=2,
                dropout=0.1,
            )
        elif self.model_name == 'TCN':
            self.model = TCNForecaster.from_dataset(
                self.training,
                learning_rate=0.03,
                num_channels=[16]*3,
                kernel_size=2,
                dropout=0.1,
            )
        elif self.model_name == 'ProphetLike':
            self.model = ProphetLikeModel.from_dataset(
                self.training,
                learning_rate=0.03,
                seasonality=10,
            )
        else:
            raise ValueError(f"Model {self.model_name} is not supported.")

        # Trainer
        early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=5, verbose=False, mode="min")
        trainer = pl.Trainer(
            max_epochs=self.max_epochs,
            callbacks=[early_stop_callback],
            gradient_clip_val=0.1,
        )

        # Fit the model
        trainer.fit(self.model, train_dataloaders=self.train_dataloader, val_dataloaders=self.val_dataloader)

    # The predict and backtest methods remain the same.

In [None]:
# Generate synthetic data for demonstration
def generate_synthetic_data(num_securities=10, num_days=500):
    dates = pd.date_range(start='2020-01-01', periods=num_days, freq='D')
    data = []
    for security_id in range(num_securities):
        price = 100 + np.cumsum(np.random.randn(num_days) * 0.5)
        returns = np.diff(price) / price[:-1]
        returns = np.insert(returns, 0, 0)  # Insert zero return for the first day
        df = pd.DataFrame({
            'date': dates,
            'security_id': f'sec_{security_id}',
            'price': price,
            'returns': returns,
        })
        data.append(df)
    df = pd.concat(data)
    return df
max_encoder_length = 28
max_prediction_length = 7
# Generate data
df = generate_synthetic_data()

# Initialize the wrapper
wrapper = AssetAllocationWrapper(model_name='Signature', max_epochs=10, max_leverage=2.0)
wrapper.prepare_data(df, time_idx='date', target='returns', group_ids=['security_id'])
wrapper.fit()
backtest_results = wrapper.backtest()
print(backtest_results.head())



AssertionError: filters should not remove entries all entries - check encoder/decoder lengths and lags

In [None]:
import pytorch_lightning as pl
from pytorch_forecasting import (
    TimeSeriesDataSet,
    Baseline,
    TemporalFusionTransformer,
    RecurrentNetwork,
    DeepAR,
    NBeats,
    NHiTS,
)
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import RMSE, QuantileLoss
from pytorch_lightning.callbacks import EarlyStopping
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
import torch

# Include the SignatureModel class code here (from previous code block)
# Ensure that you have installed torchcde
# pip install torchcde

# The SignatureModel class remains the same...



import pytorch_lightning as pl
from pytorch_forecasting import (
    TimeSeriesDataSet,
    Baseline,
    TemporalFusionTransformer,
    RecurrentNetwork,
    DeepAR,
    NBeats,
    NHiTS,
)
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import RMSE, QuantileLoss
from pytorch_lightning.callbacks import EarlyStopping
from torch.utils.data import DataLoader
import pandas as pd
import numpy as np
import torch

# Include the SignatureModel class code here (from previous code block)
# Ensure that you have installed torchcde
# pip install torchcde

# The SignatureModel class remains the same...

class AssetAllocationWrapper:
    """
    Wrapper to train models for asset allocation based on returns.
    """

    def __init__(self, model_name, max_epochs=30, max_leverage=2.0):
        """
        Initialize the wrapper.

        Args:
            model_name (str): Name of the model to use.
            max_epochs (int): Maximum number of training epochs.
            max_leverage (float): Maximum leverage allowed.
        """
        self.model_name = model_name
        self.max_epochs = max_epochs
        self.max_leverage = max_leverage
        self.model = None

    def prepare_data(self, df, time_idx, target, group_ids):
        """
        Prepare dataset for asset allocation.

        Args:
            df (pd.DataFrame): Dataset containing returns.
            time_idx (str): Name of the time index column.
            target (str): Name of the target column (e.g., returns).
            group_ids (list): List of group identifier column names (e.g., securities).
        """
        # Ensure time_idx is datetime and sort data
        df[time_idx] = pd.to_datetime(df[time_idx])
        df.sort_values(by=group_ids + [time_idx], inplace=True)

        # Assign a unique sequential integer time index within each group
        df['time_idx'] = df.groupby(group_ids).cumcount()

        # Create TimeSeriesDataSet
        max_encoder_length = 30
        max_prediction_length = 7

        self.training_cutoff = df['time_idx'].max() - max_prediction_length

        self.training = TimeSeriesDataSet(
            df[df['time_idx'] <= self.training_cutoff],
            time_idx='time_idx',
            target=target,
            group_ids=group_ids,
            max_encoder_length=max_encoder_length,
            max_prediction_length=max_prediction_length,
            static_categoricals=group_ids,
            time_varying_known_reals=['time_idx'],
            time_varying_unknown_reals=[target],
            target_normalizer=GroupNormalizer(groups=group_ids),
            allow_missing_timesteps=True,
        )

        self.validation = TimeSeriesDataSet.from_dataset(
            self.training, df[df['time_idx'] > self.training_cutoff], min_prediction_idx=self.training_cutoff + 1
        )

        self.batch_size = 64
        self.train_dataloader = DataLoader(self.training, batch_size=self.batch_size, shuffle=True)
        self.val_dataloader = DataLoader(self.validation, batch_size=self.batch_size)

    # The rest of the class remains the same...
    def fit(self):
        """
        Fit the selected model.
        """
        # Choose the model
        if self.model_name == 'TFT':
            self.model = TemporalFusionTransformer.from_dataset(
                self.training,
                learning_rate=0.03,
                hidden_size=16,
                attention_head_size=1,
                dropout=0.1,
                hidden_continuous_size=8,
                loss=QuantileLoss(),
                log_interval=10,
                reduce_on_plateau_patience=4,
            )
        elif self.model_name == 'RNN':
            self.model = RecurrentNetwork.from_dataset(
                self.training,
                learning_rate=0.03,
                hidden_size=16,
                rnn_layers=2,
                dropout=0.1,
                loss=RMSE(),
            )
        elif self.model_name == 'DeepAR':
            self.model = DeepAR.from_dataset(
                self.training,
                learning_rate=0.03,
                rnn_layers=2,
                hidden_size=16,
                dropout=0.1,
            )
        elif self.model_name == 'NBeats':
            self.model = NBeats.from_dataset(
                self.training,
                learning_rate=0.03,
            )
        elif self.model_name == 'NHiTS':
            self.model = NHiTS.from_dataset(
                self.training,
                learning_rate=0.03,
            )
        elif self.model_name == 'Baseline':
            self.model = Baseline()
        elif self.model_name == 'AutoRegressive':
            self.model = AutoRegressiveModel.from_dataset(
                self.training,
                learning_rate=0.03,
            )
        elif self.model_name == 'LSTM':
            self.model = LSTMForecaster.from_dataset(
                self.training,
                learning_rate=0.03,
                hidden_size=16,
                num_layers=2,
                dropout=0.1,
            )
        elif self.model_name == 'TCN':
            self.model = TCNForecaster.from_dataset(
                self.training,
                learning_rate=0.03,
                num_channels=[16]*3,
                kernel_size=2,
                dropout=0.1,
            )
        elif self.model_name == 'ProphetLike':
            self.model = ProphetLikeModel.from_dataset(
                self.training,
                learning_rate=0.03,
                seasonality=10,
            )
        elif self.model_name == 'Signature':
            self.model = SignatureModel.from_dataset(
                self.training,
                learning_rate=0.03,
                hidden_size=32,
            )
        else:
            raise ValueError(f"Model {self.model_name} is not supported.")

        # Trainer
        early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=5, verbose=False, mode="min")
        trainer = pl.Trainer(
            max_epochs=self.max_epochs,
            callbacks=[early_stop_callback],
            gradient_clip_val=0.1,
        )

        # Fit the model
        trainer.fit(self.model, train_dataloaders=self.train_dataloader, val_dataloaders=self.val_dataloader)

    def predict(self, df):
        """
        Make predictions on new data.

        Args:
            df (pd.DataFrame): DataFrame containing new data.

        Returns:
            pd.DataFrame: DataFrame with predictions.
        """
        # Prepare dataset
        raw_predictions, x = self.model.predict(df, mode="raw", return_x=True)
        predictions = self.model.to_prediction(raw_predictions)

        # Combine predictions with input data
        df_pred = df.copy()
        df_pred['prediction'] = predictions
        return df_pred

    def backtest(self):
        """
        Perform backtesting using the validation set.

        Returns:
            pd.DataFrame: DataFrame with backtesting predictions.
        """
        actuals = torch.cat([y[0] for x, y in iter(self.val_dataloader)])
        predictions = self.model.predict(self.val_dataloader)
        df_backtest = pd.DataFrame({
            'actual': actuals.numpy().flatten(),
            'prediction': predictions.numpy().flatten(),
        })

        # Adjust leverage based on confidence
        df_backtest['confidence'] = self.calculate_confidence(df_backtest)
        df_backtest['leverage'] = df_backtest['confidence'] * self.max_leverage
        df_backtest['leverage'] = df_backtest['leverage'].clip(0, self.max_leverage)

        # Apply leverage to predictions
        df_backtest['adjusted_prediction'] = df_backtest['prediction'] * df_backtest['leverage']
        return df_backtest

    def calculate_confidence(self, df):
        """
        Calculate confidence level based on prediction errors.

        Args:
            df (pd.DataFrame): DataFrame with 'actual' and 'prediction' columns.

        Returns:
            pd.Series: Confidence levels between 0 and 1.
        """
        errors = np.abs(df['actual'] - df['prediction'])
        max_error = errors.max()
        confidence = 1 - (errors / (max_error + 1e-6))
        return confidence

# Include the implementations of the other models (AutoRegressiveModel, LSTMForecaster, TCNForecaster, ProphetLikeModel, SignatureModel)

# Example usage:
# Generate synthetic data for demonstration
def generate_synthetic_data(num_securities=10, num_days=500):
    dates = pd.date_range(start='2020-01-01', periods=num_days, freq='D')
    data = []
    for security_id in range(num_securities):
        price = 100 + np.cumsum(np.random.randn(num_days) * 0.5)
        returns = np.diff(price) / price[:-1]
        returns = np.insert(returns, 0, 0)  # Insert zero return for the first day
        df = pd.DataFrame({
            'date': dates,
            'security_id': f'sec_{security_id}',
            'price': price,
            'returns': returns,
        })
        data.append(df)
    df = pd.concat(data)
    return df

# Generate data
df = generate_synthetic_data()

# Initialize the wrapper
wrapper = AssetAllocationWrapper(model_name='Signature', max_epochs=10, max_leverage=2.0)
wrapper.prepare_data(df, time_idx='date', target='returns', group_ids=['security_id'])
wrapper.fit()
backtest_results = wrapper.backtest()
print(backtest_results.head())

AssertionError: data index has to be unique

In [None]:
df.reset_index(inplace=True)

In [None]:
model_names = ['TFT', 'RNN', 'DeepAR', 'NBeats', 'NHiTS', 'Baseline', 'AutoRegressive', 'LSTM', 'TCN', 'ProphetLike', 'Signature']
results = {}

for model_name in model_names:
    print(f"Training aand backtesting with model: {model_name}")
    wrapper = AssetAllocationWrapper(model_name=model_name, max_epochs=10, max_leverage=2.0)
    wrapper.prepare_data(df, time_idx='date', target='returns', group_ids=['security_id'])
    wrapper.fit()
    backtest_results = wrapper.backtest()
    results[model_name] = backtest_results

Training and backtesting with model: TFT


ValueError: Found array with 0 sample(s) (shape=(0, 1)) while a minimum of 1 is required by StandardScaler.

In [None]:
import matplotlib.pyplot as plt

for model_name, df_backtest in results.items():
    # Compute strategy returns
    df_backtest['strategy_returns'] = df_backtest['adjusted_prediction'] * df_backtest['actual']
    # Compute cumulative returns
    df_backtest['cumulative_returns'] = (1 + df_backtest['strategy_returns']).cumprod() - 1
    plt.plot(df_backtest['cumulative_returns'].values, label=model_name)

plt.legend()
plt.title('Cumulative Returns Comparison')
plt.xlabel('Time')
plt.ylabel('Cumulative Returns')
plt.show()

In [None]:
##claude

In [None]:
"""
Financial Time Series Forecasting Package
---------------------------------------
A comprehensive package for financial time series forecasting using various deep learning models.

This package includes:
- Multiple model architectures (AutoRegressive, LSTM, TCN, ProphetLike)
- Data validation and preprocessing
- Model checkpointing and logging
- Configuration management
- Comprehensive testing suite

Author: Assistant
Date: 2024-10-20
Version: 1.0.0
"""

import logging
import yaml
import torch
import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pathlib import Path
from dataclasses import dataclass
from typing import Optional, List, Dict, Any, Tuple, Union
from datetime import datetime, timedelta
from pytorch_lightning.callbacks import EarlyStopping
from torch.utils.data import DataLoader
from pytorch_forecasting import TimeSeriesDataSet
from pytorch_forecasting.metrics import RMSE, QuantileLoss
from scipy import stats

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

@dataclass
class ModelConfig:
    """Configuration class for model parameters.

    Attributes:
        model_name (str): Name of the model to use
        max_epochs (int): Maximum number of training epochs
        batch_size (int): Batch size for training
        learning_rate (float): Learning rate for optimization
        hidden_size (int): Number of hidden units in layers
        dropout (float): Dropout rate for regularization
        max_encoder_length (int): Maximum length of encoder sequence
        max_prediction_length (int): Maximum length of prediction sequence
    """
    model_name: str
    max_epochs: int = 30
    batch_size: int = 64
    learning_rate: float = 0.03
    hidden_size: int = 16
    dropout: float = 0.1
    max_encoder_length: int = 30
    max_prediction_length: int = 7

    @classmethod
    def from_yaml(cls, yaml_path: str) -> 'ModelConfig':
        """Load configuration from YAML file.

        Args:
            yaml_path: Path to YAML configuration file

        Returns:
            ModelConfig instance
        """
        with open(yaml_path, 'r') as f:
            config_dict = yaml.safe_load(f)
        return cls(**config_dict)

    def to_yaml(self, yaml_path: str) -> None:
        """Save configuration to YAML file.

        Args:
            yaml_path: Path to save configuration
        """
        with open(yaml_path, 'w') as f:
            yaml.dump(self.__dict__, f)

class DataValidator:
    """Data validation utilities."""

    @staticmethod
    def validate_dataframe(df: pd.DataFrame, required_columns: List[str]) -> bool:
        """Validate DataFrame structure and content.

        Args:
            df: Input DataFrame
            required_columns: List of required column names

        Returns:
            bool: True if validation passes

        Raises:
            ValueError: If validation fails
        """
        # Check for required columns
        missing_cols = set(required_columns) - set(df.columns)
        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")

        # Check for null values
        null_cols = df.columns[df.isnull().any()].tolist()
        if null_cols:
            raise ValueError(f"Null values found in columns: {null_cols}")

        # Check for infinite values
        inf_cols = df.columns[np.isinf(df.select_dtypes(include=np.number)).any()].tolist()
        if inf_cols:
            raise ValueError(f"Infinite values found in columns: {inf_cols}")

        return True

class ModelCheckpointer:
    """Model checkpointing utilities."""

    def __init__(self, checkpoint_dir: str):
        """Initialize checkpointer.

        Args:
            checkpoint_dir: Directory to store checkpoints
        """
        self.checkpoint_dir = Path(checkpoint_dir)
        self.checkpoint_dir.mkdir(exist_ok=True)

    def save_checkpoint(self, model: torch.nn.Module, epoch: int,
                       optimizer: torch.optim.Optimizer, loss: float) -> str:
        """Save model checkpoint.

        Args:
            model: PyTorch model
            epoch: Current epoch number
            optimizer: PyTorch optimizer
            loss: Current loss value

        Returns:
            str: Path to saved checkpoint
        """
        checkpoint_path = self.checkpoint_dir / f"checkpoint_epoch_{epoch}.pt"
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
        }, checkpoint_path)
        return str(checkpoint_path)

    def load_checkpoint(self, model: torch.nn.Module,
                       optimizer: torch.optim.Optimizer,
                       checkpoint_path: str) -> Tuple[int, float]:
        """Load model checkpoint.

        Args:
            model: PyTorch model
            optimizer: PyTorch optimizer
            checkpoint_path: Path to checkpoint file

        Returns:
            Tuple containing (epoch_number, loss_value)
        """
        checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        return checkpoint['epoch'], checkpoint['loss']

class AutoRegressiveModel(pl.LightningModule):
    """Simple Autoregressive model using a Linear layer."""

    def __init__(self, input_size: int, output_size: int = 1, **kwargs):
        """Initialize AR model.

        Args:
            input_size: Number of input features
            output_size: Number of output features
            **kwargs: Additional arguments
        """
        super().__init__()
        self.linear = torch.nn.Linear(input_size, output_size)
        self.save_hyperparameters()

    def forward(self, x: Dict[str, torch.Tensor]) -> torch.Tensor:
        """Forward pass.

        Args:
            x: Dictionary containing input tensors

        Returns:
            Model predictions
        """
        encoder_output = x["encoder_cont"][:, -1, :]  # Use last time step
        prediction = self.linear(encoder_output)
        return prediction

class LSTMForecaster(pl.LightningModule):
    """LSTM-based Forecaster."""

    def __init__(self, input_size: int, hidden_size: int, num_layers: int,
                 dropout: float = 0.0, **kwargs):
        """Initialize LSTM model.

        Args:
            input_size: Number of input features
            hidden_size: Number of hidden units
            num_layers: Number of LSTM layers
            dropout: Dropout rate
            **kwargs: Additional arguments
        """
        super().__init__()
        self.lstm = torch.nn.LSTM(
            input_size, hidden_size, num_layers,
            batch_first=True, dropout=dropout
        )
        self.output_layer = torch.nn.Linear(hidden_size, 1)
        self.save_hyperparameters()

    def forward(self, x: Dict[str, torch.Tensor]) -> torch.Tensor:
        """Forward pass."""
        encoder_input = x["encoder_cont"]
        output, (hidden, _) = self.lstm(encoder_input)
        prediction = self.output_layer(hidden[-1])
        return prediction

class TCNBlock(torch.nn.Module):
    """Temporal Convolutional Network block."""

    def __init__(self, n_inputs: int, n_outputs: int, kernel_size: int,
                 stride: int, dilation: int, padding: int, dropout: float = 0.2):
        """Initialize TCN block."""
        super().__init__()
        self.conv1 = torch.nn.Conv1d(
            n_inputs, n_outputs, kernel_size,
            stride=stride, padding=padding, dilation=dilation
        )
        self.chomp1 = torch.nn.functional.pad  # Remove future timesteps
        self.relu1 = torch.nn.ReLU()
        self.dropout1 = torch.nn.Dropout(dropout)

        self.net = torch.nn.Sequential(
            self.conv1,
            self.relu1,
            self.dropout1
        )

        self.downsample = torch.nn.Conv1d(n_inputs, n_outputs, 1) \
            if n_inputs != n_outputs else None
        self.relu = torch.nn.ReLU()
        self.init_weights()

    def init_weights(self):
        """Initialize network weights."""
        self.conv1.weight.data.normal_(0, 0.01)
        if self.downsample is not None:
            self.downsample.weight.data.normal_(0, 0.01)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """Forward pass."""
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)

class TCNForecaster(pl.LightningModule):
    """Temporal Convolutional Network Forecaster."""

    def __init__(self, input_size: int, num_channels: List[int],
                 kernel_size: int = 2, dropout: float = 0.2, **kwargs):
        """Initialize TCN model."""
        super().__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = input_size if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            layers += [TCNBlock(
                in_channels, out_channels, kernel_size, stride=1,
                dilation=dilation_size,
                padding=(kernel_size-1) * dilation_size,
                dropout=dropout
            )]

        self.network = torch.nn.Sequential(*layers)
        self.output_layer = torch.nn.Linear(num_channels[-1], 1)
        self.save_hyperparameters()

    def forward(self, x: Dict[str, torch.Tensor]) -> torch.Tensor:
        """Forward pass."""
        encoder_input = x["encoder_cont"].permute(0, 2, 1)
        output = self.network(encoder_input)
        output = output[:, :, -1]  # Take last time step
        prediction = self.output_layer(output)
        return prediction

class ProphetLikeModel(pl.LightningModule):
    """Prophet-like model capturing trend and seasonality."""

    def __init__(self, seasonality: int, **kwargs):
        """Initialize Prophet-like model."""
        super().__init__()
        self.trend = torch.nn.Linear(1, 1)
        self.seasonality = torch.nn.Linear(seasonality, 1)
        self.save_hyperparameters()

    def forward(self, x: Dict[str, torch.Tensor]) -> torch.Tensor:
        """Forward pass."""
        time = x["encoder_cont"][:, :, 0].unsqueeze(-1)
        trend = self.trend(time)

        seasonal_features = x["encoder_cont"][:, :, 1:self.hparams.seasonality+1]
        seasonality = self.seasonality(seasonal_features)

        prediction = trend + seasonality
        return prediction[:, -1, :]

class BaseWrapper:
    """Base wrapper for all models."""

    def __init__(self, config: ModelConfig):
        """Initialize wrapper."""
        self.config = config
        self.logger = logging.getLogger(f"{self.__class__.__name__}")
        self.validator = DataValidator()
        self.checkpointer = ModelCheckpointer("checkpoints")
        self.model = None
        self.training_metrics = []

    def prepare_data(self, df: pd.DataFrame, time_idx: str,
                    target: str, group_ids: List[str]) -> None:
        """Prepare data for training."""
        # Validate data
        self.validator.validate_dataframe(df, [time_idx, target] + group_ids)

        # Create TimeSeriesDataSet
        self.training = TimeSeriesDataSet(
            df[df[time_idx] <= df[time_idx].max() - self.config.max_prediction_length],
            time_idx=time_idx,
            target=target,
            group_ids=group_ids,
            max_encoder_length=self.config.max_encoder_length,
            max_prediction_length=self.config.max_prediction_length,
            static_categoricals=group_ids,
            time_varying_known_reals=[time_idx],
            time_varying_unknown_reals=[target],
            target_normalizer=None,
            allow_missings=True,
        )

        self.validation = TimeSeriesDataSet.from_dataset(
            self.training,
            df[df[time_idx] > df[time_idx].max() - self.config.max_prediction_length],
            min_prediction_idx=df[time_idx].max() - self.config.max_prediction_length + 1
        )

        self.train_dataloader = DataLoader(
            self.training, batch_size=self.config.batch_size, shuffle=True
        )
        self.val_dataloader = DataLoader(
            self.validation, batch_size=self.config.batch_size
        )

    def fit(self) -> None:
        """Train the model."""
        if self.model is None:
            raise ValueError("Model not initialized")

        trainer = pl.Trainer(
            max_epochs=self.config.max_epochs,
            callbacks=[
                EarlyStopping(
                    monitor="val_loss",
                    min_delta=1e-4,
                    patience=5,
                    verbose=False,
                    mode="min"
                )
            ],
            gradient_clip_val=0.1,
        )

        trainer.fit(
            self.model,
            train_dataloaders=self.train_dataloader,
            val_dataloaders=self.val_dataloader
        )

    def predict(self, df: pd.DataFrame) -> np.ndarray:
        """Make predictions."""
        if self.model is None:
            raise ValueError("Model not trained")

        self.model.eval()
        with torch.no_grad():
            predictions = self.model(
                self.validation.to_dataloader(df, batch_size=self.config.batch_size)
            )
        return predictions.numpy()

def generate_sample_data(n_assets: int = 5, n_days: int = 100,
                        seed: int = 42) -> pd.DataFrame:
    """Generate sample financial data for testing.

    Args:
        n_assets: Number of assets
        n_days: Number of days
        seed: Random seed

    Returns:
        DataFrame containing sample data
    """
    np.random.seed(seed)

    dates = [datetime.today() - timedelta(days=x) for x in range(n_days)]
    dates.reverse()

    data = {
        'time': dates * n_assets,
        'group_id': np

SyntaxError: incomplete input (<ipython-input-1-92dd88ec71fe>, line 423)

In [None]:
import logging
import yaml
from dataclasses import dataclass
from typing import Optional, List, Dict, Any
import torch
import numpy as np
import pandas as pd
from pathlib import Path

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

@dataclass
class ModelConfig:
    """Base configuration for all models."""
    model_name: str
    max_epochs: int = 30
    batch_size: int = 64
    learning_rate: float = 0.03
    hidden_size: int = 16
    dropout: float = 0.1
    max_encoder_length: int = 30
    max_prediction_length: int = 7

    @classmethod
    def from_yaml(cls, yaml_path: str) -> 'ModelConfig':
        """Load configuration from YAML file."""
        with open(yaml_path, 'r') as f:
            config_dict = yaml.safe_load(f)
        return cls(**config_dict)

    def to_yaml(self, yaml_path: str) -> None:
        """Save configuration to YAML file."""
        with open(yaml_path, 'w') as f:
            yaml.dump(self.__dict__, f)

class DataValidator:
    """Validates input data quality."""

    @staticmethod
    def validate_dataframe(df: pd.DataFrame, required_columns: List[str]) -> bool:
        """
        Validate DataFrame structure and content.

        Args:
            df: Input DataFrame
            required_columns: List of required column names

        Returns:
            bool: True if validation passes

        Raises:
            ValueError: If validation fails
        """
        # Check for required columns
        missing_cols = set(required_columns) - set(df.columns)
        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")

        # Check for null values
        null_cols = df.columns[df.isnull().any()].tolist()
        if null_cols:
            raise ValueError(f"Null values found in columns: {null_cols}")

        # Check for infinite values
        inf_cols = df.columns[np.isinf(df.select_dtypes(include=np.number)).any()].tolist()
        if inf_cols:
            raise ValueError(f"Infinite values found in columns: {inf_cols}")

        return True

class ModelCheckpointer:
    """Handles model checkpointing and recovery."""

    def __init__(self, checkpoint_dir: str):
        self.checkpoint_dir = Path(checkpoint_dir)
        self.checkpoint_dir.mkdir(exist_ok=True)

    def save_checkpoint(self, model: torch.nn.Module, epoch: int,
                       optimizer: torch.optim.Optimizer, loss: float) -> str:
        """Save model checkpoint."""
        checkpoint_path = self.checkpoint_dir / f"checkpoint_epoch_{epoch}.pt"
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
        }, checkpoint_path)
        return str(checkpoint_path)

    def load_checkpoint(self, model: torch.nn.Module,
                       optimizer: torch.optim.Optimizer,
                       checkpoint_path: str) -> tuple:
        """Load model checkpoint."""
        checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        return checkpoint['epoch'], checkpoint['loss']

class BaseWrapper:
    """Enhanced base wrapper with improved functionality."""

    def __init__(self, config: ModelConfig):
        self.config = config
        self.logger = logging.getLogger(f"{self.__class__.__name__}")
        self.validator = DataValidator()
        self.checkpointer = ModelCheckpointer("checkpoints")
        self.model = None
        self.training_metrics = []

    def log_metrics(self, metrics: Dict[str, Any]) -> None:
        """Log training metrics."""
        self.training_metrics.append(metrics)
        self.logger.info(f"Metrics: {metrics}")

    def save_metrics(self, path: str) -> None:
        """Save training metrics to file."""
        pd.DataFrame(self.training_metrics).to_csv(path, index=False)

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import torch
from torch.utils.data import DataLoader

def generate_sample_data(n_assets: int = 5, n_days: int = 100, seed: int = 42) -> pd.DataFrame:
    """Generate sample financial data for testing."""
    np.random.seed(seed)

    # Create dates
    dates = [datetime.today() - timedelta(days=x) for x in range(n_days)]
    dates.reverse()

    # Generate data
    data = {
        'time': dates * n_assets,
        'group_id': np.repeat(range(n_assets), n_days),
        'returns': np.random.normal(0, 0.02, n_assets * n_days),
        'volume': np.random.lognormal(0, 1, n_assets * n_days),
        'price': np.random.lognormal(4, 0.1, n_assets * n_days),
        'volatility': np.random.gamma(2, 0.1, n_assets * n_days)
    }

    return pd.DataFrame(data)

# Example 1: AutoRegressive Model
def run_autoregressive_example():
    # Generate data
    df = generate_sample_data()

    # Configure model
    config = ModelConfig(
        model_name='AutoRegressive',
        max_epochs=20,
        hidden_size=16
    )

    # Initialize and train
    model = AutoRegressiveModel(
        input_size=len(df.columns) - 2,  # Exclude time and group_id
        output_size=1
    )

    wrapper = BaseWrapper(config)
    wrapper.model = model

    # Train and evaluate
    wrapper.prepare_data(df, time_idx='time', target='returns', group_ids=['group_id'])
    wrapper.fit()
    predictions = wrapper.predict(df.tail(10))

    return predictions

# Example 2: LSTM Model
def run_lstm_example():
    df = generate_sample_data()

    config = ModelConfig(
        model_name='LSTM',
        max_epochs=20,
        hidden_size=32,
        num_layers=2
    )

    model = LSTMForecaster(
        input_size=len(df.columns) - 2,
        hidden_size=config.hidden_size,
        num_layers=2
    )

    wrapper = BaseWrapper(config)
    wrapper.model = model
    wrapper.prepare_data(df, time_idx='time', target='returns', group_ids=['group_id'])
    wrapper.fit()

    return wrapper.predict(df.tail(10))

# Example 3: TCN Model
def run_tcn_example():
    df = generate_sample_data()

    config = ModelConfig(
        model_name='TCN',
        max_epochs=20,
        hidden_size=32
    )

    model = TCNForecaster(
        input_size=len(df.columns) - 2,
        num_channels=[32, 32, 32],
        kernel_size=3
    )

    wrapper = BaseWrapper(config)
    wrapper.model = model
    wrapper.prepare_data(df, time_idx='time', target='returns', group_ids=['group_id'])
    wrapper.fit()

    return wrapper.predict(df.tail(10))

# Example 4: ProphetLike Model
def run_prophet_example():
    df = generate_sample_data()

    config = ModelConfig(
        model_name='ProphetLike',
        max_epochs=20,
        seasonality=10
    )

    model = ProphetLikeModel(seasonality=10)

    wrapper = BaseWrapper(config)
    wrapper.model = model
    wrapper.prepare_data(df, time_idx='time', target='returns', group_ids=['group_id'])
    wrapper.fit()

    return wrapper.predict(df.tail(10))

# Run all examples
if __name__ == "__main__":
    # Set random seeds for reproducibility
    torch.manual_seed(42)
    np.random.seed(42)

    models = {
        'AutoRegressive': run_autoregressive_example,
        'LSTM': run_lstm_example,
        'TCN': run_tcn_example,
        'ProphetLike': run_prophet_example
    }

    results = {}
    for model_name, run_func in models.items():
        try:
            logger.info(f"Running {model_name} example...")
            predictions = run_func()
            results[model_name] = predictions
            logger.info(f"{model_name} completed successfully")
        except Exception as e:
            logger.error(f"Error running {model_name}: {str(e)}")

    # Compare results
    comparison_df = pd.DataFrame({
        model_name: results[model_name]['predictions'].mean()
        for model_name in results.keys()
    })

    print("\nModel Comparison:")
    print(comparison_df)

ERROR:__main__:Error running AutoRegressive: name 'AutoRegressiveModel' is not defined
ERROR:__main__:Error running LSTM: ModelConfig.__init__() got an unexpected keyword argument 'num_layers'
ERROR:__main__:Error running TCN: name 'TCNForecaster' is not defined
ERROR:__main__:Error running ProphetLike: ModelConfig.__init__() got an unexpected keyword argument 'seasonality'



Model Comparison:
Empty DataFrame
Columns: []
Index: []


In [None]:
#example configuration file

In [None]:
# config.yaml
model_name: LSTM
max_epochs: 30
batch_size: 64
learning_rate: 0.03
hidden_size: 32
dropout: 0.1
max_encoder_length: 30
max_prediction_length: 7
num_layers: 2  # LSTM specific
kernel_size: 3  # TCN specific
seasonality: 10  # ProphetLike specific

In [None]:
import unittest
import torch
import pandas as pd
import numpy as np
from pathlib import Path

class TestModelBaseStructure(unittest.TestCase):
    def setUp(self):
        self.config = ModelConfig(model_name='LSTM')
        self.sample_data = generate_sample_data(n_assets=2, n_days=50)

    def test_data_validator(self):
        validator = DataValidator()

        # Test valid data
        self.assertTrue(validator.validate_dataframe(
            self.sample_data,
            ['time', 'group_id', 'returns']
        ))

        # Test invalid data
        invalid_df = self.sample_data.copy()
        invalid_df.loc[0, 'returns'] = np.nan
        with self.assertRaises(ValueError):
            validator.validate_dataframe(invalid_df, ['time', 'group_id', 'returns'])

    def test_model_checkpointing(self):
        checkpointer = ModelCheckpointer("test_checkpoints")
        model = LSTMForecaster(input_size=5, hidden_size=16, num_layers=2)
        optimizer = torch.optim.Adam(model.parameters())

        # Test saving
        checkpoint_path = checkpointer.save_checkpoint(model, 1, optimizer, 0.5)
        self.assertTrue(Path(checkpoint_path).exists())

        # Test loading
        new_model = LSTMForecaster(input_size=5, hidden_size=16, num_layers=2)
        new_optimizer = torch.optim.Adam(new_model.parameters())
        epoch, loss = checkpointer.load_checkpoint(new_model, new_optimizer, checkpoint_path)
        self.assertEqual(epoch, 1)
        self.assertEqual(loss, 0.5)

    def tearDown(self):
        # Clean up test checkpoints
        for file in Path("test_checkpoints").glob("*.pt"):
            file.unlink()
        Path("test_checkpoints").rmdir()

class TestModels(unittest.TestCase):
    def setUp(self):
        self.sample_data = generate_sample_data()

    def test_autoregressive_model(self):
        model = AutoRegressiveModel(input_size=5)
        self.assertIsInstance(model, torch.nn.Module)

        # Test forward pass
        x = {"encoder_cont": torch.randn(2, 10, 5)}
        output = model(x)
        self.assertEqual(output.shape, (2, 1))

    def test_lstm_model(self):
        model = LSTMForecaster(input_size=5, hidden_size=16, num_layers=2)
        self.assertIsInstance(model, torch.nn.Module)

        # Test forward pass
        x = {"encoder_cont": torch.randn(2, 10, 5)}
        output = model(x)
        self.assertEqual(output.shape, (2, 1))

if __name__ == '__main__':
    unittest.main()

E
ERROR: /root/ (unittest.loader._FailedTest)
----------------------------------------------------------------------
AttributeError: module '__main__' has no attribute '/root/'

----------------------------------------------------------------------
Ran 1 test in 0.002s

FAILED (errors=1)


SystemExit: True

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [1]:
#2024/10/24

In [3]:
!pip install pytorch_lightning

Collecting pytorch_lightning
  Downloading pytorch_lightning-2.4.0-py3-none-any.whl.metadata (21 kB)
Collecting torchmetrics>=0.7.0 (from pytorch_lightning)
  Downloading torchmetrics-1.5.1-py3-none-any.whl.metadata (20 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch_lightning)
  Downloading lightning_utilities-0.11.8-py3-none-any.whl.metadata (5.2 kB)
Downloading pytorch_lightning-2.4.0-py3-none-any.whl (815 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m815.2/815.2 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.11.8-py3-none-any.whl (26 kB)
Downloading torchmetrics-1.5.1-py3-none-any.whl (890 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m890.6/890.6 kB[0m [31m28.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lightning-utilities, torchmetrics, pytorch_lightning
Successfully installed lightning-utilities-0.11.8 pytorch_lightning-2.4.0 torchmetrics-1.5.1


In [4]:
import logging
import yaml
import torch
import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pathlib import Path
from dataclasses import dataclass
from typing import Optional, List, Dict, Any, Tuple, Union
from datetime import datetime, timedelta
from pytorch_lightning.callbacks import EarlyStopping
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
import torch.nn.functional as F
from sklearn.metrics import cohen_kappa_score
from torch.utils.tensorboard import SummaryWriter
from itertools import product
from scipy import stats

# Setup logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

@dataclass
class ModelConfig:
    """Configuration for model parameters."""
    model_name: str
    max_epochs: int = 30
    batch_size: int = 64
    learning_rate: float = 0.001
    hidden_size: int = 128
    dropout: float = 0.1
    max_encoder_length: int = 30
    max_prediction_length: int = 7
    num_layers: int = 2
    num_heads: int = 8

    @classmethod
    def from_yaml(cls, yaml_path: str) -> 'ModelConfig':
        with open(yaml_path, 'r') as f:
            config_dict = yaml.safe_load(f)
        return cls(**config_dict)

class FinancialDataset(Dataset):
    """Dataset for financial time series with multiple outputs."""
    def __init__(self, X1, X2, y_class=None, y_alloc=None, is_train=True):
        self.X1 = torch.tensor(X1, dtype=torch.float32)
        self.X2 = torch.tensor(X2, dtype=torch.float32)
        if y_class is not None:
            self.y_class = torch.tensor(y_class, dtype=torch.long)
        if y_alloc is not None:
            self.y_alloc = torch.tensor(y_alloc, dtype=torch.float32)
        self.is_train = is_train

    def __len__(self):
        return len(self.X1)

    def __getitem__(self, idx):
        if self.is_train:
            return (self.X1[idx], self.X2[idx],
                   self.y_class[idx], self.y_alloc[idx])
        return self.X1[idx], self.X2[idx]

class BaseModel(pl.LightningModule):
    """Base model with common functionality."""
    def __init__(self, config: ModelConfig):
        super().__init__()
        self.config = config
        self.save_hyperparameters()

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(),
                                   lr=self.config.learning_rate)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, mode='min', factor=0.1, patience=5)
        return {
            "optimizer": optimizer,
            "lr_scheduler": scheduler,
            "monitor": "val_loss"
        }

    def training_step(self, batch, batch_idx):
        X1, X2, y_class, y_alloc = batch
        class_out, alloc_out = self(X1, X2)

        # Classification loss
        class_loss = F.cross_entropy(
            class_out.view(-1, class_out.size(-1)),
            y_class.view(-1)
        )

        # Allocation loss (MSE)
        alloc_loss = F.mse_loss(alloc_out, y_alloc)

        # Sharpe ratio computation for allocation
        returns = (alloc_out * y_alloc).sum(dim=1)
        sharpe = torch.mean(returns) / (torch.std(returns) + 1e-6)

        total_loss = class_loss + alloc_loss - 0.1 * sharpe

        self.log('train_loss', total_loss)
        return total_loss

    def validation_step(self, batch, batch_idx):
        X1, X2, y_class, y_alloc = batch
        class_out, alloc_out = self(X1, X2)

        class_loss = F.cross_entropy(
            class_out.view(-1, class_out.size(-1)),
            y_class.view(-1)
        )
        alloc_loss = F.mse_loss(alloc_out, y_alloc)

        returns = (alloc_out * y_alloc).sum(dim=1)
        sharpe = torch.mean(returns) / (torch.std(returns) + 1e-6)

        self.log('val_loss', class_loss + alloc_loss - 0.1 * sharpe)
        self.log('val_sharpe', sharpe)

# Model Implementations

class MatrixModel(BaseModel):
    """Matrix regression with dual output heads."""
    def __init__(self, config: ModelConfig, input_size: int,
                 num_securities: int):
        super().__init__(config)
        self.A = torch.nn.Parameter(
            torch.randn(1, num_securities))
        self.B_class = torch.nn.Parameter(
            torch.randn(input_size, 3))
        self.B_alloc = torch.nn.Parameter(
            torch.randn(input_size, 1))

    def forward(self, x1, x2):
        x = torch.cat([x1.view(x1.size(0), -1),
                      x2.view(x2.size(0), -1)], dim=1)
        class_out = self.A @ x @ self.B_class
        alloc_out = F.softmax(self.A @ x @ self.B_alloc, dim=1)
        return class_out, alloc_out

class LSTMModel(BaseModel):
    """LSTM with dual output heads."""
    def __init__(self, config: ModelConfig, input_size: int,
                 num_securities: int):
        super().__init__(config)
        self.lstm = torch.nn.LSTM(
            input_size=input_size,
            hidden_size=config.hidden_size,
            num_layers=config.num_layers,
            batch_first=True,
            dropout=config.dropout
        )
        self.class_head = torch.nn.Linear(
            config.hidden_size, num_securities * 3)
        self.alloc_head = torch.nn.Linear(
            config.hidden_size, num_securities)

    def forward(self, x1, x2):
        x = torch.cat([x1, x2], dim=2)
        out, _ = self.lstm(x)
        last_hidden = out[:, -1, :]

        class_out = self.class_head(last_hidden)
        class_out = class_out.view(-1, num_securities, 3)

        alloc_out = self.alloc_head(last_hidden)
        alloc_out = F.softmax(alloc_out, dim=1)

        return class_out, alloc_out

class TransformerModel(BaseModel):
    """Transformer with dual output heads."""
    def __init__(self, config: ModelConfig, input_size: int,
                 num_securities: int):
        super().__init__(config)
        self.embedding = torch.nn.Linear(input_size, config.hidden_size)
        encoder_layer = torch.nn.TransformerEncoderLayer(
            d_model=config.hidden_size,
            nhead=config.num_heads,
            dropout=config.dropout
        )
        self.transformer = torch.nn.TransformerEncoder(
            encoder_layer,
            num_layers=config.num_layers
        )
        self.class_head = torch.nn.Linear(
            config.hidden_size, num_securities * 3)
        self.alloc_head = torch.nn.Linear(
            config.hidden_size, num_securities)

    def forward(self, x1, x2):
        x = torch.cat([x1, x2], dim=2)
        x = self.embedding(x)
        x = x.permute(1, 0, 2)
        out = self.transformer(x)
        out = out[-1]

        class_out = self.class_head(out)
        class_out = class_out.view(-1, num_securities, 3)

        alloc_out = self.alloc_head(out)
        alloc_out = F.softmax(alloc_out, dim=1)

        return class_out, alloc_out

def prepare_data(df: pd.DataFrame,
                window_size: int = 20,
                prediction_horizon: int = 1):
    """Prepare data for training and testing."""
    # Assume df has columns: timestamp, features (X1), features (X2),
    # class labels, returns

    scaler_X1 = MinMaxScaler()
    scaler_X2 = MinMaxScaler()

    X1 = scaler_X1.fit_transform(df[X1_columns].values)
    X2 = scaler_X2.fit_transform(df[X2_columns].values)
    y_class = df[class_columns].values
    returns = df[return_columns].values

    # Create windows
    X1_windows, X2_windows, y_class_windows, return_windows = [], [], [], []

    for i in range(len(df) - window_size - prediction_horizon + 1):
        X1_windows.append(X1[i:i+window_size])
        X2_windows.append(X2[i:i+window_size])
        y_class_windows.append(y_class[i+window_size+prediction_horizon-1])
        return_windows.append(returns[i+window_size+prediction_horizon-1])

    # Convert to arrays
    X1_windows = np.array(X1_windows)
    X2_windows = np.array(X2_windows)
    y_class_windows = np.array(y_class_windows)
    return_windows = np.array(return_windows)

    # Train/test split
    train_size = int(0.8 * len(X1_windows))

    train_dataset = FinancialDataset(
        X1_windows[:train_size],
        X2_windows[:train_size],
        y_class_windows[:train_size],
        return_windows[:train_size]
    )

    test_dataset = FinancialDataset(
        X1_windows[train_size:],
        X2_windows[train_size:],
        y_class_windows[train_size:],
        return_windows[train_size:]
    )

    return train_dataset, test_dataset

def train_and_evaluate(model: BaseModel,
                      train_dataset: Dataset,
                      test_dataset: Dataset,
                      config: ModelConfig):
    """Train and evaluate a model."""
    train_loader = DataLoader(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=True
    )

    test_loader = DataLoader(
        test_dataset,
        batch_size=config.batch_size
    )

    trainer = pl.Trainer(
        max_epochs=config.max_epochs,
        callbacks=[
            EarlyStopping(
                monitor="val_loss",
                patience=5
            )
        ],
        gradient_clip_val=0.1
    )

    trainer.fit(model, train_loader, test_loader)

    # Evaluate
    model.eval()
    all_returns = []
    all_class_preds = []
    all_class_true = []

    with torch.no_grad():
        for batch in test_loader:
            X1, X2, y_class, y_alloc = batch
            class_out, alloc_out = model(X1, X2)

            returns = (alloc_out * y_alloc).sum(dim=1)
            all_returns.extend(returns.cpu().numpy())

            _, predicted = torch.max(class_out.data, 2)
            all_class_preds.extend(predicted.cpu().numpy().flatten())
            all_class_true.extend(y_class.cpu().numpy().flatten())

    # Calculate metrics
    returns = np.array(all_returns)
    sharpe = np.mean(returns) / (np.std(returns) + 1e-6)
    kappa = cohen_kappa_score(all_class_true, all_class_preds)

    print(f"Sharpe Ratio: {sharpe:.4f}")
    print(f"Cohen's Kappa: {kappa:.4f}")

    return sharpe, kappa

# Example usage
if __name__ == "__main__":
    # Load configuration
    config = ModelConfig(
        model_name="transformer",
        max_epochs=30,
        batch_size=64,
        learning_rate=0.001,
        hidden_size=128,
        dropout=0.1
    )

    # Generate sample data (replace with real data)
    num_samples = 1000
    num_features_x1 = 10
    num_features_x2 = 5
    num_securities = 5

    df = pd.DataFrame({
        'timestamp': pd.date_range(
            start='2023-01-01',
            periods=num_samples,
            freq='D'
        )
    })

    # Add features and targets
    for i in range(num_features_x1):
        df[f'x1_{i}'] = np.random.randn(num_samples)
    for i in range(num_features_x2):
        df[f'x2_{i}'] = np.random.randn(num_samples)
    for i in range(num_securities):
        df[f'class_{i}'] = np.random.randint(0, 3, num_samples)
        df[f'return_{i}'] = np.random.randn(num_samples)

    # Prepare data
    X1_columns = [f'x1_{i}' for i in range(num_features_x1)]
    X2_columns = [f'x2_{i}' for i in range(num_features_x2)]
    class_columns = [f'class_{i}' for i in range(num_securities)]
    return_columns = [f'return_{i}' for i in range(num_securities)]

    train_dataset, test_dataset = prepare_data(df)

    # Initialize and train model
    model = TransformerModel(
        config,
        input_size=num_features_x1 + num_features_x2,
        num_securities=num_securities
    )

    sharpe, kappa = train_and_evaluate(
        model,
        train_dataset,
        test_dataset,
        config
    )


INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name        | Type               | Params | Mode 
-----------------------------------------------------------
0 | embedding   | Linear             | 2.0 K  | train
1 | transformer | TransformerEncoder | 1.2 M  | train
2 | class_head  | Linear             | 1.9 K  | train
3 | alloc_head  | Linear             | 645    | train
-----------------------------------------------------------
1.2 M     Trainable params
0         Non-trainable params
1.2 M     Total params
4.763     Total estimated model params size (MB)
25        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (13) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Sharpe Ratio: 0.0121
Cohen's Kappa: 0.0066


In [5]:
    model = MatrixModel(
        config,
        input_size=num_features_x1 + num_features_x2,
        num_securities=num_securities
    )

    sharpe, kappa = train_and_evaluate(
        model,
        train_dataset,
        test_dataset,
        config
    )

INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name         | Type | Params | Mode
---------------------------------------------
  | other params | n/a  | 65     | n/a 
---------------------------------------------
65        Trainable params
0         Non-trainable params
65        Total params
0.000     Total estimated model params size (MB)
0         Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x5 and 64x300)