# READ ME

This is a training pipeline as of 19.06.2025 where we start to include more complex predictive models in our project. First, we introduce the Temporal Fusion Transformer (TFT) and Temporal ConvNet (TCN) models.

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
from typing import Dict, List, Tuple, Optional, Union, Any
import warnings
import random
from Base_models import FeedForwardPredictor, AutoEncoder, ElasticNetLoss, SharpeRatioLoss
from TM_models import TemporalConvNet, TemporalFusionTransformer, TemporalFusionTransformer2
from tedst import sp500_training_pipeline, set_seed
import math
import itertools

set_seed(42)

In [5]:
df= pd.read_csv("data_non_std.csv",parse_dates=["Unnamed: 0"]).rename(columns={'Unnamed: 0': 'Date'}) 
features = df.drop(columns=["returns", "Date"])
target = df["returns"].values.astype(np.float32)
dates = pd.to_datetime(df["Date"]).astype('datetime64[ns]').tolist()


In [7]:
class SimpleFeedForward(nn.Module):
    """Example feedforward model"""
    def __init__(self, input_dim, hidden_dim=200, dropout=0):
        super(SimpleFeedForward, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, 1)
        )
    
    def forward(self, x):
        return self.network(x)
    
class SimpleLSTM(nn.Module):
    """Example LSTM model"""
    def __init__(self, input_dim, hidden_dim=200, num_layers=2, dropout=0):
        super(SimpleLSTM, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, 
                            batch_first=True, dropout=dropout)
        
        self.fc = nn.Sequential(nn.Linear(hidden_dim, hidden_dim // 2),
                                nn.ReLU(),
                                nn.Dropout(dropout),
                                nn.Linear(hidden_dim // 2, 1))
        
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        return self.fc(lstm_out[:, -1, :])  # Use last time step output

class SimpleTransformer(nn.Module):
    def __init__(self,
                 input_dim: int,
                 model_dim: int = 128,   # divisible by nhead
                 nhead: int = 8,
                 num_layers: int = 2,
                 dropout: float = 0.1,
                 max_seq_length: int = 500):  # for positional encoding
        super().__init__()

        # project raw features to model_dim
        self.input_proj = nn.Linear(input_dim, model_dim)
        
        # Add positional encoding 
        self.pos_encoding = PositionalEncoding(model_dim, dropout, max_seq_length)

        # vanilla encoder stack
        encoder_layer = nn.TransformerEncoderLayer(
                d_model=model_dim,
                nhead=nhead,
                dim_feedforward=model_dim * 4,  # standard practice
                dropout=dropout,
                activation='gelu',  # often works better than relu for transformers
                batch_first=True        # so x is (B, T, F)
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers)

        # regression head with additional processing
        self.norm = nn.LayerNorm(model_dim)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(model_dim, 1)

    def forward(self, x):               # x: (B, T, input_dim)
        x = self.input_proj(x)          # (B, T, model_dim)
        x = self.pos_encoding(x)        # Add positional information
        x = self.encoder(x)             # (B, T, model_dim)
        x = self.norm(x[:, -1])         # Layer norm on last time step
        x = self.dropout(x)             # Additional dropout
        return self.fc(x)               # (B, 1)

class SimpleConvolutional(nn.Module):
    """
    1-D CNN for sequence-to-one forecasting.
    Assumes input tensor shape: (batch, seq_len, n_features)
    """
    def __init__(
        self,
        input_dim: int,               # -- n_features after any AR-lag concat
        num_channels: List[int] = [32, 64, 32],
        kernel_size: int = 5,
        dropout: float = 0.25,
        seq_length: int = 12          # 〈NEW – keep default same as pipeline〉
    ):
        super(SimpleConvolutional, self).__init__()

        layers, in_channels = [], input_dim
        for out_channels in num_channels:
            layers += [
                nn.Conv1d(in_channels, out_channels,
                          kernel_size, padding=kernel_size // 2),
                nn.ReLU(),
                nn.MaxPool1d(2),
                nn.Dropout(dropout)
            ]
            in_channels = out_channels

        self.conv = nn.Sequential(*layers)

        # ── force length-1 feature map so fc dim is invariant to seq_length ──
        self.pool = nn.AdaptiveAvgPool1d(1)                   # ← NEW
        self.fc   = nn.Linear(in_channels, 1)                 # 〈in_channels == last out_channels〉

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        x: (batch, seq_len, n_features)
        """
        x = x.transpose(1, 2)          # (batch, n_features, seq_len)
        x = self.conv(x)               # (batch, channels, L)
        x = self.pool(x)               # (batch, channels, 1)
        x = x.view(x.size(0), -1)      # (batch, channels)
        return self.fc(x).squeeze(-1)  # (batch,)
    
class PositionalEncoding(nn.Module):
    """Add positional encoding to input embeddings."""
    
    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 500):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        pe = pe.transpose(0, 1)  # (1, max_len, d_model)
        self.register_buffer('pe', pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Args:
            x: Tensor, shape [batch_size, seq_len, embedding_dim]
        """
        seq_len = x.size(1)
        x = x + self.pe[:, :seq_len, :]
        return self.dropout(x)
    
tbill3m_data = df["tbill3m"].values.astype(np.float32)

results_tcn = sp500_training_pipeline(
    X=features,
    y=target,
    dates=dates,
    tbill3m=tbill3m_data,
    model_class  = TemporalFusionTransformer,
    model_type   = 'temporalfusiontransformer',                    # or 'simpleconvolutional'
    model_kwargs = {
        'hidden_dim': 64, 'num_heads': 4, 'num_layers': 2,'dropout': 0.1,
    },
    window_strategy='rolling',
    train_window_years=3,
    test_window_years=1,
    use_autoencoder=True,
    encoding_dim=10,
    seq_length=24,
    ar_lags=1,
    batch_size=128,
    epochs=10,
    lr=0.001,
    plot_results=True,
    alpha=0.0,
    l1_ratio=0.0,
)

"""model_kwargs={
        'num_channels':[32, 64, 32], 'kernel_size':5, 'dropout':0.25
    }"""
    

Starting rolling window training with 13 folds

— Fold 1/13 —
Training final model…


ValueError: too many values to unpack (expected 2)

In [None]:
def hyperparameter_tuning(
    X: np.ndarray,
    y: np.ndarray,
    dates: pd.DatetimeIndex,
    tbill3m: np.ndarray,
    model_class: type,
    model_type: str = 'feedforward',
    window_strategy: str = 'rolling',
    train_window_years: int = 3,
    test_window_years: int = 1,
    device: str = 'cpu',
) -> Dict[str, Any]:
    """Hyperparameter tuning for S&P-500 forecasting models."""
    if model_type.lower() not in ['feedforward', 'lstm', 'transformer', 'simpleconvolutional']:
        raise ValueError(f"Unsupported model type: {model_type}. Supported types are 'feedforward', 'lstm', 'transformer', 'simpleconvolutional'.")
    if model_type.lower() == 'feedforward':
        parameter_grid = {
            'use_autoencoder': [True],
            'encoding_dim': [10],
            'ar_lags': [1, 5, 10],
            'seq_length': [0],
            'epochs': [60, 80, 120],
            'lr': [0.0001, 0.001],
            'batch_size': [32, 64],
            'alpha': [0.0, 0.0001],
            'l1_ratio': [0.0, 0.5],
            'hidden_dim': [50, 100, 150], 
            'dropout':[0.1]
        }
    elif model_type.lower() == 'lstm':
        parameter_grid = {
            'use_autoencoder': [False],
            'encoding_dim': [1],
            'seq_length': [24],
            'ar_lags': [1],
            'epochs': [40, 75, 100],
            'lr': [0.0001, 0.001],
            'batch_size': [32, 64, 128],
            'alpha': [0.0, 0.0001, 0.001],
            'l1_ratio': [0.0, 0.5],
            'hidden_dim': [50, 100, 150], 
            'num_layers': [1, 2,4], 
            'dropout':[0.0, 0.1]
        }
    elif model_type.lower() == 'transformer':
        parameter_grid = {
            'use_autoencoder': [False],
            'encoding_dim': [1],
            'seq_length': [24],
            'ar_lags': [1],
            'epochs': [40, 75, 100],
            'lr': [0.0001, 0.001],
            'batch_size': [32, 64, 128],
            'alpha': [0.0, 0.0001],
            'l1_ratio': [0.0, 0.5],
            'model_dim': [64, 128], 
            'num_heads': [2,4], 
            'num_layers': [2,4], 
            'dropout':[0.0, 0.1],
        }
    elif model_type.lower() == 'simpleconvolutional':
        parameter_grid = {
            'use_autoencoder': [True, False],
            'encoding_dim': [10, 15],
            'seq_length': [24],
            'ar_lags': [1],
            'epochs': [40, 75, 100],
            'lr': [0.0001, 0.001],
            'batch_size': [32, 64, 128],
            'alpha': [0.0, 0.0001],
            'l1_ratio': [0.0, 0.5],
            'num_channels': [[32], [32,64,32], [16,32,64,32,16]],
            'kernel_size': [3,5],
            'dropout':[0.0, 0.1]
        }
    elif model_type.lower() == 'tempconvnet':
        parameter_grid = {
            'use_autoencoder': [True, False],
            'encoding_dim': [10, 15],
            'seq_length': [24],
            'ar_lags': [1],
            'epochs': [40, 75, 100],
            'lr': [0.0001, 0.001],
            'batch_size': [32, 64, 128],
            'alpha': [0.0, 0.0001],
            'l1_ratio': [0.0, 0.5],
            'num_channels': [[32], [32,64,32], [16,32,64,32,16]],
            'kernel_size': [3,5],
            'dropout':[0.0, 0.1]
        }
    elif model_type.lower() == 'temporalfusiontransformer':
        parameter_grid = {
            'use_autoencoder': [False],
            'encoding_dim': [1],
            'seq_length': [24],
            'ar_lags': [1],
            'epochs': [40, 75, 100],
            'lr': [0.0001, 0.001],
            'batch_size': [32, 64, 128],
            'alpha': [0.0, 0.0001],
            'l1_ratio': [0.0, 0.5],
            'hidden_dim': [64, 128], 
            'num_heads': [2,4], 
            'num_layers': [3,4], 
            'dropout':[0.0, 0.1]
        }

    all_results = []
    for param_combination in itertools.product(*parameter_grid.values()):
        params = dict(zip(parameter_grid.keys(), param_combination))
        use_autoencoder = params['use_autoencoder']
        encoding_dim = params['encoding_dim']

        seq_length = params['seq_length']
        ar_lags = params['ar_lags']
        epochs = params['epochs']
        lr = params['lr']
        batch_size = params['batch_size']
        alpha = params['alpha']
        l1_ratio = params['l1_ratio']
        if model_type.lower() == 'feedforward':
            model_kwargs = {
                'hidden_dim': params['hidden_dim'],
                'dropout': params['dropout']
            }
        elif model_type.lower() == 'lstm':  
            model_kwargs = {
                'hidden_dim': params['hidden_dim'],
                'num_layers': params['num_layers'],
                'dropout': params['dropout']
            }
        elif model_type.lower() == 'transformer':
            model_kwargs = {
                'model_dim': params['model_dim'],
                'num_heads': params['num_heads'],
                'num_layers': params['num_layers'],
                'dropout': params['dropout']
            }
        elif model_type.lower() == 'simpleconvolutional':
            model_kwargs = {
                'num_channels': params['num_channels'],
                'kernel_size': params['kernel_size'],
                'dropout': params['dropout']
            }
        elif model_type.lower() == 'tempconvnet':
            model_kwargs = {
                'num_channels': params['num_channels'],
                'kernel_size': params['kernel_size'],
                'dropout': params['dropout']
            }
        elif model_type.lower() == 'temporalfusiontransformer':
            model_kwargs = {
                'hidden_dim': params['hidden_dim'],
                'num_heads': params['num_heads'],
                'num_layers': params['num_layers'],
                'dropout': params['dropout']
            }
        else:
            raise ValueError(f"Unsupported model type: {model_type}. Supported types are 'feedforward', 'lstm', 'transformer', 'simpleconvolutional', 'tempconvnet', 'temporalfusiontransformer'.")
        # Run the training pipeline with the current parameters
        results = sp500_training_pipeline(
            X=X,
            y=y,
            dates=dates,
            tbill3m=tbill3m,
            model_class=model_class,
            model_type=model_type,
            model_kwargs=model_kwargs,
            window_strategy=window_strategy,
            train_window_years=train_window_years,
            test_window_years=test_window_years,
            use_autoencoder=use_autoencoder,
            encoding_dim=encoding_dim,
            seq_length=seq_length,
            ar_lags=ar_lags,
            batch_size=batch_size,
            epochs=epochs,
            lr=lr,
            plot_results=False,  # No need to plot during tuning
            alpha=alpha,
            l1_ratio=l1_ratio,
            device=device,
            do_print=False,  # Suppress print statements during tuning
        )

        all_results.append(
    (params, results['overall_metrics']['avg_test_mse'])
)   

    # Find the best parameters based on MSE
    best_params, best_mse = min(all_results, key=lambda x: x[1])
    return {
        'best_params': best_params,
        'best_mse': best_mse,
        'all_results': all_results
    }


In [None]:
hyperparameter_tuning_results = hyperparameter_tuning(
    X=features.values,
    y=target,
    dates=dates,
    tbill3m=tbill3m_data,
    model_class=SimpleFeedForward,  # or any other model class
    model_type='feedforward',
    window_strategy='rolling',
    train_window_years=3,
    test_window_years=1,
    device='cpu'
)

KeyboardInterrupt: 