# TimesNet Light Configuration - Financial Data Training

This notebook contains a **lightweight TimesNet configuration** optimized for:
- Fast experimentation and testing
- Quick iterations during development
- Resource-constrained environments
- Proof of concept validations

**Dataset**: Financial time series with 4 targets + 114 covariates (118 total features)
**Training Time**: ~5-10 minutes per epoch

In [None]:
# Import required libraries
import os
import sys
import time
import torch
import numpy as np
import pandas as pd
from datetime import datetime

# Add project root to path
sys.path.append(os.path.dirname(os.path.abspath('.')))

from models.TimesNet import Model as TimesNet
from utils.tools import EarlyStopping, adjust_learning_rate
from utils.metrics import metric
from utils.logger import logger
from data_provider.data_loader import Dataset_Custom
from torch.utils.data import DataLoader

print("✅ All imports successful")
print(f"🔥 PyTorch version: {torch.__version__}")
print(f"💻 Device: {'CUDA' if torch.cuda.is_available() else 'CPU'}")

# Enhanced GPU information
if torch.cuda.is_available():
    print(f"🚀 GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")
    print(f"⚡ CUDA Version: {torch.version.cuda}")
    print("🎯 GPU acceleration will be used automatically!")
else:
    print("⚠️  No GPU detected - will use CPU (training will be slower)")

✅ All imports successful
🔥 PyTorch version: 2.7.1+cpu
💻 Device: CPU


## 🔧 Light Configuration Parameters

**Purpose**: Fast training for quick experimentation and validation

In [6]:
# ================================
# LIGHT CONFIGURATION - TIMESNET
# ================================

class LightConfig:
    # === DATA CONFIGURATION ===
    data = 'custom'                    # Dataset type (custom for prepared financial data)
    root_path = './data/'              # Root directory for data files
    data_path = 'prepared_financial_data.csv'  # Main data file
    features = 'M'                     # Forecasting mode: 'M'=Multivariate, 'S'=Univariate, 'MS'=Multivariate-to-Univariate
    target = 'log_Close'               # Primary target column (for 'S' mode)
    freq = 'b'                         # Time frequency: 'b'=business day, 'h'=hourly, 'd'=daily
    
    # === SEQUENCE PARAMETERS ===
    seq_len = 50                       # Input sequence length (lookback window) - LIGHT: shorter for speed
    label_len = 10                     # Start token length for decoder input (overlap with seq_len)
    pred_len = 5                       # Prediction horizon (how many steps to forecast) - LIGHT: shorter predictions
    
    # === TRAIN/VAL/TEST SPLITS ===
    val_len = 10                       # Validation set length in time steps
    test_len = 10                      # Test set length in time steps
    prod_len = 5                       # Production forecast length (future predictions beyond data)
    
    # === TIMESNET MODEL ARCHITECTURE ===
    # Core dimensions
    enc_in = 118                       # Encoder input size (total features: 4 targets + 114 covariates)
    dec_in = 118                       # Decoder input size (usually same as enc_in)
    c_out = 118                        # Output size (must match enc_in to avoid dimension mismatch)
    d_model = 32                       # Model dimension (embedding size) - LIGHT: smaller for speed
    d_ff = 64                          # Feed-forward network dimension - LIGHT: smaller FFN
    
    # Attention mechanism
    n_heads = 4                        # Number of attention heads - LIGHT: fewer heads
    e_layers = 2                       # Number of encoder layers - LIGHT: fewer layers
    d_layers = 1                       # Number of decoder layers (usually 1 for forecasting)
    
    # TimesNet specific parameters
    top_k = 3                          # Top-k frequencies for TimesNet decomposition - LIGHT: fewer frequencies
    num_kernels = 3                    # Number of convolution kernels in Inception blocks - LIGHT: fewer kernels
    
    # Regularization
    dropout = 0.1                      # Dropout rate for regularization
    
    # Additional model settings
    embed = 'timeF'                    # Time feature embedding: 'timeF'=time features, 'fixed'=learnable, 'learned'=learned
    activation = 'gelu'                # Activation function: 'gelu', 'relu', 'swish'
    factor = 1                         # Attention factor (usually 1)
    distil = True                      # Whether to use knowledge distillation
    moving_avg = 25                    # Moving average window for trend decomposition
    output_attention = False           # Whether to output attention weights
    
    # === TRAINING CONFIGURATION ===
    train_epochs = 10                  # Number of training epochs - LIGHT: fewer epochs
    batch_size = 32                    # Batch size - LIGHT: moderate batch size
    learning_rate = 0.001              # Learning rate - LIGHT: slightly higher for faster convergence
    patience = 5                       # Early stopping patience - LIGHT: less patience
    lradj = 'type1'                    # Learning rate adjustment strategy
    
    # Loss and optimization
    loss = 'MSE'                       # Loss function: 'MSE', 'MAE', 'Huber'
    use_amp = False                    # Automatic mixed precision (can speed up training)
    
    # System settings
    num_workers = 4                    # DataLoader workers - LIGHT: fewer workers
    seed = 2024                        # Random seed for reproducibility
    
    # Task specific
    task_name = 'short_term_forecast'  # Task type: 'short_term_forecast' for financial prediction
    
    # Experiment tracking
    des = 'light_config'               # Experiment description
    checkpoints = f'./checkpoints/TimesNet_light_{datetime.now().strftime("%Y%m%d_%H%M")}'
    
# Create config instance
args = LightConfig()

print("🔧 Light Configuration Loaded:")
print(f"   📏 Sequence Length: {args.seq_len}")
print(f"   🎯 Prediction Length: {args.pred_len}")
print(f"   🧠 Model Dimension: {args.d_model}")
print(f"   ⚡ Epochs: {args.train_epochs}")
print(f"   📊 Batch Size: {args.batch_size}")

🔧 Light Configuration Loaded:
   📏 Sequence Length: 50
   🎯 Prediction Length: 5
   🧠 Model Dimension: 32
   ⚡ Epochs: 10
   📊 Batch Size: 32


## 🎛️ Tweakable Parameters

Modify these parameters to experiment with different configurations:

In [7]:
# ================================
# TWEAKABLE PARAMETERS - EXPERIMENT
# ================================

# Modify these for quick experiments:

# --- Sequence parameters (affect model complexity and data usage) ---
args.seq_len = 50          # Try: 30, 50, 100 (longer = more context, slower training)
args.pred_len = 5          # Try: 3, 5, 10 (longer = harder prediction task)

# --- Model size (affect memory usage and training time) ---
args.d_model = 32          # Try: 16, 32, 64 (larger = more capacity, slower)
args.d_ff = 64             # Try: 32, 64, 128 (usually 2x d_model)
args.n_heads = 4           # Try: 2, 4, 8 (must divide d_model evenly)
args.e_layers = 2          # Try: 1, 2, 3 (more layers = deeper model)

# --- TimesNet specific ---
args.top_k = 3             # Try: 2, 3, 5 (more frequencies = more complex patterns)
args.num_kernels = 3       # Try: 2, 3, 6 (more kernels = more feature extraction)

# --- Training parameters ---
args.train_epochs = 10     # Try: 5, 10, 20
args.batch_size = 32       # Try: 16, 32, 64 (larger = faster but more memory)
args.learning_rate = 0.001 # Try: 0.0001, 0.001, 0.01

# --- Advanced tweaks ---
args.dropout = 0.1         # Try: 0.0, 0.1, 0.2 (higher = more regularization)
args.moving_avg = 25       # Try: 15, 25, 50 (window for trend decomposition)

print(f"✏️ Updated Configuration:")
print(f"   Model Size: d_model={args.d_model}, d_ff={args.d_ff}, heads={args.n_heads}, layers={args.e_layers}")
print(f"   TimesNet: top_k={args.top_k}, kernels={args.num_kernels}")
print(f"   Training: epochs={args.train_epochs}, batch={args.batch_size}, lr={args.learning_rate}")

✏️ Updated Configuration:
   Model Size: d_model=32, d_ff=64, heads=4, layers=2
   TimesNet: top_k=3, kernels=3
   Training: epochs=10, batch=32, lr=0.001


## 🚀 Training Setup and Execution

In [None]:
# Setup device and create checkpoint directory
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Enhanced device information
print(f"🎯 Selected Device: {device}")
if torch.cuda.is_available():
    print(f"🚀 GPU: {torch.cuda.get_device_name(0)}")
    print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")
    print("⚡ Parallel processing: ENABLED (automatic)")
    print("💡 Tips: Model will automatically use GPU cores for faster training")
else:
    print("⚠️  Running on CPU - training will be slower")
    print("💡 Tips: Install CUDA-compatible PyTorch for GPU acceleration")
os.makedirs(args.checkpoints, exist_ok=True)

print(f"🔥 Using device: {device}")
print(f"📁 Checkpoints: {args.checkpoints}")

# Data loader setup
def create_data_loader(flag):
    args.validation_length = args.val_len
    args.test_length = args.test_len
    
    dataset = Dataset_Custom(
        args=args,
        root_path=args.root_path,
        data_path=args.data_path,
        flag=flag,
        size=[args.seq_len, args.label_len, args.pred_len],
        features=args.features,
        target=args.target,
        scale=True,
        timeenc=1 if args.embed == 'timeF' else 0,
        freq=args.freq
    )
    
    shuffle = (flag == 'train')
    data_loader = DataLoader(
        dataset,
        batch_size=args.batch_size,
        shuffle=shuffle,
        num_workers=args.num_workers,
        drop_last=True
    )
    return data_loader

# Create data loaders
train_loader = create_data_loader('train')
val_loader = create_data_loader('val')
test_loader = create_data_loader('test')

print(f"📊 Data loaders created:")
print(f"   Train: {len(train_loader)} batches")
print(f"   Val: {len(val_loader)} batches")
print(f"   Test: {len(test_loader)} batches")

🔥 Using device: cpu
📁 Checkpoints: ./checkpoints/TimesNet_light_20250616_0816
2025-06-16 08:16:43,805 [INFO] TSLib: Initializing Dataset_Custom with targets: log_Close
2025-06-16 08:16:44,153 [INFO] TSLib: Border calculation: n=7109, s=50, p=5, v=10, t=10
2025-06-16 08:16:44,153 [INFO] TSLib: border1s = [0, 7039, 7049]
2025-06-16 08:16:44,153 [INFO] TSLib: border2s = [7089, 7099, 7109]
2025-06-16 08:16:44,199 [INFO] TSLib: Loaded data shape: (7109, 119)
2025-06-16 08:16:44,199 [INFO] TSLib: Data_x shape: (7089, 118), Data_y shape: (7089, 118)
2025-06-16 08:16:44,199 [INFO] TSLib: Initializing Dataset_Custom with targets: log_Close
2025-06-16 08:16:44,487 [INFO] TSLib: Border calculation: n=7109, s=50, p=5, v=10, t=10
2025-06-16 08:16:44,487 [INFO] TSLib: border1s = [0, 7039, 7049]
2025-06-16 08:16:44,492 [INFO] TSLib: border2s = [7089, 7099, 7109]
2025-06-16 08:16:44,528 [INFO] TSLib: Loaded data shape: (7109, 119)
2025-06-16 08:16:44,530 [INFO] TSLib: Data_x shape: (60, 118), Data_y s

In [9]:
# Initialize TimesNet model
model = TimesNet(args).to(device)

# Setup training components
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
early_stopping = EarlyStopping(patience=args.patience, verbose=True)

# Model info
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"🧠 TimesNet Light Model Initialized:")
print(f"   📊 Total Parameters: {total_params:,}")
print(f"   🎯 Trainable Parameters: {trainable_params:,}")
print(f"   💾 Model Size: ~{total_params * 4 / 1024 / 1024:.1f} MB")

2025-06-16 08:16:49,986 [INFO] TSLib: Initializing TimesNet model with configs: <__main__.LightConfig object at 0x000002A5B36B9310>
2025-06-16 08:16:49,987 [INFO] TSLib: Initializing Inception_Block_V1
2025-06-16 08:16:49,994 [INFO] TSLib: Initializing Inception_Block_V1
2025-06-16 08:16:49,998 [INFO] TSLib: Initializing Inception_Block_V1
2025-06-16 08:16:50,002 [INFO] TSLib: Initializing Inception_Block_V1
🧠 TimesNet Light Model Initialized:
   📊 Total Parameters: 305,483
   🎯 Trainable Parameters: 305,483
   💾 Model Size: ~1.2 MB


In [10]:
# Training function with progress tracking
def train_epoch():
    model.train()
    total_loss = 0.0
    num_batches = len(train_loader)
    
    epoch_start_time = time.time()
    print(f"🏃 Training on {num_batches} batches...")
    
    for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
        # Move to device
        batch_x = batch_x.float().to(device)
        batch_y = batch_y.float().to(device)
        batch_x_mark = batch_x_mark.float().to(device)
        batch_y_mark = batch_y_mark.float().to(device)
        
        # Prepare decoder input
        dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to(device)
        dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(device)
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
        
        # Calculate loss (only on target columns - first 4 features)
        target_outputs = outputs[:, -args.pred_len:, :4]
        target_y = batch_y[:, -args.pred_len:, :4]
        loss = criterion(target_outputs, target_y)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        # Progress reporting
        if i % 20 == 0 or i == num_batches - 1:
            progress_pct = (i + 1) / num_batches * 100
            avg_loss = total_loss / (i + 1)
            elapsed = time.time() - epoch_start_time
            remaining = elapsed / (i + 1) * (num_batches - i - 1)
            print(f"   Batch {i+1:3d}/{num_batches} ({progress_pct:5.1f}%) - "
                  f"Loss: {loss.item():.6f} (Avg: {avg_loss:.6f}) - "
                  f"Remaining: {remaining:.1f}s")
    
    epoch_time = time.time() - epoch_start_time
    avg_loss = total_loss / num_batches
    print(f"✅ Epoch completed in {epoch_time:.1f}s. Average loss: {avg_loss:.6f}")
    return avg_loss

# Validation function
def validate_epoch():
    model.eval()
    total_loss = 0.0
    num_batches = 0
    
    with torch.no_grad():
        for batch_x, batch_y, batch_x_mark, batch_y_mark in val_loader:
            batch_x = batch_x.float().to(device)
            batch_y = batch_y.float().to(device)
            batch_x_mark = batch_x_mark.float().to(device)
            batch_y_mark = batch_y_mark.float().to(device)
            
            dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to(device)
            dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(device)
            
            outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
            
            target_outputs = outputs[:, -args.pred_len:, :4]
            target_y = batch_y[:, -args.pred_len:, :4]
            loss = criterion(target_outputs, target_y)
            
            total_loss += loss.item()
            num_batches += 1
    
    avg_loss = total_loss / num_batches if num_batches > 0 else float('inf')
    return avg_loss

print("🔧 Training functions defined")

🔧 Training functions defined


In [None]:
# Main training loop
print(f"🚀 Starting TimesNet Light Training ({args.train_epochs} epochs)")
print(f"⏰ Estimated time: ~{args.train_epochs * 5} minutes")
print("="*60)

best_val_loss = float('inf')
train_losses = []
val_losses = []

training_start_time = time.time()

for epoch in range(args.train_epochs):
    print(f"\n🔄 Epoch {epoch+1}/{args.train_epochs}")
    
    # Train
    train_loss = train_epoch()
    train_losses.append(train_loss)
    
    # Validate
    print("🔍 Running validation...")
    val_loss = validate_epoch()
    val_losses.append(val_loss)
    
    # Log progress
    print(f"📈 Epoch {epoch+1} Results: Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}")
    
    # Adjust learning rate
    adjust_learning_rate(optimizer, epoch + 1, args)
    
    # Early stopping
    early_stopping(val_loss, model, args.checkpoints)
    if early_stopping.early_stop:
        print("⏹️ Early stopping triggered")
        break
    
    # Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), f"{args.checkpoints}/best_model.pth")
        print(f"💾 New best model saved (Val Loss: {val_loss:.6f})")

total_training_time = time.time() - training_start_time
print(f"\n🎉 Training completed in {total_training_time/60:.1f} minutes!")
print(f"🏆 Best validation loss: {best_val_loss:.6f}")

🚀 Starting TimesNet Light Training (10 epochs)
⏰ Estimated time: ~50 minutes

🔄 Epoch 1/10
🏃 Training on 219 batches...


## 📊 Results and Analysis

In [None]:
# Load best model and test
model.load_state_dict(torch.load(f"{args.checkpoints}/best_model.pth", weights_only=False))
model.eval()

# Test evaluation
preds = []
trues = []

print("🧪 Testing model...")
with torch.no_grad():
    for batch_x, batch_y, batch_x_mark, batch_y_mark in test_loader:
        batch_x = batch_x.float().to(device)
        batch_y = batch_y.float().to(device)
        batch_x_mark = batch_x_mark.float().to(device)
        batch_y_mark = batch_y_mark.float().to(device)
        
        dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to(device)
        dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(device)
        
        outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
        
        pred = outputs[:, -args.pred_len:, :4].detach().cpu().numpy()
        true = batch_y[:, -args.pred_len:, :4].detach().cpu().numpy()
        
        preds.append(pred)
        trues.append(true)

# Calculate metrics
if preds:
    preds = np.concatenate(preds, axis=0)
    trues = np.concatenate(trues, axis=0)
    
    mae, mse, rmse, mape, mspe = metric(preds, trues)
    
    print("\n📊 TimesNet Light - Test Results:")
    print(f"   🎯 MSE:  {mse:.6f}")
    print(f"   📏 MAE:  {mae:.6f}")
    print(f"   📐 RMSE: {rmse:.6f}")
    print(f"   📈 MAPE: {mape:.6f}%")
    print(f"   📉 MSPE: {mspe:.6f}%")
    
    # Summary
    print(f"\n📋 Configuration Summary:")
    print(f"   ⚡ Model: Light ({total_params:,} params)")
    print(f"   📏 Sequence: {args.seq_len} → {args.pred_len}")
    print(f"   🧠 Architecture: d_model={args.d_model}, layers={args.e_layers}, heads={args.n_heads}")
    print(f"   ⏱️ Training time: {total_training_time/60:.1f} minutes")
    print(f"   🏆 Final performance: RMSE={rmse:.6f}")
else:
    print("⚠️ No test data available")