# TimesNet Light Configuration - Financial Data Training

This notebook contains a **lightweight TimesNet configuration** optimized for:
- Fast experimentation and testing
- Quick iterations during development
- Resource-constrained environments
- Proof of concept validations

**Dataset**: Financial time series with 4 targets + 114 covariates (118 total features)
**Training Time**: ~5-10 minutes per epoch

In [None]:
# Import required libraries
import os
import sys
import time
import torch
import numpy as np
import pandas as pd
from datetime import datetime

# === ROBUST PATH SETUP FOR GPU DEPLOYMENT ===
# This handles both local development and GPU server deployment

# Method 1: Auto-detect project root
def setup_project_path():
    """Automatically detect and add the TimesNet project root to Python path"""
    
    # Try to find project root automatically
    current_dir = os.getcwd()
    possible_roots = [
        current_dir,  # Current working directory
        os.path.dirname(os.path.abspath('.')),  # Parent directory
        os.path.dirname(os.path.abspath(__file__ if '__file__' in globals() else '.')),  # Script directory
    ]
    
    # Check if this is a custom path (like Google Colab)
    if '/content/drive/MyDrive' in current_dir or any('timesnet' in p.lower() for p in [current_dir]):
        # Custom deployment path detected
        print(f"🔍 Custom deployment detected: {current_dir}")
        if current_dir not in sys.path:
            sys.path.insert(0, current_dir)
            print(f"✅ Added to path: {current_dir}")
    
    # Try each possible root
    for root in possible_roots:
        if root and os.path.exists(os.path.join(root, 'models')) and os.path.exists(os.path.join(root, 'utils')):
            if root not in sys.path:
                sys.path.insert(0, root)
                print(f"✅ Project root found and added: {root}")
                return root
    
    # If not found, add current directory as fallback
    if current_dir not in sys.path:
        sys.path.insert(0, current_dir)
        print(f"⚠️  Using current directory as fallback: {current_dir}")
    
    return current_dir

# Setup the project path
project_root = setup_project_path()
print(f"📁 Project root: {project_root}")
print(f"🐍 Python path includes: {[p for p in sys.path[:3]]}")

# Try to import with error handling
try:
    from models.TimesNet import Model as TimesNet
    from utils.tools import EarlyStopping, adjust_learning_rate
    from utils.metrics import metric
    from utils.logger import logger
    from data_provider.data_loader import Dataset_Custom
    from torch.utils.data import DataLoader
    
    print("✅ All imports successful!")
    
except ImportError as e:
    print(f"❌ Import error: {e}")
    print("\n🔧 TROUBLESHOOTING:")
    print("1. Verify you extracted the GPU package correctly")
    print("2. Check that these directories exist:")
    for dir_name in ['models', 'utils', 'data_provider', 'layers', 'exp']:
        dir_path = os.path.join(project_root, dir_name)
        exists = os.path.exists(dir_path)
        print(f"   {'✅' if exists else '❌'} {dir_path}")
    
    print("\n💡 Quick fix - run this in the next cell:")
    print("import sys")
    print(f"sys.path.insert(0, '{project_root}')")
    print("# Then re-run the imports")
    
    raise
from torch.utils.data import DataLoader

print("✅ All imports successful")
print(f"🔥 PyTorch version: {torch.__version__}")
print(f"💻 Device: {'CUDA' if torch.cuda.is_available() else 'CPU'}")

# Enhanced GPU information
if torch.cuda.is_available():
    print(f"🚀 GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")
    print(f"⚡ CUDA Version: {torch.version.cuda}")
    print("🎯 GPU acceleration will be used automatically!")
else:
    print("⚠️  No GPU detected - will use CPU (training will be slower)")

In [None]:
# === MANUAL PATH SETUP (Run this if imports fail) ===
# For GPU deployment troubleshooting

def manual_path_setup(custom_path=None):
    """Manually setup project path for GPU deployment"""
    
    if custom_path:
        # Use the provided custom path
        project_path = custom_path
    else:
        # Try to detect automatically
        import os
        current_dir = os.getcwd()
        
        # Common GPU deployment patterns
        if '/content/drive/MyDrive' in current_dir:
            # Google Colab
            project_path = current_dir
        elif 'timesnet' in current_dir.lower():
            # TimesNet package directory
            project_path = current_dir
        else:
            # Fallback
            project_path = current_dir
    
    # Add to Python path
    if project_path not in sys.path:
        sys.path.insert(0, project_path)
    
    print(f"🔧 Manual path setup completed")
    print(f"📁 Project path: {project_path}")
    
    # Verify directories exist
    required_dirs = ['models', 'utils', 'data_provider', 'layers', 'exp']
    missing_dirs = []
    
    for dir_name in required_dirs:
        dir_path = os.path.join(project_path, dir_name)
        if os.path.exists(dir_path):
            print(f"   ✅ {dir_name}/")
        else:
            print(f"   ❌ {dir_name}/ (MISSING)")
            missing_dirs.append(dir_name)
    
    if missing_dirs:
        print(f"\n⚠️ Missing directories: {missing_dirs}")
        print("💡 Make sure you extracted the GPU package correctly")
        return False
    else:
        print(f"\n✅ All required directories found!")
        return True

# Uncomment and modify the path below if needed:
# manual_path_setup('/content/drive/MyDrive/coding/timesnet_gpu_package_20250616_0827')

print("💡 If imports failed above, uncomment and run the manual_path_setup() with your path")

## 🚀 GPU Deployment Instructions

If you're running this on a **GPU server** (Google Colab, remote server, etc.) and getting `ModuleNotFoundError`, follow these steps:

### ✅ **Quick Fix:**

1. **Check your current directory:**
   ```python
   import os
   print("Current directory:", os.getcwd())
   print("Files here:", os.listdir('.'))
   ```

2. **If you see the TimesNet files, run this:**
   ```python
   manual_path_setup()  # Use the function above
   ```

3. **If you're in the wrong directory, navigate to the extracted package:**
   ```python
   # Example for Google Colab:
   manual_path_setup('/content/drive/MyDrive/coding/timesnet_gpu_package_20250616_0827')
   
   # Example for remote server:
   manual_path_setup('/home/username/timesnet_gpu_package')
   ```

### 🔧 **Common GPU Deployment Issues:**

- **Wrong directory**: Make sure you're in the extracted GPU package folder
- **Missing files**: Verify the ZIP was extracted completely
- **Path conflicts**: Clear and restart the kernel if needed

### 💡 **Google Colab Specific:**
```python
# Mount Google Drive first
from google.colab import drive
drive.mount('/content/drive')

# Navigate to your package
%cd /content/drive/MyDrive/your_path/timesnet_gpu_package/

# Then run the imports
```

## 🔧 Light Configuration Parameters

**Purpose**: Fast training for quick experimentation and validation

In [None]:
# ================================
# LIGHT CONFIGURATION - TIMESNET
# ================================

class LightConfig:
    # === DATA CONFIGURATION ===
    data = 'custom'                    # Dataset type (custom for prepared financial data)
    root_path = './data/'              # Root directory for data files
    data_path = 'prepared_financial_data.csv'  # Main data file
    features = 'M'                     # Forecasting mode: 'M'=Multivariate, 'S'=Univariate, 'MS'=Multivariate-to-Univariate
    target = 'log_Close'               # Primary target column (for 'S' mode)
    freq = 'b'                         # Time frequency: 'b'=business day, 'h'=hourly, 'd'=daily
    
    # === SEQUENCE PARAMETERS ===
    seq_len = 50                       # Input sequence length (lookback window) - LIGHT: shorter for speed
    label_len = 10                     # Start token length for decoder input (overlap with seq_len)
    pred_len = 5                       # Prediction horizon (how many steps to forecast) - LIGHT: shorter predictions
    
    # === TRAIN/VAL/TEST SPLITS ===
    val_len = 10                       # Validation set length in time steps
    test_len = 10                      # Test set length in time steps
    prod_len = 5                       # Production forecast length (future predictions beyond data)
    
    # === TIMESNET MODEL ARCHITECTURE ===
    # Core dimensions
    enc_in = 118                       # Encoder input size (total features: 4 targets + 114 covariates)
    dec_in = 118                       # Decoder input size (usually same as enc_in)
    c_out = 118                        # Output size (must match enc_in to avoid dimension mismatch)
    d_model = 32                       # Model dimension (embedding size) - LIGHT: smaller for speed
    d_ff = 64                          # Feed-forward network dimension - LIGHT: smaller FFN
    
    # Attention mechanism
    n_heads = 4                        # Number of attention heads - LIGHT: fewer heads
    e_layers = 2                       # Number of encoder layers - LIGHT: fewer layers
    d_layers = 1                       # Number of decoder layers (usually 1 for forecasting)
    
    # TimesNet specific parameters
    top_k = 3                          # Top-k frequencies for TimesNet decomposition - LIGHT: fewer frequencies
    num_kernels = 3                    # Number of convolution kernels in Inception blocks - LIGHT: fewer kernels
    
    # Regularization
    dropout = 0.1                      # Dropout rate for regularization
    
    # Additional model settings
    embed = 'timeF'                    # Time feature embedding: 'timeF'=time features, 'fixed'=learnable, 'learned'=learned
    activation = 'gelu'                # Activation function: 'gelu', 'relu', 'swish'
    factor = 1                         # Attention factor (usually 1)
    distil = True                      # Whether to use knowledge distillation
    moving_avg = 25                    # Moving average window for trend decomposition
    output_attention = False           # Whether to output attention weights
    
    # === TRAINING CONFIGURATION ===
    train_epochs = 10                  # Number of training epochs - LIGHT: fewer epochs
    batch_size = 32                    # Batch size - LIGHT: moderate batch size
    learning_rate = 0.001              # Learning rate - LIGHT: slightly higher for faster convergence
    patience = 5                       # Early stopping patience - LIGHT: less patience
    lradj = 'type1'                    # Learning rate adjustment strategy
    
    # Loss and optimization
    loss = 'MSE'                       # Loss function: 'MSE', 'MAE', 'Huber'
    use_amp = False                    # Automatic mixed precision (can speed up training)
    
    # System settings
    num_workers = 4                    # DataLoader workers - LIGHT: fewer workers
    seed = 2024                        # Random seed for reproducibility
    
    # Task specific
    task_name = 'short_term_forecast'  # Task type: 'short_term_forecast' for financial prediction
    
    # Experiment tracking
    des = 'light_config'               # Experiment description
    checkpoints = f'./checkpoints/TimesNet_light_{datetime.now().strftime("%Y%m%d_%H%M")}'
    
# Create config instance
args = LightConfig()

print("🔧 Light Configuration Loaded:")
print(f"   📏 Sequence Length: {args.seq_len}")
print(f"   🎯 Prediction Length: {args.pred_len}")
print(f"   🧠 Model Dimension: {args.d_model}")
print(f"   ⚡ Epochs: {args.train_epochs}")
print(f"   📊 Batch Size: {args.batch_size}")

## 🎛️ Tweakable Parameters

Modify these parameters to experiment with different configurations:

In [None]:
# ================================
# TWEAKABLE PARAMETERS - EXPERIMENT
# ================================

# Modify these for quick experiments:

# --- Sequence parameters (affect model complexity and data usage) ---
args.seq_len = 50          # Try: 30, 50, 100 (longer = more context, slower training)
args.pred_len = 5          # Try: 3, 5, 10 (longer = harder prediction task)

# --- Model size (affect memory usage and training time) ---
args.d_model = 32          # Try: 16, 32, 64 (larger = more capacity, slower)
args.d_ff = 64             # Try: 32, 64, 128 (usually 2x d_model)
args.n_heads = 4           # Try: 2, 4, 8 (must divide d_model evenly)
args.e_layers = 2          # Try: 1, 2, 3 (more layers = deeper model)

# --- TimesNet specific ---
args.top_k = 3             # Try: 2, 3, 5 (more frequencies = more complex patterns)
args.num_kernels = 3       # Try: 2, 3, 6 (more kernels = more feature extraction)

# --- Training parameters ---
args.train_epochs = 10     # Try: 5, 10, 20
args.batch_size = 32       # Try: 16, 32, 64 (larger = faster but more memory)
args.learning_rate = 0.001 # Try: 0.0001, 0.001, 0.01

# --- Advanced tweaks ---
args.dropout = 0.1         # Try: 0.0, 0.1, 0.2 (higher = more regularization)
args.moving_avg = 25       # Try: 15, 25, 50 (window for trend decomposition)

print(f"✏️ Updated Configuration:")
print(f"   Model Size: d_model={args.d_model}, d_ff={args.d_ff}, heads={args.n_heads}, layers={args.e_layers}")
print(f"   TimesNet: top_k={args.top_k}, kernels={args.num_kernels}")
print(f"   Training: epochs={args.train_epochs}, batch={args.batch_size}, lr={args.learning_rate}")

## 🚀 Training Setup and Execution

In [None]:
# Setup device and create checkpoint directory
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Enhanced device information
print(f"🎯 Selected Device: {device}")
if torch.cuda.is_available():
    print(f"🚀 GPU: {torch.cuda.get_device_name(0)}")
    print(f"💾 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")
    print("⚡ Parallel processing: ENABLED (automatic)")
    print("💡 Tips: Model will automatically use GPU cores for faster training")
else:
    print("⚠️  Running on CPU - training will be slower")
    print("💡 Tips: Install CUDA-compatible PyTorch for GPU acceleration")
os.makedirs(args.checkpoints, exist_ok=True)

print(f"🔥 Using device: {device}")
print(f"📁 Checkpoints: {args.checkpoints}")

# Data loader setup
def create_data_loader(flag):
    args.validation_length = args.val_len
    args.test_length = args.test_len
    
    dataset = Dataset_Custom(
        args=args,
        root_path=args.root_path,
        data_path=args.data_path,
        flag=flag,
        size=[args.seq_len, args.label_len, args.pred_len],
        features=args.features,
        target=args.target,
        scale=True,
        timeenc=1 if args.embed == 'timeF' else 0,
        freq=args.freq
    )
    
    shuffle = (flag == 'train')
    data_loader = DataLoader(
        dataset,
        batch_size=args.batch_size,
        shuffle=shuffle,
        num_workers=args.num_workers,
        drop_last=True
    )
    return data_loader

# Create data loaders
train_loader = create_data_loader('train')
val_loader = create_data_loader('val')
test_loader = create_data_loader('test')

print(f"📊 Data loaders created:")
print(f"   Train: {len(train_loader)} batches")
print(f"   Val: {len(val_loader)} batches")
print(f"   Test: {len(test_loader)} batches")

In [None]:
# Initialize TimesNet model
model = TimesNet(args).to(device)

# Setup training components
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
early_stopping = EarlyStopping(patience=args.patience, verbose=True)

# Model info
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"🧠 TimesNet Light Model Initialized:")
print(f"   📊 Total Parameters: {total_params:,}")
print(f"   🎯 Trainable Parameters: {trainable_params:,}")
print(f"   💾 Model Size: ~{total_params * 4 / 1024 / 1024:.1f} MB")

In [None]:
# Training function with progress tracking
def train_epoch():
    model.train()
    total_loss = 0.0
    num_batches = len(train_loader)
    
    epoch_start_time = time.time()
    print(f"🏃 Training on {num_batches} batches...")
    
    for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
        # Move to device
        batch_x = batch_x.float().to(device)
        batch_y = batch_y.float().to(device)
        batch_x_mark = batch_x_mark.float().to(device)
        batch_y_mark = batch_y_mark.float().to(device)
        
        # Prepare decoder input
        dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to(device)
        dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(device)
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
        
        # Calculate loss (only on target columns - first 4 features)
        target_outputs = outputs[:, -args.pred_len:, :4]
        target_y = batch_y[:, -args.pred_len:, :4]
        loss = criterion(target_outputs, target_y)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        # Progress reporting - EVERY BATCH for Light Config
        progress_pct = (i + 1) / num_batches * 100
        avg_loss = total_loss / (i + 1)
        elapsed = time.time() - epoch_start_time
        remaining = elapsed / (i + 1) * (num_batches - i - 1)
        
        # Show progress for every batch (Light config)
        print(f"   📊 Batch {i+1:3d}/{num_batches} ({progress_pct:5.1f}%) - "
              f"Loss: {loss.item():.6f} (Avg: {avg_loss:.6f}) - "
              f"⏱️ Elapsed: {elapsed:.1f}s, ETA: {remaining:.1f}s")
    
    epoch_time = time.time() - epoch_start_time
    avg_loss = total_loss / num_batches
    print(f"✅ Epoch completed in {epoch_time:.1f}s. Average loss: {avg_loss:.6f}")
    return avg_loss

# Validation function
def validate_epoch():
    model.eval()
    total_loss = 0.0
    num_batches = 0
    
    with torch.no_grad():
        for batch_x, batch_y, batch_x_mark, batch_y_mark in val_loader:
            batch_x = batch_x.float().to(device)
            batch_y = batch_y.float().to(device)
            batch_x_mark = batch_x_mark.float().to(device)
            batch_y_mark = batch_y_mark.float().to(device)
            
            dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to(device)
            dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(device)
            
            outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
            
            target_outputs = outputs[:, -args.pred_len:, :4]
            target_y = batch_y[:, -args.pred_len:, :4]
            loss = criterion(target_outputs, target_y)
            
            total_loss += loss.item()
            num_batches += 1
    
    avg_loss = total_loss / num_batches if num_batches > 0 else float('inf')
    return avg_loss

print("🔧 Training functions defined")

In [None]:
# Main training loop
print(f"🚀 Starting TimesNet Light Training ({args.train_epochs} epochs)")
print(f"⏰ Estimated time: ~{args.train_epochs * 5} minutes")
print("="*60)

best_val_loss = float('inf')
train_losses = []
val_losses = []

training_start_time = time.time()

for epoch in range(args.train_epochs):
    print(f"\n🔄 Epoch {epoch+1}/{args.train_epochs}")
    
    # Train
    train_loss = train_epoch()
    train_losses.append(train_loss)
    
    # Validate
    print("🔍 Running validation...")
    val_loss = validate_epoch()
    val_losses.append(val_loss)
    
    # Log progress
    print(f"📈 Epoch {epoch+1} Results: Train Loss: {train_loss:.6f}, Val Loss: {val_loss:.6f}")
    
    # Adjust learning rate
    adjust_learning_rate(optimizer, epoch + 1, args)
    
    # Early stopping
    early_stopping(val_loss, model, args.checkpoints)
    if early_stopping.early_stop:
        print("⏹️ Early stopping triggered")
        break
    
    # Save best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), f"{args.checkpoints}/best_model.pth")
        print(f"💾 New best model saved (Val Loss: {val_loss:.6f})")

total_training_time = time.time() - training_start_time
print(f"\n🎉 Training completed in {total_training_time/60:.1f} minutes!")
print(f"🏆 Best validation loss: {best_val_loss:.6f}")

## 📊 Results and Analysis

In [None]:
# Load best model and test
model.load_state_dict(torch.load(f"{args.checkpoints}/best_model.pth", weights_only=False))
model.eval()

# Test evaluation
preds = []
trues = []

print("🧪 Testing model...")
with torch.no_grad():
    for batch_x, batch_y, batch_x_mark, batch_y_mark in test_loader:
        batch_x = batch_x.float().to(device)
        batch_y = batch_y.float().to(device)
        batch_x_mark = batch_x_mark.float().to(device)
        batch_y_mark = batch_y_mark.float().to(device)
        
        dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to(device)
        dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(device)
        
        outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
        
        pred = outputs[:, -args.pred_len:, :4].detach().cpu().numpy()
        true = batch_y[:, -args.pred_len:, :4].detach().cpu().numpy()
        
        preds.append(pred)
        trues.append(true)

# Calculate metrics
if preds:
    preds = np.concatenate(preds, axis=0)
    trues = np.concatenate(trues, axis=0)
    
    mae, mse, rmse, mape, mspe = metric(preds, trues)
    
    print("\n📊 TimesNet Light - Test Results:")
    print(f"   🎯 MSE:  {mse:.6f}")
    print(f"   📏 MAE:  {mae:.6f}")
    print(f"   📐 RMSE: {rmse:.6f}")
    print(f"   📈 MAPE: {mape:.6f}%")
    print(f"   📉 MSPE: {mspe:.6f}%")
    
    # Summary
    print(f"\n📋 Configuration Summary:")
    print(f"   ⚡ Model: Light ({total_params:,} params)")
    print(f"   📏 Sequence: {args.seq_len} → {args.pred_len}")
    print(f"   🧠 Architecture: d_model={args.d_model}, layers={args.e_layers}, heads={args.n_heads}")
    print(f"   ⏱️ Training time: {total_training_time/60:.1f} minutes")
    print(f"   🏆 Final performance: RMSE={rmse:.6f}")
else:
    print("⚠️ No test data available")

In [None]:
# 🚀 Enhanced Progress Monitoring for Light Config
print("⚡ Light Configuration - Enhanced Batch Monitoring Enabled")
print("📊 Every batch will show detailed progress for faster feedback")
print("💡 This helps track training progress in real-time")
print()

# Override the train_epoch function with detailed batch printing
def train_epoch_verbose():
    """Enhanced training function with per-batch progress"""
    model.train()
    total_loss = 0.0
    num_batches = len(train_loader)
    
    epoch_start_time = time.time()
    print(f"🏃 Training on {num_batches} batches with DETAILED progress...")
    print(f"📊 Batch format: [Batch X/Y (Z%)] - Loss: current (average) - Time: elapsed/remaining")
    print("-" * 80)
    
    for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
        batch_start = time.time()
        
        # Move to device
        batch_x = batch_x.float().to(device)
        batch_y = batch_y.float().to(device)
        batch_x_mark = batch_x_mark.float().to(device)
        batch_y_mark = batch_y_mark.float().to(device)
        
        # Forward pass
        optimizer.zero_grad()
        
        dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to(device)
        dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(device)
        
        outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
        target_outputs = outputs[:, -args.pred_len:, :4]
        target_y = batch_y[:, -args.pred_len:, :4]
        loss = criterion(target_outputs, target_y)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        batch_time = time.time() - batch_start
        
        # DETAILED progress for every batch
        progress_pct = (i + 1) / num_batches * 100
        avg_loss = total_loss / (i + 1)
        elapsed = time.time() - epoch_start_time
        remaining = elapsed / (i + 1) * (num_batches - i - 1)
        
        # Progress bar visualization
        bar_length = 20
        filled_length = int(bar_length * (i + 1) // num_batches)
        bar = '█' * filled_length + '-' * (bar_length - filled_length)
        
        print(f"   [{bar}] Batch {i+1:3d}/{num_batches} ({progress_pct:5.1f}%) - "
              f"Loss: {loss.item():.6f} (Avg: {avg_loss:.6f}) - "
              f"Time: {elapsed:.1f}s/{remaining:.1f}s - "
              f"Batch: {batch_time:.2f}s")
    
    epoch_time = time.time() - epoch_start_time
    avg_loss = total_loss / num_batches
    print("-" * 80)
    print(f"✅ Epoch completed in {epoch_time:.1f}s. Average loss: {avg_loss:.6f}")
    print(f"⚡ Average time per batch: {epoch_time/num_batches:.2f}s")
    return avg_loss

print("🎯 Enhanced training function ready!")
print("💡 Now each batch will show:")
print("   - Progress bar visualization")
print("   - Current and average loss")
print("   - Elapsed and remaining time")
print("   - Individual batch processing time")

## ⚡ Enhanced Training with Detailed Batch Progress

Now you can use the enhanced training function that shows progress for **every single batch**:

### 🎯 **Features Added:**
- **📊 Progress Bar**: Visual progress indicator for each epoch
- **⏱️ Time Tracking**: Shows elapsed time and estimated remaining time
- **📈 Loss Monitoring**: Current batch loss and running average
- **🚀 Batch Timing**: Individual batch processing time

### 💡 **Why This Helps:**
- **No More Waiting**: See progress immediately, no need to wait for epoch completion
- **Performance Insights**: Identify if any batches are unusually slow
- **Loss Tracking**: Monitor if the model is learning batch by batch
- **Time Estimation**: Know exactly when training will complete

In [None]:
# 🚀 RUN ENHANCED TRAINING LOOP WITH DETAILED BATCH PROGRESS
print("🎯 Starting Enhanced TimesNet Light Training")
print("⚡ Every batch will show detailed progress!")
print("=" * 80)

# Initialize tracking
train_losses = []
val_losses = []
start_time = time.time()

# Training loop with enhanced progress
for epoch in range(args.train_epochs):
    epoch_start = time.time()
    
    print(f"\n🔥 EPOCH {epoch+1}/{args.train_epochs}")
    print(f"📅 Started at: {datetime.now().strftime('%H:%M:%S')}")
    
    # Enhanced training with detailed batch progress
    train_loss = train_epoch_verbose()
    
    # Validation (standard)
    print(f"\n🔍 Validating...")
    val_loss = validate_epoch()
    
    # Learning rate adjustment
    adjust_learning_rate(optimizer, epoch + 1, args)
    
    # Record losses
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    
    epoch_time = time.time() - epoch_start
    total_elapsed = time.time() - start_time
    
    print(f"\n📊 EPOCH {epoch+1} SUMMARY:")
    print(f"   📈 Train Loss: {train_loss:.6f}")
    print(f"   📉 Val Loss: {val_loss:.6f}")
    print(f"   ⏱️ Epoch Time: {epoch_time:.1f}s ({epoch_time/60:.1f} min)")
    print(f"   🕒 Total Time: {total_elapsed:.1f}s ({total_elapsed/60:.1f} min)")
    
    # Early stopping check
    early_stopping(val_loss, model, args.checkpoints)
    if early_stopping.early_stop:
        print(f"\n⏹️ Early stopping triggered at epoch {epoch+1}")
        break
    
    # Estimate remaining time
    if epoch < args.train_epochs - 1:
        avg_epoch_time = total_elapsed / (epoch + 1)
        remaining_epochs = args.train_epochs - (epoch + 1)
        estimated_remaining = avg_epoch_time * remaining_epochs
        print(f"   ⏳ Estimated remaining time: {estimated_remaining/60:.1f} minutes")

total_time = time.time() - start_time

print("\n" + "=" * 80)
print("🎉 ENHANCED TRAINING COMPLETED!")
print(f"⏰ Total training time: {total_time:.1f}s ({total_time/60:.1f} minutes)")
print(f"📊 Final train loss: {train_losses[-1]:.6f}")
print(f"📉 Final val loss: {val_losses[-1]:.6f}")
print(f"🏆 Best val loss: {min(val_losses):.6f} (epoch {val_losses.index(min(val_losses))+1})")
print("💡 Check the detailed batch progress above for training insights!")