In [32]:
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
import warnings
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.tuner import Tuner
# PyTorch Forecasting imports
import pytorch_forecasting as pf
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import QuantileLoss, SMAPE
from pytorch_forecasting.data.encoders import TorchNormalizer
warnings.filterwarnings('ignore')

In [33]:
# Prediction horizons configuration
PREDICTION_CONFIGS = {
    '12h': {
        'max_prediction_length': 12,
        'max_encoder_length': 168,  # 7 days of history
        'description': '12 hours ahead'
    },
    '24h': {
        'max_prediction_length': 24,
        'max_encoder_length': 168,  # 7 days of history  
        'description': '24 hours ahead'
    },
    '48h': {
        'max_prediction_length': 48,
        'max_encoder_length': 168,  # 7 days of history
        'description': '48 hours ahead'
    },
    '72h': {
        'max_prediction_length': 72,
        'max_encoder_length': 168,  # 7 days of history
        'description': '72 hours ahead'
    }
}

# TFT Model parameters
TFT_PARAMS = {
    'learning_rate': 0.03,
    'hidden_size': 64,
    'attention_head_size': 4,
    'dropout': 0.1,
    'hidden_continuous_size': 8,
    'output_size': 7,  # 7 quantiles
    'loss': QuantileLoss(),
    'log_interval': 10,
    'reduce_on_plateau_patience': 4,
    'optimizer': 'adam'
}

# Training parameters
TRAINING_PARAMS = {
    'max_epochs': 10,
    'batch_size': 128,
    'gradient_clip_val': 0.1,
    'patience': 10
}

# Quantiles for prediction intervals
QUANTILES = [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98]

print("Temporal Fusion Transformer for Wind Power Forecasting")
print("=" * 60)

Temporal Fusion Transformer for Wind Power Forecasting


In [34]:
def load_wind_data(file_path):
    """Load and preprocess wind power data for TFT"""
    print("Loading wind power data...")
    
    # Load data
    data = pd.read_excel(file_path)
    print(f"Raw data shape: {data.shape}")
    
    # Parse timestamp
    data['TIMESTAMP'] = pd.to_datetime(data['TIMESTAMP'], format='%Y%m%d %H:%M')
    
    # Sort by timestamp
    data = data.sort_values('TIMESTAMP').reset_index(drop=True)
    
    # Add time index (required by TFT)
    data['time_idx'] = range(len(data))
    
    # Add group identifier (single series)
    data['series_id'] = 0
    
    # Add only basic time features
    data['hour'] = data['TIMESTAMP'].dt.hour
    data['day_of_week'] = data['TIMESTAMP'].dt.dayofweek
    data['month'] = data['TIMESTAMP'].dt.month
    data['day_of_year'] = data['TIMESTAMP'].dt.dayofyear
    
    # Drop rows with NaN values
    data = data.dropna().reset_index(drop=True)
    data['time_idx'] = range(len(data))
    
    print(f"Processed data shape: {data.shape}")
    print(f"Date range: {data['TIMESTAMP'].min()} to {data['TIMESTAMP'].max()}")
    
    return data

def create_tft_dataset(data, config_key):
    """Create TimeSeriesDataSet for TFT training"""
    config = PREDICTION_CONFIGS[config_key]
    
    # Define feature categories for TFT (only basic features)
    time_varying_known_reals = [
        'hour', 'day_of_week', 'month', 'day_of_year',
        'U10', 'V10', 'U100', 'V100'
    ]
    
    time_varying_unknown_reals = ['TARGETVAR']
    
    print(f"Known reals: {time_varying_known_reals}")
    print(f"Unknown reals: {time_varying_unknown_reals}")
    
    # Determine training cutoff (70% for training)
    max_prediction_length = config['max_prediction_length']
    max_encoder_length = config['max_encoder_length']
    training_cutoff = int(len(data) * 0.7)
    
    print(f"Training cutoff: {training_cutoff} (out of {len(data)})")
    
    # Create training dataset
    training_data = TimeSeriesDataSet(
        data=data[lambda x: x.time_idx <= training_cutoff],
        time_idx="time_idx",
        target="TARGETVAR",
        group_ids=["series_id"],
        min_encoder_length=max_encoder_length // 2,
        max_encoder_length=max_encoder_length,
        min_prediction_length=1,
        max_prediction_length=max_prediction_length,
        time_varying_known_reals=time_varying_known_reals,
        time_varying_unknown_reals=time_varying_unknown_reals,
        target_normalizer=GroupNormalizer(
            groups=["series_id"], 
            transformation="softplus",
            center=True
        ),
        add_relative_time_idx=True,
        add_target_scales=True,
        add_encoder_length=True,
        allow_missing_timesteps=True,
    )
    
    # Create validation dataset
    validation_data = TimeSeriesDataSet.from_dataset(
        training_data,
        data[lambda x: x.time_idx > training_cutoff],
        predict=True,
        stop_randomization=True
    )
    
    print(f"Training samples: {len(training_data)}")
    print(f"Validation samples: {len(validation_data)}")
    
    return training_data, validation_data

In [35]:
def train_tft_model(training_data, validation_data, config_key):
    """Train Temporal Fusion Transformer model"""
    config = PREDICTION_CONFIGS[config_key]
    
    print(f"\nTraining TFT for {config['description']}...")
    
    # Create data loaders
    train_dataloader = training_data.to_dataloader(
        train=True, 
        batch_size=TRAINING_PARAMS['batch_size'], 
        num_workers=0
    )
    
    val_dataloader = validation_data.to_dataloader(
        train=False, 
        batch_size=TRAINING_PARAMS['batch_size'] * 2, 
        num_workers=0
    )
    
    # Find optimal learning rate (optional)
    print("Finding optimal learning rate...")
    pl.seed_everything(42)
    
    # Create TFT model
    tft = TemporalFusionTransformer.from_dataset(
        training_data,
        learning_rate=TFT_PARAMS['learning_rate'],
        hidden_size=TFT_PARAMS['hidden_size'],
        attention_head_size=TFT_PARAMS['attention_head_size'],
        dropout=TFT_PARAMS['dropout'],
        hidden_continuous_size=TFT_PARAMS['hidden_continuous_size'],
        output_size=TFT_PARAMS['output_size'],
        loss=QuantileLoss(quantiles=QUANTILES),
        log_interval=TFT_PARAMS['log_interval'],
        reduce_on_plateau_patience=TFT_PARAMS['reduce_on_plateau_patience'],
        optimizer=TFT_PARAMS['optimizer'],
    )
    
    print(f"Model created with {tft.size()/1e3:.1f}k parameters")
    
    # Configure trainer
    trainer = pl.Trainer(
        max_epochs=TRAINING_PARAMS['max_epochs'],
        accelerator="auto",
        enable_model_summary=True,
        gradient_clip_val=TRAINING_PARAMS['gradient_clip_val'],
        limit_train_batches=50,  # Speed up training for demo
        callbacks=[
            LearningRateMonitor(logging_interval="step"),
            EarlyStopping(
                monitor="val_loss", 
                min_delta=1e-4, 
                patience=TRAINING_PARAMS['patience'], 
                verbose=False, 
                mode="min"
            ),
        ],
        
        enable_progress_bar=True,
    )
    
    # Train the model
    print("Starting training...")
    trainer.fit(
        tft,
        train_dataloaders=train_dataloader,
        val_dataloaders=val_dataloader,
    )
    
    return tft, trainer

In [36]:
# Fix the cell that creates the input tensor
# Replace the entire cell with:

# Create a simple input example for visualization
input_features = ['hour', 'day_of_week', 'month', 'day_of_year', 'U10', 'V10', 'U100', 'V100']
input_values = [0] * len(input_features)  # Placeholder values
input_tensor = torch.tensor([input_values], dtype=torch.float32).unsqueeze(0)  # shape: (1, 1, num_features)

In [42]:
# Replace the problematic cell with this code:

# Alternative approach to visualize the TFT model structure
from torchsummary import summary
import torch.nn as nn

# Load and process the data first
FILE_PATH = r"C:\Users\User\Documents\Semester 07\EE4750 - Data Analytics in Power Systems\WindPowerForecastingData TASK.xlsx\WindPowerForecastingData TASK.xlsx"
processed_data = load_wind_data(FILE_PATH)

# Create a proper input sample from the dataset
config = PREDICTION_CONFIGS['12h']
training_data, _ = create_tft_dataset(processed_data, '12h')

# Create a simple TFT model
tft_model = TemporalFusionTransformer.from_dataset(
    training_data,
    learning_rate=TFT_PARAMS['learning_rate'],
    hidden_size=16,  # Smaller for visualization
    attention_head_size=2,
    dropout=TFT_PARAMS['dropout'],
    hidden_continuous_size=4,
    output_size=TFT_PARAMS['output_size'],
    loss=QuantileLoss(quantiles=QUANTILES),
)

# Print model summary instead of using torchview
print("TFT Model Summary:")
print("=" * 50)

# Get a sample batch to understand input dimensions
sample_batch = next(iter(training_data.to_dataloader(train=True, batch_size=1)))
input_dict = sample_batch[0]  # Extract the input dictionary

# Print information about the model structure
print(f"Number of parameters: {sum(p.numel() for p in tft_model.parameters()):,}")
print(f"Model components:")
for name, module in tft_model.named_children():
    print(f"  - {name}: {module.__class__.__name__}")
    
# Print input shapes
print("\nInput shapes:")
for key, value in input_dict.items():
    if hasattr(value, 'shape'):
        print(f"  {key}: {value.shape}")
    else:
        print(f"  {key}: {type(value)}")

# Create a simple diagram of the model architecture
print("\nModel Architecture Diagram:")
print("=" * 50)
print("Input → Embeddings → Variable Selection → LSTM Encoder/Decoder")
print("→ Static Context → Attention → Output Layer")
def visualize_tft_flow(model, sample_batch):
    """
    Create a text-based flow visualization similar to the provided image
    but for the Temporal Fusion Transformer model
    """
    # Extract the input dictionary from the sample batch
    input_dict = sample_batch[0]
    
    print("TFT Model Flow Visualization")
    print("=" * 50)
    
    # Print input information
    print("input-tensor depth:0")
    for key, value in input_dict.items():
        if hasattr(value, 'shape'):
            print(f"{key}: {tuple(value.shape)}")
    print()
    
    # Manually trace through the main components of TFT
    print("InputEmbeddings")
    print("input: Various input features")
    print("depth:1")
    print("output: Embedded representations")
    print()
    
    print("VariableSelectionNetwork (Static)")
    print("input: Static embeddings")
    print("depth:1")
    print("output: Selected static features")
    print()
    
    print("VariableSelectionNetwork (Encoder)")
    print("input: Time-varying known inputs")
    print("depth:1")
    print("output: Selected encoder features")
    print()
    
    print("VariableSelectionNetwork (Decoder)")
    print("input: Time-varying known inputs (future)")
    print("depth:1")
    print("output: Selected decoder features")
    print()
    
    print("LSTM Encoder")
    print("input: Selected encoder features")
    print("depth:1")
    print("output: Encoder hidden states")
    print()
    
    print("LSTM Decoder")
    print("input: Selected decoder features + static context")
    print("depth:1")
    print("output: Decoder hidden states")
    print()
    
    print("InterpretableMultiHeadAttention")
    print("input: Encoder states + Decoder states")
    print("depth:1")
    print("output: Attention-weighted representations")
    print()
    
    print("GatedResidualNetwork")
    print("input: Attention output + static context")
    print("depth:1")
    print("output: Enriched representations")
    print()
    
    print("OutputLayer")
    print("input: Final representations")
    print("depth:1")
    print("output: Quantile predictions")
    print()
    
    # Get the output shape
    with torch.no_grad():
        output = model(input_dict)
        output_shape = output[0].shape if isinstance(output, tuple) else output.shape
    
    print("output-tensor depth:0")
    print(f"output: {tuple(output_shape)}")

# Use the function to visualize the TFT flow
print("Visualizing TFT Model Flow...")
visualize_tft_flow(tft_model, sample_batch)

Loading wind power data...
Raw data shape: (16800, 6)
Processed data shape: (16765, 12)
Date range: 2012-01-01 01:00:00 to 2013-11-30 00:00:00
Known reals: ['hour', 'day_of_week', 'month', 'day_of_year', 'U10', 'V10', 'U100', 'V100']
Unknown reals: ['TARGETVAR']
Training cutoff: 11735 (out of 16765)
Training samples: 11747
Validation samples: 1
TFT Model Summary:
Number of parameters: 22,079
Model components:
  - loss: QuantileLoss
  - logging_metrics: ModuleList
  - input_embeddings: MultiEmbedding
  - prescalers: ModuleDict
  - static_variable_selection: VariableSelectionNetwork
  - encoder_variable_selection: VariableSelectionNetwork
  - decoder_variable_selection: VariableSelectionNetwork
  - static_context_variable_selection: GatedResidualNetwork
  - static_context_initial_hidden_lstm: GatedResidualNetwork
  - static_context_initial_cell_lstm: GatedResidualNetwork
  - static_context_enrichment: GatedResidualNetwork
  - lstm_encoder: LSTM
  - lstm_decoder: LSTM
  - post_lstm_gate_e

In [38]:
def make_predictions(model, validation_data, trainer, config_key):
    """Generate predictions using trained TFT model"""
    config = PREDICTION_CONFIGS[config_key]
    
    print(f"Making predictions for {config['description']}...")
    
    # Make predictions
    val_dataloader = validation_data.to_dataloader(
        train=False, batch_size=TRAINING_PARAMS['batch_size'] * 4, num_workers=0
    )
    
    predictions = trainer.predict(model, dataloaders=val_dataloader)
    
    return predictions

def evaluate_tft_predictions(predictions, config_key):
    """Evaluate TFT predictions"""
    config = PREDICTION_CONFIGS[config_key]
    
    # Extract predictions and actuals
    prediction_values = torch.cat([p[0] for p in predictions], dim=0)
    prediction_index = pd.concat([p[1] for p in predictions], ignore_index=True)
    
    # Get median predictions (quantile 0.5)
    median_idx = len(QUANTILES) // 2
    median_predictions = prediction_values[:, :, median_idx].numpy()
    
    # Calculate metrics (simplified - you may want to get actual values from your data)
    print(f"\n{config['description']} TFT Results:")
    print(f"Prediction shape: {median_predictions.shape}")
    print(f"Number of forecasts: {len(median_predictions)}")
    print(f"Forecast horizon: {config['max_prediction_length']} hours")
    
    return median_predictions, prediction_index

def plot_tft_predictions(predictions, prediction_index, config_key, num_samples=3):
    """Plot TFT prediction results"""
    config = PREDICTION_CONFIGS[config_key]
    
    prediction_values = torch.cat([p[0] for p in predictions], dim=0)
    
    fig, axes = plt.subplots(num_samples, 1, figsize=(15, 4*num_samples))
    if num_samples == 1:
        axes = [axes]
    
    for i in range(min(num_samples, len(prediction_values))):
        ax = axes[i]
        
        # Get prediction quantiles
        pred_sample = prediction_values[i].numpy()  # Shape: (forecast_length, num_quantiles)
        time_steps = range(pred_sample.shape[0])
        
        # Plot median prediction
        median_idx = len(QUANTILES) // 2
        ax.plot(time_steps, pred_sample[:, median_idx], 'r-', 
                label='Median Prediction', linewidth=2)
        
        # Plot confidence intervals
        lower_idx = 1  # 10% quantile
        upper_idx = -2  # 90% quantile
        ax.fill_between(time_steps, 
                       pred_sample[:, lower_idx], 
                       pred_sample[:, upper_idx], 
                       alpha=0.3, color='red', 
                       label='80% Prediction Interval')
        
        # Plot 95% confidence interval
        ax.fill_between(time_steps, 
                       pred_sample[:, 0], 
                       pred_sample[:, -1], 
                       alpha=0.2, color='orange', 
                       label='95% Prediction Interval')
        
        ax.set_title(f'TFT {config["description"]} - Sample {i+1}')
        ax.set_xlabel('Hours ahead')
        ax.set_ylabel('Wind Power')
        ax.legend()
        ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

def plot_feature_importance(model, training_data):
    """Plot TFT feature importance"""
    # Get interpretation
    interpretation = model.interpret_output(
        training_data.to_dataloader(train=False, batch_size=1000),
        reduce_on_plateau_patience=4,
        return_attention=True
    )
    
    # Plot variable importance
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Encoder variable importance
    model.plot_interpretation(interpretation, axes=axes[0, 0])
    axes[0, 0].set_title("Variable Importance")
    
    # Attention patterns
    model.plot_interpretation(interpretation, axes=axes[0, 1])
    axes[0, 1].set_title("Attention Patterns")
    
    plt.tight_layout()
    plt.show()


In [39]:
def run_tft_wind_forecasting(file_path, horizons_to_test=['12h']):
    """Run complete TFT wind power forecasting pipeline"""
    
    print("Starting TFT Wind Power Forecasting Pipeline")
    print("=" * 60)
    
    # Load and preprocess data
    data = load_wind_data(file_path)
    
    results = {}
    models = {}
    
    for horizon in horizons_to_test:
        print(f"\n{'='*80}")
        print(f"Processing {PREDICTION_CONFIGS[horizon]['description']} forecast with TFT")
        print(f"{'='*80}")
        
        try:
            # Create TFT dataset
            training_data, validation_data = create_tft_dataset(data, horizon)
            
            # Train TFT model
            tft_model, trainer = train_tft_model(training_data, validation_data, horizon)
            
            # Make predictions
            predictions = make_predictions(tft_model, validation_data, trainer, horizon)
            
            # Evaluate predictions
            pred_values, pred_index = evaluate_tft_predictions(predictions, horizon)
            
            # Plot results
            plot_tft_predictions(predictions, pred_index, horizon)
            
            # Plot feature importance (for the first model only)
            if horizon == horizons_to_test[0]:
                print("Plotting feature importance...")
                try:
                    plot_feature_importance(tft_model, training_data)
                except Exception as e:
                    print(f"Could not plot feature importance: {e}")
            
            # Store results
            results[horizon] = {
                'predictions': pred_values,
                'index': pred_index,
                'model_size': tft_model.size()
            }
            models[horizon] = tft_model
            
            print(f"✓ {horizon} forecast completed successfully")
            
        except Exception as e:
            print(f"✗ Error processing {horizon} forecast: {e}")
            continue
    
    # Print summary
    print(f"\n{'='*60}")
    print("TFT WIND FORECASTING RESULTS SUMMARY")
    print(f"{'='*60}")
    
    for horizon in results:
        config = PREDICTION_CONFIGS[horizon]
        result = results[horizon]
        print(f"\n{config['description']}:")
        print(f"  Model parameters: {result['model_size']:,}")
        print(f"  Prediction horizon: {config['max_prediction_length']} hours")
        print(f"  Number of forecasts: {len(result['predictions'])}")
    
    return results, models, data


In [40]:
from tst import PREDICTION_HORIZONS, load_and_prepare_data


def run_forecasting(file_path, horizons_to_test=['12h']):
    """Run complete forecasting pipeline"""
    
    # Load data
    features, timestamps = load_and_prepare_data(file_path)
    
    results = {}
    models = {}
    
    for horizon_name in horizons_to_test:
        horizon_hours = PREDICTION_HORIZONS[horizon_name]
        
        print(f"\n{'='*60}")
        print(f"Training model for {horizon_hours}-hour forecast")
        print(f"{'='*60}")
        
        # Prepare data
        train_loader, val_loader, test_loader, scaler = prepare_datasets(features, horizon_hours)
        
        # Create model
        model = WindPowerLSTM(
            input_size=features.shape[1],  # All features
            hidden_size=HIDDEN_SIZE,
            num_layers=NUM_LAYERS,
            forecast_horizon=horizon_hours,
            dropout=DROPOUT
        ).to(DEVICE)
        
        print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
        
        # Train model
        train_losses, val_losses = train_model(model, train_loader, val_loader, NUM_EPOCHS)
        
        # Evaluate model
        predictions, actuals, metrics = evaluate_model(model, test_loader, scaler, f"{horizon_hours}h")
        
        # Plot results
        plot_predictions(predictions, actuals, f"{horizon_hours}h")
        
        # Plot training curves
        plt.figure(figsize=(10, 6))
        plt.plot(train_losses, label='Training Loss')
        plt.plot(val_losses, label='Validation Loss')
        plt.title(f'Training Progress - {horizon_hours}h Forecast')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.grid(True)
        plt.show()
        
        # Store results
        results[horizon_name] = metrics
        models[horizon_name] = model
    
    # Print summary
    print(f"\n{'='*60}")
    print("FORECASTING RESULTS SUMMARY")
    print(f"{'='*60}")
    
    for horizon_name, metrics in results.items():
        hours = PREDICTION_HORIZONS[horizon_name]
        print(f"\n{hours}-hour forecast:")
        print(f"  MAE: {metrics['mae']:.4f}")
        print(f"  RMSE: {metrics['rmse']:.4f}")
        print(f"  MAPE: {metrics['mape']:.2f}%")
    
    return results, models


Using device: cpu


In [41]:
if __name__ == "__main__":
    # Set your file path
    FILE_PATH = r"C:\Users\User\Documents\Semester 07\EE4750 - Data Analytics in Power Systems\WindPowerForecastingData TASK.xlsx\WindPowerForecastingData TASK.xlsx"
    
    # Choose horizons to test
    HORIZONS_TO_TEST = ['12h']  # Start with shorter horizons
    
    print("Running TFT Wind Power Forecasting...")
    
    # Run the complete pipeline
    tft_results, tft_models, processed_data = run_tft_wind_forecasting(
        FILE_PATH, 
        HORIZONS_TO_TEST
    )
    
    print("\nTFT Pipeline completed!")
    print(f"Trained models for: {list(tft_results.keys())}")

Running TFT Wind Power Forecasting...
Starting TFT Wind Power Forecasting Pipeline
Loading wind power data...


Seed set to 42


Raw data shape: (16800, 6)
Processed data shape: (16765, 12)
Date range: 2012-01-01 01:00:00 to 2013-11-30 00:00:00

Processing 12 hours ahead forecast with TFT
Known reals: ['hour', 'day_of_week', 'month', 'day_of_year', 'U10', 'V10', 'U100', 'V100']
Unknown reals: ['TARGETVAR']
Training cutoff: 11735 (out of 16765)
Training samples: 11747
Validation samples: 1

Training TFT for 12 hours ahead...
Finding optimal learning rate...


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Model created with 246.7k parameters
Starting training...



   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 0      | train
3  | prescalers                         | ModuleDict                      | 208    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 5.0 K  | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 18.2 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 16.2 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 16.8 K | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 16.8 K 

Epoch 0:  78%|███████▊  | 39/50 [07:27<02:06,  0.09it/s, v_num=9, train_loss_step=0.276]


Detected KeyboardInterrupt, attempting graceful shutdown ...


SystemExit: 1