# Physics-Aware Transformer Model

This notebook trains the time-series transformer with physics constraints for wind power prediction.


In [None]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import torch
import torch.nn as nn

project_root = Path().resolve().parent

# Ensure results directories exist
(project_root / 'results' / 'figures').mkdir(parents=True, exist_ok=True)
(project_root / 'results' / 'metrics').mkdir(parents=True, exist_ok=True)
(project_root / 'data' / 'processed').mkdir(parents=True, exist_ok=True)
sys.path.insert(0, str(project_root / 'src'))

from preprocessing import time_aware_split, prepare_sequences, FeatureScaler
from models.transformer import TimeSeriesTransformer
from physics_constraints import PhysicsLoss, wind_power_equation
from training import set_seed, train_model, TimeSeriesDataset
from torch.utils.data import DataLoader

set_seed(42)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

plt.style.use('seaborn-v0_8-darkgrid')
%matplotlib inline


## Load and Prepare Data


In [None]:
# Load cleaned data
# Check for both compressed and uncompressed versions
data_path_csv = project_root / 'data' / 'processed' / 'scada_cleaned.csv'
data_path_gz = project_root / 'data' / 'processed' / 'scada_cleaned.csv.gz'
mapping_path = project_root / 'data' / 'processed' / 'feature_mapping.json'

# Determine which file exists
if data_path_gz.exists():
    print(f"Loading compressed data from: {data_path_gz}")
    df = pd.read_csv(data_path_gz, index_col=0, parse_dates=True, compression='gzip')
elif data_path_csv.exists():
    print(f"Loading data from: {data_path_csv}")
    df = pd.read_csv(data_path_csv, index_col=0, parse_dates=True)
else:
    raise FileNotFoundError(
        f"Cleaned data file not found!\n"
        f"Expected one of:\n"
        f"  - {data_path_csv}\n"
        f"  - {data_path_gz}\n\n"
        f"Please run notebook 01_data_exploration.ipynb first to generate the cleaned data."
    )

# Load feature mapping
if not mapping_path.exists():
    raise FileNotFoundError(
        f"Feature mapping file not found: {mapping_path}\n"
        f"Please run notebook 01_data_exploration.ipynb first."
    )

with open(mapping_path, 'r') as f:
    feature_mapping = json.load(f)
target_col = feature_mapping['target']
feature_cols = feature_mapping['features']
ws_col = feature_mapping['all_features'].get('wind_speed')

# Split data
train_df, val_df, test_df = time_aware_split(df, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15)

# Scale features
scaler = FeatureScaler(method='standard')
X_train_scaled = scaler.fit_transform(train_df[feature_cols])
X_val_scaled = scaler.transform(val_df[feature_cols])
X_test_scaled = scaler.transform(test_df[feature_cols])

y_train = train_df[target_col].values
y_val = val_df[target_col].values
y_test = test_df[target_col].values

print(f"Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}")

## Prepare Sequences


In [None]:
# Prepare sequences
sequence_length = 48  # Use 48 timesteps as input
X_train_seq, y_train_seq = prepare_sequences(X_train_scaled.values, y_train, sequence_length)
X_val_seq, y_val_seq = prepare_sequences(X_val_scaled.values, y_val, sequence_length)
X_test_seq, y_test_seq = prepare_sequences(X_test_scaled.values, y_test, sequence_length)

print(f"Train sequences: {X_train_seq.shape}")
print(f"Val sequences: {X_val_seq.shape}")
print(f"Test sequences: {X_test_seq.shape}")

# Create datasets
train_dataset = TimeSeriesDataset(X_train_scaled.values, y_train, sequence_length)
val_dataset = TimeSeriesDataset(X_val_scaled.values, y_val, sequence_length)
test_dataset = TimeSeriesDataset(X_test_scaled.values, y_test, sequence_length)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


## Create Transformer Model


In [None]:
# Create transformer model
model = TimeSeriesTransformer(
    input_size=X_train_seq.shape[2],
    d_model=128,
    nhead=8,
    num_layers=4,
    dim_feedforward=512,
    patch_size=4,
    dropout=0.1,
    output_size=1
).to(device)

print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")


## Train with Physics-Aware Loss


In [None]:
# Create physics-aware loss
physics_loss = PhysicsLoss(
    lambda_physics=0.1,
    lambda_negative=1.0,
    lambda_monotonic=0.5
)

# Standard MSE loss for comparison
mse_loss = nn.MSELoss()

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

# Train model
save_dir = project_root / 'results' / 'checkpoints'
history = train_model(
    model,
    train_loader,
    val_loader,
    mse_loss,  # Can switch to physics_loss for physics-aware training
    optimizer,
    epochs=100,
    device=device,
    save_dir=save_dir,
    early_stopping_patience=15
)

# Plot training history
plt.figure(figsize=(12, 5))
plt.plot(history['train_loss'], label='Train Loss')
plt.plot(history['val_loss'], label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training History')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
figures_dir = project_root / 'results' / 'figures'
figures_dir.mkdir(parents=True, exist_ok=True)
plt.savefig(figures_dir / 'transformer_training_history.png', dpi=150)
plt.show()


## Evaluate on Test Set


In [None]:
# Load best model
checkpoint = torch.load(save_dir / 'best_model.pt')
model.load_state_dict(checkpoint['model_state_dict'])
print(f"Loaded best model from epoch {checkpoint['epoch']}")

# Evaluate on test set
model.eval()
y_pred_test = []
with torch.no_grad():
    for batch_X, batch_y in test_loader:
        batch_X = batch_X.to(device)
        predictions = model(batch_X)
        y_pred_test.extend(predictions.cpu().numpy())

y_pred_test = np.array(y_pred_test)

# Compute metrics
from evaluation import compute_metrics
metrics = compute_metrics(y_test_seq, y_pred_test)
print("\nTransformer Model - Test Set Metrics:")
for metric, value in metrics.items():
    print(f"  {metric}: {value:.4f}")

# Save predictions
predictions_df = pd.DataFrame({
    'true': y_test_seq,
    'predicted': y_pred_test
})
predictions_df.to_csv(project_root / 'results' / 'metrics' / 'transformer_predictions.csv', index=False)
