In [8]:
%%time

!pip install -r requirements.txt

CPU times: user 9.76 ms, sys: 14.6 ms, total: 24.4 ms
Wall time: 1.08 s


In [9]:
%%time

from utils.data_utils import (
    load_and_split_data,
    add_time_features,
    add_lag_features,
    add_rolling_features,
)

train, val, test = load_and_split_data(path="data/PJME_hourly.csv")
dfs = {"train": train, "val": val, "test": test}

for name, df in dfs.items():
    dfs[name] = (
        df.pipe(add_time_features)
        # .pipe(add_lag_features)  # Uncomment to add lag features
        .pipe(add_rolling_features)  # Uncomment to add rolling features
        .pipe(lambda x: x.dropna())
    )
    print(f"\n{name}: {dfs[name].shape}")
    print(dfs[name].head(2))

train, val, test = dfs.values()


train: (101732, 10)
              Datetime  PJME_MW  hour  dayofweek  month  day  year  \
24 2002-01-02 01:00:00  28121.0     1          2      1    2  2002   
25 2002-01-02 02:00:00  27437.0     2          2      1    2  2002   

    is_weekend  rolling_mean_24  rolling_std_24  
24           0     31017.500000     2423.666231  
25           0     30922.833333     2492.512817  

val: (21781, 10)
                  Datetime  PJME_MW  hour  dayofweek  month  day  year  \
101780 2013-08-12 21:00:00  42567.0    21          0      8   12  2013   
101781 2013-08-12 22:00:00  40735.0    22          0      8   12  2013   

        is_weekend  rolling_mean_24  rolling_std_24  
101780           0     36604.208333     6865.057389  
101781           0     36783.458333     6965.732334  

test: (21781, 10)
                  Datetime  PJME_MW  hour  dayofweek  month  day  year  \
123585 2016-02-07 11:00:00  30911.0    11          6      2    7  2016   
123586 2016-02-07 12:00:00  30504.0    12       

In [10]:
%%time

import numpy as np
from torch.utils.data import DataLoader
from utils.data_utils import scale, create_sequences
from utils.dataset import Dataset_ECF

# Takes some time; keep your eyes on memory usage

train_scaled, val_scaled, test_scaled, feature_scaler, target_scaler = scale(
    train, val, test, ignore_cols=["is_weekend"]
)

# Define the window size and forecast steps, e.g., use the last 30 days to forecast the next 3 days
window_size = 24 * 30  # 24h * 30days
forecast_steps = 24 * 3  # 24h * 3days

X_train, y_train = create_sequences(train_scaled, window_size=window_size, forecast_steps=forecast_steps)
X_val, y_val = create_sequences(val_scaled, window_size=window_size, forecast_steps=forecast_steps)
X_test, y_test = create_sequences(test_scaled, window_size=window_size, forecast_steps=forecast_steps)

# Shuffle the training data before creating DataLoader
perm = np.random.permutation(len(X_train))
X_train_shuffled = X_train[perm]
y_train_shuffled = y_train[perm]

train_ds = Dataset_ECF(X_train_shuffled, y_train_shuffled)
val_ds = Dataset_ECF(X_val, y_val)
test_ds = Dataset_ECF(X_test, y_test)

train_dl = DataLoader(train_ds, batch_size=128, shuffle=False, drop_last=True)  # shuffle=False as we have already shuffled
val_dl = DataLoader(val_ds, batch_size=128, shuffle=False, drop_last=True)
test_dl = DataLoader(test_ds, batch_size=128, shuffle=False, drop_last=True)

print(f"Train Dataloader: {len(train_dl)}")
print(f"Val Dataloader: {len(val_dl)}")
print(f"Test Dataloader: {len(test_dl)}")


100%|██████████| 100941/100941 [00:59<00:00, 1689.28it/s]
100%|██████████| 20990/20990 [00:04<00:00, 4420.69it/s]
100%|██████████| 20990/20990 [00:04<00:00, 4389.89it/s]


Train Dataloader: 788
Val Dataloader: 163
Test Dataloader: 163
CPU times: user 1min 3s, sys: 8.97 s, total: 1min 12s
Wall time: 1min 27s


In [11]:
device = "mps"  # Defined once; change to "cpu" or - if available - "cuda"

### Long Short-Term Memory

In [12]:
%%time

import torch
from models.lstm import LSTM_ECF
from utils.training_utils import set_seed, train, evaluate_multi_step
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
from utils.visualization_utils import plot_multi_sample_forecasts

set_seed(42)

lstm_model = LSTM_ECF(input_size=X_train.shape[2], forecast_steps=forecast_steps)

lstm_model = train(
    lstm_model,
    train_loader=train_dl,
    val_loader=val_dl,
    optimizer=torch.optim.Adam(lstm_model.parameters(), lr=1e-3),
    loss_fn=torch.nn.MSELoss(),
    num_epochs=30,
    patience=3,
    device=device,
)

lstm_y_true, lstm_y_pred = evaluate_multi_step(lstm_model, test_dl, device)
lstm_y_true = target_scaler.inverse_transform(lstm_y_true.reshape(-1, 1)).flatten()
lstm_y_pred = target_scaler.inverse_transform(lstm_y_pred.reshape(-1, 1)).flatten()

lstm_mae = mean_absolute_error(lstm_y_true, lstm_y_pred)
lstm_rmse = root_mean_squared_error(lstm_y_true, lstm_y_pred)

print(f"[LSTM] Test MAE: {lstm_mae:.2f}")
print(f"[LSTM] Test RMSE: {lstm_rmse:.2f}")

plot_multi_sample_forecasts(
    y_true=lstm_y_true.reshape(-1, forecast_steps),
    y_pred=lstm_y_pred.reshape(-1, forecast_steps),
    num_samples=2,
    title_prefix="LSTM",
)

Epoch 01: 100%|██████████| 788/788 [02:31<00:00,  5.22it/s]


Epoch 01 | Train Loss: 0.2342 | Val Loss: 0.2152


Epoch 02: 100%|██████████| 788/788 [02:18<00:00,  5.70it/s]


Epoch 02 | Train Loss: 0.1574 | Val Loss: 0.2481


Epoch 03: 100%|██████████| 788/788 [02:20<00:00,  5.61it/s]


Epoch 03 | Train Loss: 0.1373 | Val Loss: 0.2585


Epoch 04: 100%|██████████| 788/788 [02:21<00:00,  5.57it/s]


Epoch 04 | Train Loss: 0.1139 | Val Loss: 0.2764
Early stopping triggered at epoch 4
[LSTM] Test MAE: 2926.02
[LSTM] Test RMSE: 4118.17


CPU times: user 2min 14s, sys: 47.6 s, total: 3min 2s
Wall time: 10min 28s


### Temporal Convolutional Network

In [13]:
%%time

import torch
from models.tcn import TCN_ECF
from utils.training_utils import set_seed, train, evaluate_multi_step
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
from utils.visualization_utils import plot_multi_sample_forecasts

set_seed(42)

tcn_model = TCN_ECF(
    input_size=X_train.shape[2], num_channels=[64] * 4, kernel_size=5, dropout=0.3, forecast_steps=forecast_steps
)

tcn_model = train(
    tcn_model,
    train_loader=train_dl,
    val_loader=val_dl,
    optimizer=torch.optim.Adam(tcn_model.parameters(), lr=1e-3),
    loss_fn=torch.nn.MSELoss(),
    num_epochs=30,
    patience=3,
    device=device,
)

tcn_y_true, tcn_y_pred = evaluate_multi_step(tcn_model, test_dl, device)
tcn_y_true = target_scaler.inverse_transform(tcn_y_true.reshape(-1, 1)).flatten()
tcn_y_pred = target_scaler.inverse_transform(tcn_y_pred.reshape(-1, 1)).flatten()

tcn_mae = mean_absolute_error(tcn_y_true, tcn_y_pred)
tcn_rmse = root_mean_squared_error(tcn_y_true, tcn_y_pred)

print(f"[TCN] Test MAE: {tcn_mae:.2f}")
print(f"[TCN] Test RMSE: {tcn_rmse:.2f}")

plot_multi_sample_forecasts(
    y_true=tcn_y_true.reshape(-1, forecast_steps),
    y_pred=tcn_y_pred.reshape(-1, forecast_steps),
    num_samples=2,
    title_prefix="TCN"
)


Epoch 01: 100%|██████████| 788/788 [01:02<00:00, 12.67it/s]


Epoch 01 | Train Loss: 0.2701 | Val Loss: 0.2147


Epoch 02: 100%|██████████| 788/788 [01:00<00:00, 12.94it/s]


Epoch 02 | Train Loss: 0.1807 | Val Loss: 0.2344


Epoch 03: 100%|██████████| 788/788 [01:01<00:00, 12.90it/s]


Epoch 03 | Train Loss: 0.1597 | Val Loss: 0.2525


Epoch 04: 100%|██████████| 788/788 [01:01<00:00, 12.83it/s]


Epoch 04 | Train Loss: 0.1424 | Val Loss: 0.2323
Early stopping triggered at epoch 4
[TCN] Test MAE: 2577.69
[TCN] Test RMSE: 3668.50


CPU times: user 58.9 s, sys: 22 s, total: 1min 20s
Wall time: 4min 27s


### Transformer

In [14]:
%%time

import torch
from models.tf import TF_ECF
from utils.training_utils import set_seed, train, evaluate_multi_step
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
from utils.visualization_utils import plot_multi_sample_forecasts

set_seed(42)

tf_model = TF_ECF(input_size=X_train.shape[2], forecast_steps=forecast_steps)

tf_model = train(
    tf_model,
    train_loader=train_dl,
    val_loader=val_dl,
    optimizer=torch.optim.Adam(tf_model.parameters(), lr=1e-3),
    loss_fn=torch.nn.MSELoss(),
    num_epochs=30,
    patience=3,
    device=device,
)

tf_y_true, tf_y_pred = evaluate_multi_step(tf_model, test_dl, device)
tf_y_true = target_scaler.inverse_transform(tf_y_true.reshape(-1, 1)).flatten()
tf_y_pred = target_scaler.inverse_transform(tf_y_pred.reshape(-1, 1)).flatten()

tf_mae = mean_absolute_error(tf_y_true, tf_y_pred)
tf_rmse = root_mean_squared_error(tf_y_true, tf_y_pred)

print(f"[TF] Test MAE: {tf_mae:.2f}")
print(f"[TF] Test RMSE: {tf_rmse:.2f}")

plot_multi_sample_forecasts(
    y_true=tf_y_true.reshape(-1, forecast_steps),
    y_pred=tf_y_pred.reshape(-1, forecast_steps),
    num_samples=2,
    title_prefix="Transformer"
)


Epoch 01: 100%|██████████| 788/788 [13:16<00:00,  1.01s/it]  


Epoch 01 | Train Loss: 0.2760 | Val Loss: 0.2121


Epoch 02: 100%|██████████| 788/788 [08:26<00:00,  1.56it/s]


Epoch 02 | Train Loss: 0.1627 | Val Loss: 0.2199


Epoch 03: 100%|██████████| 788/788 [09:37<00:00,  1.37it/s]


Epoch 03 | Train Loss: 0.1309 | Val Loss: 0.2364


Epoch 04: 100%|██████████| 788/788 [08:47<00:00,  1.49it/s]


Epoch 04 | Train Loss: 0.1096 | Val Loss: 0.2506
Early stopping triggered at epoch 4
[TF] Test MAE: 2773.90
[TF] Test RMSE: 3939.49


CPU times: user 1min 46s, sys: 5min 30s, total: 7min 17s
Wall time: 42min 1s


**Note: Predictions reflect daily and hourly temporal patterns, as seen in the EDA.**