In [1]:
%%time

!pip install -r requirements.txt

CPU times: user 12.6 ms, sys: 9.24 ms, total: 21.8 ms
Wall time: 1.06 s


In [2]:
%%time

from utils.data_utils import (
    load_and_split_data,
    add_time_features,
    add_lag_features,
    add_rolling_features,
)

train, val, test = load_and_split_data(path="data/PJME_hourly.csv")
dfs = {"train": train, "val": val, "test": test}

for name, df in dfs.items():
    dfs[name] = (
        df.pipe(add_time_features)
        # .pipe(add_lag_features)  # Uncomment to add lag features
        .pipe(add_rolling_features)  # Uncomment to add rolling features
        .pipe(lambda x: x.dropna())
    )
    print(f"\n{name}: {dfs[name].shape}")
    print(dfs[name].head(2))

train, val, test = dfs.values()


train: (101732, 10)
              Datetime  PJME_MW  hour  dayofweek  month  day  year  \
24 2002-01-02 01:00:00  28121.0     1          2      1    2  2002   
25 2002-01-02 02:00:00  27437.0     2          2      1    2  2002   

    is_weekend  rolling_mean_24  rolling_std_24  
24           0     31017.500000     2423.666231  
25           0     30922.833333     2492.512817  

val: (21781, 10)
                  Datetime  PJME_MW  hour  dayofweek  month  day  year  \
101780 2013-08-12 21:00:00  42567.0    21          0      8   12  2013   
101781 2013-08-12 22:00:00  40735.0    22          0      8   12  2013   

        is_weekend  rolling_mean_24  rolling_std_24  
101780           0     36604.208333     6865.057389  
101781           0     36783.458333     6965.732334  

test: (21781, 10)
                  Datetime  PJME_MW  hour  dayofweek  month  day  year  \
123585 2016-02-07 11:00:00  30911.0    11          6      2    7  2016   
123586 2016-02-07 12:00:00  30504.0    12       

In [3]:
%%time

import numpy as np
from torch.utils.data import DataLoader
from utils.data_utils import scale, create_sequences
from utils.dataset import Dataset_ECF

# Takes some time; keep your eyes on memory usage

train_scaled, val_scaled, test_scaled, feature_scaler, target_scaler = scale(
    train, val, test, ignore_cols=["is_weekend"]
)

# Define the window size and forecast steps, e.g., use the last 30 days to forecast the next 3 days
window_size = 24 * 30  # 24h * 30days
forecast_steps = 24 * 3  # 24h * 3days

X_train, y_train = create_sequences(train_scaled, window_size=window_size, forecast_steps=forecast_steps)
X_val, y_val = create_sequences(val_scaled, window_size=window_size, forecast_steps=forecast_steps)
X_test, y_test = create_sequences(test_scaled, window_size=window_size, forecast_steps=forecast_steps)

# Shuffle the training data before creating DataLoader
perm = np.random.permutation(len(X_train))
X_train_shuffled = X_train[perm]
y_train_shuffled = y_train[perm]

train_ds = Dataset_ECF(X_train_shuffled, y_train_shuffled)
val_ds = Dataset_ECF(X_val, y_val)
test_ds = Dataset_ECF(X_test, y_test)

train_dl = DataLoader(train_ds, batch_size=128, shuffle=False, drop_last=True)  # shuffle=False as we have already shuffled
val_dl = DataLoader(val_ds, batch_size=128, shuffle=False, drop_last=True)
test_dl = DataLoader(test_ds, batch_size=128, shuffle=False, drop_last=True)

print(f"Train Dataloader: {len(train_dl)}")
print(f"Val Dataloader: {len(val_dl)}")
print(f"Test Dataloader: {len(test_dl)}")


100%|██████████| 100941/100941 [00:52<00:00, 1937.47it/s]
100%|██████████| 20990/20990 [00:04<00:00, 4545.62it/s]
100%|██████████| 20990/20990 [00:04<00:00, 4633.60it/s]


Train Dataloader: 788
Val Dataloader: 163
Test Dataloader: 163
CPU times: user 1min, sys: 4.7 s, total: 1min 5s
Wall time: 1min 7s


In [4]:
device = "mps"  # Defined once; change to "cpu" or - if available - "cuda"

### Long Short-Term Memory

In [None]:
%%time

import torch
from models.lstm import LSTM_ECF
from utils.training_utils import set_seed, train, evaluate_multi_step
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
from utils.visualization_utils import plot_multi_sample_forecasts

set_seed(42)

lstm_model = LSTM_ECF(input_size=X_train.shape[2], forecast_steps=forecast_steps)

lstm_model = train(
    lstm_model,
    train_loader=train_dl,
    val_loader=val_dl,
    optimizer=torch.optim.Adam(lstm_model.parameters(), lr=1e-3),
    loss_fn=torch.nn.MSELoss(),
    num_epochs=30,
    patience=3,
    device=device,
)

lstm_y_true, lstm_y_pred = evaluate_multi_step(lstm_model, test_dl, device)
lstm_y_true = target_scaler.inverse_transform(lstm_y_true.reshape(-1, 1)).flatten()
lstm_y_pred = target_scaler.inverse_transform(lstm_y_pred.reshape(-1, 1)).flatten()

lstm_mae = mean_absolute_error(lstm_y_true, lstm_y_pred)
lstm_rmse = root_mean_squared_error(lstm_y_true, lstm_y_pred)

print(f"[LSTM] Test MAE: {lstm_mae:.2f}")
print(f"[LSTM] Test RMSE: {lstm_rmse:.2f}")

plot_multi_sample_forecasts(
    y_true=lstm_y_true.reshape(-1, forecast_steps),
    y_pred=lstm_y_pred.reshape(-1, forecast_steps),
    num_samples=2,
    title_prefix="LSTM",
)

Epoch 01: 100%|██████████| 788/788 [02:07<00:00,  6.19it/s]


Epoch 01 | Train Loss: 0.2324 | Val Loss: 0.2187


Epoch 02: 100%|██████████| 788/788 [01:56<00:00,  6.74it/s]


Epoch 02 | Train Loss: 0.1582 | Val Loss: 0.2320


Epoch 03: 100%|██████████| 788/788 [01:57<00:00,  6.72it/s]


Epoch 03 | Train Loss: 0.1360 | Val Loss: 0.2777


Epoch 04: 100%|██████████| 788/788 [01:57<00:00,  6.70it/s]


Epoch 04 | Train Loss: 0.1161 | Val Loss: 0.2749
Early stopping triggered at epoch 4
[LSTM] Test MAE: 2906.94
[LSTM] Test RMSE: 4142.23


CPU times: user 2min 28s, sys: 28.4 s, total: 2min 56s
Wall time: 8min 49s


### Temporal Convolutional Network

In [None]:
%%time

import torch
from models.tcn import TCN_ECF
from utils.training_utils import set_seed, train, evaluate_multi_step
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
from utils.visualization_utils import plot_multi_sample_forecasts

set_seed(42)

tcn_model = TCN_ECF(
    input_size=X_train.shape[2], num_channels=[64] * 4, kernel_size=5, dropout=0.3, forecast_steps=forecast_steps
)

tcn_model = train(
    tcn_model,
    train_loader=train_dl,
    val_loader=val_dl,
    optimizer=torch.optim.Adam(tcn_model.parameters(), lr=1e-3),
    loss_fn=torch.nn.MSELoss(),
    num_epochs=30,
    patience=3,
    device=device,
)

tcn_y_true, tcn_y_pred = evaluate_multi_step(tcn_model, test_dl, device)
tcn_y_true = target_scaler.inverse_transform(tcn_y_true.reshape(-1, 1)).flatten()
tcn_y_pred = target_scaler.inverse_transform(tcn_y_pred.reshape(-1, 1)).flatten()

tcn_mae = mean_absolute_error(tcn_y_true, tcn_y_pred)
tcn_rmse = root_mean_squared_error(tcn_y_true, tcn_y_pred)

print(f"[TCN] Test MAE: {tcn_mae:.2f}")
print(f"[TCN] Test RMSE: {tcn_rmse:.2f}")

plot_multi_sample_forecasts(
    y_true=tcn_y_true.reshape(-1, forecast_steps),
    y_pred=tcn_y_pred.reshape(-1, forecast_steps),
    num_samples=2,
    title_prefix="TCN"
)


Epoch 01:   0%|          | 0/788 [00:00<?, ?it/s]

Epoch 01: 100%|██████████| 788/788 [01:02<00:00, 12.69it/s]


Epoch 01 | Train Loss: 0.2739 | Val Loss: 0.2124


Epoch 02: 100%|██████████| 788/788 [01:00<00:00, 13.02it/s]


Epoch 02 | Train Loss: 0.1843 | Val Loss: 0.2300


Epoch 03: 100%|██████████| 788/788 [01:00<00:00, 13.03it/s]


Epoch 03 | Train Loss: 0.1659 | Val Loss: 0.2476


Epoch 04: 100%|██████████| 788/788 [01:00<00:00, 12.99it/s]


Epoch 04 | Train Loss: 0.1525 | Val Loss: 0.2802
Early stopping triggered at epoch 4
[TCN] Test MAE: 2922.14
[TCN] Test RMSE: 4102.02


CPU times: user 1min 2s, sys: 21.2 s, total: 1min 23s
Wall time: 4min 25s


### Transformer

In [None]:
%%time

import torch
from models.tf import TF_ECF
from utils.training_utils import set_seed, train, evaluate_multi_step
from sklearn.metrics import mean_absolute_error, root_mean_squared_error
from utils.visualization_utils import plot_multi_sample_forecasts

set_seed(42)

tf_model = TF_ECF(input_size=X_train.shape[2], forecast_steps=forecast_steps)

tf_model = train(
    tf_model,
    train_loader=train_dl,
    val_loader=val_dl,
    optimizer=torch.optim.Adam(tf_model.parameters(), lr=1e-3),
    loss_fn=torch.nn.MSELoss(),
    num_epochs=30,
    patience=3,
    device=device,
)

tf_y_true, tf_y_pred = evaluate_multi_step(tf_model, test_dl, device)
tf_y_true = target_scaler.inverse_transform(tf_y_true.reshape(-1, 1)).flatten()
tf_y_pred = target_scaler.inverse_transform(tf_y_pred.reshape(-1, 1)).flatten()

tf_mae = mean_absolute_error(tf_y_true, tf_y_pred)
tf_rmse = root_mean_squared_error(tf_y_true, tf_y_pred)

print(f"[TF] Test MAE: {tf_mae:.2f}")
print(f"[TF] Test RMSE: {tf_rmse:.2f}")

plot_multi_sample_forecasts(
    y_true=tf_y_true.reshape(-1, forecast_steps),
    y_pred=tf_y_pred.reshape(-1, forecast_steps),
    num_samples=2,
    title_prefix="Transformer"
)


Epoch 01: 100%|██████████| 788/788 [07:52<00:00,  1.67it/s]


Epoch 01 | Train Loss: 0.2782 | Val Loss: 0.2078


Epoch 02: 100%|██████████| 788/788 [07:54<00:00,  1.66it/s]


Epoch 02 | Train Loss: 0.1630 | Val Loss: 0.2096


Epoch 03: 100%|██████████| 788/788 [07:54<00:00,  1.66it/s]


Epoch 03 | Train Loss: 0.1383 | Val Loss: 0.2299


Epoch 04:  84%|████████▎ | 658/788 [06:50<01:21,  1.60it/s]


KeyboardInterrupt: 

**Note: Predictions reflect daily and hourly temporal patterns, as seen in the EDA.**