In [1]:
import optuna
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_df = pd.read_csv('../../data/electricity/train_df.csv')
test_df = pd.read_csv('../../data/electricity/test_df.csv')
X_train_df = pd.read_csv('../../data/electricity/y_train_df.csv')
X_test_df = pd.read_csv('../../data/electricity/X_test_df.csv')
y_train_df = pd.read_csv('../../data/electricity/y_train_df.csv')
y_test_df = pd.read_csv('../../data/electricity/y_test_df.csv')

In [3]:
# seq_length = 24*7
# target_seq_length = prediction_length
# input_size = 1
# hidden_size = 10
# num_layers = 1
# output_size = 1
# learning_rate = 0.013
# epochs = 800
# batch_n = 128

In [4]:
feature_variable = test_df.drop(columns='datetime_utc').columns
target_variable = 'price_de'
timestemp_col = 'datetime_utc'
step_size = 24

In [20]:
hyperparameters = {
    "seq_length": 24 * 7,             # Sequence length
    "target_seq_length": 24,          # Target sequence length for forecasting
    "input_size": len(feature_variable), #1,                  # Input size
    "hidden_size": 60,                # Hidden size of GRU
    "num_layers": 5,                  # Number of layers in GRU
    "output_size": len(feature_variable),                 # Output size
    "learning_rate": 0.001,           # Learning rate
    "epochs": 100,                    # Number of training epochs
    "batch_size": 64,                 # Batch size
    "dropout": 0.2                    # Dropout rate
}

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [7]:
def normalize_data(data):
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaled_data = scaler.fit_transform(data)#(data.reshape(-1, 1))
    # scaled_data = scaled_data.flatten()

    return scaled_data, scaler

In [8]:
def create_sequences(df, seq_length, target_seq_length):

    data = df.values
    data, scaler = normalize_data(data)
    X, Y = [], []
    sequences_dict = {}

    # price_de_idx = df.columns.get_loc('price_de')

    for i in range(len(data) - seq_length - target_seq_length):
        x = data[i:(i + seq_length)]
        y = data[i + seq_length]
        # y = data[(i + seq_length):(i + seq_length+target_seq_length)]

        X.append(x)
        Y.append(y)

    sequences_dict = {'X' : np.array(X), 'y': np.array(Y), 'scaler' : scaler}

    return sequences_dict

In [9]:
# def create_sequence(df, unique_id, seq_length):
#     # Xs, Ys = [], []
#     sequence_dict = {}
#     grouped = df.groupby(unique_id)

#     for group_id, group in grouped:
#         data = group['y'].values
#         data, scaler = normalize_data(data)
#         X, Y = [], []
#         for i in range(len(data) - seq_length):
#             X.append(data[i:(i + seq_length)])
#             Y.append(data[i + seq_length])
#         sequence_dict[group_id] = {'X' : np.array(X), 'y': np.array(Y), 'scaler' : scaler}

#     return sequence_dict

In [10]:
class GRU_Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        gru_out, _ = self.gru(x, h0)
        out = self.fc(gru_out[:, -1, :])
        return out

In [11]:
def smape_loss(y_true, y_pred):
    """
    Compute the Symmetric Mean Absolute Percentage Error (SMAPE).

    Args:
    y_true (torch.Tensor): The true values.
    y_pred (torch.Tensor): The predicted values.

    Returns:
    torch.Tensor: The SMAPE value.
    """
    epsilon = torch.finfo(y_true.dtype).eps
    denominator = torch.max(torch.abs(y_true) + torch.abs(y_pred) + epsilon, torch.tensor(0.5 + epsilon).to(y_true.device))

    diff = 2 * torch.abs(y_pred - y_true) / denominator
    smape_value = 100 / len(y_true) * torch.sum(diff)
    return smape_value

In [17]:
def train_model(model, 
                criterion, 
                optimizer, 
                X_train, 
                y_train, 
                batch_size,
                epochs):
    
    dataset = TensorDataset(X_train, y_train)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    for epoch in range(epochs):
        model.train()

        for batch_idx, (sequences, targets) in enumerate(data_loader):
            sequences, targets = sequences.to(device), targets.to(device)

            optimizer.zero_grad()
            pred = model(sequences)
            loss = criterion(pred, targets.squeeze(1))
            loss.backward()
            optimizer.step()

        if (epoch + 1) % 20 == 0:
            print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item()}')

    # generating forecasts
    model.eval()
    last_sequence = X_train[-1:].to(device) # [1, 168, 16]
    # with torch.no_grad():
    #     forecast_seq = model(last_sequence) 


    forecast_seq = torch.Tensor().to(device)
    
    for _ in range(hyperparameters["target_seq_length"]):
        with torch.no_grad():
            next_step_forecast = model(last_sequence) # [1, 16]
            # print(next_step_forecast.size()) # [1, 16]
            # print(next_step_forecast[:, -1:].size()) # [1, 1]
            # print(next_step_forecast.unsqueeze(-1).size()) # [1, 16, 1]
            # print(next_step_forecast.unsqueeze(1).size()) # [1, 1, 16]
            # break
            # forecast_seq = torch.cat((forecast_seq, next_step_forecast[:, -1:]), dim=1)
            forecast_seq = torch.cat((forecast_seq, next_step_forecast), dim=0) # [1, 16, 1]
            # print(forecast_seq.size())
            # break
            last_sequence = torch.cat((last_sequence[:, 1:, :], next_step_forecast.unsqueeze(1)), dim=1)
    
    return model, forecast_seq

In [13]:
# creating data slices to generate forecasts for the next 8 days
index_cutoffs = [24*i for i in range(7, -1, -1)]
train_df_list = [train_df.iloc[:-idx] if idx != 0 else train_df for idx in index_cutoffs]
index_ceiling = [x.index.stop for x in train_df_list]
test_df_list = [train_df['price_de'].iloc[idx:idx+step_size] if idx!=index_ceiling[-1] else test_df['price_de'] for idx in index_ceiling]
y_hat_full = np.empty((0, 1))

In [14]:
sequences_dict = create_sequences(train_df[feature_variable], hyperparameters["seq_length"], hyperparameters["target_seq_length"])

In [67]:
# y_hat_df_gru = test_data.copy().rename(columns={'y' : 'y_hat'})
# y_hat_df_gru['y_hat'] = pd.Series(dtype='float64')

all_forecast_seq_descaled = []

X_train = torch.from_numpy(sequences_dict['X'].astype(np.float32))#.unsqueeze(-1)
y_train = torch.from_numpy(sequences_dict['y'].astype(np.float32)).unsqueeze(1)

model = GRU_Model(hyperparameters['input_size'], hyperparameters['hidden_size'], hyperparameters['num_layers'], hyperparameters['output_size'], hyperparameters['dropout'])
model.to(device)

criterion = nn.MSELoss()
# criterion = smape_loss
optimizer = torch.optim.Adam(model.parameters(), lr=hyperparameters['learning_rate'])

model_gru, forecast_seq = train_model(model,
                                    criterion=criterion,
                                    optimizer=optimizer,
                                    X_train=X_train,
                                    y_train=y_train,
                                    batch_size=hyperparameters['batch_size'],
                                    epochs=hyperparameters['epochs'])

forecast_seq_descaled = sequences_dict['scaler'].inverse_transform(forecast_seq.cpu().numpy())
# all_forecast_seq_descaled = np.hstack(all_forecast_seq_descaled, forecast_seq_descaled)
# all_forecast_seq_descaled.append(forecast_seq_descaled)

# all_forecast_seq_descaled = np.hstack(all_forecast_seq_descaled)

# y_hat_df_gru['y_hat'] = all_forecast_seq_descaled


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [1/250], Loss: 0.06142239272594452
Epoch [2/250], Loss: 0.0611252561211586
Epoch [3/250], Loss: 0.06729058921337128
Epoch [4/250], Loss: 0.06908556073904037
Epoch [5/250], Loss: 0.06151284649968147
Epoch [6/250], Loss: 0.04959110543131828
Epoch [7/250], Loss: 0.05036606639623642
Epoch [8/250], Loss: 0.049042873084545135
Epoch [9/250], Loss: 0.045563697814941406
Epoch [10/250], Loss: 0.04647109657526016
Epoch [11/250], Loss: 0.04457663744688034
Epoch [12/250], Loss: 0.044874001294374466
Epoch [13/250], Loss: 0.04588751867413521
Epoch [14/250], Loss: 0.042978931218385696
Epoch [15/250], Loss: 0.04551471397280693
Epoch [16/250], Loss: 0.041095953434705734
Epoch [17/250], Loss: 0.041370365768671036
Epoch [18/250], Loss: 0.04133692383766174
Epoch [19/250], Loss: 0.03860574960708618
Epoch [20/250], Loss: 0.03933161497116089
Epoch [21/250], Loss: 0.03860257938504219
Epoch [22/250], Loss: 0.03780432417988777
Epoch [23/250], Loss: 0.03769420459866524
Epoch [24/250], Loss: 0.03748597204685

In [68]:
forecast_seq.cpu().numpy().shape

(24, 16)

In [76]:
forecast_seq_descaled[:,-1]

array([84.739   , 84.565956, 84.22186 , 83.99491 , 83.97682 , 84.1916  ,
       84.98759 , 87.22561 , 89.94887 , 91.17782 , 91.51087 , 91.55625 ,
       91.51024 , 91.38597 , 90.81423 , 89.68096 , 88.58993 , 87.585304,
       86.908226, 85.51874 , 82.99952 , 80.14192 , 76.78395 , 74.68101 ],
      dtype=float32)

In [72]:
test_df.iloc[:,-1].values

array([ 87.3 ,  85.89,  85.88,  83.21,  82.71,  85.44,  86.3 ,  88.  ,
        92.  ,  94.89,  91.28,  90.22,  87.94,  87.8 ,  91.5 , 102.87,
       115.72, 127.98, 135.76, 124.47, 113.51, 102.58,  92.38,  87.41])

In [78]:
loss = smape_loss(torch.from_numpy(forecast_seq_descaled[:,-1]), torch.from_numpy(test_df.iloc[:,-1].values))

In [83]:
loss.item()

11.583248978055664

### Hyperparameter Tuning

In [21]:
import optuna

def objective(trial):

    seed_value = 42
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)

    # Hyperparameters to tune
    learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-1, log=True)
    hidden_size = trial.suggest_int("hidden_size", 5, 50, step=5)
    num_layers = trial.suggest_int("num_layers", 2, 10)
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
    dropout = trial.suggest_float("dropout", 0.1, 0.5)
    # epochs = trial.suggest_int("epochs", 100, 1000)

    X_train = torch.from_numpy(sequences_dict['X'].astype(np.float32))#.unsqueeze(-1)
    y_train = torch.from_numpy(sequences_dict['y'].astype(np.float32)).unsqueeze(1)

    model = GRU_Model(hyperparameters['input_size'], hidden_size, num_layers, hyperparameters['output_size'], dropout)
    model.to(device)

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    model_gru, forecast_seq = train_model(model,
                                        criterion=criterion,
                                        optimizer=optimizer,
                                        X_train=X_train,
                                        y_train=y_train,
                                        batch_size=batch_size,
                                        epochs=hyperparameters['epochs'])

    forecast_seq_descaled = sequences_dict['scaler'].inverse_transform(forecast_seq.cpu().numpy())

    loss = smape_loss(torch.from_numpy(forecast_seq_descaled[:,-1]), torch.from_numpy(test_df.iloc[:,-1].values))

    return loss.item()

In [22]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)
best_params = study.best_trial.params

[I 2023-12-23 18:37:28,459] A new study created in memory with name: no-name-7bb74874-9a25-4338-9789-40e3d79e2ebd


Epoch [20/100], Loss: 0.0867677852511406
Epoch [40/100], Loss: 0.06007111072540283
Epoch [60/100], Loss: 0.05978955700993538
Epoch [80/100], Loss: 0.05940881744027138


[I 2023-12-23 18:38:48,015] Trial 0 finished with value: 11.126343586501196 and parameters: {'learning_rate': 0.004006674501117757, 'hidden_size': 10, 'num_layers': 7, 'batch_size': 64, 'dropout': 0.45042215855549683}. Best is trial 0 with value: 11.126343586501196.


Epoch [100/100], Loss: 0.054651737213134766
Epoch [20/100], Loss: 0.09155221283435822
Epoch [40/100], Loss: 0.07811836153268814
Epoch [60/100], Loss: 0.06711537390947342
Epoch [80/100], Loss: 0.05931456759572029


[I 2023-12-23 18:41:17,734] Trial 1 finished with value: 20.828225049722935 and parameters: {'learning_rate': 0.002548827108170144, 'hidden_size': 40, 'num_layers': 10, 'batch_size': 128, 'dropout': 0.39743585913612256}. Best is trial 0 with value: 11.126343586501196.


Epoch [100/100], Loss: 0.05385013669729233
Epoch [20/100], Loss: 0.017406297847628593
Epoch [40/100], Loss: 0.011404613964259624
Epoch [60/100], Loss: 0.00820804014801979
Epoch [80/100], Loss: 0.00761644309386611


[I 2023-12-23 18:43:55,029] Trial 2 finished with value: 132.2431008602678 and parameters: {'learning_rate': 0.0006779460991653283, 'hidden_size': 20, 'num_layers': 8, 'batch_size': 32, 'dropout': 0.29298957616549914}. Best is trial 0 with value: 11.126343586501196.


Epoch [100/100], Loss: 0.009276356548070908
Epoch [20/100], Loss: 0.025515267625451088
Epoch [40/100], Loss: 0.02062283828854561
Epoch [60/100], Loss: 0.016243500635027885
Epoch [80/100], Loss: 0.01464024093002081


[I 2023-12-23 18:45:23,322] Trial 3 finished with value: 91.63723154653412 and parameters: {'learning_rate': 0.00022306742134387437, 'hidden_size': 35, 'num_layers': 6, 'batch_size': 128, 'dropout': 0.18195523348668183}. Best is trial 0 with value: 11.126343586501196.


Epoch [100/100], Loss: 0.010686234571039677
Epoch [20/100], Loss: 0.0037886607460677624
Epoch [40/100], Loss: 0.0033743176609277725
Epoch [60/100], Loss: 0.0024595262948423624
Epoch [80/100], Loss: 0.0021341638639569283


[I 2023-12-23 18:47:10,038] Trial 4 finished with value: 40.08491299666899 and parameters: {'learning_rate': 0.00028353462770069914, 'hidden_size': 25, 'num_layers': 2, 'batch_size': 32, 'dropout': 0.1384879090405875}. Best is trial 0 with value: 11.126343586501196.


Epoch [100/100], Loss: 0.0018128203228116035
Epoch [20/100], Loss: 0.07577995210886002
Epoch [40/100], Loss: 0.08135593682527542
Epoch [60/100], Loss: 0.08919282257556915
Epoch [80/100], Loss: 0.09299831837415695


[I 2023-12-23 18:47:57,044] Trial 5 finished with value: 20.344153024699573 and parameters: {'learning_rate': 0.006676158644850267, 'hidden_size': 15, 'num_layers': 7, 'batch_size': 128, 'dropout': 0.35567611817589917}. Best is trial 0 with value: 11.126343586501196.


Epoch [100/100], Loss: 0.08754562586545944
Epoch [20/100], Loss: 0.03002062998712063
Epoch [40/100], Loss: 0.044361092150211334
Epoch [60/100], Loss: 0.06052280589938164
Epoch [80/100], Loss: 0.03930492699146271


[I 2023-12-23 18:51:15,209] Trial 6 finished with value: 20.724087159989768 and parameters: {'learning_rate': 0.0012860845591615977, 'hidden_size': 40, 'num_layers': 10, 'batch_size': 32, 'dropout': 0.21328516071511203}. Best is trial 0 with value: 11.126343586501196.


Epoch [100/100], Loss: 0.028967274352908134
Epoch [20/100], Loss: 0.033728644251823425
Epoch [40/100], Loss: 0.023362979292869568
Epoch [60/100], Loss: 0.021399369463324547
Epoch [80/100], Loss: 0.01846548542380333


[I 2023-12-23 18:52:29,811] Trial 7 finished with value: 95.32590341372776 and parameters: {'learning_rate': 0.0068919185824332495, 'hidden_size': 15, 'num_layers': 6, 'batch_size': 64, 'dropout': 0.3480872857266776}. Best is trial 0 with value: 11.126343586501196.


Epoch [100/100], Loss: 0.01776016131043434
Epoch [20/100], Loss: 0.011517936363816261
Epoch [40/100], Loss: 0.009943433105945587
Epoch [60/100], Loss: 0.008007503114640713
Epoch [80/100], Loss: 0.007846965454518795


[I 2023-12-23 18:53:25,155] Trial 8 finished with value: 30.708906525902744 and parameters: {'learning_rate': 0.0006189093178816213, 'hidden_size': 45, 'num_layers': 3, 'batch_size': 128, 'dropout': 0.4348504540175179}. Best is trial 0 with value: 11.126343586501196.


Epoch [100/100], Loss: 0.0064843930304050446
Epoch [20/100], Loss: 0.05045502260327339
Epoch [40/100], Loss: 0.022689100354909897
Epoch [60/100], Loss: 0.019704649224877357
Epoch [80/100], Loss: 0.01931837759912014


[I 2023-12-23 18:56:26,965] Trial 9 finished with value: 74.37339802831114 and parameters: {'learning_rate': 0.0001939449624483117, 'hidden_size': 40, 'num_layers': 10, 'batch_size': 64, 'dropout': 0.21721558213059378}. Best is trial 0 with value: 11.126343586501196.


Epoch [100/100], Loss: 0.01765473000705242
Epoch [20/100], Loss: 0.06045743077993393
Epoch [40/100], Loss: 0.050409115850925446
Epoch [60/100], Loss: 0.05022186040878296
Epoch [80/100], Loss: 0.04827507585287094


[I 2023-12-23 18:57:36,215] Trial 10 finished with value: 19.13656602338714 and parameters: {'learning_rate': 0.03896500204512589, 'hidden_size': 10, 'num_layers': 4, 'batch_size': 64, 'dropout': 0.4857019170551138}. Best is trial 0 with value: 11.126343586501196.


Epoch [100/100], Loss: 0.04959206283092499
Epoch [20/100], Loss: 0.07098069787025452
Epoch [40/100], Loss: 0.05111311376094818
Epoch [60/100], Loss: 0.057478565722703934
Epoch [80/100], Loss: 0.07120084017515182


[I 2023-12-23 18:58:45,136] Trial 11 finished with value: 28.315548198068402 and parameters: {'learning_rate': 0.05429067799286113, 'hidden_size': 5, 'num_layers': 4, 'batch_size': 64, 'dropout': 0.4993710604732742}. Best is trial 0 with value: 11.126343586501196.


Epoch [100/100], Loss: 0.07122325897216797
Epoch [20/100], Loss: 0.049553144723176956
Epoch [40/100], Loss: 0.05020619556307793
Epoch [60/100], Loss: 0.04727892577648163
Epoch [80/100], Loss: 0.0654643103480339


[I 2023-12-23 18:59:52,519] Trial 12 finished with value: 10.857660870896238 and parameters: {'learning_rate': 0.045411478943589066, 'hidden_size': 5, 'num_layers': 4, 'batch_size': 64, 'dropout': 0.49852938147935916}. Best is trial 12 with value: 10.857660870896238.


Epoch [100/100], Loss: 0.05122563987970352
Epoch [20/100], Loss: 0.10791610181331635
Epoch [40/100], Loss: 0.06209280714392662
Epoch [60/100], Loss: 0.06322503089904785
Epoch [80/100], Loss: 0.06213310733437538


[I 2023-12-23 19:01:00,443] Trial 13 finished with value: 11.211921520212861 and parameters: {'learning_rate': 0.018827759065772862, 'hidden_size': 5, 'num_layers': 5, 'batch_size': 64, 'dropout': 0.4486868152310195}. Best is trial 12 with value: 10.857660870896238.


Epoch [100/100], Loss: 0.06289715319871902
Epoch [20/100], Loss: 0.06381219625473022
Epoch [40/100], Loss: 0.06386886537075043
Epoch [60/100], Loss: 0.06387466192245483
Epoch [80/100], Loss: 0.06387537717819214


[I 2023-12-23 19:02:20,686] Trial 14 finished with value: 24.723457206763666 and parameters: {'learning_rate': 0.08036485768837771, 'hidden_size': 5, 'num_layers': 8, 'batch_size': 64, 'dropout': 0.43890336076828995}. Best is trial 12 with value: 10.857660870896238.


Epoch [100/100], Loss: 0.06387543678283691
Epoch [20/100], Loss: 0.08965639770030975
Epoch [40/100], Loss: 0.08158917725086212
Epoch [60/100], Loss: 0.09685514867305756
Epoch [80/100], Loss: 0.10563474148511887


[I 2023-12-23 19:03:43,313] Trial 15 finished with value: 10.178426319494083 and parameters: {'learning_rate': 0.018546802539428205, 'hidden_size': 25, 'num_layers': 8, 'batch_size': 64, 'dropout': 0.48655738052492503}. Best is trial 15 with value: 10.178426319494083.


Epoch [100/100], Loss: 0.10653971135616302
Epoch [20/100], Loss: 0.05724477022886276
Epoch [40/100], Loss: 0.06681285798549652
Epoch [60/100], Loss: 0.06581161916255951
Epoch [80/100], Loss: 0.06678174436092377


[I 2023-12-23 19:05:12,118] Trial 16 finished with value: 27.258810419410935 and parameters: {'learning_rate': 0.02350892670264609, 'hidden_size': 30, 'num_layers': 8, 'batch_size': 64, 'dropout': 0.3971104259547732}. Best is trial 15 with value: 10.178426319494083.


Epoch [100/100], Loss: 0.06681732833385468
Epoch [20/100], Loss: 0.04327904060482979
Epoch [40/100], Loss: 0.08567192405462265
Epoch [60/100], Loss: 0.06335314363241196
Epoch [80/100], Loss: 0.08158881962299347


[I 2023-12-23 19:06:07,126] Trial 17 finished with value: 24.758842578568167 and parameters: {'learning_rate': 0.09248685575188308, 'hidden_size': 50, 'num_layers': 2, 'batch_size': 64, 'dropout': 0.4973134426404715}. Best is trial 15 with value: 10.178426319494083.


Epoch [100/100], Loss: 0.0624210461974144
Epoch [20/100], Loss: 0.09103534370660782
Epoch [40/100], Loss: 0.05351588502526283
Epoch [60/100], Loss: 0.054995354264974594
Epoch [80/100], Loss: 0.07138160616159439


[I 2023-12-23 19:07:11,329] Trial 18 finished with value: 30.945495755334942 and parameters: {'learning_rate': 0.016128708043272273, 'hidden_size': 25, 'num_layers': 4, 'batch_size': 64, 'dropout': 0.3954084356426193}. Best is trial 15 with value: 10.178426319494083.


Epoch [100/100], Loss: 0.07710012048482895
Epoch [20/100], Loss: 0.04724040627479553
Epoch [40/100], Loss: 0.04723721742630005
Epoch [60/100], Loss: 0.0472370907664299
Epoch [80/100], Loss: 0.04911429435014725


[I 2023-12-23 19:09:46,435] Trial 19 finished with value: 86.02770058998117 and parameters: {'learning_rate': 0.03391730227288369, 'hidden_size': 30, 'num_layers': 9, 'batch_size': 32, 'dropout': 0.29883021358169937}. Best is trial 15 with value: 10.178426319494083.


Epoch [100/100], Loss: 0.047839827835559845
Epoch [20/100], Loss: 0.03836609050631523
Epoch [40/100], Loss: 0.07369405031204224
Epoch [60/100], Loss: 0.08335959911346436
Epoch [80/100], Loss: 0.08731050044298172


[I 2023-12-23 19:10:54,324] Trial 20 finished with value: 12.759265790105454 and parameters: {'learning_rate': 0.012152792232769015, 'hidden_size': 20, 'num_layers': 5, 'batch_size': 64, 'dropout': 0.46911758928674513}. Best is trial 15 with value: 10.178426319494083.


Epoch [100/100], Loss: 0.05573653429746628
Epoch [20/100], Loss: 0.08361807465553284
Epoch [40/100], Loss: 0.06269584596157074
Epoch [60/100], Loss: 0.05718686804175377
Epoch [80/100], Loss: 0.08297432959079742


[I 2023-12-23 19:12:12,570] Trial 21 finished with value: 9.189601160583814 and parameters: {'learning_rate': 0.008064783050198608, 'hidden_size': 10, 'num_layers': 7, 'batch_size': 64, 'dropout': 0.4594444109038407}. Best is trial 21 with value: 9.189601160583814.


Epoch [100/100], Loss: 0.06929463893175125
Epoch [20/100], Loss: 0.12018130719661713
Epoch [40/100], Loss: 0.11958955228328705
Epoch [60/100], Loss: 0.11955852061510086
Epoch [80/100], Loss: 0.11956940591335297


[I 2023-12-23 19:13:29,923] Trial 22 finished with value: 10.795240808278253 and parameters: {'learning_rate': 0.009458604353316948, 'hidden_size': 10, 'num_layers': 7, 'batch_size': 64, 'dropout': 0.4986362980946092}. Best is trial 21 with value: 9.189601160583814.


Epoch [100/100], Loss: 0.11957070976495743
Epoch [20/100], Loss: 0.04683319106698036
Epoch [40/100], Loss: 0.06186496093869209
Epoch [60/100], Loss: 0.07312685251235962
Epoch [80/100], Loss: 0.069434754550457


[I 2023-12-23 19:14:45,066] Trial 23 finished with value: 9.892630069226673 and parameters: {'learning_rate': 0.010004337919291287, 'hidden_size': 15, 'num_layers': 7, 'batch_size': 64, 'dropout': 0.4607881788350834}. Best is trial 21 with value: 9.189601160583814.


Epoch [100/100], Loss: 0.07813885062932968
Epoch [20/100], Loss: 0.06902285665273666
Epoch [40/100], Loss: 0.09736047685146332
Epoch [60/100], Loss: 0.0959208682179451
Epoch [80/100], Loss: 0.0956764966249466


[I 2023-12-23 19:16:07,658] Trial 24 finished with value: 10.1315829105087 and parameters: {'learning_rate': 0.004358580511797656, 'hidden_size': 20, 'num_layers': 9, 'batch_size': 64, 'dropout': 0.45427870688996963}. Best is trial 21 with value: 9.189601160583814.


Epoch [100/100], Loss: 0.09561676532030106
Epoch [20/100], Loss: 0.07525647431612015
Epoch [40/100], Loss: 0.08140435069799423
Epoch [60/100], Loss: 0.08579450100660324
Epoch [80/100], Loss: 0.06521964818239212


[I 2023-12-23 19:17:30,224] Trial 25 finished with value: 11.68631029439908 and parameters: {'learning_rate': 0.0033705508261851628, 'hidden_size': 15, 'num_layers': 9, 'batch_size': 64, 'dropout': 0.41639785283833936}. Best is trial 21 with value: 9.189601160583814.


Epoch [100/100], Loss: 0.0644519180059433
Epoch [20/100], Loss: 0.06450732797384262
Epoch [40/100], Loss: 0.05797417461872101
Epoch [60/100], Loss: 0.06056022644042969
Epoch [80/100], Loss: 0.07379104942083359


[I 2023-12-23 19:18:52,549] Trial 26 finished with value: 10.48987735021206 and parameters: {'learning_rate': 0.006562409096645486, 'hidden_size': 20, 'num_layers': 9, 'batch_size': 64, 'dropout': 0.4596637989379206}. Best is trial 21 with value: 9.189601160583814.


Epoch [100/100], Loss: 0.07982713729143143
Epoch [20/100], Loss: 0.056720905005931854
Epoch [40/100], Loss: 0.059986378997564316
Epoch [60/100], Loss: 0.030960923060774803
Epoch [80/100], Loss: 0.021037118509411812


[I 2023-12-23 19:20:08,369] Trial 27 finished with value: 77.79897158059165 and parameters: {'learning_rate': 0.0024913666702540433, 'hidden_size': 15, 'num_layers': 7, 'batch_size': 64, 'dropout': 0.41399936326003184}. Best is trial 21 with value: 9.189601160583814.


Epoch [100/100], Loss: 0.01533550675958395
Epoch [20/100], Loss: 0.07792073488235474
Epoch [40/100], Loss: 0.0907500833272934
Epoch [60/100], Loss: 0.11163311451673508
Epoch [80/100], Loss: 0.12580886483192444


[I 2023-12-23 19:20:53,768] Trial 28 finished with value: 11.623465236580365 and parameters: {'learning_rate': 0.010227391073038054, 'hidden_size': 20, 'num_layers': 6, 'batch_size': 128, 'dropout': 0.4623126446094664}. Best is trial 21 with value: 9.189601160583814.


Epoch [100/100], Loss: 0.08894767612218857
Epoch [20/100], Loss: 0.04969954118132591
Epoch [40/100], Loss: 0.0772918313741684
Epoch [60/100], Loss: 0.06242728233337402
Epoch [80/100], Loss: 0.05288274586200714


[I 2023-12-23 19:23:33,820] Trial 29 finished with value: 11.467471327781125 and parameters: {'learning_rate': 0.004495488845486523, 'hidden_size': 10, 'num_layers': 9, 'batch_size': 32, 'dropout': 0.43509380552487575}. Best is trial 21 with value: 9.189601160583814.


Epoch [100/100], Loss: 0.06689434498548508


## Using Pytorch Forecasting Library

In [1]:
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger

In [12]:
import os
# os.environ['PYTORCH_CUDA_ALLOC_CONF'] = "max_split_size_mb:128"
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import pandas as pd
import pickle
import argparse

from pytorch_forecasting import GRU, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer, NaNLabelEncoder
from pytorch_forecasting.metrics import SMAPE
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

import warnings
# To ignore all FutureWarnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore", ".*does not have many workers.*")
warnings.filterwarnings("ignore", ".*and is already saved during checkpointing*")
warnings.filterwarnings("ignore", ".*The number of training batches*")

import logging
logging.getLogger("lightning").setLevel(logging.ERROR)
logging.getLogger("pytorch_lightning.utilities.rank_zero").setLevel(logging.ERROR)
logging.getLogger("pytorch_lightning.accelerators.cuda").setLevel(logging.ERROR)


In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
df_original = pd.read_csv('../../data/electricity/df_electricity_processed.csv')
train_df = pd.read_csv('../../data/electricity/train_df.csv')
test_df = pd.read_csv('../../data/electricity/test_df.csv')
X_train_df = pd.read_csv('../../data/electricity/y_train_df.csv')
X_test_df = pd.read_csv('../../data/electricity/X_test_df.csv')
y_train_df = pd.read_csv('../../data/electricity/y_train_df.csv')
y_test_df = pd.read_csv('../../data/electricity/y_test_df.csv')

In [4]:
feature_variable = train_df.drop(columns=['datetime_utc','price_de']).columns
target_variable = 'price_de'
timestemp_col = 'datetime_utc'
step_size = 24

In [5]:
df = train_df.copy()
df['datetime_utc'] = pd.to_datetime(df['datetime_utc'])
df['datetime_utc'] = (df['datetime_utc'] - df['datetime_utc'].min()).dt.total_seconds() // 3600 + 1 #df_train_val['ds'].max() + 1
df['datetime_utc'] = df['datetime_utc'].astype(int)
df_train_val = df.reset_index(drop=True)

In [6]:
max_encoder_length = 24*7
max_prediction_length = 24

In [16]:
hyperparameters = {
    "seq_length": 24 * 7,             # Sequence length
    "target_seq_length": 24,          # Target sequence length for forecasting
    "input_size": len(feature_variable), #1,                  # Input size
    "hidden_size": 50,                # Hidden size of GRU
    "num_layers": 5,                  # Number of layers in GRU
    "output_size": len(feature_variable),                 # Output size
    "learning_rate": 0.005,           # Learning rate
    "epochs": 500,                    # Number of training epochs
    "batch_size": 128                 # Batch size
}

In [7]:
df_train_val['unique_id'] = 'H1'
df_train_val

Unnamed: 0,datetime_utc,price_de_lag_336,price_de_avg_24,price_at_lag_24,price_at_avg_24,price_fr_lag_24,price_fr_avg_24,load_de_lag_24,load_de_lag_168,load_at,gen_de,gen_de_lag_168,gen_at,gen_fr,windon_de,solar_de,price_de,unique_id
0,1,135.71,143.192083,130.59,145.302083,136.77,142.511250,48193.0200,49161.9775,4868.0,49417.49,50407.76,4079.2,40284.0,16063.7425,0.0,80.65,H1
1,2,140.35,141.425000,119.09,144.504167,119.09,140.804167,44770.8675,48136.9350,4378.0,49446.61,49715.33,4027.1,39246.0,16839.0400,0.0,76.68,H1
2,3,139.95,138.880833,116.32,143.657500,116.32,138.950417,43981.9450,47600.7900,4829.0,48568.35,49051.50,3933.9,39366.5,17616.8775,0.0,55.26,H1
3,4,150.30,135.647500,114.60,142.480000,114.60,136.477083,43422.0775,47753.3075,4773.0,48487.32,49083.27,3766.6,37003.5,17984.3850,0.0,37.00,H1
4,5,149.51,132.467917,115.07,141.435417,115.07,134.017500,43581.7450,49360.8175,4833.0,49072.56,50384.18,3585.8,34949.5,18902.0375,0.0,38.76,H1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8993,8996,120.89,90.798333,112.41,95.982917,112.22,90.937500,64363.1600,62435.3950,7798.0,54701.73,60625.63,7268.6,63410.5,15818.6625,0.0,123.58,H1
8994,8997,100.05,91.503333,99.15,96.557083,103.63,91.325000,62330.2025,59991.6850,7289.0,51345.73,60074.36,6770.3,61559.0,15185.0900,0.0,112.93,H1
8995,8998,96.84,92.433333,85.54,97.372083,92.50,91.850000,58638.3150,56167.5975,6780.0,47738.81,58340.22,6173.9,59780.5,14189.9350,0.0,105.10,H1
8996,8999,93.70,93.263750,84.04,98.202500,84.04,92.680417,55156.1275,52589.0625,6568.0,45213.73,56945.46,5898.0,58930.5,13123.5375,0.0,103.97,H1


In [8]:
training = TimeSeriesDataSet(
    df_train_val.iloc[:-max_prediction_length],
    time_idx="datetime_utc",
    target="price_de",
    # time_varying_unknown_reals = feature_variable,
    group_ids=['unique_id'],
    max_encoder_length=max_encoder_length,
    # min_encoder_length=min_encoder_length,
    # min_encoder_length=max_encoder_length // 2,
    # min_encoder_length=1,
    max_prediction_length=max_prediction_length,
    # min_prediction_length=max_prediction_length // 2,
    # min_prediction_length=1,
    # time_varying_known_reals=['y_arima', 'y_theta', 'y_xgb', 'y_gru', 'y_lstm'],  # Base model forecasts
    target_normalizer=GroupNormalizer(
        groups=["unique_id"], transformation="softplus"
    ),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True
    )

In [9]:
validation = TimeSeriesDataSet.from_dataset(training, df_train_val, predict=True, stop_randomization=True)

In [15]:
train_dataloader = DataLoader(training, batch_size=32, shuffle=True)
val_dataloader = DataLoader(validation, batch_size=64)

In [10]:
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-5, patience=50, verbose=False, mode="min")

In [13]:
class GRU_Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        gru_out, _ = self.gru(x, h0)
        out = self.fc(gru_out[:, -1, :])
        return out

In [14]:
trainer = pl.Trainer(
        max_epochs=10,
        accelerator="gpu",
        gradient_clip_val=0.1,
        # limit_train_batches=50,  # coment in for training, running valiation every 30 batches
        # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
        callbacks=[early_stop_callback],
        logger=False,
        enable_model_summary=False,
        enable_checkpointing=False
    )


# tft = GRU.from_dataset(
#     training,
#     # not meaningful for finding the learning rate but otherwise very important
#     learning_rate=0.03,
#     hidden_size=8,  # most important hyperparameter apart from learning rate
#     # number of attention heads. Set to up to 4 for large datasets
#     attention_head_size=1,
#     dropout=0.1,  # between 0.1 and 0.3 are good values
#     hidden_continuous_size=8,  # set to <= hidden_size
#     loss=SMAPE(),
#     optimizer="Ranger"
#     # reduce learning rate if no improvement in validation loss after x epochs
#     # reduce_on_plateau_patience=1000,
# )


In [25]:
def train_model(model, 
                criterion, 
                optimizer, 
                # X_train, 
                # y_train, 
                batch_size,
                epochs):
    
    # dataset = TensorDataset(X_train, y_train)
    data_loader = train_dataloader

    for epoch in range(epochs):
        
        model.train()

        for batch_idx, (sequences, targets) in enumerate(data_loader):
            sequences, targets = sequences.to(device), targets.to(device)

            optimizer.zero_grad()
            pred = model(sequences)
            loss = criterion(pred, targets)
            loss.backward()
            optimizer.step()

        if (epoch + 1) % 1 == 0:
            print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item()}')

    # generating forecasts
    model.eval()
    last_sequence = X_train[-1:].to(device) # [1, 168, 16]
    # with torch.no_grad():
    #     forecast_seq = model(last_sequence) 


    forecast_seq = torch.Tensor().to(device)
    
    for _ in range(hyperparameters["target_seq_length"]):
        with torch.no_grad():
            next_step_forecast = model(last_sequence) # [1, 16]
            # print(next_step_forecast.size()) # [1, 16]
            # print(next_step_forecast[:, -1:].size()) # [1, 1]
            # print(next_step_forecast.unsqueeze(-1).size()) # [1, 16, 1]
            # print(next_step_forecast.unsqueeze(1).size()) # [1, 1, 16]
            # break
            # forecast_seq = torch.cat((forecast_seq, next_step_forecast[:, -1:]), dim=1)
            forecast_seq = torch.cat((forecast_seq, next_step_forecast), dim=0) # [1, 16, 1]
            # print(forecast_seq.size())
            # break
            last_sequence = torch.cat((last_sequence[:, 1:, :], next_step_forecast.unsqueeze(1)), dim=1)
    
    return model, forecast_seq

In [26]:
# y_hat_df_gru = test_data.copy().rename(columns={'y' : 'y_hat'})
# y_hat_df_gru['y_hat'] = pd.Series(dtype='float64')

all_forecast_seq_descaled = []

# X_train = torch.from_numpy(sequences_dict['X'].astype(np.float32))#.unsqueeze(-1)
# y_train = torch.from_numpy(sequences_dict['y'].astype(np.float32)).unsqueeze(1)

model = GRU_Model(hyperparameters['input_size'], hyperparameters['hidden_size'], hyperparameters['num_layers'], hyperparameters['output_size'])
model.to(device)

criterion = nn.MSELoss()
# criterion = smape_loss
optimizer = torch.optim.Adam(model.parameters(), lr=hyperparameters['learning_rate'])

model_gru, forecast_seq = train_model(model,
                                    criterion=criterion,
                                    optimizer=optimizer,
                                    # X_train=X_train,
                                    # y_train=y_train,
                                    batch_size=hyperparameters['batch_size'],
                                    epochs=hyperparameters['epochs'])

# forecast_seq_descaled = sequences_dict['scaler'].inverse_transform(forecast_seq.cpu().numpy())
# all_forecast_seq_descaled = np.hstack(all_forecast_seq_descaled, forecast_seq_descaled)
# all_forecast_seq_descaled.append(forecast_seq_descaled)

# all_forecast_seq_descaled = np.hstack(all_forecast_seq_descaled)

# y_hat_df_gru['y_hat'] = all_forecast_seq_descaled


TypeError: __init__() missing 1 required positional argument: 'dropout'

In [17]:
model = GRU_Model(hyperparameters['input_size'], hyperparameters['hidden_size'], hyperparameters['num_layers'], hyperparameters['output_size'])

In [19]:
trainer.fit(
    GRU,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
    )

TypeError: `model` must be a `LightningModule` or `torch._dynamo.OptimizedModule`, got `ABCMeta`

In [28]:
for x, y in train_dataloader:
    print(x.shape)
    print(y.shape)
    break

TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'>