# DeepAR / Probalistic RNN

In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [20]:
# Import the data 
def load_and_prepare_data(file_path):
    """
    Load energy prices data from a CSV file, ensure chronological order, and convert 'Date' to datetime.
    """
    df = pd.read_csv(file_path)
    df.sort_values('Date', inplace=True)
    # Convert 'date' column to datetime
    df['Date'] = pd.to_datetime(df['Date'])
    #df.set_index('Date', inplace=True)
    df = pd.DataFrame(df)
    return df

In [21]:
# Load in the train and test data
train_df = load_and_prepare_data('../../data/Final_data/train_df.csv')
test_df = load_and_prepare_data('../../data/Final_data/test_df.csv')

# Concatenate the train and test data
df = pd.concat([train_df, test_df])
df['Date'] = pd.to_datetime(df['Date'])

# Create a time series object
#series_train = TimeSeries.from_dataframe(train_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')
#series_test = TimeSeries.from_dataframe(test_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')

# Show 
df

Unnamed: 0,Date,Day_ahead_price (€/MWh),Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),Other (GWh),Pumped_storage_generation (GWh),Solar_energy (GWh),Wind_offshore (GWh),Wind_onshore (GWh),Net_total_export_import (GWh),BEV_vehicles,Oil_price (EUR),TTF_gas_price (€/MWh),Nuclear_energy (GWh)
0,2012-01-01,18.19,14.75,4.95,8.39,98.605,108.454,51.011,325.337,188.811,54.040,19.314,6.263,3.404,235.467,54.662,6,99.64,21.10,250.979
1,2012-01-02,33.82,15.12,5.00,7.41,98.605,222.656,51.862,343.168,229.293,54.166,28.892,6.312,3.350,231.772,-64.477,6,100.04,20.00,258.671
2,2012-01-03,35.03,31.88,7.77,5.23,98.605,162.204,48.851,336.773,241.297,53.518,21.072,24.226,7.292,504.484,-35.078,6,100.44,20.90,271.495
3,2012-01-04,32.16,25.21,8.04,4.78,98.605,189.633,47.101,323.976,252.289,52.194,28.300,14.157,7.828,541.528,22.924,6,103.15,21.40,270.613
4,2012-01-05,20.35,13.46,9.98,4.23,98.605,175.733,45.854,327.502,259.018,52.179,31.887,4.728,8.280,572.819,35.618,6,103.92,21.30,287.555
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
754,2024-07-24,66.61,225.04,3.47,17.54,110.007,43.469,85.857,199.246,194.291,54.026,20.934,325.285,49.360,179.921,-168.705,992,75.75,32.63,0.000
755,2024-07-25,78.34,272.71,2.12,17.85,110.410,50.676,82.632,195.983,209.610,52.963,18.766,394.116,51.053,42.885,-194.496,992,76.36,31.70,0.000
756,2024-07-26,93.04,172.33,2.60,19.09,110.852,42.333,79.531,205.273,205.773,52.616,19.081,256.246,40.449,129.267,-241.786,993,75.21,32.20,0.000
757,2024-07-27,80.74,176.67,2.05,19.63,110.479,33.307,74.958,184.012,216.412,50.927,18.856,244.051,2.180,32.001,-251.655,992,74.79,32.90,0.000


In [22]:
# Define the future covariates columns from your dataframe
future_covariates_columns = ['Solar_radiation (W/m2)', 'Wind_speed (m/s)', 'Temperature (°C)', 
                             'Biomass (GWh)', 'Hard_coal (GWh)', 'Hydro (GWh)', 'Lignite (GWh)', 
                             'Natural_gas (GWh)', 'Other (GWh)', 'Pumped_storage_generation (GWh)', 
                             'Solar_energy (GWh)', 'Wind_offshore (GWh)', 'Wind_onshore (GWh)', 
                             'Net_total_export_import (GWh)', 'BEV_vehicles', 'Oil_price (EUR)', 
                             'TTF_gas_price (€/MWh)', 'Nuclear_energy (GWh)']

In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import shutil
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm_notebook as tqdm
import optuna

import matplotlib.pyplot as plt

from darts import TimeSeries
from darts.utils.callbacks import TFMProgressBar
from darts.utils.statistics import check_seasonality, plot_acf
import darts.utils.timeseries_generation as tg
from darts.utils.likelihood_models import GaussianLikelihood
from darts.models import RNNModel
from darts.dataprocessing.transformers import Scaler
from darts.timeseries import concatenate
from sklearn.preprocessing import MaxAbsScaler
import warnings

warnings.filterwarnings("ignore")


In [17]:
import traceback  # Add this import at the beginning
import pandas as pd
from darts import TimeSeries
from darts.models import RNNModel
from darts.dataprocessing.transformers import Scaler
from darts.metrics import mape, mae, rmse, mse
from sklearn.preprocessing import MaxAbsScaler

# Create a time series object
series_train = TimeSeries.from_dataframe(train_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')
series_test = TimeSeries.from_dataframe(test_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')

# Convert future covariates to TimeSeries objects
future_covariates_train = TimeSeries.from_dataframe(train_df, 'Date', future_covariates_columns).astype('float32')
future_covariates_test = TimeSeries.from_dataframe(test_df, 'Date', future_covariates_columns).astype('float32')

# Determine required start date for future covariates
max_input_chunk_length = 200  # Maximum input_chunk_length from your hyperparameter search
required_start_date = series_test.start_time() - pd.DateOffset(days=(max_input_chunk_length - 1))

# Ensure future_covariates_full covers the required range
required_end_date = pd.Timestamp(test_df['Date'].iloc[0]) + pd.DateOffset(days=len(series_test)-1)

# Check if future_covariates_full has sufficient data
if future_covariates_full.start_time() > required_start_date or future_covariates_full.end_time() < required_end_date:
    print("Warning: The future_covariates_full is not long enough to cover the required input chunk length and prediction range.")
    
# Slice the future covariates to the required range, including data from the training period
future_covariates_test = future_covariates_full.slice(required_start_date, required_end_date)

# Scaling the data
scaler_series = Scaler(MaxAbsScaler())
scaler_covariates = Scaler(MaxAbsScaler())

# Fit the scaler on the training data
series_train_scaled = scaler_series.fit_transform(series_train)
future_covariates_train_scaled = scaler_covariates.fit_transform(future_covariates_train)

# Transform the test series and future covariates using the same scaler
series_test_scaled = scaler_series.transform(series_test)
future_covariates_test_scaled = scaler_covariates.transform(future_covariates_test)

# Slice the future covariates to the required range, including data from the training period
future_covariates_train = future_covariates_full.slice(required_start_date, series_train.end_time())
future_covariates_test = future_covariates_full.slice(series_train.end_time() + pd.DateOffset(days=1), required_end_date)

# Scaling the data
scaler_series = Scaler(MaxAbsScaler())
scaler_covariates = Scaler(MaxAbsScaler())

# Fit the scaler on the training data
series_train_scaled = scaler_series.fit_transform(series_train)
future_covariates_train_scaled = scaler_covariates.fit_transform(future_covariates_train)

# Transform the test series and future covariates using the same scaler
series_test_scaled = scaler_series.transform(series_test)
future_covariates_test_scaled = scaler_covariates.transform(future_covariates_test)

# Define the Optuna objective function without backtesting
def objective(trial):
    # Suggest hyperparameters
    hidden_dim = trial.suggest_int('hidden_dim', 10, 100)
    n_layers = trial.suggest_int('n_layers', 1, 5)
    dropout = trial.suggest_uniform('dropout', 0.0, 0.5)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    input_chunk_length = trial.suggest_int('input_chunk_length', 30, 200)
    n_epochs = 3  # Adjust epochs for optimization

    # Ensure training_length >= input_chunk_length
    training_length = max(200, input_chunk_length)

    # Define the model
    model = RNNModel(
        model='LSTM',
        input_chunk_length=input_chunk_length,
        training_length=training_length,
        hidden_dim=hidden_dim,
        n_rnn_layers=n_layers,
        dropout=dropout,
        likelihood=GaussianLikelihood(),
        batch_size=batch_size,
        n_epochs=n_epochs,
        optimizer_kwargs={'lr': learning_rate},
        random_state=42,
        pl_trainer_kwargs={
            'accelerator': 'gpu',  # Use GPU
            'devices': 1,
            'enable_progress_bar': True,
            'logger': False,
            'enable_model_summary': False,
        }
    )

    try:
        # Fit the model on the training data
        model.fit(
            series_train_scaled,
            future_covariates=future_covariates_full,
            verbose=False
        )

        # Make predictions
        forecast_val = model.predict(
            n=1,
            future_covariates=future_covariates_test_scaled
        )

        # Calculate MAPE on the validation set
        error = mape(series_test_scaled, forecast_val)
    except Exception as e:
        print(f'Exception during model training: {e}')
        traceback.print_exc()
        return float('inf')

    return error

# Run Optuna optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=1)

# Get the best hyperparameters
best_params = study.best_params
print('Best hyperparameters:')
for key, value in best_params.items():
    print(f'  {key}: {value}')

# Ensure training_length >= input_chunk_length
best_training_length = max(200, best_params['input_chunk_length'])

# Train the best model on the full training data
best_model = RNNModel(
    model='LSTM',
    input_chunk_length=best_params['input_chunk_length'],
    training_length=best_training_length,
    hidden_dim=best_params['hidden_dim'],
    n_rnn_layers=best_params['n_layers'],
    dropout=best_params['dropout'],
    likelihood=GaussianLikelihood(),
    batch_size=best_params['batch_size'],
    n_epochs=10,  # Increase epochs for final training
    optimizer_kwargs={'lr': best_params['learning_rate']},
    random_state=42,
    pl_trainer_kwargs={
        'accelerator': 'gpu',  # Use GPU
        'devices': 1,
        'enable_progress_bar': True,
        'logger': False,
        'enable_model_summary': False,
    }
)

best_model.fit(
    series_train_scaled,
    future_covariates=future_covariates_full,
    verbose=True
)

# Determine forecast horizon based on available covariates
max_n = (future_covariates_test.end_time() - series_test.start_time()).days + 1
n = min(len(series_test_scaled), max_n)

# Make predictions on the test set
forecast = best_model.predict(
    n=n,
    future_covariates=future_covariates_test_scaled
)

# Adjust test_series to match forecast length
test_series = test_series[:n]

# Continue with inverse transformation, plotting, and error metrics as before

# Inverse transform the forecast and test_series
forecast = scaler_series.inverse_transform(forecast)
test_series = scaler_series.inverse_transform(series_test_scaled)

# Plot the forecast
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=test_series.time_index,
    y=test_series.values().squeeze(),
    mode='lines',
    name='Actual',
    line=dict(color='blue')
))

fig.add_trace(go.Scatter(
    x=forecast.time_index,
    y=forecast.values().squeeze(),
    mode='lines',
    name='Forecast',
    line=dict(color='red', dash='dash')
))

fig.update_layout(
    title='RNN Model - Time Series Forecast',
    xaxis_title='Date',
    yaxis_title='Day Ahead Price (€/MWh)',
    legend=dict(
        x=1,
        y=1,
        xanchor='right',
        yanchor='top',
        bordercolor='black',
        borderwidth=1
    ),
    template='plotly'
)

fig.show()

# Calculate and print error metrics
print('Error Metrics on Test Set:')
print(f'  Mean Absolute Percentage Error (MAPE): {mape(test_series, forecast):.2f}%')
print(f'  Mean Absolute Error (MAE): {mae(test_series, forecast):.2f}')
print(f'  Root Mean Squared Error (RMSE): {rmse(test_series, forecast):.2f}')
print(f'  Mean Squared Error (MSE): {mse(test_series, forecast):.2f}')


[I 2024-09-19 11:07:45,760] A new study created in memory with name: no-name-6e30a061-fa4c-4829-a140-3032af822e01
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=3` reached.
ValueError: For the given forecasting case, the provided future covariates at dataset index `0` do not extend far enough into the past. The future covariates must start at time step `2022-05-16 00:00:00`, whereas now they start at time step `2022-07-01 00:00:00`.
Traceback (most recent call last):
  File "/var/folders/8w/b_0gc01d70g6h4k62sb7ytyw0000gn/T/ipykernel_47611/2817176609.py", line 104, in objective
    forecast_val = model.predict(
  File "/Users/skyfano/Documents/Masterarbeit/Prediction_of_energy_prices/master_thesis/lib/python3.10/site-packages/darts/utils/torch.py", line 103, in decorator
    return decorated(self, *args, **kwargs)
  File "/Users/skyfano/Documents/Masterarbeit/Prediction_of_energy_price

Exception during model training: For the given forecasting case, the provided future covariates at dataset index `0` do not extend far enough into the past. The future covariates must start at time step `2022-05-16 00:00:00`, whereas now they start at time step `2022-07-01 00:00:00`.
Best hyperparameters:
  hidden_dim: 21
  n_layers: 1
  dropout: 0.44776392192658937
  batch_size: 64
  learning_rate: 0.0078545751357899
  input_chunk_length: 46


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
ValueError: For the given forecasting case, the provided future covariates at dataset index `0` do not extend far enough into the past. The future covariates must start at time step `2022-05-16 00:00:00`, whereas now they start at time step `2022-07-01 00:00:00`.


ValueError: For the given forecasting case, the provided future covariates at dataset index `0` do not extend far enough into the past. The future covariates must start at time step `2022-05-16 00:00:00`, whereas now they start at time step `2022-07-01 00:00:00`.

In [26]:
import traceback  
import pandas as pd
from darts import TimeSeries
from darts.models import RNNModel
from darts.dataprocessing.transformers import Scaler
from darts.metrics import mape, mae, rmse, mse
from sklearn.preprocessing import MaxAbsScaler
import optuna
import plotly.graph_objects as go
from darts.utils.callbacks import TFMProgressBar

# Create time series objects
series_train = TimeSeries.from_dataframe(train_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')
series_test = TimeSeries.from_dataframe(test_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')

# Convert future covariates to TimeSeries objects

future_covariates_train = TimeSeries.from_dataframe(train_df, 'Date', future_covariates_columns).astype('float32')

# Determine required start date for future covariates during prediction
max_input_chunk_length = 200  # Maximum input_chunk_length from your hyperparameter search
required_covariate_start = series_test.start_time() - pd.DateOffset(days=(max_input_chunk_length - 1))
required_covariate_end = series_test.end_time()

# Ensure future_covariates_full covers the required range
future_covariates_full = TimeSeries.from_dataframe(
    df, 'Date', future_covariates_columns, fill_missing_dates=True, freq="D"
).astype('float32')

# Slice future covariates for prediction
future_covariates_for_prediction = future_covariates_full.slice(
    required_covariate_start, required_covariate_end
)

# Scaling the data
scaler_series = Scaler(MaxAbsScaler())
scaler_covariates = Scaler(MaxAbsScaler())

# Fit the scaler on the training data
series_train_scaled = scaler_series.fit_transform(series_train)
future_covariates_train_scaled = scaler_covariates.fit_transform(future_covariates_train)

# Transform the test series and future covariates using the same scaler
series_test_scaled = scaler_series.transform(series_test)
future_covariates_for_prediction_scaled = scaler_covariates.transform(future_covariates_for_prediction)

# Define the Optuna objective function without backtesting
def objective(trial):
    # Suggest hyperparameters
    hidden_dim = trial.suggest_int('hidden_dim', 10, 100)
    n_layers = trial.suggest_int('n_layers', 1, 5)
    dropout = trial.suggest_uniform('dropout', 0.0, 0.5)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1)
    input_chunk_length = trial.suggest_int('input_chunk_length', 30, 200)
    n_epochs = 10  # Adjust epochs for optimization

    # Ensure training_length >= input_chunk_length
    training_length = max(200, input_chunk_length)

    # Define the model
    model = RNNModel(
        model='LSTM',
        input_chunk_length=input_chunk_length,
        training_length=training_length,
        hidden_dim=hidden_dim,
        n_rnn_layers=n_layers,
        dropout=dropout,
        likelihood=GaussianLikelihood(),
        batch_size=batch_size,
        n_epochs=n_epochs,
        optimizer_kwargs={'lr': learning_rate},
        random_state=42,
        save_checkpoints=True,
        model_name= "rnn_model",
        pl_trainer_kwargs={
            'accelerator': 'gpu',  # Use GPU if available
            'devices': 1,
            'enable_progress_bar': True,
            'logger': False,
            'enable_model_summary': False,
            "callbacks": [TFMProgressBar(enable_train_bar_only=True)],
        }
    )

    try:
        # Fit the model on the training data
        model.fit(
            series_train_scaled,
            future_covariates=future_covariates_train_scaled,
            verbose=False
        )

        # Determine forecast horizon
        n = len(series_test_scaled)

        # Make predictions
        forecast_val = model.predict(
            n=n,
            future_covariates=future_covariates_for_prediction_scaled
        )

        # Calculate MAPE on the validation set
        error = mape(series_test_scaled, forecast_val)
    except Exception as e:
        print(f'Exception during model training: {e}')
        traceback.print_exc()
        return float('inf')

    return error

# Run Optuna optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)

# Get the best hyperparameters
best_params = study.best_params
print('Best hyperparameters:')
for key, value in best_params.items():
    print(f'  {key}: {value}')

# Ensure training_length >= input_chunk_length
best_training_length = max(200, best_params['input_chunk_length'])

# Train the best model on the full training data
best_model = RNNModel(
    model='LSTM',
    input_chunk_length=best_params['input_chunk_length'],
    training_length=best_training_length,
    hidden_dim=best_params['hidden_dim'],
    n_rnn_layers=best_params['n_layers'],
    dropout=best_params['dropout'],
    likelihood=GaussianLikelihood(),
    batch_size=best_params['batch_size'],
    n_epochs=10, 
    optimizer_kwargs={'lr': best_params['learning_rate']},
    random_state=42,
    pl_trainer_kwargs={
        'accelerator': 'gpu',  
        'devices': 1,
        'enable_progress_bar': True,
        'logger': False,
        'enable_model_summary': False,
    }
)

best_model.fit(
    series_train_scaled,
    future_covariates=future_covariates_train_scaled,
    verbose=True
)

# Determine forecast horizon based on available covariates
n = len(series_test_scaled)

# Make predictions on the test set
forecast = best_model.predict(
    n=n,
    future_covariates=future_covariates_for_prediction_scaled
)


# Inverse transform the forecast and test_series
forecast = scaler_series.inverse_transform(forecast)
test_series = scaler_series.inverse_transform(series_test_scaled)

# Plot the forecast
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=test_series.time_index,
    y=test_series.values().squeeze(),
    mode='lines',
    name='Actual',
    line=dict(color='blue')
))

fig.add_trace(go.Scatter(
    x=forecast.time_index,
    y=forecast.values().squeeze(),
    mode='lines',
    name='Forecast',
    line=dict(color='red', dash='dash')
))

fig.update_layout(
    title='RNN Model - Time Series Forecast',
    xaxis_title='Date',
    yaxis_title='Day Ahead Price (€/MWh)',
    legend=dict(
        x=1,
        y=1,
        xanchor='right',
        yanchor='top',
        bordercolor='black',
        borderwidth=1
    ),
    template='plotly_white'
)

fig.show()

# Calculate and print error metrics
print('Error Metrics on Test Set:')
print(f'  Mean Absolute Percentage Error (MAPE): {mape(test_series, forecast):.2f}%')
print(f'  Mean Absolute Error (MAE): {mae(test_series, forecast):.2f}')
print(f'  Root Mean Squared Error (RMSE): {rmse(test_series, forecast):.2f}')
print(f'  Mean Squared Error (MSE): {mse(test_series, forecast):.2f}')


[I 2024-09-19 11:22:19,938] A new study created in memory with name: no-name-18df9c02-df78-4348-ada7-ad910869a0e2
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2024-09-19 11:22:45,779] Trial 0 finished with value: 205.57139587402344 and parameters: {'hidden_dim': 51, 'n_layers': 2, 'dropout': 0.39910456304305625, 'batch_size': 64, 'learning_rate': 0.0005359145324041791, 'input_chunk_length': 196}. Best is trial 0 with value: 205.57139587402344.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2024-09-19 11:23:07,364] Trial 1 finished with value: 400.8519287109375 and parameters: {'hidden_dim': 46, 'n_layers': 2, 'dropout': 0.08393738906984405, 'batch_size': 64, 'learning_rate': 3.4620746840546855e-05, 'input_chunk_length': 141}. Best is trial 0 with value: 205.57139587402344.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2024-09-19 11:23:53,252] Trial 2 finished with value: 11689.0869140625 and parameters: {'hidden_dim': 87, 'n_layers': 2, 'dropout': 0.1944015192022635, 'batch_size': 32, 'learning_rate': 0.026217265038648525, 'input_chunk_length': 194}. Best is trial 0 with value: 205.57139587402344.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2024-09-19 11:25:13,799] Trial 3 finished with value: 114.58106994628906 and parameters: {'hidden_dim': 70, 'n_layers': 5, 'dropout': 0.4523217467058362, 'batch_size': 32, 'learning_rate': 1.3874588634864121e-05, 'input_chunk_length': 172}. Best is trial 3 with value: 114.58106994628906.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2024-09-19 11:25:58,197] Trial 4 finished with value: 118.36807250976562 and parameters: {'hidden_dim': 63, 'n_layers': 3, 'dropout': 0.1509263419430691, 'batch_size': 32, 'learning_rate': 0.0338770459746194, 'input_chunk_length': 38}. Best is trial 3 with value: 114.58106994628906.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2024-09-19 11:26:23,840] Trial 5 finished with value: 217.9854278564453 and parameters: {'hidden_dim': 49, 'n_layers': 2, 'dropout': 0.2606596339863262, 'batch_size': 64, 'learning_rate': 7.522677189627802e-05, 'input_chunk_length': 50}. Best is trial 3 with value: 114.58106994628906.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2024-09-19 11:27:05,375] Trial 6 finished with value: 160.12881469726562 and parameters: {'hidden_dim': 11, 'n_layers': 4, 'dropout': 0.3779622727692257, 'batch_size': 32, 'learning_rate': 0.061274746716043715, 'input_chunk_length': 193}. Best is trial 3 with value: 114.58106994628906.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2024-09-19 11:27:59,814] Trial 7 finished with value: 287.53607177734375 and parameters: {'hidden_dim': 78, 'n_layers': 5, 'dropout': 0.3629235123676253, 'batch_size': 64, 'learning_rate': 0.003691784419843991, 'input_chunk_length': 48}. Best is trial 3 with value: 114.58106994628906.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2024-09-19 11:29:00,160] Trial 8 finished with value: 66.19490051269531 and parameters: {'hidden_dim': 64, 'n_layers': 1, 'dropout': 0.34160105679751707, 'batch_size': 16, 'learning_rate': 0.015265711876397851, 'input_chunk_length': 34}. Best is trial 8 with value: 66.19490051269531.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2024-09-19 11:29:40,784] Trial 9 finished with value: 134.1493377685547 and parameters: {'hidden_dim': 51, 'n_layers': 4, 'dropout': 0.263331663740493, 'batch_size': 64, 'learning_rate': 0.05665671897174339, 'input_chunk_length': 64}. Best is trial 8 with value: 66.19490051269531.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2024-09-19 11:30:38,342] Trial 10 finished with value: 89.33930206298828 and parameters: {'hidden_dim': 25, 'n_layers': 1, 'dropout': 0.49423574035255324, 'batch_size': 16, 'learning_rate': 0.004455357513063347, 'input_chunk_length': 97}. Best is trial 8 with value: 66.19490051269531.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2024-09-19 11:31:35,284] Trial 11 finished with value: 103.44505310058594 and parameters: {'hidden_dim': 25, 'n_layers': 1, 'dropout': 0.49944684021141705, 'batch_size': 16, 'learning_rate': 0.0028574059930340893, 'input_chunk_length': 83}. Best is trial 8 with value: 66.19490051269531.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2024-09-19 11:32:41,946] Trial 12 finished with value: 55.464134216308594 and parameters: {'hidden_dim': 100, 'n_layers': 1, 'dropout': 0.3181620047751069, 'batch_size': 16, 'learning_rate': 0.008501557673410083, 'input_chunk_length': 102}. Best is trial 12 with value: 55.464134216308594.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[I 2024-09-19 11:33:48,416] Trial 13 finished with value: 85.6839828491211 and parameters: {'hidden_dim': 100, 'n_layers': 1, 'dropout': 0.32357135208735077, 'batch_size': 16, 'learning_rate': 0.01018350008673993, 'input_chunk_length': 127}. Best is trial 12 with value: 55.464134216308594.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


## Backtesting

In [None]:
#Backtesting
from darts.utils.timeseries_generation import concatenate

# Combine training and test future covariates
future_covariates_full_scaled = future_covariates_train_scaled.concatenate(future_covariates_for_prediction_scaled)

# Combine the training and test series for the full series
series_full_scaled = series_train_scaled.concatenate(series_test_scaled)

# Define backtesting parameters
forecast_horizon = 7  # Adjust based on your requirements
stride = 1  # Number of time steps between forecasts
start = series_train_scaled.end_time()  # Start backtesting after the training period

# Run backtesting using historical_forecasts
backtest_forecasts = best_model.historical_forecasts(
    series=series_full_scaled,
    future_covariates=future_covariates_full_scaled,
    start=start,
    forecast_horizon=forecast_horizon,
    stride=stride,
    retrain=False,
    verbose=True,
    last_points_only=False,
)

# Concatenate the list of forecasts into a single TimeSeries
backtest_forecasts_concat = concatenate(backtest_forecasts, axis=0)

# Get the actual series corresponding to the forecasts
actual_series = series_full_scaled.slice_intersect(backtest_forecasts_concat)

# Inverse transform the forecasts and actual series
backtest_forecasts_inv = scaler_series.inverse_transform(backtest_forecasts_concat)
actual_series_inv = scaler_series.inverse_transform(actual_series)

# Calculate error metrics
backtest_mape = mape(actual_series_inv, backtest_forecasts_inv)
backtest_mae = mae(actual_series_inv, backtest_forecasts_inv)
backtest_rmse = rmse(actual_series_inv, backtest_forecasts_inv)
backtest_mse = mse(actual_series_inv, backtest_forecasts_inv)

print('Backtesting Error Metrics:')
print(f'  Mean Absolute Percentage Error (MAPE): {backtest_mape:.2f}%')
print(f'  Mean Absolute Error (MAE): {backtest_mae:.2f}')
print(f'  Root Mean Squared Error (RMSE): {backtest_rmse:.2f}')
print(f'  Mean Squared Error (MSE): {backtest_mse:.2f}')

# Plot the backtest forecasts against the actual series
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=actual_series_inv.time_index,
    y=actual_series_inv.values().squeeze(),
    mode='lines',
    name='Actual',
    line=dict(color='blue')
))

fig.add_trace(go.Scatter(
    x=backtest_forecasts_inv.time_index,
    y=backtest_forecasts_inv.values().squeeze(),
    mode='lines',
    name='Backtest Forecast',
    line=dict(color='red', dash='dash')
))

fig.update_layout(
    title='RNN Model - Backtesting Forecast',
    xaxis_title='Date',
    yaxis_title='Day Ahead Price (€/MWh)',
    legend=dict(
        x=1,
        y=1,
        xanchor='right',
        yanchor='top',
        bordercolor='black',
        borderwidth=1
    ),
    template='plotly'
)

fig.show()
