## Temporal Fusion Transformer

In [1]:
import pandas as pd 
import numpy as np
import torch 
from darts import TimeSeries
from optuna.integration import PyTorchLightningPruningCallback
from pytorch_lightning.callbacks import EarlyStopping

In [2]:
# Import the data 
def load_and_prepare_data(file_path):
    """
    Load energy prices data from a CSV file, ensure chronological order, and convert 'Date' to datetime.
    """
    df = pd.read_csv(file_path)
    df.sort_values('Date', inplace=True)
    # Convert 'date' column to datetime
    df['Date'] = pd.to_datetime(df['Date'])
    #df.set_index('Date', inplace=True)
    df = pd.DataFrame(df)
    return df

In [3]:
# Load in the train and test data
train_df = load_and_prepare_data('../../data/Final_data/train_df.csv')
test_df = load_and_prepare_data('../../data/Final_data/test_df.csv')

# Concatenate the train and test data
df = pd.concat([train_df, test_df])
df['Date'] = pd.to_datetime(df['Date'])

# Create a time series object
series_train = TimeSeries.from_dataframe(train_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')
series_test = TimeSeries.from_dataframe(test_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')

# Show 
df

Unnamed: 0,Date,Day_ahead_price (€/MWh),Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),Other (GWh),Pumped_storage_generation (GWh),Solar_energy (GWh),Wind_offshore (GWh),Wind_onshore (GWh),Net_total_export_import (GWh),BEV_vehicles,Oil_price (EUR),TTF_gas_price (€/MWh),Nuclear_energy (GWh)
0,2012-01-01,18.19,14.75,4.95,8.39,98.605,108.454,51.011,325.337,188.811,54.040,19.314,6.263,3.404,235.467,54.662,6,99.64,21.10,250.979
1,2012-01-02,33.82,15.12,5.00,7.41,98.605,222.656,51.862,343.168,229.293,54.166,28.892,6.312,3.350,231.772,-64.477,6,100.04,20.00,258.671
2,2012-01-03,35.03,31.88,7.77,5.23,98.605,162.204,48.851,336.773,241.297,53.518,21.072,24.226,7.292,504.484,-35.078,6,100.44,20.90,271.495
3,2012-01-04,32.16,25.21,8.04,4.78,98.605,189.633,47.101,323.976,252.289,52.194,28.300,14.157,7.828,541.528,22.924,6,103.15,21.40,270.613
4,2012-01-05,20.35,13.46,9.98,4.23,98.605,175.733,45.854,327.502,259.018,52.179,31.887,4.728,8.280,572.819,35.618,6,103.92,21.30,287.555
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
754,2024-07-24,66.61,225.04,3.47,17.54,110.007,43.469,85.857,199.246,194.291,54.026,20.934,325.285,49.360,179.921,-168.705,992,75.75,32.63,0.000
755,2024-07-25,78.34,272.71,2.12,17.85,110.410,50.676,82.632,195.983,209.610,52.963,18.766,394.116,51.053,42.885,-194.496,992,76.36,31.70,0.000
756,2024-07-26,93.04,172.33,2.60,19.09,110.852,42.333,79.531,205.273,205.773,52.616,19.081,256.246,40.449,129.267,-241.786,993,75.21,32.20,0.000
757,2024-07-27,80.74,176.67,2.05,19.63,110.479,33.307,74.958,184.012,216.412,50.927,18.856,244.051,2.180,32.001,-251.655,992,74.79,32.90,0.000


In [4]:
# Define the future covariates columns from your dataframe
future_covariates_columns = ['Solar_radiation (W/m2)', 'Wind_speed (m/s)', 'Temperature (°C)', 
                             'Biomass (GWh)', 'Hard_coal (GWh)', 'Hydro (GWh)', 'Lignite (GWh)', 
                             'Natural_gas (GWh)', 'Other (GWh)', 'Pumped_storage_generation (GWh)', 
                             'Solar_energy (GWh)', 'Wind_offshore (GWh)', 'Wind_onshore (GWh)', 
                             'Net_total_export_import (GWh)', 'BEV_vehicles', 'Oil_price (EUR)', 
                             'TTF_gas_price (€/MWh)', 'Nuclear_energy (GWh)']

## Temporal Fusion Transformer (TFT)

In [6]:
from darts.models import TFTModel
from darts import TimeSeries
from darts.models.forecasting.tft_model import QuantileRegression
import pandas as pd
import numpy as np
import plotly.graph_objs as go
from darts.dataprocessing.transformers import Scaler
from sklearn.preprocessing import MaxAbsScaler, RobustScaler, StandardScaler
from darts.metrics import mape, rmse, mse, mae

# Convert future covariates to TimeSeries objects
future_covariates_train = TimeSeries.from_dataframe(train_df, 'Date', future_covariates_columns).astype('float32')
future_covariates_test = TimeSeries.from_dataframe(test_df, 'Date', future_covariates_columns).astype('float32')

# Convert future covariates to TimeSeries objects
future_covariates_train = TimeSeries.from_dataframe(train_df, 'Date', future_covariates_columns).astype('float32')
future_covariates_full = TimeSeries.from_dataframe(df, 'Date', future_covariates_columns, fill_missing_dates=True, freq="D").astype('float32')

# Determine required start date for future covariates
input_chunk_length = 200  # Set based on desired look-back period
required_start_date = pd.Timestamp(test_df['Date'].iloc[0]) - pd.DateOffset(days=input_chunk_length)

# Ensure future_covariates_full covers the required range
required_end_date = pd.Timestamp(test_df['Date'].iloc[0]) + pd.DateOffset(days=len(series_test)-1)

# Check if future_covariates_full has sufficient data
if future_covariates_full.start_time() > required_start_date or future_covariates_full.end_time() < required_end_date:
    print("Warning: The future_covariates_full is not long enough to cover the required input chunk length and prediction range.")
    # Extend the future_covariates_full or adjust your dataset

# Slice the future covariates to the required range, including data from the training period
future_covariates_test = future_covariates_full.slice(required_start_date, required_end_date)

# Scaling the data
scaler_series = Scaler(MaxAbsScaler())
scaler_covariates = Scaler(MaxAbsScaler())

# Fit the scaler on the training data
series_train_scaled = scaler_series.fit_transform(series_train)
future_covariates_train_scaled = scaler_covariates.fit_transform(future_covariates_train)

# Transform the test series and future covariates using the same scaler
series_test_scaled = scaler_series.transform(series_test)
future_covariates_test_scaled = scaler_covariates.transform(future_covariates_test)

# Define the TFT model
model = TFTModel(
    input_chunk_length=input_chunk_length,  # History length (number of past time steps used to predict the future)
    output_chunk_length=len(series_test),  # Adjusted to predict the length of the test set
    hidden_size=16,
    lstm_layers=4,
    num_attention_heads=4,
    dropout=0.1,
    likelihood=QuantileRegression(quantiles=[0.1, 0.5, 0.9]),  # Correctly using QuantileRegression
    random_state=42,
    add_relative_index=True,  # Automatically generate relative index as a future covariate
    loss_fn=torch.nn.MSELoss()  # Correctly using MSELoss
)

# Train the model with future covariates
model.fit(series_train_scaled, future_covariates=future_covariates_train_scaled, epochs=30, verbose=True)

# Make predictions on the test set
forecast_scaled = model.predict(n=len(series_test), future_covariates=future_covariates_test_scaled)

# Inverse transform the forecast to the original scale
forecast = scaler_series.inverse_transform(forecast_scaled)

# Convert TimeSeries to DataFrame for Plotly plotting
test_df_plotly = series_test.pd_dataframe()
forecast_df_plotly = forecast.pd_dataframe()

# Plot the results using Plotly
fig = go.Figure()

# Add actual test data trace
fig.add_trace(go.Scatter(x=test_df_plotly.index, y=test_df_plotly['Day_ahead_price (€/MWh)'],
                         mode='lines', name='Actual Test Data', line=dict(color='darkblue')))

# Add forecast data trace
fig.add_trace(go.Scatter(x=forecast_df_plotly.index, y=forecast_df_plotly['Day_ahead_price (€/MWh)'],
                         mode='lines', name='TFT Model on Test Data', line=dict(color='red')))

# Update layout
fig.update_layout(
    title='TFT Model - Test Performance Only',
    xaxis_title='Date',
    yaxis_title='Day Ahead Price (€/MWh)',
    legend=dict(
        x=1,   # Set x position to 1 (far right)
        y=1,   # Set y position to 1 (top)
        xanchor='right',  # Anchor the legend's x position to the right
        yanchor='top',    # Anchor the legend's y position to the top
        bordercolor='black',  # Optional: Add a border around the legend
        borderwidth=1        # Optional: Set the border width
    ),
    template='plotly'
)

# Show the plot
fig.show()

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

   | Name                              | Type                             | Params | Mode 
------------------------------------------------------------------------------------------------
0  | criterion                         | MSELoss                          | 0      | train
1  | train_criterion                   | MSELoss                          | 0      | train
2  | val_criterion                     | MSELoss                          | 0      | train
3  | train_metrics                     | MetricCollection                 | 0      | train
4  | val_metrics                       | MetricCollection                 | 0      | train
5  | input_embeddings                  | _MultiEmbedding                  | 0      | train
6  | static_covariates_vsn             | _VariableSelectionNetwork        | 0      | train
7  | encoder_vsn                       | _VariableSelection

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=30` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

AttributeError: 'Figure' object has no attribute 'write'

In [8]:
# Evaluate the model using Darts' metrics
print(f'Mean Absolute Error on Test Set: {mae(series_test, forecast)}')
print(f'Mean Absolute Percentage Error on Test Set: {mape(series_test, forecast)}')
print(f'Mean Squared Error on Test Set: {mse(series_test, forecast)}')
print(f'Root Mean Squared Error on Test Set: {rmse(series_test, forecast)}')

Mean Absolute Error on Test Set: 30.65642547607422
Mean Absolute Percentage Error on Test Set: 57.051856994628906
Mean Squared Error on Test Set: 2314.962890625
Root Mean Squared Error on Test Set: 48.11405944824219


In [9]:
# Save the created figure as png file and the error metrics 
fig.write_image("../../predictions/Other_Ideas/TFT_epochs_30.png")
error_metrics = pd.DataFrame({'MAE': [mae(series_test, forecast)], 'MAPE': [mape(series_test, forecast)], 'MSE': [mse(series_test, forecast)], 'RMSE': [rmse(series_test, forecast)]})
error_metrics.to_csv('../../predictions/Other_Ideas/TFT_metrics_epochs_30.csv', index=False)


### Hyperparameter Optimiziation 

In [10]:
import optuna
from darts.models import TFTModel
from darts import TimeSeries
from darts.models.forecasting.tft_model import QuantileRegression
import pandas as pd
import numpy as np
import torch
import plotly.graph_objs as go
from darts.dataprocessing.transformers import Scaler
from darts.metrics import mape, rmse, mse, mae

# Convert future covariates to TimeSeries objects
future_covariates_train = TimeSeries.from_dataframe(train_df, 'Date', future_covariates_columns).astype('float32')

# Convert full covariates to TimeSeries objects
future_covariates_full = TimeSeries.from_dataframe(df, 'Date', future_covariates_columns, fill_missing_dates=True, freq="D").astype('float32')

# Scaling the data
scaler_series = Scaler()
scaler_covariates = Scaler()

# Fit the scaler on the training data
series_train_scaled = scaler_series.fit_transform(series_train)
future_covariates_train_scaled = scaler_covariates.fit_transform(future_covariates_train)

# Transform the test series using the same scaler
series_test_scaled = scaler_series.transform(series_test)

def objective(trial):
    # Suggest hyperparameters
    input_chunk_length = trial.suggest_int('input_chunk_length', 30, 300)
    hidden_size = trial.suggest_int('hidden_size', 8, 64)
    lstm_layers = trial.suggest_int('lstm_layers', 1, 4)
    num_attention_heads = trial.suggest_int('num_attention_heads', 1, 8)
    dropout = trial.suggest_float('dropout', 0.0, 0.5)

    # Recalculate required start and end dates based on input_chunk_length
    required_start_date = pd.Timestamp(test_df['Date'].iloc[0]) - pd.DateOffset(days=input_chunk_length)
    required_end_date = pd.Timestamp(test_df['Date'].iloc[0]) + pd.DateOffset(days=len(series_test)-1)

    # Check if future_covariates_full has sufficient data
    if future_covariates_full.start_time() > required_start_date or future_covariates_full.end_time() < required_end_date:
        print(f"Warning: The future_covariates_full is not long enough for input_chunk_length = {input_chunk_length}")
        return float('inf')

    # Slice and scale the future covariates
    future_covariates_test = future_covariates_full.slice(required_start_date, required_end_date)
    future_covariates_test_scaled = scaler_covariates.transform(future_covariates_test)

    # Initialize the TFT model with suggested hyperparameters
    model = TFTModel(
        input_chunk_length=input_chunk_length,
        output_chunk_length=1,
        hidden_size=hidden_size,
        lstm_layers=lstm_layers,
        num_attention_heads=num_attention_heads,
        dropout=dropout,
        likelihood=QuantileRegression(quantiles=[0.1, 0.5, 0.9]),
        random_state=42,
        add_relative_index=True
    )

    try:
        # Train the model
        model.fit(series_train_scaled, future_covariates=future_covariates_train_scaled, epochs=1, verbose=False)

        # Make predictions
        forecast_scaled = model.predict(n=len(series_test_scaled), future_covariates=future_covariates_test_scaled)

        # Inverse transform the forecast to the original scale
        forecast = scaler_series.inverse_transform(forecast_scaled)

        # Return Mean Squared Error as the objective metric for Optuna to minimize
        error = mse(series_test, forecast)
        if np.isnan(error):
            return float('inf')
        return error
    except Exception as e:
        print(f"Trial failed due to: {e}")
        return float('inf')

# Run the Optuna study with more trials
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=5, n_jobs=1)

# Print the best hyperparameters
print('Best hyperparameters: ', study.best_params)

# Use the best hyperparameters
best_params = study.best_params

# Recalculate required start and end dates based on best input_chunk_length
best_input_chunk_length = best_params['input_chunk_length']
required_start_date = pd.Timestamp(test_df['Date'].iloc[0]) - pd.DateOffset(days=best_input_chunk_length)
required_end_date = pd.Timestamp(test_df['Date'].iloc[0]) + pd.DateOffset(days=len(series_test)-1)

# Check if future_covariates_full has sufficient data
if future_covariates_full.start_time() > required_start_date or future_covariates_full.end_time() < required_end_date:
    raise ValueError(f"The future_covariates_full is not long enough for input_chunk_length = {best_input_chunk_length}")

# Slice and scale the future covariates
future_covariates_test = future_covariates_full.slice(required_start_date, required_end_date)
future_covariates_test_scaled = scaler_covariates.transform(future_covariates_test)

# Initialize the final model with best hyperparameters
best_model = TFTModel(
    input_chunk_length=best_input_chunk_length,
    output_chunk_length=1,
    hidden_size=best_params['hidden_size'],
    lstm_layers=best_params['lstm_layers'],
    num_attention_heads=best_params['num_attention_heads'],
    dropout=best_params['dropout'],
    likelihood=QuantileRegression(quantiles=[0.1, 0.5, 0.9]),
    random_state=42,
    add_relative_index=True,
    loss_fn=torch.nn.MSELoss()
)

# Train the best model
best_model.fit(series_train_scaled, future_covariates=future_covariates_train_scaled, epochs=1, verbose=True)

# Create a prediction series that includes the required history
prediction_start = series_test_scaled.start_time() - pd.DateOffset(days=best_input_chunk_length)
prediction_series = series_train_scaled.append(series_test_scaled).slice(
    prediction_start,
    series_test_scaled.end_time()
)

# Adjust future covariates for prediction
future_covariates_prediction = future_covariates_full.slice(
    prediction_start,
    series_test_scaled.end_time()
)
future_covariates_prediction_scaled = scaler_covariates.transform(future_covariates_prediction)

# Make predictions starting from the correct point
forecast_scaled = best_model.predict(
    n=len(series_test_scaled),
    series=prediction_series,
    future_covariates=future_covariates_prediction_scaled
)

# Inverse transform the forecast to the original scale
forecast = scaler_series.inverse_transform(forecast_scaled)

# Convert TimeSeries to DataFrame for Plotly plotting
test_df_plotly = series_test.pd_dataframe()
forecast_df_plotly = forecast.pd_dataframe()

# Plot the results using Plotly
fig = go.Figure()

# Add actual test data trace
fig.add_trace(go.Scatter(x=test_df_plotly.index, y=test_df_plotly['Day_ahead_price (€/MWh)'],
                         mode='lines', name='Actual Test Data', line=dict(color='darkblue')))

# Add forecast data trace
fig.add_trace(go.Scatter(x=forecast_df_plotly.index, y=forecast_df_plotly['Day_ahead_price (€/MWh)'],
                         mode='lines', name='TFT Forecast on Test Data', line=dict(color='red')))

# Update layout
fig.update_layout(
    title='TFT Model - Test Performance with Optuna Hyperparameter Tuning',
    xaxis_title='Date',
    yaxis_title='Day Ahead Price (€/MWh)',
    legend=dict(
        x=1,
        y=1,
        xanchor='right',
        yanchor='top',
        bordercolor='black',
        borderwidth=1
    ),
    template='plotly_white'
)

# Show the plot
fig.show()

# Evaluate the model using Darts' metrics
print(f'Mean Absolute Error on Test Set: {mae(series_test, forecast)}')
print(f'Mean Absolute Percentage Error on Test Set: {mape(series_test, forecast)}')
print(f'Mean Squared Error on Test Set: {mse(series_test, forecast)}')
print(f'Root Mean Squared Error on Test Set: {rmse(series_test, forecast)}')


[I 2024-09-19 13:54:08,963] A new study created in memory with name: no-name-41a2923b-c78e-449d-820d-355534903066
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=1` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]


Mean of empty slice


Mean of empty slice

[I 2024-09-19 13:55:53,071] Trial 0 finished with value: inf and parameters: {'input_chunk_length': 131, 'hidden_size': 11, 'lstm_layers': 3, 'num_attention_heads': 4, 'dropout': 0.16768321132499386}. Best is trial 0 with value: inf.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=1` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]


Mean of empty slice


Mean of empty slice

[I 2024-09-19 13:57:44,285] Trial 1 finished with value: inf and parameters: {'input_chunk_length': 216, 'hidden_size': 44, 'lstm_layers': 3, 'num_attention_heads': 2, 'dropout': 0.21687556238209665}. Best is trial 0 with value: inf.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=1` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]


Mean of empty slice


Mean of empty slice

[I 2024-09-19 13:59:26,044] Trial 2 finished with value: inf and parameters: {'input_chunk_length': 227, 'hidden_size': 18, 'lstm_layers': 2, 'num_attention_heads': 4, 'dropout': 0.28160062414105275}. Best is trial 0 with value: inf.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=1` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]


Mean of empty slice


Mean of empty slice

[I 2024-09-19 14:01:06,864] Trial 3 finished with value: inf and parameters: {'input_chunk_length': 63, 'hidden_size': 29, 'lstm_layers': 4, 'num_attention_heads': 4, 'dropout': 0.3248447315962685}. Best is trial 0 with value: inf.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
`Trainer.fit` stopped: `max_epochs=1` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]


Mean of empty slice


Mean of empty slice

[I 2024-09-19 14:02:54,763] Trial 4 finished with value: inf and parameters: {'input_chunk_length': 230, 'hidden_size': 53, 'lstm_layers': 2, 'num_attention_heads': 6, 'dropout': 0.22243531917156212}. Best is trial 0 with value: inf.


Best hyperparameters:  {'input_chunk_length': 131, 'hidden_size': 11, 'lstm_layers': 3, 'num_attention_heads': 4, 'dropout': 0.16768321132499386}


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

   | Name                              | Type                             | Params | Mode 
------------------------------------------------------------------------------------------------
0  | criterion                         | MSELoss                          | 0      | train
1  | train_criterion                   | MSELoss                          | 0      | train
2  | val_criterion                     | MSELoss                          | 0      | train
3  | train_metrics                     | MetricCollection                 | 0      | train
4  | val_metrics                       | MetricCollection                 | 0      | train
5  | input_embeddings                  | _MultiEmbedding                  | 0      | train
6  | static_covariates_vsn             | _VariableSelectionNetwork        | 0      | train
7  | encoder_vsn                       | _VariableSelection

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
ValueError: For the given forecasting horizon `n=759`, the provided future covariates at dataset index `0` do not extend far enough into the future. As `n > output_chunk_length` the future covariates must end at time step `2026-08-26 00:00:00`, whereas now they end at time step `2024-07-28 00:00:00`.


ValueError: For the given forecasting horizon `n=759`, the provided future covariates at dataset index `0` do not extend far enough into the future. As `n > output_chunk_length` the future covariates must end at time step `2026-08-26 00:00:00`, whereas now they end at time step `2024-07-28 00:00:00`.

In [8]:
print("series_test time index:", series_test.time_index)
print("forecast time index:", forecast.time_index)

series_test time index: DatetimeIndex(['2022-07-01', '2022-07-02', '2022-07-03', '2022-07-04',
               '2022-07-05', '2022-07-06', '2022-07-07', '2022-07-08',
               '2022-07-09', '2022-07-10',
               ...
               '2024-07-19', '2024-07-20', '2024-07-21', '2024-07-22',
               '2024-07-23', '2024-07-24', '2024-07-25', '2024-07-26',
               '2024-07-27', '2024-07-28'],
              dtype='datetime64[ns]', name='Date', length=759, freq='D')
forecast time index: DatetimeIndex(['2022-07-01', '2022-07-02', '2022-07-03', '2022-07-04',
               '2022-07-05', '2022-07-06', '2022-07-07', '2022-07-08',
               '2022-07-09', '2022-07-10',
               ...
               '2024-07-19', '2024-07-20', '2024-07-21', '2024-07-22',
               '2024-07-23', '2024-07-24', '2024-07-25', '2024-07-26',
               '2024-07-27', '2024-07-28'],
              dtype='datetime64[ns]', name='Date', length=759, freq='D')


In [13]:
print("NaN values in series_train:", np.isnan(series_train.values()).any())
print("NaN values in future_covariates_train:", np.isnan(future_covariates_train.values()).any())
print("series_test components:", series_test.components)


NaN values in series_train: False
NaN values in future_covariates_train: False
series_test components: Index(['Day_ahead_price (€/MWh)'], dtype='object', name='component')


In [14]:
# Import necessary libraries
from darts.models import TFTModel
from darts import TimeSeries
from darts.models.forecasting.tft_model import QuantileRegression
import pandas as pd
import numpy as np
import plotly.graph_objs as go
from darts.dataprocessing.transformers import Scaler
from sklearn.preprocessing import MaxAbsScaler
from darts.metrics import rmse
import optuna
import torch
from optuna.integration import PyTorchLightningPruningCallback

# Convert future covariates to TimeSeries objects
future_covariates_train = TimeSeries.from_dataframe(train_df, 'Date', future_covariates_columns).astype('float32')
future_covariates_test = TimeSeries.from_dataframe(test_df, 'Date', future_covariates_columns).astype('float32')

# Combine full future covariates for slicing
future_covariates_full = TimeSeries.from_dataframe(df, 'Date', future_covariates_columns, fill_missing_dates=True, freq="D").astype('float32')

# Determine required start and end dates for future covariates
input_chunk_length = 200  # Initial value; will be tuned
required_start_date = pd.Timestamp(test_df['Date'].iloc[0]) - pd.DateOffset(days=input_chunk_length)
required_end_date = pd.Timestamp(test_df['Date'].iloc[0]) + pd.DateOffset(days=len(series_test)-1)

# Check if future_covariates_full has sufficient data
if future_covariates_full.start_time() > required_start_date or future_covariates_full.end_time() < required_end_date:
    print("Warning: The future_covariates_full is not long enough to cover the required input chunk length and prediction range.")
    # Extend the future_covariates_full or adjust your dataset

# Slice the future covariates to the required range, including data from the training period
future_covariates_test = future_covariates_full.slice(required_start_date, required_end_date)

# Scaling the data
scaler_series = Scaler(MaxAbsScaler())
scaler_covariates = Scaler(MaxAbsScaler())

# Fit the scaler on the entire training data
series_train_scaled = scaler_series.fit_transform(series_train)
future_covariates_train_scaled = scaler_covariates.fit_transform(future_covariates_train)

# Transform the test series and future covariates using the same scaler
series_test_scaled = scaler_series.transform(series_test)
future_covariates_test_scaled = scaler_covariates.transform(future_covariates_test)

# Split the training data into training and validation sets
validation_ratio = 0.2
split_point = int(len(series_train_scaled) * (1 - validation_ratio))

series_train_sub = series_train_scaled[:split_point]
series_val = series_train_scaled[split_point:]

future_covariates_train_sub = future_covariates_train_scaled[:split_point]
future_covariates_val = future_covariates_train_scaled[split_point:]

# Define the objective function for Optuna
def objective(trial):
    # Suggest hyperparameters
    input_chunk_length = trial.suggest_int('input_chunk_length', 30, 200)
    hidden_size = trial.suggest_int('hidden_size', 8, 128)
    lstm_layers = trial.suggest_int('lstm_layers', 1, 8)
    num_attention_heads = trial.suggest_int('num_attention_heads', 1, 8)
    dropout = trial.suggest_float('dropout', 0.0, 0.5)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128])
    learning_rate = trial.suggest_float('learning_rate', 1e-4, 1e-2, log=True)

    # Build the model with suggested hyperparameters
    model = TFTModel(
        input_chunk_length=input_chunk_length,
        output_chunk_length=len(series_val),
        hidden_size=hidden_size,
        lstm_layers=lstm_layers,
        num_attention_heads=num_attention_heads,
        dropout=dropout,
        likelihood=QuantileRegression(quantiles=[0.1, 0.5, 0.9]),
        random_state=42,
        add_relative_index=True,
        loss_fn=torch.nn.MSELoss(),
        batch_size=batch_size,
        optimizer_kwargs={'lr': learning_rate}
    )

    # Train the model on training data
    try:
        model.fit(series_train_sub, future_covariates=future_covariates_train_sub, epochs=30, verbose=False)
    except Exception as e:
        print(f"Exception during model fitting: {e}")
        return float('inf')  # Return a large number to indicate failure

    # Predict on validation data
    try:
        forecast_scaled = model.predict(n=len(series_val), future_covariates=future_covariates_val)
    except Exception as e:
        print(f"Exception during prediction: {e}")
        return float('inf')  # Return a large number to indicate failure

    # Inverse transform the forecast and validation data to original scale
    forecast = scaler_series.inverse_transform(forecast_scaled)
    actual = scaler_series.inverse_transform(series_val)

    # Compute RMSE on validation data
    rmse_score = rmse(actual, forecast)
    return rmse_score

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, verbose=True, n_jobs=1)

# Retrieve the best hyperparameters
best_params = study.best_params
print("Best hyperparameters:", best_params)

# Retrain the model with the best hyperparameters on the full training data
best_model = TFTModel(
    input_chunk_length=best_params['input_chunk_length'],
    output_chunk_length=len(series_test),
    hidden_size=best_params['hidden_size'],
    lstm_layers=best_params['lstm_layers'],
    num_attention_heads=best_params['num_attention_heads'],
    dropout=best_params['dropout'],
    likelihood=QuantileRegression(quantiles=[0.1, 0.5, 0.9]),
    random_state=42,
    add_relative_index=True,
    loss_fn=torch.nn.MSELoss(),
    batch_size=best_params['batch_size'],
    optimizer_kwargs={'lr': best_params['learning_rate']}
)

# Train the best model on the full training data
best_model.fit(series_train_scaled, future_covariates=future_covariates_train_scaled, epochs=30, verbose=True)

# Make predictions on the test set
forecast_scaled = best_model.predict(n=len(series_test_scaled), future_covariates=future_covariates_test_scaled)

# Inverse transform the forecast to the original scale
forecast = scaler_series.inverse_transform(forecast_scaled)

# Convert TimeSeries to DataFrame for Plotly plotting
test_df_plotly = series_test.pd_dataframe()
forecast_df_plotly = forecast.pd_dataframe()

# Plot the results using Plotly
fig = go.Figure()

# Add actual test data trace
fig.add_trace(go.Scatter(x=test_df_plotly.index, y=test_df_plotly['Day_ahead_price (€/MWh)'],
                         mode='lines', name='Actual Test Data', line=dict(color='darkblue')))

# Add forecast data trace
fig.add_trace(go.Scatter(x=forecast_df_plotly.index, y=forecast_df_plotly['Day_ahead_price (€/MWh)'],
                         mode='lines', name='TFT Model Forecast', line=dict(color='red')))

# Update layout
fig.update_layout(
    title='TFT Model - Test Performance with Hyperparameter Tuning',
    xaxis_title='Date',
    yaxis_title='Day Ahead Price (€/MWh)',
    legend=dict(
        x=1,
        y=1,
        xanchor='right',
        yanchor='top',
        bordercolor='black',
        borderwidth=1
    ),
    template='plotly'
)

# Show the plot
fig.show()


[I 2024-09-19 14:11:38,741] A new study created in memory with name: no-name-ae8d0b1d-9b6a-4e83-a6da-a87f90f9b3f2
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
