## Temporal Convolutional Networks (TCNs)

In [1]:
import pandas as pd 
import numpy as np
import torch 

from darts import TimeSeries, concatenate

In [2]:
# Import the data 
def load_and_prepare_data(file_path):
    """
    Load energy prices data from a CSV file, ensure chronological order, and convert 'Date' to datetime.
    """
    df = pd.read_csv(file_path)
    df.sort_values('Date', inplace=True)
    # Convert 'date' column to datetime
    df['Date'] = pd.to_datetime(df['Date'])
    #df.set_index('Date', inplace=True)
    df = pd.DataFrame(df)
    return df

In [3]:
# Load in the train and test data
train_df = load_and_prepare_data('../../data/Final_data/train_df.csv')
test_df = load_and_prepare_data('../../data/Final_data/test_df.csv')

# Concatenate the train and test data
df = pd.concat([train_df, test_df])
df['Date'] = pd.to_datetime(df['Date'])

# Create a time series object
series_train = TimeSeries.from_dataframe(train_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')
series_test = TimeSeries.from_dataframe(test_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')

# Show 
df

Unnamed: 0,Date,Day_ahead_price (€/MWh),Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),Other (GWh),Pumped_storage_generation (GWh),Solar_energy (GWh),Wind_offshore (GWh),Wind_onshore (GWh),Net_total_export_import (GWh),BEV_vehicles,Oil_price (EUR),TTF_gas_price (€/MWh),Nuclear_energy (GWh)
0,2012-01-01,18.19,14.75,4.95,8.39,98.605,108.454,51.011,325.337,188.811,54.040,19.314,6.263,3.404,235.467,54.662,6,99.64,21.10,250.979
1,2012-01-02,33.82,15.12,5.00,7.41,98.605,222.656,51.862,343.168,229.293,54.166,28.892,6.312,3.350,231.772,-64.477,6,100.04,20.00,258.671
2,2012-01-03,35.03,31.88,7.77,5.23,98.605,162.204,48.851,336.773,241.297,53.518,21.072,24.226,7.292,504.484,-35.078,6,100.44,20.90,271.495
3,2012-01-04,32.16,25.21,8.04,4.78,98.605,189.633,47.101,323.976,252.289,52.194,28.300,14.157,7.828,541.528,22.924,6,103.15,21.40,270.613
4,2012-01-05,20.35,13.46,9.98,4.23,98.605,175.733,45.854,327.502,259.018,52.179,31.887,4.728,8.280,572.819,35.618,6,103.92,21.30,287.555
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
754,2024-07-24,66.61,225.04,3.47,17.54,110.007,43.469,85.857,199.246,194.291,54.026,20.934,325.285,49.360,179.921,-168.705,992,75.75,32.63,0.000
755,2024-07-25,78.34,272.71,2.12,17.85,110.410,50.676,82.632,195.983,209.610,52.963,18.766,394.116,51.053,42.885,-194.496,992,76.36,31.70,0.000
756,2024-07-26,93.04,172.33,2.60,19.09,110.852,42.333,79.531,205.273,205.773,52.616,19.081,256.246,40.449,129.267,-241.786,993,75.21,32.20,0.000
757,2024-07-27,80.74,176.67,2.05,19.63,110.479,33.307,74.958,184.012,216.412,50.927,18.856,244.051,2.180,32.001,-251.655,992,74.79,32.90,0.000


In [4]:
# Define the future covariates columns from your dataframe
future_covariates_columns = ['Solar_radiation (W/m2)', 'Wind_speed (m/s)', 'Temperature (°C)', 
                             'Biomass (GWh)', 'Hard_coal (GWh)', 'Hydro (GWh)', 'Lignite (GWh)', 
                             'Natural_gas (GWh)', 'Other (GWh)', 'Pumped_storage_generation (GWh)', 
                             'Solar_energy (GWh)', 'Wind_offshore (GWh)', 'Wind_onshore (GWh)', 
                             'Net_total_export_import (GWh)', 'BEV_vehicles', 'Oil_price (EUR)', 
                             'TTF_gas_price (€/MWh)', 'Nuclear_energy (GWh)']

In [5]:
import numpy as np
import pandas as pd
import plotly.graph_objs as go
from pytorch_lightning.callbacks import TQDMProgressBar

from darts import TimeSeries, concatenate
from darts.utils.callbacks import TFMProgressBar
from darts.models import TCNModel, RNNModel
from darts.dataprocessing.transformers import Scaler
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.metrics import mape, r2_score
from darts.utils.missing_values import fill_missing_values
from darts.datasets import AirPassengersDataset, SunspotsDataset, EnergyDataset

import warnings

warnings.filterwarnings("ignore")

import logging

logging.disable(logging.CRITICAL)


def generate_torch_kwargs():
    # run torch models on CPU, and disable progress bars for all model stages except training.
    return {
        "pl_trainer_kwargs": {
            "accelerator": "cpu",
            "callbacks": [TFMProgressBar(enable_train_bar_only=True)],
        }
    }



In [6]:
# Convert future covariates to TimeSeries objects
future_covariates_train = TimeSeries.from_dataframe(train_df, 'Date', future_covariates_columns).astype('float32')
future_covariates_test = TimeSeries.from_dataframe(test_df, 'Date', future_covariates_columns).astype('float32')

# Convert future covariates to TimeSeries objects
future_covariates_train = TimeSeries.from_dataframe(train_df, 'Date', future_covariates_columns).astype('float32')
future_covariates_full = TimeSeries.from_dataframe(df, 'Date', future_covariates_columns, fill_missing_dates=True, freq="D").astype('float32')

# Determine required start date for future covariates
input_chunk_length = 500  # Set based on desired look-back period
required_start_date = pd.Timestamp(test_df['Date'].iloc[0]) - pd.DateOffset(days=input_chunk_length)

# Ensure future_covariates_full covers the required range
required_end_date = pd.Timestamp(test_df['Date'].iloc[0]) + pd.DateOffset(days=len(series_test)-1)

# Check if future_covariates_full has sufficient data
if future_covariates_full.start_time() > required_start_date or future_covariates_full.end_time() < required_end_date:
    print("Warning: The future_covariates_full is not long enough to cover the required input chunk length and prediction range.")
    # Extend the future_covariates_full or adjust your dataset

# Slice the future covariates to the required range, including data from the training period
future_covariates_test = future_covariates_full.slice(required_start_date, required_end_date)

# Scaling the data
scaler_series = Scaler()
scaler_covariates = Scaler()

# Fit the scaler on the training data
series_train_scaled = scaler_series.fit_transform(series_train)
future_covariates_train_scaled = scaler_covariates.fit_transform(future_covariates_train)

# Transform the test series and future covariates using the same scaler
series_test_scaled = scaler_series.transform(series_test)
future_covariates_test_scaled = scaler_covariates.transform(future_covariates_test)

In [7]:
import optuna
from darts.metrics import mse, mae, rmse, r2_score, mape
from darts.models import TCNModel

# Define the objective function for Optuna
def objective(trial):
    # Suggest values for the hyperparameters
    input_chunk_length = trial.suggest_int('input_chunk_length', 100, 500)
    output_chunk_length = trial.suggest_int('output_chunk_length', 30, 100)
    dropout = trial.suggest_float('dropout', 0.0, 0.5)
    kernel_size = trial.suggest_int('kernel_size', 2, 7)
    num_filters = trial.suggest_int('num_filters', 4, 16)
    dilation_base = trial.suggest_int('dilation_base', 2, 10)
    
    # Create the TCN model with the trial's hyperparameters
    model = TCNModel(
        input_chunk_length=input_chunk_length,
        output_chunk_length=output_chunk_length,
        dropout=dropout,
        dilation_base=dilation_base,
        kernel_size=kernel_size,
        num_filters=num_filters,
        n_epochs=10,
        weight_norm=True,
        random_state=42,
        model_name="TCN_optuna",
        force_reset=True,
        **generate_torch_kwargs()
    )
    
    # Train the model with the current set of hyperparameters
    model.fit(
        series=series_train_scaled,
        past_covariates=future_covariates_train_scaled,
        val_series=series_test_scaled,
        val_past_covariates=future_covariates_test_scaled,
        verbose=False
    )

    # Make predictions on the test set
    forecast = model.predict(n=len(series_test_scaled), past_covariates=future_covariates_test_scaled)

    # Calculate the Mean Squared Error (MSE) as the objective to minimize
    error = mse(series_test_scaled, forecast)
    
    return error

# Create the Optuna study and start optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50, n_jobs=1)

# Print the best hyperparameters
print('Best hyperparameters: ', study.best_params)


Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Best hyperparameters:  {'input_chunk_length': 480, 'output_chunk_length': 30, 'dropout': 0.2951302515698671, 'kernel_size': 5, 'num_filters': 16, 'dilation_base': 10}


In [8]:
# Train final model with the best hyperparameters from Optuna
best_model = TCNModel(
    input_chunk_length=study.best_params['input_chunk_length'],
    output_chunk_length=study.best_params['output_chunk_length'],
    dropout=study.best_params['dropout'],
    dilation_base=study.best_params['dilation_base'],
    kernel_size=study.best_params['kernel_size'],
    num_filters=study.best_params['num_filters'],
    n_epochs=20,  # Optionally train with more epochs
    weight_norm=True,
    random_state=0,
    model_name="TCN_optuna_best",
    force_reset=True,
    **generate_torch_kwargs()
)

# Train the model with the best hyperparameters
best_model.fit(
    series=series_train_scaled,
    past_covariates=future_covariates_train_scaled,
    val_series=series_test_scaled,
    val_past_covariates=future_covariates_test_scaled,
    verbose=True
)

# Make predictions using the best model
forecast = best_model.predict(n=len(series_test_scaled), past_covariates=future_covariates_test_scaled)

# Inverse transform and plot as before
forecast = scaler_series.inverse_transform(forecast)
series_test_original = scaler_series.inverse_transform(series_test_scaled)



Training: 0it [00:00, ?it/s]

NameError: name 'test_df_plotly' is not defined

In [13]:
from darts.metrics import mse, mae, rmse, r2_score, mape

# Convert TimeSeries to DataFrames for Plotly plotting
test_df_plotly = series_test_original.pd_dataframe()
forecast_df_plotly = forecast.pd_dataframe()

# Plot the results using Plotly (only test period)
fig = go.Figure()

# Add actual test data trace
fig.add_trace(go.Scatter(x=test_df_plotly.index, y=test_df_plotly['Day_ahead_price (€/MWh)'],
                         mode='lines', name='Actual Test Data', line=dict(color='darkblue')))

# Add forecast data trace
fig.add_trace(go.Scatter(x=forecast_df_plotly.index, y=forecast_df_plotly['Day_ahead_price (€/MWh)'],
                         mode='lines', name='TCN Forecast on Test Data', line=dict(color='red')))

# Update layout
fig.update_layout(
    title='TCN Model - Test Performance',
    xaxis_title='Date',
    yaxis_title='Day Ahead Price (€/MWh)',
    legend=dict(
        x=1,   # Set x position to 1 (far right)
        y=1,   # Set y position to 1 (top)
        xanchor='right',  # Anchor the legend's x position to the right
        yanchor='top',    # Anchor the legend's y position to the top
        bordercolor='black',  # Optional: Add a border around the legend
        borderwidth=1        # Optional: Set the border width
    ),
    template='plotly_white'
)
fig.show()

# Evaluate the model using Darts' metrics
print(f'Mean Absolute Error on Test Set: {mae(series_test, forecast)}')
print(f'Mean Squared Error on Test Set: {mse(series_test, forecast)}')
print(f'Root Mean Squared Error on Test Set: {rmse(series_test, forecast)}')
print(f'R2 Score on Test Set: {r2_score(series_test, forecast)}')
print(f'Mean Absolute Percentage Error on Test Set: {mape(series_test, forecast)}')


Mean Absolute Error on Test Set: 178.89859008789062
Mean Squared Error on Test Set: 40766.86328125
Root Mean Squared Error on Test Set: 201.90805053710938
R2 Score on Test Set: -2.01061749458313
Mean Absolute Percentage Error on Test Set: 589.3053588867188
