In [1]:
import pandas as pd 
import numpy as np
import torch 
from darts import TimeSeries
from optuna.integration import PyTorchLightningPruningCallback
from pytorch_lightning.callbacks import EarlyStopping

In [2]:
# Import the data 
def load_and_prepare_data(file_path):
    """
    Load energy prices data from a CSV file, ensure chronological order, and convert 'Date' to datetime.
    """
    df = pd.read_csv(file_path)
    df.sort_values('Date', inplace=True)
    # Convert 'date' column to datetime
    df['Date'] = pd.to_datetime(df['Date'])
    #df.set_index('Date', inplace=True)
    df = pd.DataFrame(df)
    return df

In [3]:
# Load in the train and test data
train_df = load_and_prepare_data('../../data/Final_data/train_df.csv')
test_df = load_and_prepare_data('../../data/Final_data/test_df.csv')

# Concatenate the train and test data
df = pd.concat([train_df, test_df])
df['Date'] = pd.to_datetime(df['Date'])

# Create a time series object
series_train = TimeSeries.from_dataframe(train_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')
series_test = TimeSeries.from_dataframe(test_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')

# Show 
df

Unnamed: 0,Date,Day_ahead_price (€/MWh),Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),Other (GWh),Pumped_storage_generation (GWh),Solar_energy (GWh),Wind_offshore (GWh),Wind_onshore (GWh),Net_total_export_import (GWh),BEV_vehicles,Oil_price (EUR),TTF_gas_price (€/MWh),Nuclear_energy (GWh)
0,2012-01-01,18.19,14.75,4.95,8.39,98.605,108.454,51.011,325.337,188.811,54.040,19.314,6.263,3.404,235.467,54.662,6,99.64,21.10,250.979
1,2012-01-02,33.82,15.12,5.00,7.41,98.605,222.656,51.862,343.168,229.293,54.166,28.892,6.312,3.350,231.772,-64.477,6,100.04,20.00,258.671
2,2012-01-03,35.03,31.88,7.77,5.23,98.605,162.204,48.851,336.773,241.297,53.518,21.072,24.226,7.292,504.484,-35.078,6,100.44,20.90,271.495
3,2012-01-04,32.16,25.21,8.04,4.78,98.605,189.633,47.101,323.976,252.289,52.194,28.300,14.157,7.828,541.528,22.924,6,103.15,21.40,270.613
4,2012-01-05,20.35,13.46,9.98,4.23,98.605,175.733,45.854,327.502,259.018,52.179,31.887,4.728,8.280,572.819,35.618,6,103.92,21.30,287.555
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
754,2024-07-24,66.61,225.04,3.47,17.54,110.007,43.469,85.857,199.246,194.291,54.026,20.934,325.285,49.360,179.921,-168.705,992,75.75,32.63,0.000
755,2024-07-25,78.34,272.71,2.12,17.85,110.410,50.676,82.632,195.983,209.610,52.963,18.766,394.116,51.053,42.885,-194.496,992,76.36,31.70,0.000
756,2024-07-26,93.04,172.33,2.60,19.09,110.852,42.333,79.531,205.273,205.773,52.616,19.081,256.246,40.449,129.267,-241.786,993,75.21,32.20,0.000
757,2024-07-27,80.74,176.67,2.05,19.63,110.479,33.307,74.958,184.012,216.412,50.927,18.856,244.051,2.180,32.001,-251.655,992,74.79,32.90,0.000


In [4]:
# Define the future covariates columns from your dataframe
future_covariates_columns = ['Solar_radiation (W/m2)', 'Wind_speed (m/s)', 'Temperature (°C)', 
                             'Biomass (GWh)', 'Hard_coal (GWh)', 'Hydro (GWh)', 'Lignite (GWh)', 
                             'Natural_gas (GWh)', 'Other (GWh)', 'Pumped_storage_generation (GWh)', 
                             'Solar_energy (GWh)', 'Wind_offshore (GWh)', 'Wind_onshore (GWh)', 
                             'Net_total_export_import (GWh)', 'BEV_vehicles', 'Oil_price (EUR)', 
                             'TTF_gas_price (€/MWh)', 'Nuclear_energy (GWh)']

## TFT Pytorch Forecasting


In [6]:
# add time index
df["time_idx"] = df["Date"].dt.year * 12 + df["Date"].dt.month
df["time_idx"] -= df["time_idx"].min()

# add additional features
df["Month"] = df.Date.dt.month.astype(str).astype("category")  # categories have be strings

In [7]:
df

Unnamed: 0,Date,Day_ahead_price (€/MWh),Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),...,Solar_energy (GWh),Wind_offshore (GWh),Wind_onshore (GWh),Net_total_export_import (GWh),BEV_vehicles,Oil_price (EUR),TTF_gas_price (€/MWh),Nuclear_energy (GWh),time_idx,Month
0,2012-01-01,18.19,14.75,4.95,8.39,98.605,108.454,51.011,325.337,188.811,...,6.263,3.404,235.467,54.662,6,99.64,21.10,250.979,0,1
1,2012-01-02,33.82,15.12,5.00,7.41,98.605,222.656,51.862,343.168,229.293,...,6.312,3.350,231.772,-64.477,6,100.04,20.00,258.671,0,1
2,2012-01-03,35.03,31.88,7.77,5.23,98.605,162.204,48.851,336.773,241.297,...,24.226,7.292,504.484,-35.078,6,100.44,20.90,271.495,0,1
3,2012-01-04,32.16,25.21,8.04,4.78,98.605,189.633,47.101,323.976,252.289,...,14.157,7.828,541.528,22.924,6,103.15,21.40,270.613,0,1
4,2012-01-05,20.35,13.46,9.98,4.23,98.605,175.733,45.854,327.502,259.018,...,4.728,8.280,572.819,35.618,6,103.92,21.30,287.555,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
754,2024-07-24,66.61,225.04,3.47,17.54,110.007,43.469,85.857,199.246,194.291,...,325.285,49.360,179.921,-168.705,992,75.75,32.63,0.000,150,7
755,2024-07-25,78.34,272.71,2.12,17.85,110.410,50.676,82.632,195.983,209.610,...,394.116,51.053,42.885,-194.496,992,76.36,31.70,0.000,150,7
756,2024-07-26,93.04,172.33,2.60,19.09,110.852,42.333,79.531,205.273,205.773,...,256.246,40.449,129.267,-241.786,993,75.21,32.20,0.000,150,7
757,2024-07-27,80.74,176.67,2.05,19.63,110.479,33.307,74.958,184.012,216.412,...,244.051,2.180,32.001,-251.655,992,74.79,32.90,0.000,150,7


In [8]:
# Create a time series object
series_train = TimeSeries.from_dataframe(train_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')
series_test = TimeSeries.from_dataframe(test_df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')

## Approach 2

In [9]:
import pandas as pd
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
from sklearn.preprocessing import RobustScaler
import torch
from pytorch_lightning import Trainer
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# Target and known covariates
target = "Day_ahead_price (€/MWh)"
time_idx = "time_idx"
group_id = "group"  # Dummy group ID

# Future and historical covariates (known for all time)
known_reals = ["Solar_radiation (W/m2)", "Wind_speed (m/s)", "Temperature (°C)", 
               "Biomass (GWh)", "Hard_coal (GWh)", "Hydro (GWh)", "Lignite (GWh)", 
               "Natural_gas (GWh)", "Other (GWh)", "Pumped_storage_generation (GWh)", 
               "Solar_energy (GWh)", "Wind_offshore (GWh)", "Wind_onshore (GWh)", 
               "Net_total_export_import (GWh)", "Oil_price (EUR)", "TTF_gas_price (€/MWh)", 
               "Nuclear_energy (GWh)", "BEV_vehicles"]

# Convert date column to time_idx integer
train_df["Date"] = pd.to_datetime(train_df["Date"])
test_df["Date"] = pd.to_datetime(test_df["Date"])

# Create integer time index (0 to len(train_df) - 1)
train_df["time_idx"] = (train_df["Date"] - train_df["Date"].min()).dt.days
test_df["time_idx"] = (test_df["Date"] - train_df["Date"].min()).dt.days

# Add a dummy group ID column
train_df["group"] = 0  # Single group with ID 0
test_df["group"] = 0  # Same for the test set

# Scaling the data
scaler = RobustScaler()

# Applying scaling to known reals
train_df[known_reals] = scaler.fit_transform(train_df[known_reals])
test_df[known_reals] = scaler.transform(test_df[known_reals])

# Now creating the dataset using PyTorch Forecasting's TimeSeriesDataSet
max_encoder_length = 90  # Lookback period
max_prediction_length = 30  # Forecast period

# Define the training TimeSeriesDataSet (group_ids is required)
training = TimeSeriesDataSet(
    train_df,
    time_idx=time_idx,
    target=target,
    group_ids=[group_id],  # Single group ID column
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    time_varying_known_reals=known_reals,
    time_varying_unknown_reals=[target],  # The target is unknown for the future
    add_relative_time_idx=True,  # Adds a time index column automatically
    add_target_scales=True,  # Adds scales for the target variable
    add_encoder_length=True,  # Adds the length of each series as a feature
)

# Validation dataset
validation = TimeSeriesDataSet.from_dataset(training, test_df, min_prediction_idx=train_df["time_idx"].max())

# Dataloaders for training and validation
batch_size = 64
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=8)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size, num_workers=8)

### Step 3: Defining the TFT Model

# Define the TFT model correctly, as a LightningModule
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,  # learning rate for training
    hidden_size=32,  # number of hidden units
    attention_head_size=4,  # number of attention heads
    dropout=0.1,  # dropout rate
    hidden_continuous_size=8,  # continuous variable size
    loss=QuantileLoss(),  # quantile loss for multi-quantile forecasting
    log_interval=10,  # interval for logging
    reduce_on_plateau_patience=4,  # reduce learning rate if validation loss does not improve
)

# This should work correctly with Trainer now:
trainer = Trainer(
    max_epochs=5,  # Set this to a higher value for real training
    devices=1,  # Number of GPUs or CPUs
    accelerator='gpu' if torch.cuda.is_available() else 'cpu',  # Automatically select GPU if available
    gradient_clip_val=0.1,
)

# Train the model
trainer.fit(
    tft, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader
)


### Step 4: Hyperparameter Optimization

# You can also use hyperparameter optimization if desired (optional)
hyperparameters = optimize_hyperparameters(
     train_dataloader=train_dataloader,
     val_dataloader=val_dataloader,
     model_path="optuna_test",
     n_trials=10,
     max_epochs=5,
     gradient_clip_val_range=(0.01, 1.0),
     hidden_size_range=(8, 128),
     hidden_continuous_size_range=(8, 128),
     attention_head_size_range=(1, 4),
     learning_rate_range=(0.001, 0.1),
     dropout_range=(0.1, 0.5),
     trainer_kwargs=dict(gpus=1 if torch.cuda.is_available() else 0),
 )

### Step 5: Make Predictions

# Best model path
best_model_path = trainer.checkpoint_callback.best_model_path
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)

# Make predictions
raw_predictions, x = best_tft.predict(val_dataloader, mode="raw", return_x=True)

# Inverse scale the predictions if necessary
predictions = scaler.inverse_transform(raw_predictions)

# Calculate metrics like MAE, MAPE, and RMSE
mae_metric = MAE()
mape_metric = MAPE()
rmse_metric = RMSE()

mae_score = mae_metric(predicted_target, actual_target)
mape_score = mape_metric(predicted_target, actual_target)
rmse_score = rmse_metric(predicted_target, actual_target)

print(f"Mean Absolute Error (MAE): {mae_score}")
print(f"Mean Absolute Percentage Error (MAPE): {mape_score}")
print(f"Root Mean Squared Error (RMSE): {rmse_score}")

### Step 6: Plot Forecast vs Actual using Plotly

# Prepare data for plotting
test_dates = test_df["Date"].values[-len(predicted_target):]

fig = go.Figure()

# Add actual test data trace
fig.add_trace(go.Scatter(x=test_dates, y=actual_target,
                         mode='lines', name='Actual Data', line=dict(color='darkblue')))

# Add forecast data trace
fig.add_trace(go.Scatter(x=test_dates, y=predicted_target,
                         mode='lines', name='TFT Forecast', line=dict(color='red')))

# Update layout
fig.update_layout(
    title='TFT Model - Forecast vs Actual on Test Data',
    xaxis_title='Date',
    yaxis_title='Day Ahead Price (€/MWh)',
    legend=dict(
        x=1,
        y=1,
        xanchor='right',
        yanchor='top',
        bordercolor='black',
        borderwidth=1
    ),
    template='plotly_white'
)

# Show the plot
fig.show()


AttributeError: module 'numpy' has no attribute 'float'.
`np.float` was a deprecated alias for the builtin `float`. To avoid this error in existing code, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations