## Data Simulation for future covariates to predict energy prices 


In [74]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import STL
from darts import TimeSeries
from darts.models import ExponentialSmoothing


In [75]:
# Import the data 
def load_and_prepare_data(file_path):
    """
    Load energy prices data from a CSV file, ensure chronological order, and convert 'Date' to datetime.
    """
    df = pd.read_csv(file_path)
    df.sort_values('Date', inplace=True)
    df.set_index('Date', inplace=True)
    df = pd.DataFrame(df)
    return df

In [76]:
# Import the data
df = load_and_prepare_data('../../data/Final_data/final_data_july.csv')

# Reset the index
df.reset_index(inplace=True)
# Ensure the 'Date' column is properly parsed and set as the index
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Ensure the data has a daily frequency (Filling missing dates if necessary)
df = df.asfreq('D')  # This makes sure the data has a daily frequency

In [77]:
df

Unnamed: 0_level_0,Day_ahead_price (€/MWh),Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),Other (GWh),Pumped_storage_generation (GWh),Solar_energy (GWh),Wind_offshore (GWh),Wind_onshore (GWh),Net_total_export_import (GWh),BEV_vehicles,Oil_price (EUR),TTF_gas_price (€/MWh),Nuclear_energy (GWh)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2012-01-01,18.19,14.75,4.95,8.39,98.605,108.454,51.011,325.337,188.811,54.040,19.314,6.263,3.404,235.467,54.662,6,99.64,21.10,250.979
2012-01-02,33.82,15.12,5.00,7.41,98.605,222.656,51.862,343.168,229.293,54.166,28.892,6.312,3.350,231.772,-64.477,6,100.04,20.00,258.671
2012-01-03,35.03,31.88,7.77,5.23,98.605,162.204,48.851,336.773,241.297,53.518,21.072,24.226,7.292,504.484,-35.078,6,100.44,20.90,271.495
2012-01-04,32.16,25.21,8.04,4.78,98.605,189.633,47.101,323.976,252.289,52.194,28.300,14.157,7.828,541.528,22.924,6,103.15,21.40,270.613
2012-01-05,20.35,13.46,9.98,4.23,98.605,175.733,45.854,327.502,259.018,52.179,31.887,4.728,8.280,572.819,35.618,6,103.92,21.30,287.555
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-07-24,66.61,225.04,3.47,17.54,110.007,43.469,85.857,199.246,194.291,54.026,20.934,325.285,49.360,179.921,-168.705,992,75.75,32.63,0.000
2024-07-25,78.34,272.71,2.12,17.85,110.410,50.676,82.632,195.983,209.610,52.963,18.766,394.116,51.053,42.885,-194.496,992,76.36,31.70,0.000
2024-07-26,93.04,172.33,2.60,19.09,110.852,42.333,79.531,205.273,205.773,52.616,19.081,256.246,40.449,129.267,-241.786,993,75.21,32.20,0.000
2024-07-27,80.74,176.67,2.05,19.63,110.479,33.307,74.958,184.012,216.412,50.927,18.856,244.051,2.180,32.001,-251.655,992,74.79,32.90,0.000


## Check if additive or mulitplicative model works better for predictions

In [78]:
from prophet import Prophet
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Function to evaluate model performance
def evaluate_model(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    return mse, mae, mape

# Split the data into 80% for training and 20% for backtesting
train_size = int(len(df) * 0.8)
train_df = df[:train_size]
test_df = df[train_size:]

train_df = train_df.reset_index().rename(columns={'Date': 'ds', 'Solar_radiation (W/m2)': 'y'})
test_df = test_df.reset_index().rename(columns={'Date': 'ds', 'Solar_radiation (W/m2)': 'y'})

# Create a future DataFrame for backtesting (to predict the next 20%)
future_test_dates = test_df['ds']

# Train an additive model
additive_model = Prophet(
    seasonality_mode='additive', 
    yearly_seasonality=20,
    weekly_seasonality=False,
    daily_seasonality=False )
additive_model.fit(train_df)

# Make future predictions for the backtest period
forecast_additive = additive_model.predict(test_df)

# Evaluate the additive model
additive_mse, additive_mae, additive_mape = evaluate_model(test_df['y'], forecast_additive['yhat'])
print(f"Additive Model: MSE = {additive_mse}, MAE = {additive_mae}, MAPE = {additive_mape}")

# Train a multiplicative model
multiplicative_model = Prophet(
    seasonality_mode='multiplicative', 
    yearly_seasonality=20,
    weekly_seasonality=False,  
    daily_seasonality=False )
multiplicative_model.fit(train_df)

# Make future predictions for the backtest period
forecast_multiplicative = multiplicative_model.predict(test_df)

# Evaluate the multiplicative model
multiplicative_mse, multiplicative_mae, multiplicative_mape = evaluate_model(test_df['y'], forecast_multiplicative['yhat'])
print(f"Multiplicative Model: MSE = {multiplicative_mse}, MAE = {multiplicative_mae}, MAPE = {multiplicative_mape}")

# Compare the results and decide which is better
if additive_mse < multiplicative_mse:
    print("Additive model performs better.")
else:
    print("Multiplicative model performs better.")


14:10:37 - cmdstanpy - INFO - Chain [1] start processing
14:10:37 - cmdstanpy - INFO - Chain [1] done processing
14:10:38 - cmdstanpy - INFO - Chain [1] start processing


Additive Model: MSE = 1734.6298192391282, MAE = 31.498838368119518, MAPE = 0.2568660630639314


14:10:38 - cmdstanpy - INFO - Chain [1] done processing


Multiplicative Model: MSE = 1878.1001067329864, MAE = 32.39687451206886, MAPE = 0.25838165906311944
Additive model performs better.


In [79]:
import pandas as pd
from prophet import Prophet
from prophet.plot import plot_plotly
import plotly.graph_objs as go

# Load your data (adjust the path and data columns)
# Prepare data for Prophet
df_prophet = pd.DataFrame({
    'ds': df.index, 
    'y': df['Temperature (°C)']
})

# Initialize the Prophet model
m = Prophet(
    seasonality_mode='additive',  # Additive seasonality is not always necessary
    yearly_seasonality=20,
    weekly_seasonality=False,  # Disable weekly seasonality if not needed
    daily_seasonality=False    # Disable daily seasonality
)

# Fit the model
m.fit(df_prophet)

# Make future predictions for the next 2 years (730 days)
future = m.make_future_dataframe(periods=730)
forecast = m.predict(future)

# Use Plotly for visualization 
fig = plot_plotly(m, forecast)

# Add additional layout elements if necessary
fig.update_layout(
    title="Forecasted Solar Radiation",
    xaxis_title="Date",
    yaxis_title="Solar Radiation (W/m²)",
    legend=dict(
        x=1, y=1,
        xanchor="right",
        yanchor="top"
    ),
    template="plotly_white"
)

# Show the Plotly plot
fig.show()


14:10:38 - cmdstanpy - INFO - Chain [1] start processing
14:10:39 - cmdstanpy - INFO - Chain [1] done processing


In [80]:
forecast

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2012-01-01,8.701837,-1.851120,6.453316,8.701837,8.701837,-6.377179,-6.377179,-6.377179,-6.377179,-6.377179,-6.377179,0.0,0.0,0.0,2.324658
1,2012-01-02,8.702947,-1.602707,6.306857,8.702947,8.702947,-6.488921,-6.488921,-6.488921,-6.488921,-6.488921,-6.488921,0.0,0.0,0.0,2.214026
2,2012-01-03,8.704057,-1.763832,6.013120,8.704057,8.704057,-6.579226,-6.579226,-6.579226,-6.579226,-6.579226,-6.579226,0.0,0.0,0.0,2.124832
3,2012-01-04,8.705167,-1.830484,6.028777,8.705167,8.705167,-6.652618,-6.652618,-6.652618,-6.652618,-6.652618,-6.652618,0.0,0.0,0.0,2.052549
4,2012-01-05,8.706277,-2.032344,6.038913,8.706277,8.706277,-6.717422,-6.717422,-6.717422,-6.717422,-6.717422,-6.717422,0.0,0.0,0.0,1.988856
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5318,2026-07-24,11.825729,17.641110,25.368894,11.660704,11.996445,9.771340,9.771340,9.771340,9.771340,9.771340,9.771340,0.0,0.0,0.0,21.597069
5319,2026-07-25,11.826806,17.768479,25.910711,11.661347,11.997908,9.800434,9.800434,9.800434,9.800434,9.800434,9.800434,0.0,0.0,0.0,21.627240
5320,2026-07-26,11.827884,17.736341,25.707574,11.661989,11.999371,9.813446,9.813446,9.813446,9.813446,9.813446,9.813446,0.0,0.0,0.0,21.641329
5321,2026-07-27,11.828961,17.512075,25.683835,11.662640,12.000834,9.812940,9.812940,9.812940,9.812940,9.812940,9.812940,0.0,0.0,0.0,21.641901


In [81]:
import pandas as pd
from prophet import Prophet
from prophet.plot import plot_plotly
import plotly.graph_objs as go

# Assuming df contains your original data with the covariates
covariates = ['Solar_radiation (W/m2)', 'Wind_speed (m/s)', 'Wind_onshore (GWh)', 'Wind_offshore (GWh)', 'Temperature (°C)']

# DataFrame to store the forecast results
forecast_results = pd.DataFrame()

# Iterate over each covariate
for covariate in covariates:
    print(f"Forecasting for {covariate}...")

    # Prepare data for Prophet for each covariate
    df_prophet = pd.DataFrame({
        'ds': df.index, 
        'y': df[covariate]
    })
    
    # Initialize the Prophet model with multiplicative and additive options
    m_additive = Prophet(
        seasonality_mode='additive',  
        yearly_seasonality=20,
        weekly_seasonality=False,
        daily_seasonality=False, 
        changepoint_prior_scale=0.1, 
        n_changepoints=50 # Adjust the changepoint prior scale if needed
    )

    # Fit the models
    m_additive.fit(df_prophet)

    # Make future predictions for the next 2 years (730 days)
    future = m_additive.make_future_dataframe(periods=730)
    forecast_additive = m_additive.predict(future)

    # Slice the forecast to get values only after the end of the original data
    forecast_after = forecast_additive[forecast_additive['ds'] > df.index[-1]]

    # Store the forecasts in the results DataFrame, only for the forecast period
    forecast_results[f'{covariate}'] = forecast_after['yhat'].values

    # Store the corresponding dates in the 'Date' column (for the first iteration only)
    if 'Date' not in forecast_results.columns:
        forecast_results['Date'] = forecast_after['ds'].values

    # Use Plotly for visualization (only for one seasonality mode, change if needed)
    fig = plot_plotly(m_additive, forecast_additive)

    # Add layout for the plot
    fig.update_layout(
        title=f"Forecasted {covariate}",
        xaxis_title="Date",
        yaxis_title=covariate,
        legend=dict(
            x=1, y=1,
            xanchor="right",
            yanchor="top"
        ),
        template="plotly_white"
    )

    # Show the Plotly plot
    fig.show()

# After the loop, forecast_results will contain forecasted values for all covariates
forecast_results.set_index('Date', inplace=True)  # Set 'Date' as the index after adding all covariates
print(forecast_results.head())  # Show the first few rows of the combined forecast


Forecasting for Solar_radiation (W/m2)...


14:10:40 - cmdstanpy - INFO - Chain [1] start processing
14:10:40 - cmdstanpy - INFO - Chain [1] done processing


14:10:41 - cmdstanpy - INFO - Chain [1] start processing


Forecasting for Wind_speed (m/s)...


14:10:41 - cmdstanpy - INFO - Chain [1] done processing


14:10:42 - cmdstanpy - INFO - Chain [1] start processing


Forecasting for Wind_onshore (GWh)...


14:10:43 - cmdstanpy - INFO - Chain [1] done processing


14:10:44 - cmdstanpy - INFO - Chain [1] start processing


Forecasting for Wind_offshore (GWh)...


14:10:44 - cmdstanpy - INFO - Chain [1] done processing


14:10:45 - cmdstanpy - INFO - Chain [1] start processing


Forecasting for Temperature (°C)...


14:10:46 - cmdstanpy - INFO - Chain [1] done processing


            Solar_radiation (W/m2)  Wind_speed (m/s)  Wind_onshore (GWh)  \
Date                                                                       
2024-07-29              213.606087          3.226179          221.376995   
2024-07-30              211.485189          3.245633          224.183103   
2024-07-31              209.887579          3.262665          226.400266   
2024-08-01              208.823461          3.276353          227.951218   
2024-08-02              208.257361          3.285928          228.790108   

            Wind_offshore (GWh)  Temperature (°C)  
Date                                               
2024-07-29            53.051468         20.932865  
2024-07-30            52.916804         20.894650  
2024-07-31            52.859785         20.843814  
2024-08-01            52.907991         20.779382  
2024-08-02            53.072985         20.700881  


## Multistepforecasting with TFT

In [91]:
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import TFTModel
from darts.dataprocessing.transformers import Scaler
from darts.metrics import mape, rmse
import plotly.graph_objs as go

# Step 1: Prepare the Data
# Assuming df is your DataFrame with 'Date' as index and columns like 'Day_ahead_price (€/MWh)', 'Wind_speed', 'Solar_radiation', etc.

# Target: Energy Price
series_price = TimeSeries.from_dataframe(df, 'Date', 'Day_ahead_price (€/MWh)').astype('float32')

# Covariates: Wind speed, Solar radiation, etc.
covariates_columns = ['Wind_speed (m/s)', 'Solar_radiation (W/m2)', 'Temperature (°C)', 'Wind_onshore (GWh)', 'Wind_offshore (GWh)']
covariates = TimeSeries.from_dataframe(df, 'Date', covariates_columns).astype('float32')

# Scale the data (Scaling helps neural network models to converge faster)
scaler_price = Scaler()
scaler_covariates = Scaler()

series_price_scaled = scaler_price.fit_transform(series_price)
covariates_scaled = scaler_covariates.fit_transform(covariates)

# Step 2: Split Data into Train and Validation sets
train_size = int(0.8 * len(series_price_scaled))  # 80% training, 20% validation
series_price_train, series_price_val = series_price_scaled[:train_size], series_price_scaled[train_size:]
covariates_train, covariates_val = covariates_scaled[:train_size], covariates_scaled[train_size:]

# Step 3: Define the TFT Model
model = TFTModel(
    input_chunk_length=300,    # Lookback window size
    output_chunk_length=1,     # Forecasting horizon (forecast one day at a time)
    hidden_size=64,            # Size of the hidden layers
    lstm_layers=2,             # Number of LSTM layers
    num_attention_heads=4,     # Attention heads for the attention mechanism
    dropout=0.1,               # Dropout to prevent overfitting
    add_relative_index=True,   # Adds a relative time index as a future covariate
    random_state=42
)

# Step 4: Ensure that covariates cover the required range for the forecast

# Set based on desired look-back period
input_chunk_length = 300  # Lookback period for the model

# Get the required start date for the covariates, considering the lookback period
required_start_date = pd.Timestamp(df['Date'].iloc[train_size]) - pd.DateOffset(days=input_chunk_length)
required_end_date = pd.Timestamp(df['Date'].iloc[train_size]) + pd.DateOffset(days=n_forecast_steps - 1)

# Ensure covariates cover the required date range
if covariates.start_time() > required_start_date or covariates.end_time() < required_end_date:
    print("Warning: The covariates do not cover the required date range. Adjust your dataset accordingly.")
    
# Slice the covariates to match the required range
covariates_val = covariates_scaled.slice(required_start_date, required_end_date)

# Step 5: Train the Model
model.fit(
    series=series_price_train,              # Target series (energy price)      # Past covariates (known for history and future)
    future_covariates=covariates_train,       # Ensure future covariates cover the required range
    verbose=True,
    epochs=1                               # Number of epochs for training
)

# Step 6: Forecast the Future
# Forecast for the next 730 days (2 years)
n_forecast_steps = 730
forecast = model.predict(n=n_forecast_steps, future_covariates=covariates_val)

# Inverse transform the forecast back to original scale
forecast_original = scaler_price.inverse_transform(forecast)

# Step 7: Evaluate the Model
mape_val = mape(series_price_val, forecast_original)
rmse_val = rmse(series_price_val, forecast_original)

print(f"MAPE: {mape_val:.2f}%")
print(f"RMSE: {rmse_val:.2f}")

# Step 8: Plot the Results with Plotly

# Create a Plotly figure
fig = go.Figure()

# Add the actual series (validation data)
fig.add_trace(go.Scatter(
    x=series_price_val.time_index,
    y=series_price_val.values().flatten(),
    mode='lines',
    name='Actual Price',
    line=dict(color='blue')
))

# Add the forecasted series
fig.add_trace(go.Scatter(
    x=forecast_original.time_index,
    y=forecast_original.values().flatten(),
    mode='lines',
    name='TFT Forecast',
    line=dict(color='red')
))

# Update the layout
fig.update_layout(
    title="TFT Model - Energy Price Forecast",
    xaxis_title="Date",
    yaxis_title="Day-ahead Price (€/MWh)",
    legend=dict(x=0.01, y=0.99, bordercolor="Black", borderwidth=1),
    template="plotly_white"
)

# Show the Plotly plot
fig.show()


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

   | Name                              | Type                             | Params | Mode 
------------------------------------------------------------------------------------------------
0  | train_metrics                     | MetricCollection                 | 0      | train
1  | val_metrics                       | MetricCollection                 | 0      | train
2  | input_embeddings                  | _MultiEmbedding                  | 0      | train
3  | static_covariates_vsn             | _VariableSelectionNetwork        | 0      | train
4  | encoder_vsn                       | _VariableSelectionNetwork        | 12.5 K | train
5  | decoder_vsn                       | _VariableSelectionNetwork        | 10.6 K | train
6  | static_context_grn                | _GatedResidualNetwork            | 16.8 K | train
7  | static_context_hidden_encoder_grn | _GatedResidualNetw

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

MAPE: 19532.88%
RMSE: 57.16


## TFT Forecasting for one covariate 

In [96]:
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import TFTModel
from darts.dataprocessing.transformers import Scaler
from darts.metrics import mape, rmse
import plotly.graph_objs as go

# Step 1: Prepare the Data
# Assuming df is your DataFrame with 'Date' as index and columns like 'Day_ahead_price (€/MWh)', 'Wind_speed', 'Solar_radiation', etc.

# Target: Energy Price
series_price = TimeSeries.from_dataframe(df, 'Date', 'Temperature (°C)').astype('float32')

# Scale the data (Scaling helps neural network models to converge faster)
scaler_price = Scaler()

series_price_scaled = scaler_price.fit_transform(series_price)


# Step 2: Split Data into Train and Validation sets
train_size = int(0.8 * len(series_price_scaled))  # 80% training, 20% validation
series_price_train, series_price_val = series_price_scaled[:train_size], series_price_scaled[train_size:]

# Step 3: Define the TFT Model
model = TFTModel(
    input_chunk_length=300,    # Lookback window size
    output_chunk_length=1,     # Forecasting horizon (forecast one day at a time)
    hidden_size=64,            # Size of the hidden layers
    lstm_layers=2,             # Number of LSTM layers
    num_attention_heads=4,     # Attention heads for the attention mechanism
    dropout=0.1,               # Dropout to prevent overfitting
    add_relative_index=True,  
    add_encoders={"cyclic": {"future": ["month"]}},
    random_state=42
)

# Step 4: Ensure that covariates cover the required range for the forecast

# Set based on desired look-back period
input_chunk_length = 300  # Lookback period for the model

# Get the required start date for the covariates, considering the lookback period
required_start_date = pd.Timestamp(df['Date'].iloc[train_size]) - pd.DateOffset(days=input_chunk_length)
required_end_date = pd.Timestamp(df['Date'].iloc[train_size]) + pd.DateOffset(days=n_forecast_steps - 1)

# Step 5: Train the Model
model.fit(
    series=series_price_train,              # Target series (energy price)      # Past covariates (known for history and future)      # Ensure future covariates cover the required range
    verbose=True,
    epochs=10                               # Number of epochs for training
)

# Step 6: Forecast the Future
# Forecast for the next 730 days (2 years)
n_forecast_steps = len(series_price_val)
forecast = model.predict(n=n_forecast_steps)

# Inverse transform the forecast back to original scale
forecast_original = scaler_price.inverse_transform(forecast)
series_price_og = scaler_price.inverse_transform(series_price_val)

# Step 7: Evaluate the Model
mape_val = mape(series_price_og, forecast_original)
rmse_val = rmse(series_price_og, forecast_original)

print(f"MAPE: {mape_val:.2f}%")
print(f"RMSE: {rmse_val:.2f}")

# Step 8: Plot the Results with Plotly

# Create a Plotly figure
fig = go.Figure()

# Add the actual series (validation data)
fig.add_trace(go.Scatter(
    x=series_price_og.time_index,
    y=series_price_og.values().flatten(),
    mode='lines',
    name='Actual Price',
    line=dict(color='blue')
))

# Add the forecasted series
fig.add_trace(go.Scatter(
    x=forecast_original.time_index,
    y=forecast_original.values().flatten(),
    mode='lines',
    name='TFT Forecast',
    line=dict(color='red')
))

# Update the layout
fig.update_layout(
    title="TFT Model - Temperature Forecast",
    xaxis_title="Date",
    yaxis_title="Day-ahead Price (€/MWh)",
    legend=dict(x=0.01, y=0.99, bordercolor="Black", borderwidth=1),
    template="plotly_white"
)

# Show the Plotly plot
fig.show()


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

   | Name                              | Type                             | Params | Mode 
------------------------------------------------------------------------------------------------
0  | train_metrics                     | MetricCollection                 | 0      | train
1  | val_metrics                       | MetricCollection                 | 0      | train
2  | input_embeddings                  | _MultiEmbedding                  | 0      | train
3  | static_covariates_vsn             | _VariableSelectionNetwork        | 0      | train
4  | encoder_vsn                       | _VariableSelectionNetwork        | 7.0 K  | train
5  | decoder_vsn                       | _VariableSelectionNetwork        | 5.2 K  | train
6  | static_context_grn                | _GatedResidualNetwork            | 16.8 K | train
7  | static_context_hidden_encoder_grn | _GatedResidualNetw

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

MAPE: 119.61%
RMSE: 5.21


In [97]:
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import TFTModel
from darts.dataprocessing.transformers import Scaler
from darts.metrics import mape, rmse
import plotly.graph_objs as go

# Step 1: Prepare the Data
# Assuming df is your DataFrame with 'Date' as index and columns like 'Temperature (°C)', etc.

# Target: Temperature
series_price = TimeSeries.from_dataframe(df, 'Date', 'Temperature (°C)').astype('float32')

# Scale the data (Scaling helps neural network models to converge faster)
scaler_price = Scaler()
series_price_scaled = scaler_price.fit_transform(series_price)

# Step 2: No splitting; use the entire dataset for training
series_price_train = series_price_scaled

# Step 3: Define the TFT Model (no covariates are used)
model = TFTModel(
    input_chunk_length=300,    # Lookback window size (how many days in the past the model will look at)
    output_chunk_length=1,     # Forecasting horizon (forecast one day at a time)
    hidden_size=64,            # Size of the hidden layers
    lstm_layers=2,             # Number of LSTM layers
    num_attention_heads=4,     # Attention heads for the attention mechanism
    dropout=0.1,               # Dropout to prevent overfitting
    add_relative_index=True,  
    random_state=42
)

# Step 4: Train the Model on the entire dataset
model.fit(
    series=series_price_train,  # Target series (temperature)
    verbose=True,
    epochs=10                   # Number of epochs for training
)

# Step 5: Forecast the Future
n_forecast_steps = 365  # Predict for the next 365 days (or adjust as needed)
forecast = model.predict(n=n_forecast_steps)

# Inverse transform the forecast back to original scale
forecast_original = scaler_price.inverse_transform(forecast)

# Step 6: Plot the Forecast Results with Plotly

# Create a Plotly figure
fig = go.Figure()

# Add the actual series (training data)
fig.add_trace(go.Scatter(
    x=series_price.time_index,
    y=series_price.values().flatten(),
    mode='lines',
    name='Training Data',
    line=dict(color='blue')
))

# Add the forecasted series (future predictions)
fig.add_trace(go.Scatter(
    x=forecast_original.time_index,
    y=forecast_original.values().flatten(),
    mode='lines',
    name='TFT Forecast',
    line=dict(color='red')
))

# Update the layout
fig.update_layout(
    title="TFT Model - Temperature Forecast",
    xaxis_title="Date",
    yaxis_title="Temperature (°C)",
    legend=dict(x=0.01, y=0.99, bordercolor="Black", borderwidth=1),
    template="plotly_white"
)

# Show the Plotly plot
fig.show()


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

   | Name                              | Type                             | Params | Mode 
------------------------------------------------------------------------------------------------
0  | train_metrics                     | MetricCollection                 | 0      | train
1  | val_metrics                       | MetricCollection                 | 0      | train
2  | input_embeddings                  | _MultiEmbedding                  | 0      | train
3  | static_covariates_vsn             | _VariableSelectionNetwork        | 0      | train
4  | encoder_vsn                       | _VariableSelectionNetwork        | 3.5 K  | train
5  | decoder_vsn                       | _VariableSelectionNetwork        | 1.6 K  | train
6  | static_context_grn                | _GatedResidualNetwork            | 16.8 K | train
7  | static_context_hidden_encoder_grn | _GatedResidualNetw

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=10` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]