# Final Day Ahead Price Prediction

In [66]:
# Import all csv files from the forecasted_data folder and merge them into one csv file
import os
import pandas as pd
from pathlib import Path


def load_and_merge_csv_files(forecasted_data_dir, csv_files):
    # Initialize an empty DataFrame
    merged_df = pd.DataFrame()

    for csv_file in csv_files:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(os.path.join(forecasted_data_dir, csv_file))

        # Merge the DataFrame on the 'Date' column
        if merged_df.empty:
            merged_df = df
        else:
            merged_df = pd.merge(merged_df, df, on='Date', how='outer')

    return merged_df


# Get the current working directory and go one folder up
cwd = Path(os.getcwd()).parent

# Path to the forecasted_data folder after going one directory up
forecasted_data_dir = cwd / 'data' / 'Future_data' / \
    'Manual_simulation' / 'forecasted_data'

# Check if the directory exists
if forecasted_data_dir.exists():
    # Get all the CSV files in the forecasted_data folder
    csv_files = [f for f in os.listdir(
        forecasted_data_dir) if f.endswith('.csv')]
    # Load and merge CSV files
    merged_df = load_and_merge_csv_files(forecasted_data_dir, csv_files)
else:
    print(f"Directory does not exist: {forecasted_data_dir}")

merged_df = load_and_merge_csv_files(forecasted_data_dir, csv_files)

merged_df

Unnamed: 0,Date,Lignite (GWh),Natural_gas (GWh),Hard_coal (GWh),Biomass (GWh),Other (GWh),Wind_speed (m/s),Temperature (°C),Wind_offshore (GWh),Wind_onshore (GWh),Pumped_storage_generation (GWh),Solar_radiation (W/m2),Net_total_export_import (GWh),Hydro (GWh),Solar_energy (GWh)
0,2024-07-29,236.412769,161.729196,77.131892,114.467181,60.589910,3.763740,19.677840,39.251187,186.406952,20.967216,217.053659,-191.470585,51.164239,274.989011
1,2024-07-30,198.529067,135.785707,72.426480,113.739188,66.760590,3.338270,18.784963,42.308327,194.113928,17.658391,219.402981,1.475163,50.560365,289.499426
2,2024-07-31,208.821403,168.197331,71.723027,114.277482,60.193604,3.523603,20.269650,39.480807,313.203398,20.118478,242.176329,-150.735820,50.425785,260.453237
3,2024-08-01,220.638712,198.698191,106.894001,110.041468,63.041389,3.557334,21.287117,47.330189,357.542795,22.168196,271.883692,-175.684717,50.152274,257.157486
4,2024-08-02,194.058603,134.624710,88.976278,108.614761,58.138594,2.592341,18.286833,66.875940,228.297288,17.953564,194.746012,-160.080532,48.400867,269.136151
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
881,2026-12-27,117.110038,169.146932,45.128759,116.062974,60.615720,4.023179,2.727938,142.636220,524.012611,19.572419,23.109718,-55.377232,46.773712,70.109556
882,2026-12-28,87.176535,180.352931,40.477918,116.144244,55.883586,3.569411,2.533574,162.537815,863.528769,18.342296,22.202108,239.700967,46.392569,44.178287
883,2026-12-29,98.931078,253.753669,53.978579,117.999628,61.870596,5.019709,3.209663,155.986292,1136.581529,25.978222,31.392542,410.843744,49.568048,46.375693
884,2026-12-30,95.350181,223.832415,47.858825,117.413011,60.694992,4.985357,3.785974,165.603991,836.832535,23.985740,31.985752,199.807324,48.644834,70.941593


In [67]:
csv_files

['Lignite_(GWh)_forecast.csv',
 'Natural_gas_(GWh)_forecast.csv',
 'Hard_coal_(GWh)_forecast.csv',
 'Biomass_(GWh)_forecast.csv',
 'Other_(GWh)_forecast.csv',
 'Wind_speed_(m_s)_forecast.csv',
 'Temperature_(°C)_forecast.csv',
 'Wind_offshore_(GWh)_forecast.csv',
 'Wind_onshore_(GWh)_forecast.csv',
 'Pumped_storage_generation_(GWh)_forecast.csv',
 'Solar_radiation_(W_m2)_forecast.csv',
 'Net_total_export_import_(GWh)_forecast.csv',
 'Hydro_(GWh)_forecast.csv',
 'Solar_energy_(GWh)_forecast.csv']

In [70]:
# Load the other two files
prices_dir = cwd / 'data' / 'Future_data' / 'Manual_simulation'

gas_prices_dir = os.path.join(
    prices_dir, 'forecast_values_large_730_TTF_gas_price_EUR_MWh_300_32.csv')
gas_prices = pd.read_csv(gas_prices_dir)

oil_prices = pd.read_csv(os.path.join(
    prices_dir, 'forecast_values_large_730_Oil_price_EUR_400_32_large.csv'))

In [71]:
gas_prices

Unnamed: 0,Date,Low_10,Mean,Mean_Smoothed,High_90
0,2024-07-29,32.177422,33.072580,33.309678,33.895161
1,2024-07-30,32.129034,33.290320,33.309678,34.379029
2,2024-07-31,31.669356,33.508064,33.309678,34.620965
3,2024-08-01,31.887099,33.459680,33.425807,34.717739
4,2024-08-02,31.161290,33.217747,33.479033,34.669352
...,...,...,...,...,...
725,2026-07-24,39.195382,42.066080,42.027586,45.642410
726,2026-07-25,39.195382,41.921738,42.149471,45.546185
727,2026-07-26,39.195382,41.953810,42.373994,45.546185
728,2026-07-27,40.734970,42.691536,42.373994,45.642410


In [72]:
# Select only Date and Mean columns
# Create copies before modifying to avoid SettingWithCopyWarning
gas_prices = gas_prices.copy()
oil_prices = oil_prices.copy()

gas_prices = gas_prices[['Date', 'Mean']]
oil_prices = oil_prices[['Date', 'Mean']]

# Rename mean to gas_price and oil_price
gas_prices.rename(columns={'Mean': 'TTF_gas_price (EUR/MWh)'}, inplace=True)
oil_prices.rename(columns={'Mean': 'Oil_price (EUR)'}, inplace=True)

# Set the Date column as the index
gas_prices.set_index('Date', inplace=True)
oil_prices.set_index('Date', inplace=True)

# Merge the gas_prices and oil_prices DataFrames with the merged_df DataFrame
merged_df = pd.merge(merged_df, gas_prices, on='Date', how='outer')
merged_df = pd.merge(merged_df, oil_prices, on='Date', how='outer')

# Drop all rows with NaN values
merged_df.dropna(inplace=True)

# Round all values to 2 decimal places
merged_df = merged_df.round(2)

# Create a column with zeros called "Nuclear_energy (MWh)"
merged_df['Nuclear_energy (GWh)'] = 0

# Rename the "TTF_gas_price (EUR/MWh)" column to "TTF_gas_price (€/MWh)"
merged_df.rename(
    columns={'TTF_gas_price (EUR/MWh)': 'TTF_gas_price (€/MWh)'}, inplace=True)

merged_df

Unnamed: 0,Date,Lignite (GWh),Natural_gas (GWh),Hard_coal (GWh),Biomass (GWh),Other (GWh),Wind_speed (m/s),Temperature (°C),Wind_offshore (GWh),Wind_onshore (GWh),Pumped_storage_generation (GWh),Solar_radiation (W/m2),Net_total_export_import (GWh),Hydro (GWh),Solar_energy (GWh),TTF_gas_price (€/MWh),Oil_price (EUR),Nuclear_energy (GWh)
0,2024-07-29,236.41,161.73,77.13,114.47,60.59,3.76,19.68,39.25,186.41,20.97,217.05,-191.47,51.16,274.99,33.07,74.60,0
1,2024-07-30,198.53,135.79,72.43,113.74,66.76,3.34,18.78,42.31,194.11,17.66,219.40,1.48,50.56,289.50,33.29,74.94,0
2,2024-07-31,208.82,168.20,71.72,114.28,60.19,3.52,20.27,39.48,313.20,20.12,242.18,-150.74,50.43,260.45,33.51,75.02,0
3,2024-08-01,220.64,198.70,106.89,110.04,63.04,3.56,21.29,47.33,357.54,22.17,271.88,-175.68,50.15,257.16,33.46,74.83,0
4,2024-08-02,194.06,134.62,88.98,108.61,58.14,2.59,18.29,66.88,228.30,17.95,194.75,-160.08,48.40,269.14,33.22,74.85,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,2026-07-24,187.03,161.53,43.88,112.93,57.04,2.71,19.84,74.45,242.05,18.64,234.49,-154.09,54.43,380.35,42.07,56.30,0
726,2026-07-25,200.38,170.04,56.37,112.21,59.70,3.07,20.31,88.52,237.06,17.15,224.19,-215.40,55.06,392.35,41.92,56.89,0
727,2026-07-26,208.27,188.15,59.60,113.57,60.30,3.31,20.41,89.90,275.73,19.54,236.87,-243.09,56.30,332.18,41.95,56.91,0
728,2026-07-27,190.38,148.32,49.12,113.34,56.76,2.77,18.77,50.03,194.81,13.88,189.39,-215.89,52.74,273.64,42.69,56.20,0


In [74]:
# Import the BEV vehicle data
bev_dir = cwd / 'data' / 'Future_data' / \
    'Manual_simulation' / 'BEV_vehicles_data'
bev_constant = pd.read_csv(os.path.join(
    bev_dir, 'BEV_vehicles_forecast_constant.csv'))
bev_growth = pd.read_csv(os.path.join(
    bev_dir, 'BEV_vehicles_forecast_growth.csv'))
bev_decline = pd.read_csv(os.path.join(
    bev_dir, 'BEV_vehicles_forecast_decline.csv'))

### Load old data and combine new and old 

In [75]:
df_old = pd.read_csv(
    "/Users/skyfano/Documents/Masterarbeit/Prediction_of_energy_prices/data/Final_data/final_data_july.csv")

# Remove the last 10 columns from the old data
df_old = df_old.iloc[:, :-10]
df_old

Unnamed: 0,Date,Day_ahead_price (€/MWh),Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),Other (GWh),Pumped_storage_generation (GWh),Solar_energy (GWh),Wind_offshore (GWh),Wind_onshore (GWh),Net_total_export_import (GWh),BEV_vehicles,Oil_price (EUR),TTF_gas_price (€/MWh),Nuclear_energy (GWh)
0,2012-01-08,26.83,17.54,5.21,3.74,98.605,189.718,48.467,354.178,256.892,52.178,16.006,8.039,3.639,251.722,26.066,6,103.71,21.33,286.206
1,2012-01-09,47.91,13.04,4.24,3.80,98.605,344.154,49.054,382.756,282.438,60.752,25.507,4.942,2.225,153.948,-59.712,6,103.64,22.45,294.593
2,2012-01-10,45.77,28.71,4.30,4.81,98.605,360.126,51.143,334.267,267.311,62.106,18.835,17.101,2.892,200.075,-31.384,6,104.22,21.95,293.534
3,2012-01-11,47.83,21.58,4.08,5.14,98.605,360.330,50.693,385.000,277.343,60.862,18.164,10.254,3.059,211.599,-2.110,6,103.93,21.95,295.340
4,2012-01-12,43.10,25.12,6.77,4.98,98.605,306.521,50.732,332.985,266.820,56.922,19.129,17.576,6.901,477.412,32.997,6,102.26,22.08,294.394
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4581,2024-07-24,66.61,225.04,3.47,17.54,110.007,43.469,85.857,199.246,194.291,54.026,20.934,325.285,49.360,179.921,-168.705,992,75.75,32.63,0.000
4582,2024-07-25,78.34,272.71,2.12,17.85,110.410,50.676,82.632,195.983,209.610,52.963,18.766,394.116,51.053,42.885,-194.496,992,76.36,31.70,0.000
4583,2024-07-26,93.04,172.33,2.60,19.09,110.852,42.333,79.531,205.273,205.773,52.616,19.081,256.246,40.449,129.267,-241.786,993,75.21,32.20,0.000
4584,2024-07-27,80.74,176.67,2.05,19.63,110.479,33.307,74.958,184.012,216.412,50.927,18.856,244.051,2.180,32.001,-251.655,992,74.79,32.90,0.000


In [76]:
# added the BEV data to the merged_df but three times and save each created dataframe as csv
merged_df_constant = merged_df.copy()
merged_df_growth = merged_df.copy()
merged_df_decline = merged_df.copy()

# Merge the BEV vehicle data with the merged_df DataFrame
merged_df_constant = pd.merge(
    merged_df_constant, bev_constant, on='Date', how='outer')
merged_df_growth = pd.merge(
    merged_df_growth, bev_growth, on='Date', how='outer')
merged_df_decline = pd.merge(
    merged_df_decline, bev_decline, on='Date', how='outer')

# Drop all rows with NaN values
merged_df_constant.dropna(inplace=True)
merged_df_growth.dropna(inplace=True)
merged_df_decline.dropna(inplace=True)

# Round all values to 2 decimal places
merged_df_constant = merged_df_constant.round(2)
merged_df_growth = merged_df_growth.round(2)
merged_df_decline = merged_df_decline.round(2)

# Add the merged BEV vehicle data to the df_old DataFrame and then save them each to a CSV file
final_df_constant = pd.concat([df_old, merged_df_constant], ignore_index=True)
final_df_growth = pd.concat([df_old, merged_df_growth], ignore_index=True)
final_df_decline = pd.concat([df_old, merged_df_decline], ignore_index=True)

# Save the merged DataFrames to CSV files
final_df_constant.to_csv('final_df_constant.csv', index=False)
final_df_growth.to_csv('final_df_growth.csv', index=False)
final_df_decline.to_csv('final_df_decline.csv', index=False)

# Implementation of DeepAR model due to best performance 

### First version for the BEV decline scenario 

In [53]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch
from darts import TimeSeries
from darts.models import RNNModel
from darts.dataprocessing.transformers import Scaler
from darts.utils.likelihood_models import GaussianLikelihood

# Load your dataset and set the target column
data = final_df_decline.copy()
target_column = 'Day_ahead_price (€/MWh)'

# List of covariates available for future data (excluding lag features)
covariates = ['Solar_radiation (W/m2)',
              'Wind_speed (m/s)', 'Temperature (°C)', 'Biomass (GWh)',
              'Hard_coal (GWh)', 'Hydro (GWh)', 'Lignite (GWh)', 'Natural_gas (GWh)',
              'Other (GWh)', 'Pumped_storage_generation (GWh)', 'Solar_energy (GWh)',
              'Wind_offshore (GWh)', 'Wind_onshore (GWh)',
              'Net_total_export_import (GWh)', 'BEV_vehicles', 'Oil_price (EUR)',
              'TTF_gas_price (€/MWh)', 'Nuclear_energy (GWh)']

# Convert target column and covariates to float32
data[target_column] = data[target_column].astype(np.float32)
data[covariates] = data[covariates].astype(np.float32)

data['Date'] = pd.to_datetime(data['Date'], errors='coerce')

# Split the data after the 28 of july 2024  in train and prediction data
train_data = data[data['Date'] < '2024-07-29']
prediction_data = data[data['Date'] >= '2024-07-29']

# Create TimeSeries objects
target_series = TimeSeries.from_dataframe(
    train_data, time_col='Date', value_cols=target_column)
covariate_series = TimeSeries.from_dataframe(
    train_data, time_col='Date', value_cols=covariates)

# Separate scalers for target and covariates
target_scaler = Scaler()
covariate_scaler = Scaler()

# Scale the target and covariates separately
target_series_scaled = target_scaler.fit_transform(target_series)
covariate_series_scaled = covariate_scaler.fit_transform(covariate_series)

# Define model with previously found hyperparameters
model = RNNModel(
    model='LSTM',
    input_chunk_length=39,
    training_length=300,
    optimizer_kwargs={'lr': 0.01},
    n_rnn_layers=1,
    n_epochs=50,
    likelihood=GaussianLikelihood(),
    batch_size=16,
    hidden_dim=153,
    dropout=0,
    random_state=42,
    pl_trainer_kwargs={
        "accelerator": "gpu",
    },
)

# Train the model
model.fit(
    series=target_series_scaled,
    future_covariates=covariate_series_scaled,
    verbose=True
)

# Set up the start and end dates as Timestamp objects
forecast_start_date = pd.to_datetime(prediction_data['Date'].iloc[0])
required_start_date = forecast_start_date - \
    pd.Timedelta(days=model.input_chunk_length)
forecast_end_date = pd.to_datetime(prediction_data['Date'].iloc[-1])

# Select the required time range from 'data'
covariates_df = data.loc[
    (data['Date'] >= required_start_date) & (
        data['Date'] <= forecast_end_date),
    ['Date'] + covariates
]

# Convert this DataFrame to a TimeSeries object
combined_covariate_series = TimeSeries.from_dataframe(
    covariates_df, time_col='Date', value_cols=covariates)

# Scale the covariates
combined_covariate_series_scaled = covariate_scaler.transform(
    combined_covariate_series)

# Forecast with the scaled covariates
forecast = model.predict(
    n=730, future_covariates=combined_covariate_series_scaled)

# Inverse scaling for only the target series forecast
forecast_original_scale = target_scaler.inverse_transform(forecast)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | rnn             | LSTM             | 106 K  | train
6 | V               | Linear           | 308    | train
-------------------------------------------------------------
106 K     Trainable params
0         Non-trainable params
106 K     Total params
0.427     Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=50` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

In [64]:
import plotly.graph_objects as go

# Create a figure
fig = go.Figure()

# Add historical target series with thicker lines and hover text
fig.add_trace(go.Scatter(
    x=target_series.time_index,
    y=target_series.univariate_values(),
    mode='lines',
    name='Actual',
    line=dict(color='royalblue', width=3),  # Use a more vibrant blue
    hovertemplate='<b>Date</b>: %{x}<br><b>Price</b>: %{y:.2f} EUR/MWh<br>',
))

# Add forecast data with thicker lines and hover text
fig.add_trace(go.Scatter(
    x=forecast_original_scale.time_index,
    y=forecast_original_scale.univariate_values(),
    mode='lines',
    name='Forecast',
    # Use orange with dash line for contrast
    line=dict(color='darkorange', width=3),
    hovertemplate='<b>Date</b>: %{x}<br><b>Forecast</b>: %{y:.2f} EUR/MWh<br>',
))

# Update layout for better aesthetics
fig.update_layout(
    title=dict(
        text='Day Ahead Price Forecast',
        font=dict(size=24, color='darkslategray'),
        x=0.5,  # Center the title
        xanchor='center'
    ),
    xaxis_title='Date',
    yaxis_title='Day Ahead Price (EUR/MWh)',
    legend_title='Series',
    width=1000,
    height=600,
    plot_bgcolor='white',  # Clean white background
    xaxis=dict(
        showgrid=True,  # Show vertical gridlines
        gridcolor='LightGrey',
        tickformat='%Y',  # Format the x-axis as year
        title_font=dict(size=18),
        tickfont=dict(size=14),
    ),
    yaxis=dict(
        showgrid=True,  # Show horizontal gridlines
        gridcolor='LightGrey',
        title_font=dict(size=18),
        tickfont=dict(size=14),
        range=[0, max(target_series.univariate_values().max(
        ), forecast_original_scale.univariate_values().max()) + 50]  # Add padding for clarity
    ),
    legend=dict(
        x=0.8,  # Move legend to top-right for a modern look
        y=0.95,
        font=dict(size=14),
        # Transparent white background for legend
        bgcolor="rgba(255,255,255,0.9)",
        bordercolor="Black",
        borderwidth=1
    )
)

# Add interactivity - hovering effects
fig.update_traces(marker=dict(size=5),
                  selector=dict(mode='lines'))

# Show the figure
fig.show()

In [80]:
# Function to create and save the plot as PNG
def create_and_save_plot(target_series, forecast_original_scale, potential, save_dir, n_epochs):
    # Create a Plotly figure
    fig = go.Figure()

    # Add historical target series with thicker lines and hover text
    fig.add_trace(go.Scatter(
        x=target_series.time_index,
        y=target_series.univariate_values(),
        mode='lines',
        name='Actual',
        line=dict(color='royalblue', width=3),
        hovertemplate='<b>Date</b>: %{x}<br><b>Price</b>: %{y:.2f} EUR/MWh<br>',
    ))

    # Add forecast data with thicker lines and hover text
    fig.add_trace(go.Scatter(
        x=forecast_original_scale.time_index,
        y=forecast_original_scale.univariate_values(),
        mode='lines',
        name='Forecast',
        line=dict(color='darkorange', width=3, dash='dash'),
        hovertemplate='<b>Date</b>: %{x}<br><b>Forecast</b>: %{y:.2f} EUR/MWh<br>',
    ))

    # Update layout for better aesthetics
    fig.update_layout(
        title=dict(
            text=f'Day Ahead Price Forecast - {potential.capitalize()}',
            font=dict(size=24, color='darkslategray'),
            x=0.5,
            xanchor='center'
        ),
        xaxis_title='Date',
        yaxis_title='Day Ahead Price (EUR/MWh)',
        legend_title='Series',
        width=1000,
        height=600,
        plot_bgcolor='white',
        xaxis=dict(
            showgrid=True,
            gridcolor='LightGrey',
            tickformat='%Y',
            title_font=dict(size=18),
            tickfont=dict(size=14),
        ),
        yaxis=dict(
            showgrid=True,
            gridcolor='LightGrey',
            title_font=dict(size=18),
            tickfont=dict(size=14),
            range=[0, max(target_series.univariate_values().max(
            ), forecast_original_scale.univariate_values().max()) + 50]
        ),
        legend=dict(
            x=0.8,
            y=0.95,
            font=dict(size=14),
            bgcolor="rgba(255,255,255,0.9)",
            bordercolor="Black",
            borderwidth=1
        )
    )

    # Save the figure as a PNG file
    save_path = Path(save_dir) / f'{potential}_forecast_plot_{n_epochs}.png'
    fig.write_image(str(save_path), format='png')
    print(f"Saved plot for {potential} scenario at {save_path}")

In [82]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch
from darts import TimeSeries
from darts.models import RNNModel
from darts.dataprocessing.transformers import Scaler
from darts.utils.likelihood_models import GaussianLikelihood
import plotly.graph_objects as go
from pathlib import Path


# Function to perform the forecasting and plotting process
def forecast_and_plot(data, potential, save_dir, n_epochs):
    # Set the target column
    target_column = 'Day_ahead_price (€/MWh)'

    # List of covariates available for future data (excluding lag features)
    covariates = ['Solar_radiation (W/m2)',
                  'Wind_speed (m/s)', 'Temperature (°C)', 'Biomass (GWh)',
                  'Hard_coal (GWh)', 'Hydro (GWh)', 'Lignite (GWh)', 'Natural_gas (GWh)',
                  'Other (GWh)', 'Pumped_storage_generation (GWh)', 'Solar_energy (GWh)',
                  'Wind_offshore (GWh)', 'Wind_onshore (GWh)',
                  'Net_total_export_import (GWh)', 'BEV_vehicles', 'Oil_price (EUR)',
                  'TTF_gas_price (€/MWh)', 'Nuclear_energy (GWh)']

    # Convert target column and covariates to float32
    data[target_column] = data[target_column].astype(np.float32)
    data[covariates] = data[covariates].astype(np.float32)

    data['Date'] = pd.to_datetime(data['Date'], errors='coerce')

    # Split the data after July 28, 2024, into train and prediction data
    train_data = data[data['Date'] < '2024-07-29']
    prediction_data = data[data['Date'] >= '2024-07-29']

    # Create TimeSeries objects
    target_series = TimeSeries.from_dataframe(
        train_data, time_col='Date', value_cols=target_column)
    covariate_series = TimeSeries.from_dataframe(
        train_data, time_col='Date', value_cols=covariates)

    # Separate scalers for target and covariates
    target_scaler = Scaler()
    covariate_scaler = Scaler()

    # Scale the target and covariates separately
    target_series_scaled = target_scaler.fit_transform(target_series)
    covariate_series_scaled = covariate_scaler.fit_transform(covariate_series)

    # Define model with previously found hyperparameters
    model = RNNModel(
        model='LSTM',
        input_chunk_length=39,
        training_length=300,
        optimizer_kwargs={'lr': 0.01},
        n_rnn_layers=1,
        n_epochs=n_epochs,
        likelihood=GaussianLikelihood(),
        batch_size=16,
        hidden_dim=153,
        dropout=0,
        random_state=42,
        pl_trainer_kwargs={
            "accelerator": "gpu",
        },
    )

    # Train the model
    model.fit(
        series=target_series_scaled,
        future_covariates=covariate_series_scaled,
        verbose=True
    )

    # Set up the start and end dates as Timestamp objects
    forecast_start_date = pd.to_datetime(prediction_data['Date'].iloc[0])
    required_start_date = forecast_start_date - \
        pd.Timedelta(days=model.input_chunk_length)
    forecast_end_date = pd.to_datetime(prediction_data['Date'].iloc[-1])

    # Select the required time range from 'data'
    covariates_df = data.loc[
        (data['Date'] >= required_start_date) & (
            data['Date'] <= forecast_end_date),
        ['Date'] + covariates
    ]

    # Convert this DataFrame to a TimeSeries object
    combined_covariate_series = TimeSeries.from_dataframe(
        covariates_df, time_col='Date', value_cols=covariates)

    # Scale the covariates
    combined_covariate_series_scaled = covariate_scaler.transform(
        combined_covariate_series)

    # Forecast with the scaled covariates
    forecast = model.predict(
        n=730, future_covariates=combined_covariate_series_scaled)

    # Inverse scaling for only the target series forecast
    forecast_original_scale = target_scaler.inverse_transform(forecast)

    # Call the plot function to save the plot as a PNG file
    create_and_save_plot(
        target_series, forecast_original_scale, potential, save_dir, n_epochs)


# Define the three scenarios
potentials = ['growth', 'decline', 'constant']
N_EPOCHS = 1

# Define the directory to save plots
save_directory = 'forecasted_plots'

# Ensure the save directory exists
Path(save_directory).mkdir(parents=True, exist_ok=True)

# Loop through each scenario and run the forecasting + plot saving
for potential in potentials:
    data = globals()[f'final_df_{potential}']
    forecast_and_plot(data, potential, save_directory, N_EPOCHS)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | rnn             | LSTM             | 106 K  | train
6 | V               | Linear           | 308    | train
-------------------------------------------------------------
106 K     Trainable params
0         Non-trainable params
106 K     Total params
0.427     Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | rnn             | LSTM             | 106 K  | train
6 | V               | Linear           | 308    | train
-------------------------------------------------------------
106 K     Trainable params
0         Non-trainable params
106 K     Total params
0.427     Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Saved plot for growth scenario at forecasted_plots/growth_forecast_plot_1.png


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | rnn             | LSTM             | 106 K  | train
6 | V               | Linear           | 308    | train
-------------------------------------------------------------
106 K     Trainable params
0         Non-trainable params
106 K     Total params
0.427     Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Saved plot for decline scenario at forecasted_plots/decline_forecast_plot_1.png


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

Saved plot for constant scenario at forecasted_plots/constant_forecast_plot_1.png


Error Metrics on Test Set:
  MAPE: 39.18%
  MAE: 14.42
  RMSE: 23.62
  MSE: 557.98

Best hyperparameters:
  n_layers: 1
  input_chunk_length: 39
  hidden_dim: 153
  learning_rate: 0.0034907220631375884
  batch_size: 16

# Test with Arima

In [None]:
from darts.models import AutoARIMA
from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
import pandas as pd
import plotly.graph_objects as go
from darts.metrics import mape, mae, rmse, mse

# Initialize AutoARIMA model
model = AutoARIMA(seasonal=True, m=7, max_p=5, max_d=2,
                  max_q=5, information_criterion='aic')

# Fit the model with covariates
model.fit(target_series_scaled, future_covariates=covariate_series_scaled)

# Forecast with covariates
n = len(prediction_data)
forecast_scaled = model.predict(
    n=n, future_covariates=combined_covariate_series_scaled)

# Inverse transform the forecast
forecast = target_scaler.inverse_transform(forecast_scaled)

# Plot the actual vs forecasted data
fig = go.Figure()

# Add actual test data
fig.add_trace(go.Scatter(x=test_series.time_index,
                         y=test_series.values().squeeze(),
                         mode='lines', name='Actual', line=dict(color='blue')))

# Add forecasted data
fig.add_trace(go.Scatter(x=forecast.time_index,
                         y=forecast.values().squeeze(),
                         mode='lines', name='Forecast', line=dict(color='red')))

# Update layout
fig.update_layout(title='AutoARIMA Model - Forecast with Covariates',
                  xaxis_title='Date', yaxis_title='Day Ahead Price (€/MWh)',
                  legend=dict(x=1, y=1, xanchor='right', yanchor='top',
                              bordercolor='black', borderwidth=1),
                  template='plotly_white')

# Save the plot as a PNG image
plot_path = "final_arima_prediction.png"
fig.write_image(plot_path)
print(f"Forecast plot saved at: {plot_path}")