In [104]:
# Import all csv files from the forecasted_data folder and merge them into one csv file
import os
import pandas as pd


def load_and_merge_csv_files(forecasted_data_dir, csv_files):
    # Initialize an empty DataFrame
    merged_df = pd.DataFrame()

    for csv_file in csv_files:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(os.path.join(forecasted_data_dir, csv_file))

        # Merge the DataFrame on the 'Date' column
        if merged_df.empty:
            merged_df = df
        else:
            merged_df = pd.merge(merged_df, df, on='Date', how='outer')

    return merged_df


# Example usage
# Get the current working directory
cwd = os.getcwd()
# Get the directory of the forecasted_data folder
forecasted_data_dir = os.path.join(cwd, 'forecasted_data')
# Get all the CSV files in the forecasted_data folder
csv_files = [f for f in os.listdir(forecasted_data_dir) if f.endswith('.csv')]
merged_df = load_and_merge_csv_files(forecasted_data_dir, csv_files)

merged_df

Unnamed: 0,Date,Lignite (GWh),Natural_gas (GWh),Hard_coal (GWh),Biomass (GWh),Other (GWh),BEV_vehicles,Wind_speed (m/s),Temperature (°C),Wind_offshore (GWh),Wind_onshore (GWh),Pumped_storage_generation (GWh),Solar_radiation (W/m2),Net_total_export_import (GWh),Hydro (GWh),Solar_energy (GWh)
0,2024-07-29,223.360661,161.729196,69.088323,114.467181,60.589910,1013.457751,3.763740,19.677840,96.653642,317.229620,20.967216,217.053659,-191.470585,51.164239,247.250141
1,2024-07-30,218.071269,135.785707,59.969954,113.739188,66.760590,951.904038,3.338270,18.784963,49.424443,180.394796,17.658391,219.402981,1.475163,50.560365,230.687057
2,2024-07-31,237.024951,168.197331,56.812073,114.277482,60.193604,1039.480594,3.523603,20.269650,51.967861,322.998717,20.118478,242.176329,-150.735820,50.425785,240.273297
3,2024-08-01,266.668389,198.698191,77.015942,110.041468,63.041389,1443.619584,3.557334,21.287117,76.322452,309.554800,22.168196,271.883692,-175.684717,50.152274,270.447722
4,2024-08-02,223.105485,134.624710,71.376632,108.614761,58.138594,1223.709411,2.592341,18.286833,52.432233,171.298677,17.953564,194.746012,-160.080532,48.400867,210.150011
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
881,2026-12-27,164.479597,169.146932,67.313059,116.062974,60.615720,1821.174336,4.023179,2.727938,82.449250,411.332734,19.572419,23.109718,-55.377232,46.773712,45.928522
882,2026-12-28,169.827967,180.352931,70.608102,116.144244,55.883586,1829.142336,3.569411,2.533574,76.204128,456.417497,18.342296,22.202108,239.700967,46.392569,29.224473
883,2026-12-29,219.744514,253.753669,96.883479,117.999628,61.870596,2124.095851,5.019709,3.209663,94.771923,683.270957,25.978222,31.392542,410.843744,49.568048,40.867571
884,2026-12-30,153.231596,223.832415,76.408091,117.413011,60.694992,2054.928875,4.985357,3.785974,137.129298,650.042670,23.985740,31.985752,199.807324,48.644834,59.042280


In [105]:
csv_files

['Lignite_(GWh)_forecast.csv',
 'Natural_gas_(GWh)_forecast.csv',
 'Hard_coal_(GWh)_forecast.csv',
 'Biomass_(GWh)_forecast.csv',
 'Other_(GWh)_forecast.csv',
 'BEV_vehicles_forecast.csv',
 'Wind_speed_(m_s)_forecast.csv',
 'Temperature_(°C)_forecast.csv',
 'Wind_offshore_(GWh)_forecast.csv',
 'Wind_onshore_(GWh)_forecast.csv',
 'Pumped_storage_generation_(GWh)_forecast.csv',
 'Solar_radiation_(W_m2)_forecast.csv',
 'Net_total_export_import_(GWh)_forecast.csv',
 'Hydro_(GWh)_forecast.csv',
 'Solar_energy_(GWh)_forecast.csv']

In [106]:
# Load the other two files
gas_prices_dir = os.path.join(
    cwd, 'forecast_values_large_730_TTF_gas_price_EUR_MWh_300_32.csv')
gas_prices = pd.read_csv(gas_prices_dir)

oil_prices = pd.read_csv(os.path.join(
    cwd, 'forecast_values_large_730_Oil_price_EUR_400_32_large.csv'))

In [107]:
gas_prices

Unnamed: 0,Date,Low_10,Mean,Mean_Smoothed,High_90
0,2024-07-29,32.177422,33.072580,33.309678,33.895161
1,2024-07-30,32.129034,33.290320,33.309678,34.379029
2,2024-07-31,31.669356,33.508064,33.309678,34.620965
3,2024-08-01,31.887099,33.459680,33.425807,34.717739
4,2024-08-02,31.161290,33.217747,33.479033,34.669352
...,...,...,...,...,...
725,2026-07-24,39.195382,42.066080,42.027586,45.642410
726,2026-07-25,39.195382,41.921738,42.149471,45.546185
727,2026-07-26,39.195382,41.953810,42.373994,45.546185
728,2026-07-27,40.734970,42.691536,42.373994,45.642410


In [108]:
# Select only Date and Mean columns
# Create copies before modifying to avoid SettingWithCopyWarning
gas_prices = gas_prices.copy()
oil_prices = oil_prices.copy()

gas_prices = gas_prices[['Date', 'Mean']]
oil_prices = oil_prices[['Date', 'Mean']]

# Rename mean to gas_price and oil_price
gas_prices.rename(columns={'Mean': 'TTF_gas_price (EUR/MWh)'}, inplace=True)
oil_prices.rename(columns={'Mean': 'Oil_price (EUR)'}, inplace=True)

# Set the Date column as the index
gas_prices.set_index('Date', inplace=True)
oil_prices.set_index('Date', inplace=True)

# Merge the gas_prices and oil_prices DataFrames with the merged_df DataFrame
merged_df = pd.merge(merged_df, gas_prices, on='Date', how='outer')

# Drop all rows with NaN values
merged_df.dropna(inplace=True)


# Round all values to 2 decimal places
merged_df = merged_df.round(2)

# Create a column with zeros called "Nuclear_energy (MWh)"
merged_df['Nuclear_energy (MWh)'] = 0


merged_df

Unnamed: 0,Date,Lignite (GWh),Natural_gas (GWh),Hard_coal (GWh),Biomass (GWh),Other (GWh),BEV_vehicles,Wind_speed (m/s),Temperature (°C),Wind_offshore (GWh),Wind_onshore (GWh),Pumped_storage_generation (GWh),Solar_radiation (W/m2),Net_total_export_import (GWh),Hydro (GWh),Solar_energy (GWh),TTF_gas_price (EUR/MWh),Nuclear_energy (MWh)
0,2024-07-29,223.36,161.73,69.09,114.47,60.59,1013.46,3.76,19.68,96.65,317.23,20.97,217.05,-191.47,51.16,247.25,33.07,0
1,2024-07-30,218.07,135.79,59.97,113.74,66.76,951.90,3.34,18.78,49.42,180.39,17.66,219.40,1.48,50.56,230.69,33.29,0
2,2024-07-31,237.02,168.20,56.81,114.28,60.19,1039.48,3.52,20.27,51.97,323.00,20.12,242.18,-150.74,50.43,240.27,33.51,0
3,2024-08-01,266.67,198.70,77.02,110.04,63.04,1443.62,3.56,21.29,76.32,309.55,22.17,271.88,-175.68,50.15,270.45,33.46,0
4,2024-08-02,223.11,134.62,71.38,108.61,58.14,1223.71,2.59,18.29,52.43,171.30,17.95,194.75,-160.08,48.40,210.15,33.22,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
725,2026-07-24,225.66,161.53,51.48,112.93,57.04,952.94,2.71,19.84,46.57,177.56,18.64,234.49,-154.09,54.43,269.28,42.07,0
726,2026-07-25,225.07,170.04,59.80,112.21,59.70,972.59,3.07,20.31,62.88,153.70,17.15,224.19,-215.40,55.06,289.55,41.92,0
727,2026-07-26,241.45,188.15,76.65,113.57,60.30,1004.75,3.31,20.41,63.62,206.55,19.54,236.87,-243.09,56.30,260.94,41.95,0
728,2026-07-27,204.21,148.32,70.46,113.34,56.76,918.40,2.77,18.77,34.57,126.88,13.88,189.39,-215.89,52.74,178.88,42.69,0


### Load old data and combine new and old 

In [109]:
df_old = pd.read_csv(
    "/Users/skyfano/Documents/Masterarbeit/Prediction_of_energy_prices/data/Final_data/final_data_july.csv")
df_old

Unnamed: 0,Date,Day_ahead_price (€/MWh),Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),...,Lag_1_day,Lag_2_days,Lag_3_days,Lag_4_days,Lag_5_days,Lag_6_days,Lag_7_days,Day_of_week,Month,Rolling_mean_7
0,2012-01-08,26.83,17.54,5.21,3.74,98.605,189.718,48.467,354.178,256.892,...,32.58,36.26,20.35,32.16,35.03,33.82,18.19,6,1,31.00
1,2012-01-09,47.91,13.04,4.24,3.80,98.605,344.154,49.054,382.756,282.438,...,26.83,32.58,36.26,20.35,32.16,35.03,33.82,0,1,33.02
2,2012-01-10,45.77,28.71,4.30,4.81,98.605,360.126,51.143,334.267,267.311,...,47.91,26.83,32.58,36.26,20.35,32.16,35.03,1,1,34.55
3,2012-01-11,47.83,21.58,4.08,5.14,98.605,360.330,50.693,385.000,277.343,...,45.77,47.91,26.83,32.58,36.26,20.35,32.16,2,1,36.79
4,2012-01-12,43.10,25.12,6.77,4.98,98.605,306.521,50.732,332.985,266.820,...,47.83,45.77,47.91,26.83,32.58,36.26,20.35,3,1,40.04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4581,2024-07-24,66.61,225.04,3.47,17.54,110.007,43.469,85.857,199.246,194.291,...,79.62,88.75,58.45,59.32,86.47,90.75,76.79,2,7,75.71
4582,2024-07-25,78.34,272.71,2.12,17.85,110.410,50.676,82.632,195.983,209.610,...,66.61,79.62,88.75,58.45,59.32,86.47,90.75,3,7,73.94
4583,2024-07-26,93.04,172.33,2.60,19.09,110.852,42.333,79.531,205.273,205.773,...,78.34,66.61,79.62,88.75,58.45,59.32,86.47,4,7,74.88
4584,2024-07-27,80.74,176.67,2.05,19.63,110.479,33.307,74.958,184.012,216.412,...,93.04,78.34,66.61,79.62,88.75,58.45,59.32,5,7,77.94


In [110]:
# Merge df_old and merged_df DataFrames
final_df = pd.concat([df_old, merged_df], ignore_index=True)
final_df

Unnamed: 0,Date,Day_ahead_price (€/MWh),Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),...,Lag_3_days,Lag_4_days,Lag_5_days,Lag_6_days,Lag_7_days,Day_of_week,Month,Rolling_mean_7,TTF_gas_price (EUR/MWh),Nuclear_energy (MWh)
0,2012-01-08,26.83,17.54,5.21,3.74,98.605,189.718,48.467,354.178,256.892,...,20.35,32.16,35.03,33.82,18.19,6.0,1.0,31.00,,
1,2012-01-09,47.91,13.04,4.24,3.80,98.605,344.154,49.054,382.756,282.438,...,36.26,20.35,32.16,35.03,33.82,0.0,1.0,33.02,,
2,2012-01-10,45.77,28.71,4.30,4.81,98.605,360.126,51.143,334.267,267.311,...,32.58,36.26,20.35,32.16,35.03,1.0,1.0,34.55,,
3,2012-01-11,47.83,21.58,4.08,5.14,98.605,360.330,50.693,385.000,277.343,...,26.83,32.58,36.26,20.35,32.16,2.0,1.0,36.79,,
4,2012-01-12,43.10,25.12,6.77,4.98,98.605,306.521,50.732,332.985,266.820,...,47.91,26.83,32.58,36.26,20.35,3.0,1.0,40.04,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5311,2026-07-24,,234.49,2.71,19.84,112.930,51.480,54.430,225.660,161.530,...,,,,,,,,,42.07,0.0
5312,2026-07-25,,224.19,3.07,20.31,112.210,59.800,55.060,225.070,170.040,...,,,,,,,,,41.92,0.0
5313,2026-07-26,,236.87,3.31,20.41,113.570,76.650,56.300,241.450,188.150,...,,,,,,,,,41.95,0.0
5314,2026-07-27,,189.39,2.77,18.77,113.340,70.460,52.740,204.210,148.320,...,,,,,,,,,42.69,0.0


In [111]:
# Drop the last ten columns of the DataFrame
final_df = final_df.iloc[:, :-12]
final_df

Unnamed: 0,Date,Day_ahead_price (€/MWh),Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),Other (GWh),Pumped_storage_generation (GWh),Solar_energy (GWh),Wind_offshore (GWh),Wind_onshore (GWh),Net_total_export_import (GWh),BEV_vehicles,Oil_price (EUR),TTF_gas_price (€/MWh),Nuclear_energy (GWh)
0,2012-01-08,26.83,17.54,5.21,3.74,98.605,189.718,48.467,354.178,256.892,52.178,16.006,8.039,3.639,251.722,26.066,6.00,103.71,21.33,286.206
1,2012-01-09,47.91,13.04,4.24,3.80,98.605,344.154,49.054,382.756,282.438,60.752,25.507,4.942,2.225,153.948,-59.712,6.00,103.64,22.45,294.593
2,2012-01-10,45.77,28.71,4.30,4.81,98.605,360.126,51.143,334.267,267.311,62.106,18.835,17.101,2.892,200.075,-31.384,6.00,104.22,21.95,293.534
3,2012-01-11,47.83,21.58,4.08,5.14,98.605,360.330,50.693,385.000,277.343,60.862,18.164,10.254,3.059,211.599,-2.110,6.00,103.93,21.95,295.340
4,2012-01-12,43.10,25.12,6.77,4.98,98.605,306.521,50.732,332.985,266.820,56.922,19.129,17.576,6.901,477.412,32.997,6.00,102.26,22.08,294.394
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5311,2026-07-24,,234.49,2.71,19.84,112.930,51.480,54.430,225.660,161.530,57.040,18.640,269.280,46.570,177.560,-154.090,952.94,,,
5312,2026-07-25,,224.19,3.07,20.31,112.210,59.800,55.060,225.070,170.040,59.700,17.150,289.550,62.880,153.700,-215.400,972.59,,,
5313,2026-07-26,,236.87,3.31,20.41,113.570,76.650,56.300,241.450,188.150,60.300,19.540,260.940,63.620,206.550,-243.090,1004.75,,,
5314,2026-07-27,,189.39,2.77,18.77,113.340,70.460,52.740,204.210,148.320,56.760,13.880,178.880,34.570,126.880,-215.890,918.40,,,


In [112]:
final_df.columns

Index(['Date', 'Day_ahead_price (€/MWh)', 'Solar_radiation (W/m2)',
       'Wind_speed (m/s)', 'Temperature (°C)', 'Biomass (GWh)',
       'Hard_coal (GWh)', 'Hydro (GWh)', 'Lignite (GWh)', 'Natural_gas (GWh)',
       'Other (GWh)', 'Pumped_storage_generation (GWh)', 'Solar_energy (GWh)',
       'Wind_offshore (GWh)', 'Wind_onshore (GWh)',
       'Net_total_export_import (GWh)', 'BEV_vehicles', 'Oil_price (EUR)',
       'TTF_gas_price (€/MWh)', 'Nuclear_energy (GWh)'],
      dtype='object')

# Implementation of DeepAR model due to best performance 

In [113]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch
from darts import TimeSeries
from darts.models import RNNModel
from darts.dataprocessing.transformers import Scaler
from darts.utils.likelihood_models import GaussianLikelihood

# Load your dataset and set the target column
data = final_df.copy()
target_column = 'Day_ahead_price (€/MWh)'

# List of covariates available for future data (excluding lag features)
covariates = ['Solar_radiation (W/m2)',
              'Wind_speed (m/s)', 'Temperature (°C)', 'Biomass (GWh)',
              'Hard_coal (GWh)', 'Hydro (GWh)', 'Lignite (GWh)', 'Natural_gas (GWh)',
              'Other (GWh)', 'Pumped_storage_generation (GWh)', 'Solar_energy (GWh)',
              'Wind_offshore (GWh)', 'Wind_onshore (GWh)',
              'Net_total_export_import (GWh)', 'BEV_vehicles', 'Oil_price (EUR)',
              'TTF_gas_price (€/MWh)', 'Nuclear_energy (GWh)']

# Convert target column and covariates to float32
data[target_column] = data[target_column].astype(np.float32)
data[covariates] = data[covariates].astype(np.float32)

data['Date'] = pd.to_datetime(data['Date'], errors='coerce')

# Split the data after the 28 of july 2024  in train and prediction data
train_data = data[data['Date'] < '2024-07-29']
prediction_data = data[data['Date'] >= '2024-07-29']

In [114]:
train_data

Unnamed: 0,Date,Day_ahead_price (€/MWh),Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),Other (GWh),Pumped_storage_generation (GWh),Solar_energy (GWh),Wind_offshore (GWh),Wind_onshore (GWh),Net_total_export_import (GWh),BEV_vehicles,Oil_price (EUR),TTF_gas_price (€/MWh),Nuclear_energy (GWh)
0,2012-01-08,26.830000,17.540001,5.21,3.740000,98.605003,189.718002,48.466999,354.178009,256.891998,52.178001,16.006001,8.039000,3.639000,251.722000,26.066000,6.0,103.709999,21.330000,286.205994
1,2012-01-09,47.910000,13.040000,4.24,3.800000,98.605003,344.153992,49.054001,382.756012,282.437988,60.751999,25.507000,4.942000,2.225000,153.947998,-59.712002,6.0,103.639999,22.450001,294.592987
2,2012-01-10,45.770000,28.709999,4.30,4.810000,98.605003,360.126007,51.143002,334.266998,267.311005,62.105999,18.834999,17.101000,2.892000,200.074997,-31.384001,6.0,104.220001,21.950001,293.533997
3,2012-01-11,47.830002,21.580000,4.08,5.140000,98.605003,360.329987,50.693001,385.000000,277.342987,60.862000,18.164000,10.254000,3.059000,211.598999,-2.110000,6.0,103.930000,21.950001,295.339996
4,2012-01-12,43.099998,25.120001,6.77,4.980000,98.605003,306.520996,50.731998,332.984985,266.820007,56.922001,19.129000,17.576000,6.901000,477.411987,32.997002,6.0,102.260002,22.080000,294.394012
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4581,2024-07-24,66.610001,225.039993,3.47,17.540001,110.007004,43.469002,85.857002,199.246002,194.291000,54.026001,20.934000,325.285004,49.360001,179.921005,-168.705002,992.0,75.750000,32.630001,0.000000
4582,2024-07-25,78.339996,272.709991,2.12,17.850000,110.410004,50.675999,82.632004,195.983002,209.610001,52.963001,18.766001,394.115997,51.053001,42.884998,-194.496002,992.0,76.360001,31.700001,0.000000
4583,2024-07-26,93.040001,172.330002,2.60,19.090000,110.851997,42.333000,79.530998,205.272995,205.772995,52.616001,19.080999,256.246002,40.449001,129.266998,-241.785995,993.0,75.209999,32.200001,0.000000
4584,2024-07-27,80.739998,176.669998,2.05,19.629999,110.478996,33.306999,74.958000,184.011993,216.412003,50.926998,18.856001,244.050995,2.180000,32.000999,-251.654999,992.0,74.790001,32.900002,0.000000


In [115]:
prediction_data

Unnamed: 0,Date,Day_ahead_price (€/MWh),Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),Other (GWh),Pumped_storage_generation (GWh),Solar_energy (GWh),Wind_offshore (GWh),Wind_onshore (GWh),Net_total_export_import (GWh),BEV_vehicles,Oil_price (EUR),TTF_gas_price (€/MWh),Nuclear_energy (GWh)
4586,2024-07-29,,217.050003,3.76,19.680000,114.470001,69.089996,51.160000,223.360001,161.729996,60.590000,20.969999,247.250000,96.650002,317.230011,-191.470001,1013.460022,,,
4587,2024-07-30,,219.399994,3.34,18.780001,113.739998,59.970001,50.560001,218.070007,135.789993,66.760002,17.660000,230.690002,49.419998,180.389999,1.480000,951.900024,,,
4588,2024-07-31,,242.179993,3.52,20.270000,114.279999,56.810001,50.430000,237.020004,168.199997,60.189999,20.120001,240.270004,51.970001,323.000000,-150.740005,1039.479980,,,
4589,2024-08-01,,271.880005,3.56,21.290001,110.040001,77.019997,50.150002,266.670013,198.699997,63.040001,22.170000,270.450012,76.320000,309.549988,-175.679993,1443.619995,,,
4590,2024-08-02,,194.750000,2.59,18.290001,108.610001,71.379997,48.400002,223.110001,134.619995,58.139999,17.950001,210.149994,52.430000,171.300003,-160.080002,1223.709961,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5311,2026-07-24,,234.490005,2.71,19.840000,112.930000,51.480000,54.430000,225.660004,161.529999,57.040001,18.639999,269.279999,46.570000,177.559998,-154.089996,952.940002,,,
5312,2026-07-25,,224.190002,3.07,20.309999,112.209999,59.799999,55.060001,225.070007,170.039993,59.700001,17.150000,289.549988,62.880001,153.699997,-215.399994,972.590027,,,
5313,2026-07-26,,236.869995,3.31,20.410000,113.570000,76.650002,56.299999,241.449997,188.149994,60.299999,19.540001,260.940002,63.619999,206.550003,-243.089996,1004.750000,,,
5314,2026-07-27,,189.389999,2.77,18.770000,113.339996,70.459999,52.740002,204.210007,148.320007,56.759998,13.880000,178.880005,34.570000,126.879997,-215.889999,918.400024,,,


In [117]:
prediction_data.columns

Index(['Date', 'Day_ahead_price (€/MWh)', 'Solar_radiation (W/m2)',
       'Wind_speed (m/s)', 'Temperature (°C)', 'Biomass (GWh)',
       'Hard_coal (GWh)', 'Hydro (GWh)', 'Lignite (GWh)', 'Natural_gas (GWh)',
       'Other (GWh)', 'Pumped_storage_generation (GWh)', 'Solar_energy (GWh)',
       'Wind_offshore (GWh)', 'Wind_onshore (GWh)',
       'Net_total_export_import (GWh)', 'BEV_vehicles', 'Oil_price (EUR)',
       'TTF_gas_price (€/MWh)', 'Nuclear_energy (GWh)'],
      dtype='object')

In [116]:
import pandas as pd
import numpy as np
import torch
from darts import TimeSeries
from darts.models import RNNModel
from darts.dataprocessing.transformers import Scaler
from darts.utils.likelihood_models import GaussianLikelihood
import matplotlib.pyplot as plt

# Assume `data` is your full historical dataset
# Set the target column and covariates
target_column = 'Day_ahead_price (€/MWh)'
covariates = ['Solar_radiation (W/m2)', 'Wind_speed (m/s)', 'Temperature (°C)', 'Biomass (GWh)',
              'Hard_coal (GWh)', 'Hydro (GWh)', 'Lignite (GWh)', 'Natural_gas (GWh)',
              'Other (GWh)', 'Pumped_storage_generation (GWh)', 'Solar_energy (GWh)',
              'Wind_offshore (GWh)', 'Wind_onshore (GWh)',
              'Net_total_export_import (GWh)', 'BEV_vehicles', 'Oil_price (EUR)',
              'TTF_gas_price (€/MWh)', 'Nuclear_energy (GWh)']

# Convert target column and covariates to float32
data[target_column] = data[target_column].astype(np.float32)
data[covariates] = data[covariates].astype(np.float32)

# Create TimeSeries objects
target_series = TimeSeries.from_dataframe(
    data, time_col='Date', value_cols=target_column)
covariate_series = TimeSeries.from_dataframe(
    data, time_col='Date', value_cols=covariates)

# Separate scalers for target and covariates
target_scaler = Scaler()
covariate_scaler = Scaler()

# Scale the target and covariates separately
target_series_scaled = target_scaler.fit_transform(target_series)
covariate_series_scaled = covariate_scaler.fit_transform(covariate_series)

# Define model with previously found hyperparameters
model = RNNModel(
    model='LSTM',
    input_chunk_length=39,
    training_length=500,
    optimizer_kwargs={'lr': 0.0034907220631375884},
    n_rnn_layers=1,
    n_epochs=1,
    likelihood=GaussianLikelihood(),
    batch_size=16,
    hidden_dim=153,
    dropout=0,
    random_state=42
)

# Train the model
model.fit(
    series=target_series_scaled,
    future_covariates=covariate_series_scaled
)

# Generate future covariates TimeSeries for the forecast period
# Set up the start and end dates as Timestamp objects
forecast_start_date = pd.to_datetime(prediction_data['Date'].iloc[0])
required_start_date = forecast_start_date - \
    pd.Timedelta(days=model.input_chunk_length)
forecast_end_date = pd.to_datetime(prediction_data['Date'].iloc[-1])

# Select the required time range from 'data'
covariates_df = data.loc[
    (data['Date'] >= required_start_date) & (
        data['Date'] <= forecast_end_date),
    ['Date'] + covariates
]

# Convert this DataFrame to a TimeSeries object
combined_covariate_series = TimeSeries.from_dataframe(
    covariates_df, time_col='Date', value_cols=covariates)

# Scale the covariates
combined_covariate_series_scaled = covariate_scaler.transform(
    combined_covariate_series)

# Forecast with the scaled covariates
forecast = model.predict(
    n=730, future_covariates=combined_covariate_series_scaled)

# Inverse scaling for only the target series forecast
forecast_original_scale = target_scaler.inverse_transform(forecast)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name            | Type             | Params | Mode 
-------------------------------------------------------------
0 | criterion       | MSELoss          | 0      | train
1 | train_criterion | MSELoss          | 0      | train
2 | val_criterion   | MSELoss          | 0      | train
3 | train_metrics   | MetricCollection | 0      | train
4 | val_metrics     | MetricCollection | 0      | train
5 | rnn             | LSTM             | 101 K  | train
6 | V               | Linear           | 308    | train
-------------------------------------------------------------
101 K     Trainable params
0         Non-trainable params
101 K     Total params
0.408     Total estimated model params size (MB)
7         Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
ValueError: For the given forecasting horizon `n=730`, the provided future covariates at dataset index `0` do not extend far enough into the future. As `n > output_chunk_length` the future covariates must end at time step `2028-07-27 00:00:00`, whereas now they end at time step `2026-07-28 00:00:00`.


ValueError: For the given forecasting horizon `n=730`, the provided future covariates at dataset index `0` do not extend far enough into the future. As `n > output_chunk_length` the future covariates must end at time step `2028-07-27 00:00:00`, whereas now they end at time step `2026-07-28 00:00:00`.

In [None]:
# Plot the forecast
plt.figure(figsize=(12, 6))
target_series.plot(label='Actual', color='blue')
forecast_original_scale.plot(label='Forecast', color='orange')
plt.title('Day Ahead Price Forecast')
plt.xlabel('Date')
plt.ylabel('Day Ahead Price (EUR/MWh)')
plt.legend()
plt.show()

Error Metrics on Test Set:
  MAPE: 39.18%
  MAE: 14.42
  RMSE: 23.62
  MSE: 557.98

Best hyperparameters:
  n_layers: 1
  input_chunk_length: 39
  hidden_dim: 153
  learning_rate: 0.0034907220631375884
  batch_size: 16

In [103]:
covariates_df.columns

Index(['Date', 'Solar_radiation (W/m2)', 'Wind_speed (m/s)',
       'Temperature (°C)', 'Biomass (GWh)', 'Hard_coal (GWh)', 'Hydro (GWh)',
       'Lignite (GWh)', 'Natural_gas (GWh)', 'Other (GWh)',
       'Pumped_storage_generation (GWh)'],
      dtype='object')