## Data Simulation for future covariates to predict energy prices 


In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import STL
from darts import TimeSeries
from darts.models import ExponentialSmoothing


In [4]:
# Import the data 
def load_and_prepare_data(file_path):
    """
    Load energy prices data from a CSV file, ensure chronological order, and convert 'Date' to datetime.
    """
    df = pd.read_csv(file_path)
    df.sort_values('Date', inplace=True)
    df.set_index('Date', inplace=True)
    df = pd.DataFrame(df)
    return df

In [21]:
# Import the data
df = load_and_prepare_data('../../data/Final_data/final_data_july.csv')

# Reset the index
df.reset_index(inplace=True)
# Ensure the 'Date' column is properly parsed and set as the index
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Ensure the data has a daily frequency (Filling missing dates if necessary)
df = df.asfreq('D')  # This makes sure the data has a daily frequency

In [22]:
df

Unnamed: 0_level_0,Day_ahead_price (€/MWh),Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),Other (GWh),Pumped_storage_generation (GWh),Solar_energy (GWh),Wind_offshore (GWh),Wind_onshore (GWh),Net_total_export_import (GWh),BEV_vehicles,Oil_price (EUR),TTF_gas_price (€/MWh),Nuclear_energy (GWh)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2012-01-01,18.19,14.75,4.95,8.39,98.605,108.454,51.011,325.337,188.811,54.040,19.314,6.263,3.404,235.467,54.662,6,99.64,21.10,250.979
2012-01-02,33.82,15.12,5.00,7.41,98.605,222.656,51.862,343.168,229.293,54.166,28.892,6.312,3.350,231.772,-64.477,6,100.04,20.00,258.671
2012-01-03,35.03,31.88,7.77,5.23,98.605,162.204,48.851,336.773,241.297,53.518,21.072,24.226,7.292,504.484,-35.078,6,100.44,20.90,271.495
2012-01-04,32.16,25.21,8.04,4.78,98.605,189.633,47.101,323.976,252.289,52.194,28.300,14.157,7.828,541.528,22.924,6,103.15,21.40,270.613
2012-01-05,20.35,13.46,9.98,4.23,98.605,175.733,45.854,327.502,259.018,52.179,31.887,4.728,8.280,572.819,35.618,6,103.92,21.30,287.555
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-07-24,66.61,225.04,3.47,17.54,110.007,43.469,85.857,199.246,194.291,54.026,20.934,325.285,49.360,179.921,-168.705,992,75.75,32.63,0.000
2024-07-25,78.34,272.71,2.12,17.85,110.410,50.676,82.632,195.983,209.610,52.963,18.766,394.116,51.053,42.885,-194.496,992,76.36,31.70,0.000
2024-07-26,93.04,172.33,2.60,19.09,110.852,42.333,79.531,205.273,205.773,52.616,19.081,256.246,40.449,129.267,-241.786,993,75.21,32.20,0.000
2024-07-27,80.74,176.67,2.05,19.63,110.479,33.307,74.958,184.012,216.412,50.927,18.856,244.051,2.180,32.001,-251.655,992,74.79,32.90,0.000


## Check if additive or mulitplicative model works better for predictions

In [24]:
from prophet import Prophet
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error

# Function to evaluate model performance
def evaluate_model(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    return mse, mae, mape

# Split the data into 80% for training and 20% for backtesting
train_size = int(len(df) * 0.8)
train_df = df[:train_size]
test_df = df[train_size:]

train_df = train_df.reset_index().rename(columns={'Date': 'ds', 'Solar_radiation (W/m2)': 'y'})
test_df = test_df.reset_index().rename(columns={'Date': 'ds', 'Solar_radiation (W/m2)': 'y'})

# Create a future DataFrame for backtesting (to predict the next 20%)
future_test_dates = test_df['ds']

# Train an additive model
additive_model = Prophet(
    seasonality_mode='additive', 
    yearly_seasonality=20,
    weekly_seasonality=False,
    daily_seasonality=False )
additive_model.fit(train_df)

# Make future predictions for the backtest period
forecast_additive = additive_model.predict(test_df)

# Evaluate the additive model
additive_mse, additive_mae, additive_mape = evaluate_model(test_df['y'], forecast_additive['yhat'])
print(f"Additive Model: MSE = {additive_mse}, MAE = {additive_mae}, MAPE = {additive_mape}")

# Train a multiplicative model
multiplicative_model = Prophet(
    seasonality_mode='multiplicative', 
    yearly_seasonality=20,
    weekly_seasonality=False,  
    daily_seasonality=False )
multiplicative_model.fit(train_df)

# Make future predictions for the backtest period
forecast_multiplicative = multiplicative_model.predict(test_df)

# Evaluate the multiplicative model
multiplicative_mse, multiplicative_mae, multiplicative_mape = evaluate_model(test_df['y'], forecast_multiplicative['yhat'])
print(f"Multiplicative Model: MSE = {multiplicative_mse}, MAE = {multiplicative_mae}, MAPE = {multiplicative_mape}")

# Compare the results and decide which is better
if additive_mse < multiplicative_mse:
    print("Additive model performs better.")
else:
    print("Multiplicative model performs better.")


09:33:36 - cmdstanpy - INFO - Chain [1] start processing
09:33:37 - cmdstanpy - INFO - Chain [1] done processing
09:33:37 - cmdstanpy - INFO - Chain [1] start processing


Additive Model: MSE = 1734.6298192391282, MAE = 31.498838368119518, MAPE = 0.2568660630639314


09:33:37 - cmdstanpy - INFO - Chain [1] done processing


Multiplicative Model: MSE = 1878.1001067329864, MAE = 32.39687451206886, MAPE = 0.25838165906311944
Additive model performs better.


# Hyperparametertuning

In [8]:
# Install needed packages
import pandas as pd
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly, plot_cross_validation_metric
import plotly.graph_objects as go
import numpy as np
import matplotlib.pyplot as plt
import itertools
from prophet.diagnostics import cross_validation, performance_metrics
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import time

In [9]:
# Import the data
df = load_and_prepare_data('../../data/Final_data/final_data.csv')

# Reset the index
df = df.reset_index()

#rename date and target column to ds and y
df = df.rename(columns={'Date': 'ds', 'Temperature (°C)': 'y'})


In [None]:
import itertools
from tqdm import tqdm
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import time

# Define the hyperparameter grid
param_dist = {  
    'changepoint_prior_scale': [0.01, 0.05, 0.1, 0.25, 0.5],
    'seasonality_prior_scale': [1, 5, 10],
    'holidays_prior_scale': [1, 5, 10],
    'seasonality_mode': ['additive', 'multiplicative'],
}


# Generate all combinations of parameters
all_params = [dict(zip(param_dist.keys(), v)) for v in itertools.product(*param_dist.values())]

# Function to run Prophet with a specific parameter set
def evaluate_params(params):
    try:
        # Fit model with given params
        m = Prophet(**params).fit(df)
        df_cv = cross_validation(m, initial='730 days', period='180 days', horizon='365 days')
        df_p = performance_metrics(df_cv, rolling_window=0.1)

        # Calculate the average RMSE across all horizons
        avg_rmse = df_p['rmse'].mean()

        return params, avg_rmse
    except Exception as e:
        # Handle any exceptions that occur during evaluation
        print(f"Error occurred with parameters {params}: {e}")
        return params, float('inf'), pd.Timedelta(seconds=0)

# Start the timer
start_time = time.time()

# Use ThreadPoolExecutor to parallelize the evaluation
with ThreadPoolExecutor() as executor:
    results = list(tqdm(executor.map(evaluate_params, all_params), total=len(all_params)))

# Stop the timer
end_time = time.time()

# Calculate the elapsed time
elapsed_time = end_time - start_time
print(f"Total execution time: {elapsed_time:.2f} seconds")

# Extract the RMSEs, horizons, and corresponding params
rmses = [result[1] for result in results]
tuning_results = pd.DataFrame([result[0] for result in results])
tuning_results['average_rmse'] = rmses

# Output the tuning results
print(tuning_results)


In [11]:
# Python
best_params = all_params[np.argmin(rmses)]
print(best_params)


{'changepoint_prior_scale': 0.01, 'seasonality_prior_scale': 10, 'holidays_prior_scale': 1, 'seasonality_mode': 'multiplicative'}


## Applying the best parameter configuration to the model

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import STL
from darts import TimeSeries
from darts.models import ExponentialSmoothing


In [5]:
# Import the data
df = load_and_prepare_data('../../data/Final_data/final_data_july.csv')

# Reset the index
df.reset_index(inplace=True)
# Ensure the 'Date' column is properly parsed and set as the index
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Ensure the data has a daily frequency (Filling missing dates if necessary)
df = df.asfreq('D')  # This makes sure the data has a daily frequency

In [6]:
df

Unnamed: 0_level_0,Day_ahead_price (€/MWh),Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),Other (GWh),Pumped_storage_generation (GWh),Solar_energy (GWh),Wind_offshore (GWh),Wind_onshore (GWh),Net_total_export_import (GWh),BEV_vehicles,Oil_price (EUR),TTF_gas_price (€/MWh),Nuclear_energy (GWh)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2012-01-01,18.19,14.75,4.95,8.39,98.605,108.454,51.011,325.337,188.811,54.040,19.314,6.263,3.404,235.467,54.662,6,99.64,21.10,250.979
2012-01-02,33.82,15.12,5.00,7.41,98.605,222.656,51.862,343.168,229.293,54.166,28.892,6.312,3.350,231.772,-64.477,6,100.04,20.00,258.671
2012-01-03,35.03,31.88,7.77,5.23,98.605,162.204,48.851,336.773,241.297,53.518,21.072,24.226,7.292,504.484,-35.078,6,100.44,20.90,271.495
2012-01-04,32.16,25.21,8.04,4.78,98.605,189.633,47.101,323.976,252.289,52.194,28.300,14.157,7.828,541.528,22.924,6,103.15,21.40,270.613
2012-01-05,20.35,13.46,9.98,4.23,98.605,175.733,45.854,327.502,259.018,52.179,31.887,4.728,8.280,572.819,35.618,6,103.92,21.30,287.555
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-07-24,66.61,225.04,3.47,17.54,110.007,43.469,85.857,199.246,194.291,54.026,20.934,325.285,49.360,179.921,-168.705,992,75.75,32.63,0.000
2024-07-25,78.34,272.71,2.12,17.85,110.410,50.676,82.632,195.983,209.610,52.963,18.766,394.116,51.053,42.885,-194.496,992,76.36,31.70,0.000
2024-07-26,93.04,172.33,2.60,19.09,110.852,42.333,79.531,205.273,205.773,52.616,19.081,256.246,40.449,129.267,-241.786,993,75.21,32.20,0.000
2024-07-27,80.74,176.67,2.05,19.63,110.479,33.307,74.958,184.012,216.412,50.927,18.856,244.051,2.180,32.001,-251.655,992,74.79,32.90,0.000


In [7]:
import pandas as pd
from prophet import Prophet
from prophet.plot import plot_plotly
import plotly.graph_objs as go

# Load your data (adjust the path and data columns)
# Prepare data for Prophet
df_prophet = pd.DataFrame({
    'ds': df.index, 
    'y': df['Temperature (°C)']
})

# Initialize the Prophet model
m = Prophet(
    seasonality_mode='multiplicative',
    changepoint_prior_scale = 0.01,
    holidays_prior_scale=1, 
    seasonality_prior_scale=10, 
    yearly_seasonality=20,
    weekly_seasonality=False,  
    daily_seasonality=False,
    scaling="absmax", 
    interval_width=0.9,   
)

# Add holidays for Germany to the data 
m.add_country_holidays(country_name='DE')

# Fit the model
m.fit(df_prophet)

# Make future predictions for the next 2 years (730 days)
future = m.make_future_dataframe(periods=730)
forecast = m.predict(future)

# Use Plotly for visualization 
fig = plot_plotly(m, forecast)

# Add additional layout elements if necessary
fig.update_layout(
    title="Forecasted Temperature",
    xaxis_title="Date",
    yaxis_title="Temperature (°C)",
    legend=dict(
        x=1, y=1,
        xanchor="right",
        yanchor="top"
    ),
    template="plotly_white"
)

# Show the Plotly plot
fig.show()


09:59:30 - cmdstanpy - INFO - Chain [1] start processing
09:59:31 - cmdstanpy - INFO - Chain [1] done processing


In [8]:
forecast

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,Ascension Day,Ascension Day_lower,Ascension Day_upper,Christmas Day,...,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yearly,yearly_lower,yearly_upper,additive_terms,additive_terms_lower,additive_terms_upper,yhat
0,2012-01-01,9.429553,-1.548650,8.594670,9.429553,9.429553,0.0,0.0,0.0,0.0,...,-0.619627,-0.619627,-0.619627,-0.646099,-0.646099,-0.646099,0.0,0.0,0.0,3.586745
1,2012-01-02,9.429775,-1.867668,8.324358,9.429775,9.429775,0.0,0.0,0.0,0.0,...,-0.658823,-0.658823,-0.658823,-0.658823,-0.658823,-0.658823,0.0,0.0,0.0,3.217218
2,2012-01-03,9.429996,-1.819404,7.822208,9.429996,9.429996,0.0,0.0,0.0,0.0,...,-0.669293,-0.669293,-0.669293,-0.669293,-0.669293,-0.669293,0.0,0.0,0.0,3.118563
3,2012-01-04,9.430218,-1.732018,8.454835,9.430218,9.430218,0.0,0.0,0.0,0.0,...,-0.677882,-0.677882,-0.677882,-0.677882,-0.677882,-0.677882,0.0,0.0,0.0,3.037641
4,2012-01-05,9.430439,-2.022917,8.152880,9.430439,9.430439,0.0,0.0,0.0,0.0,...,-0.685362,-0.685362,-0.685362,-0.685362,-0.685362,-0.685362,0.0,0.0,0.0,2.967178
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5318,2026-07-24,10.482117,15.859893,25.983882,10.478929,10.485607,0.0,0.0,0.0,0.0,...,0.984396,0.984396,0.984396,0.984396,0.984396,0.984396,0.0,0.0,0.0,20.800674
5319,2026-07-25,10.482297,15.064381,25.881518,10.479102,10.485792,0.0,0.0,0.0,0.0,...,0.986965,0.986965,0.986965,0.986965,0.986965,0.986965,0.0,0.0,0.0,20.827956
5320,2026-07-26,10.482477,15.978305,26.096927,10.479271,10.485977,0.0,0.0,0.0,0.0,...,0.987958,0.987958,0.987958,0.987958,0.987958,0.987958,0.0,0.0,0.0,20.838725
5321,2026-07-27,10.482657,15.806537,26.279160,10.479441,10.486162,0.0,0.0,0.0,0.0,...,0.987631,0.987631,0.987631,0.987631,0.987631,0.987631,0.0,0.0,0.0,20.835656


In [10]:
# select column ds and yhat into a new dataframe and create a copy to avoid the warning
forecast_final = forecast[['ds', 'yhat']].copy()

# rename ds to Date and yhat to Temperature (°C)
forecast_final.rename(columns={'ds': 'Date', 'yhat': 'Temperature (°C)'}, inplace=True)

# reduce the number of decimal places
forecast_final = forecast_final.round(2)

# set Date as index
forecast_final.set_index('Date', inplace=True)

# Select the data after the 28.07.2024
forecast_final = forecast_final.loc['2024-07-29':]

# save the results to a csv file
forecast_final.to_csv('../../data/Future_data/forecasted_temperature.csv')
