## Data Simulation for future covariates to predict energy prices 


In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import STL
from darts import TimeSeries
from darts.models import ExponentialSmoothing


In [20]:
# Import the data 
def load_and_prepare_data(file_path):
    """
    Load energy prices data from a CSV file, ensure chronological order, and convert 'Date' to datetime.
    """
    df = pd.read_csv(file_path)
    df.sort_values('Date', inplace=True)
    df.set_index('Date', inplace=True)
    df = pd.DataFrame(df)
    return df

In [21]:
# Import the data
df = load_and_prepare_data('../../../data/Final_data/final_data_july.csv')

# Reset the index
df.reset_index(inplace=True)
# Ensure the 'Date' column is properly parsed and set as the index
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Ensure the data has a daily frequency (Filling missing dates if necessary)
df = df.asfreq('D')  # This makes sure the data has a daily frequency

In [22]:
df

Unnamed: 0_level_0,Day_ahead_price (€/MWh),Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),Other (GWh),...,Lag_1_day,Lag_2_days,Lag_3_days,Lag_4_days,Lag_5_days,Lag_6_days,Lag_7_days,Day_of_week,Month,Rolling_mean_7
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-01-08,26.83,17.54,5.21,3.74,98.605,189.718,48.467,354.178,256.892,52.178,...,32.58,36.26,20.35,32.16,35.03,33.82,18.19,6,1,31.00
2012-01-09,47.91,13.04,4.24,3.80,98.605,344.154,49.054,382.756,282.438,60.752,...,26.83,32.58,36.26,20.35,32.16,35.03,33.82,0,1,33.02
2012-01-10,45.77,28.71,4.30,4.81,98.605,360.126,51.143,334.267,267.311,62.106,...,47.91,26.83,32.58,36.26,20.35,32.16,35.03,1,1,34.55
2012-01-11,47.83,21.58,4.08,5.14,98.605,360.330,50.693,385.000,277.343,60.862,...,45.77,47.91,26.83,32.58,36.26,20.35,32.16,2,1,36.79
2012-01-12,43.10,25.12,6.77,4.98,98.605,306.521,50.732,332.985,266.820,56.922,...,47.83,45.77,47.91,26.83,32.58,36.26,20.35,3,1,40.04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-07-24,66.61,225.04,3.47,17.54,110.007,43.469,85.857,199.246,194.291,54.026,...,79.62,88.75,58.45,59.32,86.47,90.75,76.79,2,7,75.71
2024-07-25,78.34,272.71,2.12,17.85,110.410,50.676,82.632,195.983,209.610,52.963,...,66.61,79.62,88.75,58.45,59.32,86.47,90.75,3,7,73.94
2024-07-26,93.04,172.33,2.60,19.09,110.852,42.333,79.531,205.273,205.773,52.616,...,78.34,66.61,79.62,88.75,58.45,59.32,86.47,4,7,74.88
2024-07-27,80.74,176.67,2.05,19.63,110.479,33.307,74.958,184.012,216.412,50.927,...,93.04,78.34,66.61,79.62,88.75,58.45,59.32,5,7,77.94


In [23]:
# Define the future covariates columns from your dataframe
covariates = ['Solar_radiation (W/m2)', 'Wind_speed (m/s)', 'Temperature (°C)', 
                             'Biomass (GWh)', 'Hard_coal (GWh)', 'Hydro (GWh)', 'Lignite (GWh)', 
                             'Natural_gas (GWh)', 'Other (GWh)', 'Pumped_storage_generation (GWh)', 
                             'Solar_energy (GWh)', 'Wind_offshore (GWh)', 'Wind_onshore (GWh)', 
                             'Net_total_export_import (GWh)']

future_covariates_columns = df[covariates]

In [24]:
import pandas as pd
from prophet import Prophet
from prophet.plot import plot_plotly
import plotly.graph_objs as go

# Load your data (adjust the path and data columns)
# Prepare data for Prophet
df_prophet = pd.DataFrame({
    'ds': df.index, 
    'y': df["TTF_gas_price (€/MWh)"]
})

# Initialize the Prophet model
m = Prophet(
    seasonality_mode='additive',  # Additive seasonality is not always necessary
    yearly_seasonality=False,
    weekly_seasonality=False,  # Disable weekly seasonality if not needed
    daily_seasonality=False,   # Disable daily seasonality
    changepoint_prior_scale= 0.01,
    seasonality_prior_scale= 10,
    holidays_prior_scale= 1
)

# Fit the model
m.fit(df_prophet)

# Make future predictions for the next 2 years (730 days)
future = m.make_future_dataframe(periods=730)
forecast = m.predict(future)

# Use Plotly for visualization 
fig = plot_plotly(m, forecast)

# Add additional layout elements if necessary
fig.update_layout(
    title="Forecasted TTF gas price using Prophet",
    xaxis_title="Date",
    yaxis_title="TTF_gas_price (€/MWh)",
    legend=dict(
        x=1, y=1,
        xanchor="right",
        yanchor="top"
    ),
    template="plotly_white"
)

# Show the Plotly plot
fig.show()


14:42:03 - cmdstanpy - INFO - Chain [1] start processing
14:42:04 - cmdstanpy - INFO - Chain [1] done processing


In [25]:
forecast

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2012-01-08,26.641230,0.233890,53.328416,26.641230,26.641230,0.0,0.0,0.0,0.0,0.0,0.0,26.641230
1,2012-01-09,26.636275,0.914660,52.938507,26.636275,26.636275,0.0,0.0,0.0,0.0,0.0,0.0,26.636275
2,2012-01-10,26.631321,-0.553231,53.814047,26.631321,26.631321,0.0,0.0,0.0,0.0,0.0,0.0,26.631321
3,2012-01-11,26.626366,-1.244640,53.600462,26.626366,26.626366,0.0,0.0,0.0,0.0,0.0,0.0,26.626366
4,2012-01-12,26.621412,-0.587914,53.791910,26.621412,26.621412,0.0,0.0,0.0,0.0,0.0,0.0,26.621412
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5311,2026-07-24,-36.231012,-75.418920,4.458749,-64.560973,-11.219613,0.0,0.0,0.0,0.0,0.0,0.0,-36.231012
5312,2026-07-25,-36.314965,-73.452480,1.328851,-64.721540,-11.256004,0.0,0.0,0.0,0.0,0.0,0.0,-36.314965
5313,2026-07-26,-36.398918,-76.887621,1.354074,-64.882106,-11.292395,0.0,0.0,0.0,0.0,0.0,0.0,-36.398918
5314,2026-07-27,-36.482871,-73.520183,2.399319,-65.042673,-11.328786,0.0,0.0,0.0,0.0,0.0,0.0,-36.482871


# Hyperparametertuning

In [14]:
# Install needed packages
import pandas as pd
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly, plot_cross_validation_metric
import plotly.graph_objects as go
import numpy as np
import matplotlib.pyplot as plt
import itertools
from prophet.diagnostics import cross_validation, performance_metrics
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import time

In [15]:
# Import the data
df = load_and_prepare_data('/Users/skyfano/Documents/Masterarbeit/Prediction_of_energy_prices/data/Final_data/final_data_july.csv')

# Reset the index
df = df.reset_index()

#rename date and Day_ahead_price columns to ds and y
df = df.rename(columns={'Date': 'ds', "TTF_gas_price (€/MWh)": 'y'})


In [None]:
import itertools
from tqdm import tqdm
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import time

param_dist = {  
    'changepoint_prior_scale': [0.01, 0.05, 0.1, 0.25, 0.5],
    'seasonality_prior_scale': [1, 5, 10],
    'holidays_prior_scale': [1, 5, 10],
    'seasonality_mode': ['additive', 'multiplicative'],
}


# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_dist.values())]

# Function to run Prophet with a specific parameter set
def evaluate_params(params):
    try:
        # Fit model with given params
        m = Prophet(**params).fit(df)
        df_cv = cross_validation(m, initial='730 days', period='180 days', horizon='365 days')
        df_p = performance_metrics(df_cv, rolling_window=0.1)

        # Calculate the average RMSE across all horizons
        avg_rmse = df_p['rmse'].mean()

        return params, avg_rmse
    except Exception as e:
        # Handle any exceptions that occur during evaluation
        print(f"Error occurred with parameters {params}: {e}")
        return params, float('inf'), pd.Timedelta(seconds=0)

# Start the timer
start_time = time.time()

# Use ThreadPoolExecutor to parallelize the evaluation
with ThreadPoolExecutor() as executor:
    results = list(tqdm(executor.map(evaluate_params, all_params), total=len(all_params)))

# Stop the timer
end_time = time.time()

# Calculate the elapsed time
elapsed_time = end_time - start_time
print(f"Total execution time: {elapsed_time:.2f} seconds")

# Extract the RMSEs, horizons, and corresponding params
rmses = [result[1] for result in results]
tuning_results = pd.DataFrame([result[0] for result in results])
tuning_results['average_rmse'] = rmses

# Output the tuning results
print(tuning_results)


In [17]:
# Python
best_params = all_params[np.argmin(rmses)]
print(best_params)


{'changepoint_prior_scale': 0.01, 'seasonality_prior_scale': 10, 'holidays_prior_scale': 1, 'seasonality_mode': 'additive'}
