## Applying the facebook prophet model to the validation data 

In [17]:
# Load the saved model and make predictions on the test data    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
from prophet.serialize import model_to_json, model_from_json
from prophet.utilities import regressor_coefficients
from sklearn.metrics import mean_squared_error, mean_absolute_error


In [18]:
# Load the data
with open('prophet_model.json', 'r') as fin:
   model = model_from_json(fin.read())  # Load model

## Load the validation data

In [19]:
def load_and_prepare_data(file_path):
    df = pd.read_csv(file_path, parse_dates=['Date'])
    df.sort_values('Date', inplace=True)
    df.rename(columns={'Date': 'ds', 'Day_ahead_price (€/MWh)': 'y'}, inplace=True)
    df.ffill(inplace=True)
    df.set_index('ds', inplace=True)
    return df

In [20]:
# Import the data
valid_data = load_and_prepare_data('./../../data/Application_data/Final_application_data/final_application_data.csv')
valid_data

# Delete the last ten row of the data
valid_data = valid_data[:-10]



DataFrame.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.



In [21]:
# Search for NAs in the data frame
valid_data.isnull().sum()

y                                  0
Solar_radiation (W/m2)             0
Wind_speed (m/s)                   0
Temperature (°C)                   0
Biomass (GWh)                      0
Hard_coal (GWh)                    0
Hydro (GWh)                        0
Lignite (GWh)                      0
Natural_gas (GWh)                  0
Other (GWh)                        0
Pumped_storage_generation (GWh)    0
Solar_energy (GWh)                 0
Wind_offshore (GWh)                0
Wind_onshore (GWh)                 0
Net_total_export_import (GWh)      0
BEV_vehicles                       0
Oil_price (EUR)                    0
TTF_gas_price (€/MWh)              0
Nuclear_energy (GWh)               0
dtype: int64

In [22]:
# Create a future dataframe covering the validation period
future_dates = valid_data.reset_index().copy()
future_dates = future_dates[future_dates['ds'] >= model.history['ds'].max()]
future_dates = future_dates.set_index('ds')
future_dates

Unnamed: 0_level_0,y,Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),Other (GWh),Pumped_storage_generation (GWh),Solar_energy (GWh),Wind_offshore (GWh),Wind_onshore (GWh),Net_total_export_import (GWh),BEV_vehicles,Oil_price (EUR),TTF_gas_price (€/MWh),Nuclear_energy (GWh)
ds,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2024-03-01,70.93,65.50,3.14,5.81,124.839,68.840,55.295,252.736,256.269,61.390,15.691,99.970,109.303,258.820,-168.740,1012,77.45,25.5000,0
2024-03-02,63.99,101.71,3.25,7.75,124.742,48.551,53.621,192.488,222.065,55.669,12.299,178.801,98.946,264.598,-20.455,1012,77.22,26.3250,0
2024-03-03,61.08,111.42,3.31,8.15,124.469,56.956,50.715,206.578,209.388,52.162,8.209,202.653,101.406,235.637,43.071,1012,76.99,26.7375,0
2024-03-04,75.59,81.88,2.79,6.42,124.460,110.860,53.916,373.782,277.610,56.478,20.788,101.805,49.602,126.337,-149.007,1013,76.76,27.1500,0
2024-03-05,74.98,52.29,3.10,5.66,125.129,134.758,53.633,399.568,307.345,60.802,11.454,72.640,121.064,179.406,-74.175,1012,76.05,27.3700,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-07-14,24.11,262.62,2.84,17.84,108.523,13.148,72.425,109.751,152.700,52.337,16.552,373.534,103.933,130.989,-61.474,992,78.71,31.1300,0
2024-07-15,77.45,304.71,2.58,20.75,104.809,22.465,85.336,170.017,199.304,51.962,28.313,427.314,56.057,105.741,-135.508,993,78.66,31.0400,0
2024-07-16,51.14,198.54,3.88,19.53,103.756,26.754,82.035,169.305,173.874,50.752,17.974,305.022,72.066,314.640,-112.108,993,77.62,32.5300,0
2024-07-17,76.79,209.54,3.37,18.23,106.761,23.216,78.904,179.681,185.801,53.000,21.230,294.246,71.815,257.865,-157.414,993,78.87,31.5200,0


In [23]:
# Predict the future
forecast = model.predict(future_dates.reset_index())

# Filter forecast to only include dates in the validation period
forecast_filtered = forecast[forecast['ds'].isin(valid_data.index)]

In [24]:
forecast

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,Ascension Day,Ascension Day_lower,Ascension Day_upper,BEV_vehicles,...,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,yhat
0,2024-03-01,89.763944,82.432513,97.264789,89.763944,89.763944,0.0,0.0,0.0,-0.042727,...,0.005908,0.005908,0.005908,1.736441,1.736441,1.736441,-2.106076,-2.106076,-2.106076,90.151362
1,2024-03-02,89.791692,66.850323,81.754505,89.791692,89.791692,0.0,0.0,0.0,-0.042727,...,-0.113884,-0.113884,-0.113884,-2.527389,-2.527389,-2.527389,-2.119554,-2.119554,-2.119554,74.641035
2,2024-03-03,89.819441,60.264147,75.123538,89.819441,89.819441,0.0,0.0,0.0,-0.042727,...,-0.133129,-0.133129,-0.133129,-7.488977,-7.488977,-7.488977,-2.093197,-2.093197,-2.093197,67.680391
3,2024-03-04,89.847189,95.915408,110.127094,89.847189,89.847189,0.0,0.0,0.0,-0.042777,...,0.162973,0.162973,0.162973,1.395656,1.395656,1.395656,-2.031157,-2.031157,-2.031157,103.159305
4,2024-03-05,89.874938,94.903803,109.795884,89.874938,89.874938,0.0,0.0,0.0,-0.042727,...,0.151179,0.151179,0.151179,2.096164,2.096164,2.096164,-1.943521,-1.943521,-1.943521,102.946382
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135,2024-07-14,93.510003,71.291815,86.428215,92.904219,94.180786,0.0,0.0,0.0,-0.041717,...,-0.107894,-0.107894,-0.107894,-7.488977,-7.488977,-7.488977,2.896555,2.896555,2.896555,78.780431
136,2024-07-15,93.537752,85.432926,99.882936,92.924160,94.216875,0.0,0.0,0.0,-0.041767,...,-0.057247,-0.057247,-0.057247,1.395656,1.395656,1.395656,2.991016,2.991016,2.991016,92.782407
137,2024-07-16,93.565500,80.891656,95.815385,92.945674,94.260504,0.0,0.0,0.0,-0.041767,...,-0.118686,-0.118686,-0.118686,2.096164,2.096164,2.096164,3.152027,3.152027,3.152027,88.141302
138,2024-07-17,93.593249,87.046705,101.490997,92.966254,94.297156,0.0,0.0,0.0,-0.041767,...,-0.061967,-0.061967,-0.061967,2.434467,2.434467,2.434467,3.346681,3.346681,3.346681,94.115924


In [25]:
# Calculate error metrics
if len(valid_data) == len(forecast):
    mse = mean_squared_error(valid_data['y'], forecast['yhat'])
    mae = mean_absolute_error(valid_data['y'], forecast['yhat'])
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((forecast['yhat'] - valid_data['y']) / valid_data['y'])) * 100
    
    print(f"MSE: {mse:.2f}, MAE: {mae:.2f}, RMSE: {rmse:.2f}, MAPE: {mape:.2f}%")
else:
    raise ValueError("The lengths of valid_data and forecast_filtered do not match after filtering.")

MSE: 578.87, MAE: 19.19, RMSE: 24.06, MAPE: nan%



'<' not supported between instances of 'Timestamp' and 'int', sort order is undefined for incomparable objects.



In [26]:
forecast_filtered['yhat'] = pd.to_numeric(forecast_filtered['yhat'], errors='coerce')
valid_data['y'] = pd.to_numeric(valid_data['y'], errors='coerce')


In [27]:
# Ensure columns are numeric
forecast_filtered['yhat'] = pd.to_numeric(forecast_filtered['yhat'], errors='coerce')
valid_data['y'] = pd.to_numeric(valid_data['y'], errors='coerce')

# Handle zero values in 'y'
valid_data.loc[valid_data['y'] == 0, 'y'] = np.nan

# Drop NaNs before MAPE calculation
valid_data.dropna(subset=['y'], inplace=True)

# Calculate error metrics
if len(valid_data) == len(forecast_filtered):
    mse = mean_squared_error(valid_data['y'], forecast_filtered['yhat'])
    mae = mean_absolute_error(valid_data['y'], forecast_filtered['yhat'])
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((forecast_filtered['yhat'] - valid_data['y']) / valid_data['y'])) * 100
    
    print(f"MSE: {mse:.2f}, MAE: {mae:.2f}, RMSE: {rmse:.2f}, MAPE: {mape:.2f}%")
else:
    raise ValueError("The lengths of valid_data and forecast_filtered do not match after filtering.")


MSE: 578.87, MAE: 19.19, RMSE: 24.06, MAPE: nan%



'<' not supported between instances of 'Timestamp' and 'int', sort order is undefined for incomparable objects.



In [28]:
forecast['yhat']

0       90.151362
1       74.641035
2       67.680391
3      103.159305
4      102.946382
          ...    
135     78.780431
136     92.782407
137     88.141302
138     94.115924
139    107.173474
Name: yhat, Length: 140, dtype: float64

In [29]:
valid_data['y']

ds
2024-03-01    70.93
2024-03-02    63.99
2024-03-03    61.08
2024-03-04    75.59
2024-03-05    74.98
              ...  
2024-07-14    24.11
2024-07-15    77.45
2024-07-16    51.14
2024-07-17    76.79
2024-07-18    90.75
Name: y, Length: 140, dtype: float64

In [30]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

# Define the validation period start and end dates
valid_data.reset_index(inplace=True)
validation_start = valid_data['ds'].min()
validation_end = valid_data['ds'].max()

# Filter forecast to validation period
validation_forecast = forecast[(forecast['ds'] >= validation_start) & (forecast['ds'] <= validation_end)]

# Filter historical data to validation period
validation_actuals = valid_data[(valid_data['ds'] >= validation_start) & (valid_data['ds'] <= validation_end)]

# Create a figure with subplots
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces for forecast and actual data
fig.add_trace(
    go.Scatter(x=validation_forecast['ds'], y=validation_forecast['yhat'], name='Forecast', mode='lines', line=dict(color='blue')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=validation_forecast['ds'], y=validation_forecast['yhat_lower'], name='Lower Confidence', mode='lines', line=dict(color='gray', dash='dot')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=validation_forecast['ds'], y=validation_forecast['yhat_upper'], name='Upper Confidence', mode='lines', line=dict(color='gray', dash='dot'), fill='tonexty'),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=validation_actuals['ds'], y=validation_actuals['y'], name='Actual', mode='markers', marker=dict(color='black', size=3)),
    secondary_y=False,
)

# Set graph title and axis labels
fig.update_layout(
    title='Forecast vs Actuals for Validation Period',
    xaxis_title='Date',
    yaxis_title='Day-Ahead Energy Price (EUR/MWh)',
    legend=dict(x=0.01, y=0.99, bordercolor="Black", borderwidth=1)
)

# Show plot
fig.show()
