# Facebook prophet model #####

The facebook prophet model is known for its capabilities in time series forecasting. The documentation can be found here: 
https://facebook.github.io/prophet/

## 1. Import packages 


In [62]:
# Install needed packages
import pandas as pd
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import numpy as np
import matplotlib.pyplot as plt
from darts.metrics import mape, mae, rmse, r2_score, mse
from prophet.serialize import model_to_json, model_from_json
from prophet.utilities import regressor_coefficients

In [63]:
# Define a function to load and prepare the dataset
def load_and_prepare_data(file_path):
    """
    Loads and prepares energy price data from the specified CSV file.
    Ensures chronological order and converts the 'Date' column to datetime format.

    Args:
        file_path (str): Path to the CSV file.

    Returns:
        pd.DataFrame: A DataFrame with the processed energy price data.
    """
    df = pd.read_csv(file_path, parse_dates=['Date'])
    df['Date'] = pd.to_datetime(df['Date'])
    df.sort_values('Date', inplace=True)
    df.set_index('Date', inplace=True)
    return df


## 2. Load and Prepare Data

In [64]:
# Load the full dataset
df = load_and_prepare_data('../../data/Final_data/final_data_no_lags.csv')

# Reset the index and rename columns for Prophet compatibility
df = df.reset_index().rename(
    columns={'Date': 'ds', 'Day_ahead_price (€/MWh)': 'y'})

# Move the 'y' column to the second position in the DataFrame
cols = list(df.columns)
cols.remove('y')
cols.insert(1, 'y')
df = df[cols]

# Preview the data
df.head()

Unnamed: 0,ds,y,Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),...,Solar_energy (GWh),Wind_offshore (GWh),Wind_onshore (GWh),Net_total_export_import (GWh),BEV_vehicles,Oil_price (EUR),TTF_gas_price (€/MWh),Nuclear_energy (GWh),Day_of_week,Month
0,2012-01-08,26.83,17.54,5.21,3.74,98.605,189.718,48.467,354.178,256.892,...,8.039,3.639,251.722,26.066,6,103.71,21.33,286.206,6,1
1,2012-01-09,47.91,13.04,4.24,3.8,98.605,344.154,49.054,382.756,282.438,...,4.942,2.225,153.948,-59.712,6,103.64,22.45,294.593,0,1
2,2012-01-10,45.77,28.71,4.3,4.81,98.605,360.126,51.143,334.267,267.311,...,17.101,2.892,200.075,-31.384,6,104.22,21.95,293.534,1,1
3,2012-01-11,47.83,21.58,4.08,5.14,98.605,360.33,50.693,385.0,277.343,...,10.254,3.059,211.599,-2.11,6,103.93,21.95,295.34,2,1
4,2012-01-12,43.1,25.12,6.77,4.98,98.605,306.521,50.732,332.985,266.82,...,17.576,6.901,477.412,32.997,6,102.26,22.08,294.394,3,1


## 3. Load Training and Test Sets

In [65]:
# Load training and testing datasets
train_df = load_and_prepare_data('../../data/Final_data/train_df_no_lags.csv')
test_df = load_and_prepare_data('../../data/Final_data/test_df_no_lags.csv')

# Reset the index and rename columns for Prophet compatibility
train_df = train_df.reset_index().rename(
    columns={'Date': 'ds', 'Day_ahead_price (€/MWh)': 'y'})
test_df = test_df.reset_index().rename(
    columns={'Date': 'ds', 'Day_ahead_price (€/MWh)': 'y'})

# Reorder columns in both training and test sets
cols_train = list(train_df.columns)
cols_test = list(test_df.columns)

cols_train.remove('y')
cols_test.remove('y')

cols_train.insert(1, 'y')
cols_test.insert(1, 'y')

train_df = train_df[cols_train]
test_df = test_df[cols_test]

# Preview training data
train_df.head()

Unnamed: 0,ds,y,Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),...,Solar_energy (GWh),Wind_offshore (GWh),Wind_onshore (GWh),Net_total_export_import (GWh),BEV_vehicles,Oil_price (EUR),TTF_gas_price (€/MWh),Nuclear_energy (GWh),Day_of_week,Month
0,2012-01-08,26.83,17.54,5.21,3.74,98.605,189.718,48.467,354.178,256.892,...,8.039,3.639,251.722,26.066,6,103.71,21.33,286.206,6,1
1,2012-01-09,47.91,13.04,4.24,3.8,98.605,344.154,49.054,382.756,282.438,...,4.942,2.225,153.948,-59.712,6,103.64,22.45,294.593,0,1
2,2012-01-10,45.77,28.71,4.3,4.81,98.605,360.126,51.143,334.267,267.311,...,17.101,2.892,200.075,-31.384,6,104.22,21.95,293.534,1,1
3,2012-01-11,47.83,21.58,4.08,5.14,98.605,360.33,50.693,385.0,277.343,...,10.254,3.059,211.599,-2.11,6,103.93,21.95,295.34,2,1
4,2012-01-12,43.1,25.12,6.77,4.98,98.605,306.521,50.732,332.985,266.82,...,17.576,6.901,477.412,32.997,6,102.26,22.08,294.394,3,1


## 4. Instantiate the Prophet model using prophet python package

In [66]:
# Create the prophet model with the applicable seasonality and holidays
m = Prophet(
    seasonality_mode='additive',
    yearly_seasonality=25,
    weekly_seasonality=3,
    daily_seasonality=False,
    seasonality_prior_scale=1,
    holidays_prior_scale=1,
    changepoint_prior_scale=0.01,
    scaling="absmax",
    interval_width=0.9,
)

# Add monthly seasonality
m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
m.add_seasonality(name='weekly', period=7, fourier_order=3, prior_scale=0.1)

# Add holidays for Germany to the data
m.add_country_holidays(country_name='DE')

# Add regressors for all columns except ds and y
for column in train_df.columns:
    if column not in ['ds', 'y']:
        m.add_regressor(column, prior_scale=0.7, mode='additive')

# Fit the model
m.fit(train_df)

13:01:56 - cmdstanpy - INFO - Chain [1] start processing
13:01:58 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x14be3c8b0>

In [67]:
# Create the future dataframe
future = m.make_future_dataframe(periods=test_df.shape[0], freq='1D')

# Check the alignment and number of rows
print(future['ds'].equals(df['ds']))  # Should return True if perfectly aligned
print(future.shape[0] == df.shape[0])  # Also should return True

True
True


In [68]:
# Add columns from df to future
for column in df.columns:
    if column != 'ds':
        future[column] = df[column]

In [69]:
future

Unnamed: 0,ds,y,Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),...,Solar_energy (GWh),Wind_offshore (GWh),Wind_onshore (GWh),Net_total_export_import (GWh),BEV_vehicles,Oil_price (EUR),TTF_gas_price (€/MWh),Nuclear_energy (GWh),Day_of_week,Month
0,2012-01-08,26.83,17.54,5.21,3.74,98.605,189.718,48.467,354.178,256.892,...,8.039,3.639,251.722,26.066,6,103.71,21.33,286.206,6,1
1,2012-01-09,47.91,13.04,4.24,3.80,98.605,344.154,49.054,382.756,282.438,...,4.942,2.225,153.948,-59.712,6,103.64,22.45,294.593,0,1
2,2012-01-10,45.77,28.71,4.30,4.81,98.605,360.126,51.143,334.267,267.311,...,17.101,2.892,200.075,-31.384,6,104.22,21.95,293.534,1,1
3,2012-01-11,47.83,21.58,4.08,5.14,98.605,360.330,50.693,385.000,277.343,...,10.254,3.059,211.599,-2.110,6,103.93,21.95,295.340,2,1
4,2012-01-12,43.10,25.12,6.77,4.98,98.605,306.521,50.732,332.985,266.820,...,17.576,6.901,477.412,32.997,6,102.26,22.08,294.394,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4581,2024-07-24,66.61,225.04,3.47,17.54,110.007,43.469,85.857,199.246,194.291,...,325.285,49.360,179.921,-168.705,992,75.75,32.63,0.000,2,7
4582,2024-07-25,78.34,272.71,2.12,17.85,110.410,50.676,82.632,195.983,209.610,...,394.116,51.053,42.885,-194.496,992,76.36,31.70,0.000,3,7
4583,2024-07-26,93.04,172.33,2.60,19.09,110.852,42.333,79.531,205.273,205.773,...,256.246,40.449,129.267,-241.786,993,75.21,32.20,0.000,4,7
4584,2024-07-27,80.74,176.67,2.05,19.63,110.479,33.307,74.958,184.012,216.412,...,244.051,2.180,32.001,-251.655,992,74.79,32.90,0.000,5,7


In [70]:
# Forecast the future
forecast = m.predict(future)
# Show results
print(forecast.tail())

             ds      trend  yhat_lower  yhat_upper  trend_lower  trend_upper  \
4581 2024-07-24  57.438672   52.230489   96.132784    54.861899    60.086772   
4582 2024-07-25  57.439754   58.368000  100.207130    54.854663    60.091357   
4583 2024-07-26  57.440836   55.949304   98.873515    54.847426    60.095941   
4584 2024-07-27  57.441918   59.108378  103.389385    54.840190    60.100526   
4585 2024-07-28  57.443001   33.284338   76.500557    54.832953    60.105110   

      Ascension Day  Ascension Day_lower  Ascension Day_upper  BEV_vehicles  \
4581            0.0                  0.0                  0.0      1.652028   
4582            0.0                  0.0                  0.0      1.652028   
4583            0.0                  0.0                  0.0      1.654198   
4584            0.0                  0.0                  0.0      1.652028   
4585            0.0                  0.0                  0.0      1.652028   

      ...    weekly  weekly_lower  weekly_up

In [71]:
forecast

Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,Ascension Day,Ascension Day_lower,Ascension Day_upper,BEV_vehicles,...,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,multiplicative_terms,multiplicative_terms_lower,multiplicative_terms_upper,yhat
0,2012-01-08,44.473356,1.777224,44.670430,44.473356,44.473356,0.0,0.0,0.0,-0.487723,...,-8.460044,-8.460044,-8.460044,-2.195779,-2.195779,-2.195779,0.0,0.0,0.0,24.269358
1,2012-01-09,44.461334,26.404931,71.723560,44.461334,44.461334,0.0,0.0,0.0,-0.487723,...,-2.072336,-2.072336,-2.072336,-0.193293,-0.193293,-0.193293,0.0,0.0,0.0,49.730266
2,2012-01-10,44.449312,26.261198,71.277174,44.449312,44.449312,0.0,0.0,0.0,-0.487723,...,1.544570,1.544570,1.544570,1.508039,1.508039,1.508039,0.0,0.0,0.0,48.406519
3,2012-01-11,44.437290,26.461089,69.688606,44.437290,44.437290,0.0,0.0,0.0,-0.487723,...,3.074512,3.074512,3.074512,2.771092,2.771092,2.771092,0.0,0.0,0.0,48.535462
4,2012-01-12,44.425268,10.828414,53.263525,44.425268,44.425268,0.0,0.0,0.0,-0.487723,...,4.486737,4.486737,4.486737,3.519615,3.519615,3.519615,0.0,0.0,0.0,31.417086
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4581,2024-07-24,57.438672,52.230489,96.132784,54.861899,60.086772,0.0,0.0,0.0,1.652028,...,3.074512,3.074512,3.074512,0.229974,0.229974,0.229974,0.0,0.0,0.0,73.593046
4582,2024-07-25,57.439754,58.368000,100.207130,54.854663,60.091357,0.0,0.0,0.0,1.652028,...,4.486737,4.486737,4.486737,0.127016,0.127016,0.127016,0.0,0.0,0.0,77.619899
4583,2024-07-26,57.440836,55.949304,98.873515,54.847426,60.095941,0.0,0.0,0.0,1.654198,...,4.146121,4.146121,4.146121,-0.107288,-0.107288,-0.107288,0.0,0.0,0.0,78.529431
4584,2024-07-27,57.441918,59.108378,103.389385,54.840190,60.100526,0.0,0.0,0.0,1.652028,...,-2.719560,-2.719560,-2.719560,-0.474771,-0.474771,-0.474771,0.0,0.0,0.0,81.263928


In [72]:
columns = forecast.columns
columns = list(columns)
columns

['ds',
 'trend',
 'yhat_lower',
 'yhat_upper',
 'trend_lower',
 'trend_upper',
 'Ascension Day',
 'Ascension Day_lower',
 'Ascension Day_upper',
 'BEV_vehicles',
 'BEV_vehicles_lower',
 'BEV_vehicles_upper',
 'Biomass (GWh)',
 'Biomass (GWh)_lower',
 'Biomass (GWh)_upper',
 'Christmas Day',
 'Christmas Day_lower',
 'Christmas Day_upper',
 'Day_of_week',
 'Day_of_week_lower',
 'Day_of_week_upper',
 'Easter Monday',
 'Easter Monday_lower',
 'Easter Monday_upper',
 'German Unity Day',
 'German Unity Day_lower',
 'German Unity Day_upper',
 'Good Friday',
 'Good Friday_lower',
 'Good Friday_upper',
 'Hard_coal (GWh)',
 'Hard_coal (GWh)_lower',
 'Hard_coal (GWh)_upper',
 'Hydro (GWh)',
 'Hydro (GWh)_lower',
 'Hydro (GWh)_upper',
 'Labor Day',
 'Labor Day_lower',
 'Labor Day_upper',
 'Lignite (GWh)',
 'Lignite (GWh)_lower',
 'Lignite (GWh)_upper',
 'Month',
 'Month_lower',
 'Month_upper',
 'Natural_gas (GWh)',
 'Natural_gas (GWh)_lower',
 'Natural_gas (GWh)_upper',
 'Net_total_export_import (

## 5. Plot the predictions vs. the actual data for the test period

In [73]:
# Define the test period start and end dates
test_start = test_df['ds'].min()
test_end = test_df['ds'].max()

# Filter forecast to test period
test_forecast = forecast[(forecast['ds'] >= test_start)
                         & (forecast['ds'] <= test_end)]

# Filter historical data to test period
test_actuals = df[(df['ds'] >= test_start) & (df['ds'] <= test_end)]

# Create a figure with subplots
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces for forecast and actual data
fig.add_trace(
    go.Scatter(x=test_forecast['ds'], y=test_forecast['yhat'],
               name='Forecast', mode='lines', line=dict(color='blue')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=test_forecast['ds'], y=test_forecast['yhat_lower'],
               name='Lower Confidence', mode='lines', line=dict(color='gray', dash='dot')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=test_forecast['ds'], y=test_forecast['yhat_upper'], name='Upper Confidence',
               mode='lines', line=dict(color='gray', dash='dot'), fill='tonexty'),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=test_actuals['ds'], y=test_actuals['y'], name='Actual',
               mode='markers', marker=dict(color='black', size=3)),
    secondary_y=False,
)

# Set graph title and axis labels
fig.update_layout(
    title='Forecast vs Actuals for Test Period',
    xaxis_title='Date',
    yaxis_title='Day-Ahead Energy Price (EUR/MWh)',
    legend=dict(x=0.01, y=0.99, bordercolor="Black", borderwidth=1)
)

# Update layout
fig.update_layout(
    title='Forecast vs Actuals for Test Period',
    xaxis_title='Date',
    yaxis_title='Day-Ahead Energy Price (EUR/MWh)',
    legend=dict(
        x=0.94,   # Set x position to 1 (far right)
        y=1,   # Set y position to 1 (top)
        xanchor='right',  # Anchor the legend's x position to the right
        yanchor='top',    # Anchor the legend's y position to the top
        bordercolor='black',  # Optional: Add a border around the legend
        borderwidth=1        # Optional: Set the border width
    ),
    template='plotly'  # Changed to plotly_white for better visibility
)

# Save the figure as a PNG image
fig.write_image("forecast_vs_actuals_without_lags.png")

# Show plot
fig.show()

In [74]:
# Python
plot_components_plotly(m, forecast)


Discarding nonzero nanoseconds in conversion.



In [75]:
# Calculate the regression coefficients
coefficients = regressor_coefficients(m)
print(coefficients)

                          regressor regressor_mode      center  coef_lower  \
0            Solar_radiation (W/m2)       additive  131.412279    0.006702   
1                  Wind_speed (m/s)       additive    3.637902    0.047655   
2                  Temperature (°C)       additive    9.676783    0.563152   
3                     Biomass (GWh)       additive  118.868339    0.262122   
4                   Hard_coal (GWh)       additive  228.228327   -0.027719   
5                       Hydro (GWh)       additive   53.631400   -0.045944   
6                     Lignite (GWh)       additive  347.651960   -0.018549   
7                 Natural_gas (GWh)       additive  210.594330    0.074687   
8                       Other (GWh)       additive   61.825732    0.232829   
9   Pumped_storage_generation (GWh)       additive   20.860455   -0.078844   
10               Solar_energy (GWh)       additive  112.216697   -0.061131   
11              Wind_offshore (GWh)       additive   38.716651  

### Error metrics

In [76]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error
from darts import TimeSeries
from darts.metrics import mape, smape, mse, rmse, mae

# Ensuring that test_df and forecast are aligned by date and filter the forecast to the test period
test_forecast = forecast[(forecast['ds'] >= test_df['ds'].min()) & (
    forecast['ds'] <= test_df['ds'].max())]

# Making sure the lengths are the same and they are in the same order
if len(test_forecast) == len(test_df) and all(test_forecast['ds'].values == test_df['ds'].values):
    # Convert to TimeSeries objects for Darts metrics
    y_actual_ts = TimeSeries.from_dataframe(
        test_df, time_col='ds', value_cols='y')
    y_pred_ts = TimeSeries.from_dataframe(
        test_forecast, time_col='ds', value_cols='yhat')

    # Sklearn metrics to calculate MSE and MAE
    mse = mse(y_actual_ts, y_pred_ts)
    mae = mae(y_actual_ts, y_pred_ts)
    rmse = rmse(y_actual_ts, y_pred_ts)  # RMSE is just the square root of MSE

    # MAPE and SMAPE using Darts metrics
    mape_value = mape(y_actual_ts, y_pred_ts)
    smape_value = smape(y_actual_ts, y_pred_ts)

else:
    # Raise Error
    raise ValueError(
        "Dataframes are not aligned or of different lengths. Please check and try again.")

# Print the metrics
evaluation_metrics = {
    'MSE': mse,
    'MAE': mae,
    'RMSE': rmse,
    'MAPE': mape_value,
    'SMAPE': smape_value
}

# Convert the dictionary to a pandas dataframe for better visualization
metrics_df = pd.DataFrame([evaluation_metrics])

# Save metrics as CSV
metrics_df.to_csv('metrics_without_lags.csv', index=False)
print(metrics_df)

           MSE        MAE       RMSE       MAPE      SMAPE
0  1450.722637  25.323661  38.088353  80.012948  23.703571


## 6. Save model for further applications


In [77]:
with open('prophet_model.json', 'w') as fout:
    fout.write(model_to_json(m))  # Save model

# with open('prophet_model.json', 'r') as fin:
   # m = model_from_json(fin.read())  # Load model