# Facebook prophet model #####

The facebook prophet model is known for its capabilities in time series forecasting. The documentation can be found here: 
https://facebook.github.io/prophet/

### Import packages and load in the data 


In [77]:
# Install needed packages
import pandas as pd
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import utils
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from prophet.serialize import model_to_json, model_from_json
from prophet.utilities import regressor_coefficients

In [78]:
# Import the data and prepare for further processing
def load_and_prepare_data(file_path):
    """
    Load energy prices data from a CSV file, ensure chronological order, and convert 'Date' to datetime.
    """
    df = pd.read_csv(file_path, parse_dates=['Date'])
    df['Date'] = pd.to_datetime(df['Date'])
    df.sort_values('Date', inplace=True)
    df.set_index('Date', inplace=True)
    df = pd.DataFrame(df)
    return df

In [79]:
# Import the data
df = load_and_prepare_data('../../data/Final_data/final_data.csv')

# Reset the index
df = df.reset_index()

#rename date and Day_ahead_price columns to ds and y
df = df.rename(columns={'Date': 'ds', 'Day_ahead_price (€/MWh)': 'y'})

# Put the column called "y" at the second position, keep all other columns in the same order
# Get the list of all columns
cols = list(df.columns)

# Remove the 'y' column from its current position
cols.remove('y')

# Insert the 'y' column into the second position (index 1)
cols.insert(1, 'y')

# Reorder the DataFrame columns
df = df[cols]


In [80]:
df

Unnamed: 0,ds,y,Solar_radiation (W/m2),Wind_speed (m/s),Temperature (°C),Biomass (GWh),Hard_coal (GWh),Hydro (GWh),Lignite (GWh),Natural_gas (GWh),Other (GWh),Pumped_storage_generation (GWh),Solar_energy (GWh),Wind_offshore (GWh),Wind_onshore (GWh),Net_total_export_import (GWh),BEV_vehicles,Oil_price (EUR),TTF_gas_price (€/MWh),Nuclear_energy (GWh)
0,2012-01-01,18.19,14.75,4.95,8.39,98.605,108.454,51.011,325.337,188.811,54.040,19.314,6.263,3.404,235.467,54.662,6,99.64,21.1000,250.979
1,2012-01-02,33.82,15.12,5.00,7.41,98.605,222.656,51.862,343.168,229.293,54.166,28.892,6.312,3.350,231.772,-64.477,6,100.04,20.0000,258.671
2,2012-01-03,35.03,31.88,7.77,5.23,98.605,162.204,48.851,336.773,241.297,53.518,21.072,24.226,7.292,504.484,-35.078,6,100.44,20.9000,271.495
3,2012-01-04,32.16,25.21,8.04,4.78,98.605,189.633,47.101,323.976,252.289,52.194,28.300,14.157,7.828,541.528,22.924,6,103.15,21.4000,270.613
4,2012-01-05,20.35,13.46,9.98,4.23,98.605,175.733,45.854,327.502,259.018,52.179,31.887,4.728,8.280,572.819,35.618,6,103.92,21.3000,287.555
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4438,2024-02-25,61.09,97.58,3.43,4.52,123.683,48.501,58.671,193.697,255.124,60.627,13.565,169.216,29.879,278.787,-36.930,947,75.22,23.7625,0.000
4439,2024-02-26,66.27,73.25,3.12,4.96,124.810,69.146,58.444,281.177,289.764,59.810,12.231,110.504,62.336,239.555,-198.686,947,75.09,23.9000,0.000
4440,2024-02-27,73.84,58.12,3.11,4.53,124.989,103.379,59.181,351.355,354.042,67.170,23.753,85.584,16.951,131.761,-209.332,947,76.11,24.8300,0.000
4441,2024-02-28,71.82,66.00,2.46,3.69,125.068,93.416,58.160,350.348,338.216,65.375,19.042,106.330,68.585,76.355,-206.956,947,76.57,24.8000,0.000


### Train/Testspilt

In [81]:
def train_test_split_sequential(data, test_size=0.2):
    """
    Split the sequence data into training and testing datasets sequentially.
    
    Parameters:
    - data: List of tuples where each tuple contains (sequence, label).
    - test_size: Fraction of the dataset to be used as test data.

    Returns:
    - train_data: Training data containing sequences and labels.
    - test_data: Testing data containing sequences and labels.
    """
    split_idx = int(len(data) * (1 - test_size))  # Calculate split index
    train_data = data[:split_idx]
    test_data = data[split_idx:]
    return train_data, test_data

train_df, test_df= train_test_split_sequential(df, test_size=0.2)

### Instantiate the Prophet model using prophet python package

In [82]:
# Create the prophet model with the applicable seasonality and holidays
m = Prophet(
    seasonality_mode='additive',
    yearly_seasonality=25,
    weekly_seasonality=3,
    daily_seasonality=False,
    seasonality_prior_scale=5,
    holidays_prior_scale=5,
    changepoint_prior_scale=0.05,
    scaling="absmax", 
) 
# Add monthly seasonality
m.add_seasonality(name='monthly', period=30.5, fourier_order=5)

# Add holidays for Germany to the data 
m.add_country_holidays(country_name='DE')

# Add regressors for all columns except ds and y
for column in df.columns:
    if column not in ['ds', 'y']:
        m.add_regressor(column, prior_scale=0.5, mode='multiplicative')

# Fit the model
m.fit(train_df)


14:58:06 - cmdstanpy - INFO - Chain [1] start processing
14:58:13 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x1682446d0>

In [83]:
# Create the future dataframe 
future = m.make_future_dataframe(periods=test_df.shape[0], freq='1D')

# Check the alignment and number of rows
print(future['ds'].equals(df['ds']))  # Should return True if perfectly aligned
print(future.shape[0] == df.shape[0])  # Also should return True


True
True


In [84]:
# Add columns from df to future
for column in df.columns:
    if column != 'ds':  
        future[column] = df[column]

# Forecast the future
forecast = m.predict(future)
# Show results
print(forecast.tail())

             ds      trend  yhat_lower  yhat_upper  trend_lower  trend_upper  \
4438 2024-02-25  89.625201   51.616497   82.434110    73.134686   106.071105   
4439 2024-02-26  89.652949   70.725222  106.006214    73.132114   106.128006   
4440 2024-02-27  89.680698   85.398366  125.921272    73.130015   106.184879   
4441 2024-02-28  89.708446   85.908219  127.495185    73.124196   106.238720   
4442 2024-02-29  89.736195   61.859753   93.309018    73.121682   106.292560   

      Ascension Day  Ascension Day_lower  Ascension Day_upper  BEV_vehicles  \
4438            0.0                  0.0                  0.0     -0.039445   
4439            0.0                  0.0                  0.0     -0.039445   
4440            0.0                  0.0                  0.0     -0.039445   
4441            0.0                  0.0                  0.0     -0.039445   
4442            0.0                  0.0                  0.0     -0.040253   

      ...  multiplicative_terms  multiplicat

In [85]:
columns = forecast.columns
columns = list(columns)
columns

['ds',
 'trend',
 'yhat_lower',
 'yhat_upper',
 'trend_lower',
 'trend_upper',
 'Ascension Day',
 'Ascension Day_lower',
 'Ascension Day_upper',
 'BEV_vehicles',
 'BEV_vehicles_lower',
 'BEV_vehicles_upper',
 'Biomass (GWh)',
 'Biomass (GWh)_lower',
 'Biomass (GWh)_upper',
 'Christmas Day',
 'Christmas Day_lower',
 'Christmas Day_upper',
 'Easter Monday',
 'Easter Monday_lower',
 'Easter Monday_upper',
 'German Unity Day',
 'German Unity Day_lower',
 'German Unity Day_upper',
 'Good Friday',
 'Good Friday_lower',
 'Good Friday_upper',
 'Hard_coal (GWh)',
 'Hard_coal (GWh)_lower',
 'Hard_coal (GWh)_upper',
 'Hydro (GWh)',
 'Hydro (GWh)_lower',
 'Hydro (GWh)_upper',
 'Labor Day',
 'Labor Day_lower',
 'Labor Day_upper',
 'Lignite (GWh)',
 'Lignite (GWh)_lower',
 'Lignite (GWh)_upper',
 'Natural_gas (GWh)',
 'Natural_gas (GWh)_lower',
 'Natural_gas (GWh)_upper',
 'Net_total_export_import (GWh)',
 'Net_total_export_import (GWh)_lower',
 'Net_total_export_import (GWh)_upper',
 "New Year's Da

### Plot the predictions vs. the actual data for the test period

In [86]:
# Define the test period start and end dates
test_start = test_df['ds'].min()
test_end = test_df['ds'].max()

# Filter forecast to test period
test_forecast = forecast[(forecast['ds'] >= test_start) & (forecast['ds'] <= test_end)]

# Filter historical data to test period
test_actuals = df[(df['ds'] >= test_start) & (df['ds'] <= test_end)]

# Create a figure with subplots
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces for forecast and actual data
fig.add_trace(
    go.Scatter(x=test_forecast['ds'], y=test_forecast['yhat'], name='Forecast', mode='lines', line=dict(color='blue')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=test_forecast['ds'], y=test_forecast['yhat_lower'], name='Lower Confidence', mode='lines', line=dict(color='gray', dash='dot')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=test_forecast['ds'], y=test_forecast['yhat_upper'], name='Upper Confidence', mode='lines', line=dict(color='gray', dash='dot'), fill='tonexty'),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=test_actuals['ds'], y=test_actuals['y'], name='Actual', mode='markers', marker=dict(color='black', size=3)),
    secondary_y=False,
)

# Set graph title and axis labels
fig.update_layout(
    title='Forecast vs Actuals for Test Period',
    xaxis_title='Date',
    yaxis_title='Day-Ahead Energy Price (EUR/MWh)',
    legend=dict(x=0.01, y=0.99, bordercolor="Black", borderwidth=1)
)

# Show plot
fig.show()


In [87]:
# Python
plot_components_plotly(m, forecast)

In [88]:
# Calculate the regression coefficients
coefficients = regressor_coefficients(m)
print(coefficients)

                          regressor  regressor_mode      center  coef_lower  \
0            Solar_radiation (W/m2)  multiplicative  132.124361    0.000012   
1                  Wind_speed (m/s)  multiplicative    3.633467   -0.055259   
2                  Temperature (°C)  multiplicative    9.811882    0.001755   
3                     Biomass (GWh)  multiplicative  118.679972   -0.001919   
4                   Hard_coal (GWh)  multiplicative  233.268332    0.000636   
5                       Hydro (GWh)  multiplicative   54.066345   -0.001550   
6                     Lignite (GWh)  multiplicative  351.084776    0.000184   
7                 Natural_gas (GWh)  multiplicative  209.257293    0.000403   
8                       Other (GWh)  multiplicative   60.894726    0.002639   
9   Pumped_storage_generation (GWh)  multiplicative   21.322286    0.001111   
10               Solar_energy (GWh)  multiplicative  110.255912   -0.000645   
11              Wind_offshore (GWh)  multiplicative 

### Error metrics

In [89]:
# Ensuring that test_df and forecast are aligned by date and filter the forecast to the test period
test_forecast = forecast[(forecast['ds'] >= test_df['ds'].min()) & (forecast['ds'] <= test_df['ds'].max())]

# Making sure the lengths are the same and they are in the same order
if len(test_forecast) == len(test_df) and all(test_forecast['ds'].values == test_df['ds'].values):
    # Sklearn metrics to calculate MSE and MAE
    mse = mean_squared_error(test_df['y'], test_forecast['yhat'])
    mae = mean_absolute_error(test_df['y'], test_forecast['yhat'])
    rmse = np.sqrt(mse)  # RMSE is just the square root of MSE
    
    # MAPE
    mape = np.mean(np.abs((test_forecast['yhat'] - test_df['y']) / test_df['y'])) * 100  # Multiply by 100 to get percentage

else:
    # Raise Error
    raise ValueError("Dataframes are not aligned or of different lengths. Please check and try again.")

# Print the metrics
evaluation_metrics = {
    'MSE': mse,
    'MAE': mae,
    'RMSE': rmse,
    'MAPE': mape
}

# Convert the dictionary to a pandas dataframe for better visualization
metrics_df = pd.DataFrame([evaluation_metrics])
print(metrics_df)


           MSE        MAE       RMSE      MAPE
0  2024.761123  31.042558  44.997346  54.75146


### Save model for further applications


In [90]:
with open('prophet_model.json', 'w') as fout:
    fout.write(model_to_json(m))  # Save model

# with open('prophet_model.json', 'r') as fin:
   # m = model_from_json(fin.read())  # Load model
