# Facebook prophet model #####

The facebook prophet model is known for its capabilities in time series forecasting. The documentation can be found here: 
https://facebook.github.io/prophet/

### Import packages and load in the data 


In [4]:
# Install needed packages
import pandas as pd
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import utils
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from prophet.serialize import model_to_json, model_from_json
from prophet.utilities import regressor_coefficients

In [5]:
# Import the data and prepare for further processing
def load_and_prepare_data(file_path):
    """
    Load energy prices data from a CSV file, ensure chronological order, and convert 'Date' to datetime.
    """
    df = pd.read_csv(file_path, parse_dates=['Date'])
    df['Date'] = pd.to_datetime(df['Date'])
    df.sort_values('Date', inplace=True)
    df.set_index('Date', inplace=True)
    df = pd.DataFrame(df)
    return df

In [6]:
# Import the data
df = load_and_prepare_data('./../../data/Final_data/final_data.csv')

# Reset the index
df = df.reset_index()

#rename date and Day_ahead_price columns to ds and y
df = df.rename(columns={'Date': 'ds', 'Day_ahead_price (€/MWh)': 'y'})

ValueError: Missing column provided to 'parse_dates': 'date'

### Train/Testspilt

In [None]:
def train_test_split_sequential(data, test_size=0.2):
    """
    Split the sequence data into training and testing datasets sequentially.
    
    Parameters:
    - data: List of tuples where each tuple contains (sequence, label).
    - test_size: Fraction of the dataset to be used as test data.

    Returns:
    - train_data: Training data containing sequences and labels.
    - test_data: Testing data containing sequences and labels.
    """
    split_idx = int(len(data) * (1 - test_size))  # Calculate split index
    train_data = data[:split_idx]
    test_data = data[split_idx:]
    return train_data, test_data

train_df, test_df= train_test_split_sequential(df, test_size=0.2)

### Instantiate the Prophet model using prophet python package

In [None]:
# Create the prophet model with the applicable seasonality and holidays
m = Prophet(
    seasonality_mode='additive',
    yearly_seasonality=25,
    weekly_seasonality=3,
    daily_seasonality=False,
    seasonality_prior_scale=5,
    holidays_prior_scale=5,
    changepoint_prior_scale=0.05,
    scaling="absmax"
) 
# Add monthly seasonality
m.add_seasonality(name='monthly', period=30.5, fourier_order=5)

# Add holidays for Germany to the data 
m.add_country_holidays(country_name='DE')

# Add regressors for all columns except ds and y
for column in df.columns:
    if column not in ['ds', 'y']:
        m.add_regressor(column, prior_scale=0.5, mode='multiplicative')

# Fit the model
m.fit(train_df)


12:09:05 - cmdstanpy - INFO - Chain [1] start processing
12:09:07 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x164b4b880>

In [None]:
# Create the future dataframe 
future = m.make_future_dataframe(periods=test_df.shape[0], freq='1D')

# Check the alignment and number of rows
print(future['ds'].equals(df['ds']))  # Should return True if perfectly aligned
print(future.shape[0] == df.shape[0])  # Also should return True


True
True


In [None]:
# Add columns from df to future
for column in df.columns:
    if column != 'ds':  
        future[column] = df[column]

# Forecast the future
forecast = m.predict(future)
# Show results
print(forecast.tail())

             ds      trend  yhat_lower  yhat_upper  trend_lower  trend_upper  \
4434 2024-02-25  93.573566   41.994735   69.723213    74.767173   112.249499   
4435 2024-02-26  93.603819   68.155763  106.939303    74.763705   112.310515   
4436 2024-02-27  93.634072   82.398827  126.629673    74.760237   112.371531   
4437 2024-02-28  93.664325   83.634472  130.031130    74.756768   112.432547   
4438 2024-02-29  93.694578   63.148350   98.147902    74.753300   112.493563   

      Ascension Day  Ascension Day_lower  Ascension Day_upper  BEV_vehicles  \
4434            0.0                  0.0                  0.0     -0.028155   
4435            0.0                  0.0                  0.0     -0.028155   
4436            0.0                  0.0                  0.0     -0.028155   
4437            0.0                  0.0                  0.0     -0.028155   
4438            0.0                  0.0                  0.0     -0.028732   

      ...    weekly  weekly_lower  weekly_up

In [None]:
columns = forecast.columns
columns = list(columns)
columns

['ds',
 'trend',
 'yhat_lower',
 'yhat_upper',
 'trend_lower',
 'trend_upper',
 'Ascension Day',
 'Ascension Day_lower',
 'Ascension Day_upper',
 'BEV_vehicles',
 'BEV_vehicles_lower',
 'BEV_vehicles_upper',
 'Biomass (GWh)',
 'Biomass (GWh)_lower',
 'Biomass (GWh)_upper',
 'Christmas Day',
 'Christmas Day_lower',
 'Christmas Day_upper',
 'Easter Monday',
 'Easter Monday_lower',
 'Easter Monday_upper',
 'German Unity Day',
 'German Unity Day_lower',
 'German Unity Day_upper',
 'Good Friday',
 'Good Friday_lower',
 'Good Friday_upper',
 'Hard Coal (GWh)',
 'Hard Coal (GWh)_lower',
 'Hard Coal (GWh)_upper',
 'Hydro (GWh)',
 'Hydro (GWh)_lower',
 'Hydro (GWh)_upper',
 'Labor Day',
 'Labor Day_lower',
 'Labor Day_upper',
 'Lignite (GWh)',
 'Lignite (GWh)_lower',
 'Lignite (GWh)_upper',
 'Natural Gas (GWh)',
 'Natural Gas (GWh)_lower',
 'Natural Gas (GWh)_upper',
 "New Year's Day",
 "New Year's Day_lower",
 "New Year's Day_upper",
 'Nuclear (GWh)',
 'Nuclear (GWh)_lower',
 'Nuclear (GWh)_up

### Plot the predictions vs. the actual data for the test period

In [None]:
# Define the test period start and end dates
test_start = test_df['ds'].min()
test_end = test_df['ds'].max()

# Filter forecast to test period
test_forecast = forecast[(forecast['ds'] >= test_start) & (forecast['ds'] <= test_end)]

# Filter historical data to test period
test_actuals = df[(df['ds'] >= test_start) & (df['ds'] <= test_end)]

# Create a figure with subplots
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces for forecast and actual data
fig.add_trace(
    go.Scatter(x=test_forecast['ds'], y=test_forecast['yhat'], name='Forecast', mode='lines', line=dict(color='blue')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=test_forecast['ds'], y=test_forecast['yhat_lower'], name='Lower Confidence', mode='lines', line=dict(color='gray', dash='dot')),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=test_forecast['ds'], y=test_forecast['yhat_upper'], name='Upper Confidence', mode='lines', line=dict(color='gray', dash='dot'), fill='tonexty'),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=test_actuals['ds'], y=test_actuals['y'], name='Actual', mode='markers', marker=dict(color='black', size=3)),
    secondary_y=False,
)

# Set graph title and axis labels
fig.update_layout(
    title='Forecast vs Actuals for Test Period',
    xaxis_title='Date',
    yaxis_title='Day-Ahead Energy Price (EUR/MWh)',
    legend=dict(x=0.01, y=0.99, bordercolor="Black", borderwidth=1)
)

# Show plot
fig.show()


In [None]:
# Python
plot_components_plotly(m, forecast)

In [None]:
# Calculate the regression coefficients
coefficients = regressor_coefficients(m)
print(coefficients)

                          regressor  regressor_mode      center  coef_lower  \
0                      BEV_vehicles  multiplicative  166.072092   -0.000036   
1                     Biomass (GWh)  multiplicative  118.673371    0.000355   
2                   Hard Coal (GWh)  multiplicative  233.336564    0.000840   
3                       Hydro (GWh)  multiplicative   54.102830   -0.001532   
4                     Lignite (GWh)  multiplicative  351.082655    0.000146   
5                 Natural Gas (GWh)  multiplicative  209.264213    0.000191   
6                     Nuclear (GWh)  multiplicative  216.970264    0.000211   
7                   Oil_price (EUR)  multiplicative   65.588854    0.002434   
8                       Other (GWh)  multiplicative   60.911058    0.002867   
9   Pumped storage generation (GWh)  multiplicative   21.321836    0.001178   
10                      Solar (GWh)  multiplicative  110.368485   -0.000408   
11                  Solar_radiation  multiplicative 

### Error metrics

In [None]:
# Ensuring that test_df and forecast are aligned by date and filter the forecast to the test period
test_forecast = forecast[(forecast['ds'] >= test_df['ds'].min()) & (forecast['ds'] <= test_df['ds'].max())]

# Making sure the lengths are the same and they are in the same order
if len(test_forecast) == len(test_df) and all(test_forecast['ds'].values == test_df['ds'].values):
    # Sklearn metrics to calculate MSE and MAE
    mse = mean_squared_error(test_df['y'], test_forecast['yhat'])
    mae = mean_absolute_error(test_df['y'], test_forecast['yhat'])
    rmse = np.sqrt(mse)  # RMSE is just the square root of MSE
    
    # MAPE
    mape = np.mean(np.abs((test_forecast['yhat'] - test_df['y']) / test_df['y'])) * 100  # Multiply by 100 to get percentage

else:
    # Raise Error
    raise ValueError("Dataframes are not aligned or of different lengths. Please check and try again.")

# Print the metrics
evaluation_metrics = {
    'MSE': mse,
    'MAE': mae,
    'RMSE': rmse,
    'MAPE': mape
}

# Convert the dictionary to a pandas dataframe for better visualization
metrics_df = pd.DataFrame([evaluation_metrics])
print(metrics_df)


           MSE        MAE       RMSE       MAPE
0  2087.774294  31.032051  45.692169  45.427395


### Save model for further applications


In [None]:
with open('prophet_model.json', 'w') as fout:
    fout.write(model_to_json(m))  # Save model

# with open('prophet_model.json', 'r') as fin:
   # m = model_from_json(fin.read())  # Load model
