<a href="https://colab.research.google.com/github/sdikici/Demand_Forecasting_Prophet_DeepAR/blob/main/prophet_hyperparameter_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d sercandikici/merged-dataset-electricty-weather-for-modelling
! unzip merged-dataset-electricty-weather-for-modelling.zip

In [None]:
from prophet import Prophet
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
def mean_absolute_percentage_error(y_true, y_pred):
    '''Calculate and return the Mean Absolute Percentage Error (MAPE) between actual values (y_true) and predicted values (y_pred).'''
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mape = np.mean(np.abs((y_true - y_pred) / y_true))
    return mape

In [None]:
'''
Read the CSV file named "merged_data.csv" into a DataFrame named df_merged using the pd.read_csv function from the pandas library.
Ensure that the 'settlement_date' column is interpreted as datetime by converting it using the pd.to_datetime function.
'''

df_merged = pd.read_csv("merged_data.csv")
df_merged['settlement_date'] = pd.to_datetime(df_merged['settlement_date'])
df_merged

In [None]:
'''
Define the parameter split_from to determine the point of split between training and testing data. In this case, split_from is set to 90 days multiplied by 12, assuming hourly data.
Define the frequency of the time series data as "2H" (2 hours).
Specify the number of days to predict,and calculate the periods by multiplying days_to_predict by 12.
Set the number of MCMC samples.
'''
split_from = 90*12 #train test split is from 90days
freq = "2H"
days_to_predict=7
periods = days_to_predict*12
mcmc_samples = 50

In [None]:
'''
Create a new DataFrame named df_model_3 containing only the columns 'tsd', 'settlement_date', and 'temp' from df_merged.
Rename the columns of df_model_3 to 'y', 'ds', and 'temp' respectively.
'''
df_model_3 = df_merged[["tsd","settlement_date","temp"]]
df_model_3.columns=["y","ds","temp"]
df_model_3

In [None]:
'''
Split the df_model_3 DataFrame into training and testing sets:
- train_data_3 contains all data points from df_model_3 except for the last entries defined by split_from, representing the last 90 days' worth of data based on the assumption of hourly recordings.
- test_data_3 consists of the last 90 days' worth of data from df_model_3, serving as the dataset for evaluating the model's performance on unseen data.
'''
train_data_3 = df_model_3[:-split_from]
test_data_3 = df_model_3[-split_from:]

In [None]:
'''
Define a parameter grid containing different combinations of hyperparameters for tuning the Prophet model, including changepoint_prior_scale and seasonality_prior_scale.

Generate all possible combinations of parameters using itertools.product and store them in all_params.

Iterate over each parameter combination in all_params:
    - Initialize a Prophet model with the current set of parameters.
    - Add country holidays and additional regressors (e.g., temperature) to the model.
    - Fit the model to the training data.
    - Create a future dataframe for forecasting.
    - Add regressors to the future dataframe.
    - Make predictions using the model.
    - Calculate the MAPE between the predicted and actual values for the test period.
    - Append the MAPE to the list mapes.

Combine the parameters and corresponding MAPEs into a DataFrame named tuning_results.

Print the tuning_results DataFrame to display the results of hyperparameter tuning.
'''

param_grid = {
    'changepoint_prior_scale': [0.001, 0.01, 0.05, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
}

# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
mapes = []  # Store the MAPEs for each params here

# Use cross validation to evaluate all parameters
for params in all_params:
    # Initialize Prophet model with given params
    m = Prophet(**params,mcmc_samples=mcmc_samples)
    m.add_country_holidays(country_name="UK")
    m.add_regressor("temp", mode="additive")

    # Fit the model
    m.fit(train_data_3)

    # Make future dataframe
    future = m.make_future_dataframe(periods=periods, freq=freq)

    # Add regressors to future dataframe
    train_idx = future["ds"].isin(train_data_3["ds"])
    test_idx = ~train_idx
    reg = ["temp"]
    for r in reg:
        future.loc[train_idx, r] = train_data_3[r].to_list()
        future.loc[test_idx, r] = test_data_3.iloc[:periods][r].to_list()

    # Predict and calculate MAPE
    forecast = m.predict(future)
    forecast_days = forecast[forecast["ds"] >= test_data_3["ds"].iloc[0]]
    test_days = test_data_3[(test_data_3["ds"] >= test_data_3["ds"].iloc[0]) & (test_data_3["ds"] <= forecast_days["ds"].iloc[-1])]
    mape = mean_absolute_percentage_error(test_days["y"], forecast_days["yhat"])
    mapes.append(mape)

# Combine parameters and corresponding MAPEs into a DataFrame
tuning_results = pd.DataFrame(all_params)
tuning_results['mape'] = mapes
print(tuning_results)
