# Hyperparameter optimization 


In [None]:
# Install needed packages
import pandas as pd
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly, plot_cross_validation_metric
import plotly.graph_objects as go
import utils
import numpy as np
import matplotlib.pyplot as plt
import itertools
from prophet.diagnostics import cross_validation, performance_metrics

In [1]:
# Import the data and prepare for further processing
def load_and_prepare_data(file_path):
    """
    Load energy prices data from a CSV file, ensure chronological order, and convert 'Date' to datetime.
    """
    df = pd.read_csv(file_path, parse_dates=['date'])
    df['date'] = pd.to_datetime(df['date'])
    df.sort_values('date', inplace=True)
    df.set_index('date', inplace=True)
    df = pd.DataFrame(df)
    return df

In [None]:
# Import the data
df = load_and_prepare_data('../../data/Final_data/final_data.csv')

# Reset the index
df = df.reset_index()

#rename date and Day_ahead_price columns to ds and y
df = df.rename(columns={'date': 'ds', 'Day_ahead_price': 'y'})

In [None]:
def train_test_split_sequential(data, test_size=0.2):
    """
    Split the sequence data into training and testing datasets sequentially.
    
    Parameters:
    - data: List of tuples where each tuple contains (sequence, label).
    - test_size: Fraction of the dataset to be used as test data.

    Returns:
    - train_data: Training data containing sequences and labels.
    - test_data: Testing data containing sequences and labels.
    """
    split_idx = int(len(data) * (1 - test_size))  # Calculate split index
    train_data = data[:split_idx]
    test_data = data[split_idx:]
    return train_data, test_data

train_df, test_df= train_test_split_sequential(df, test_size=0.2)

In [None]:
# Define the hyperparameter grid
param_grid = {  
    'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 5, 10.0],
    "holidays_prior_scale": [0.01, 0.1, 1.0, 5, 10.0],
    'seasonality_mode': ['additive', 'multiplicative'],
}

# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
rmses = []  # Store the RMSEs for each params here

# Use cross validation to evaluate all parameters
for params in all_params:
    m = Prophet(**params).fit(df)  # Fit model with given params
    df_cv = cross_validation(m, initial='730 days', period='180 days', horizon = '365 days', parallel="processes")
    df_p = performance_metrics(df_cv, rolling_window=0.1)
    rmses.append(df_p['rmse'].values[0])

# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['rmse'] = rmses
print(tuning_results)



In [None]:
fig = plot_cross_validation_metric(df_cv, metric='rmse')
fig

In [None]:
# Python
best_params = all_params[np.argmin(rmses)]
print(best_params)
