In [None]:
import pandas as pd
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import KFold
from prophet import Prophet
import numpy as np
import matplotlib.pyplot as plt

# Load the dataset
file_path = '../../../dataset/data.csv'
data = pd.read_csv(file_path)

# Prepare the data for Prophet (for Average Dew)
data['ds'] = pd.to_datetime(data['datetime'], format='%m/%d/%Y')
dew_data = data[['ds', 'dew']]  # Rename Average Dew for Prophet
dew_data.rename(columns={'dew': 'y'}, inplace=True)

# Updated parameter grid
param_grid = {
    'changepoint_prior_scale': np.arange(0.01, 0.11, 0.01).tolist(),  # Trend flexibility from 0.01 to 0.1 with step 0.01
    'seasonality_prior_scale': np.arange(0.1, 11, 1.0).tolist(),  # Seasonality flexibility from 0.1 to 10 with step 1.0
    'seasonality_mode': ['additive', 'multiplicative']  # Additive vs multiplicative seasonality
}

# Define number of folds for cross-validation
kf = KFold(n_splits=5)

# Function to evaluate model with given parameters and cross-validation
def evaluate_model_cv(changepoint_prior_scale, seasonality_prior_scale, seasonality_mode, data):
    r2_scores = []
    rmse_scores = []
    mae_scores = []
    
    for train_index, test_index in kf.split(data):
        # Split the data into train and test sets for each fold
        train_data = data.iloc[train_index]
        test_data = data.iloc[test_index]
        
        # Initialize the Prophet model with specified hyperparameters
        model = Prophet(
            yearly_seasonality=True,
            changepoint_prior_scale=changepoint_prior_scale,
            seasonality_prior_scale=seasonality_prior_scale,
            seasonality_mode=seasonality_mode
        )
        
        # Fit the model
        model.fit(train_data)
        
        # Predict on the test set (validation set)
        test_forecast = model.predict(test_data[['ds']])
        
        # Calculate R², RMSE, and MAE
        y_true = test_data['y'].values  # Actual Average Dews
        y_pred = test_forecast['yhat'].values  # Predicted Average Dews
        
        r2_scores.append(r2_score(y_true, y_pred))
        rmse_scores.append(mean_squared_error(y_true, y_pred, squared=False))
        mae_scores.append(mean_absolute_error(y_true, y_pred))
    
    # Calculate average scores across the folds
    avg_r2 = np.mean(r2_scores)
    avg_rmse = np.mean(rmse_scores)
    avg_mae = np.mean(mae_scores)
    
    return avg_r2, avg_rmse, avg_mae

# Iterate through the parameter grid and evaluate each combination using K-Fold CV
best_score = -float('inf')
best_params = None
best_rmse = float('inf')
best_mae = float('inf')

for changepoint in param_grid['changepoint_prior_scale']:
    for seasonality_prior in param_grid['seasonality_prior_scale']:
        for seasonality_mode in param_grid['seasonality_mode']:
            print(f"Evaluating: changepoint_prior_scale={changepoint}, seasonality_prior_scale={seasonality_prior}, seasonality_mode={seasonality_mode}")
            r2, rmse, mae = evaluate_model_cv(changepoint, seasonality_prior, seasonality_mode, dew_data)
            print(f"R² Score: {r2:.3f}, RMSE: {rmse:.3f}, MAE: {mae:.3f}\n")
            
            # Keep track of the best model: prioritize highest R², then lowest RMSE and MAE
            if r2 > best_score or (r2 == best_score and rmse < best_rmse and mae < best_mae):
                best_score = r2
                best_rmse = rmse
                best_mae = mae
                best_params = (changepoint, seasonality_prior, seasonality_mode)

# Output the best hyperparameters
print(f"Best R² Score: {best_score:.3f}")
print(f"Best RMSE: {best_rmse:.3f}")
print(f"Best MAE: {best_mae:.3f}")
print(f"Best Hyperparameters: changepoint_prior_scale={best_params[0]}, seasonality_prior_scale={best_params[1]}, seasonality_mode={best_params[2]}")

# Re-train the best model using the entire dataset and best hyperparameters
best_model = Prophet(
    yearly_seasonality=True,
    changepoint_prior_scale=best_params[0],
    seasonality_prior_scale=best_params[1],
    seasonality_mode=best_params[2]
)

best_model.fit(dew_data)

# Generate future dates (from 21/08/2024 to 31/12/2024)
future_dates = best_model.make_future_dataframe(periods=len(pd.date_range('2024-08-21', '2024-12-31')), freq='D')

# Predict future Average Dews
dew_forecast = best_model.predict(future_dates)

# Display and save the predicted Average Dews
dew_forecast[['ds', 'yhat']].tail()
dew_forecast[['ds', 'yhat']].to_csv('dew_forecast_finetuning_2024.csv', index=False)

# Plot the forecasted Average Dews with a title
forecast_plot = best_model.plot(dew_forecast)
plt.title("Average Dew Forecast (2024)")
plt.xlabel("Date")
plt.ylabel("Average Dew (°C)")
plt.show()

# Plot seasonal components (trend, yearly, etc.) with titles
components_plot = best_model.plot_components(dew_forecast)

# Adding titles to individual component plots
ax = components_plot.get_axes()

# Titles for each component
ax[0].set_title('Trend Component: Average Dew')
ax[1].set_title('Yearly Seasonality: Average Dew')
ax[2].set_title('Weekly Seasonality: Average Dew')  # If applicable
plt.show()
