In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [4]:
def calculate_error_metrics(df, prediction_features, output_features):#, path, name):
    metrics = {}

    for pred_feature, output_feature in zip(prediction_features, output_features):
        # Calculate the errors
        mse = mean_squared_error(df[output_feature], df[pred_feature])
        mae = mean_absolute_error(df[output_feature], df[pred_feature])
        mape = (
            np.mean(
                np.abs((df[output_feature] - df[pred_feature]) / df[output_feature])
            )
            * 100
        )
        rmse = np.sqrt(mse)
        # Store the results in a dictionary
        metrics[output_feature] = {
            "MSE": mse,
            "MAE": mae,
            "MAPE": mape,
            "RMSE": rmse,
        }

    # print(f"Metrics: {metrics}")
    # # check if the path exists
    # if not os.path.exists(path):
    #     os.makedirs(path)
    # # save the metrics to a json file
    # with open(f"{path}/metrics_{name}.json", "w") as file:
    #     json.dump(metrics, file)

    return metrics

In [5]:
# Load datasets
data1 = pd.read_csv('../results/imbalance_predictions_smard_real_lstm_seq2seq_additive_corrected_hour.csv')
data2 = pd.read_csv('../results/imbalance_predictions_smard_real_mse_lstm_naive_hour.csv')
data3 = pd.read_csv('../results/imbalance_predictions_smard_real_mse_lstm_seq2seq_additive_regu_31_07_hour.csv')
data4 = pd.read_csv('../results/imbalance_predictions_smard_real_mse_lstm_stacked_hour.csv')

# Convert 'start_time' to datetime
data1['start_time'] = pd.to_datetime(data1['start_time'])
data2['start_time'] = pd.to_datetime(data2['start_time'])
data3['start_time'] = pd.to_datetime(data3['start_time'])
data4['start_time'] = pd.to_datetime(data4['start_time'])

# Prediction and actual features
predicted_features = {
    'M_Seq2Seq': ['pred_total_production_mwh', 'pred_total_load_mwh'],
    'M_Naive': ['pred_total_production_mwh', 'pred_total_load_mwh'],
    'M_RegSeq2Seq': ['pred_total_production_mwh', 'pred_total_load_mwh'],
    'M_Stacked': ['pred_total_production_mwh', 'pred_total_load_mwh']
}

output_features_production = ['total_production_mwh']
output_features_consumption = ['total_load_mwh']

# Quarterly data collection
quarters = pd.date_range(start='2023-01-01', end='2024-07-01', freq='QS')
metrics_production = pd.DataFrame()
metrics_consumption = pd.DataFrame()

for model, files in zip(predicted_features.keys(), [data1, data2, data3, data4]):
    for start, end in zip(quarters[:-1], quarters[1:]):
        quarterly_data = files.loc[(files['start_time'] >= start) & (files['start_time'] < end)]
        prod_metrics = calculate_error_metrics(quarterly_data, [predicted_features[model][0]], output_features_production)
        cons_metrics = calculate_error_metrics(quarterly_data, [predicted_features[model][1]], output_features_consumption)
        
        for metric in ['MAE', 'MAPE', 'RMSE']:
            metrics_production.loc[f'{start.strftime("%Y-%m")}', f'{model}_{metric}'] = prod_metrics[output_features_production[0]][metric]
            metrics_consumption.loc[f'{start.strftime("%Y-%m")}', f'{model}_{metric}'] = cons_metrics[output_features_consumption[0]][metric]


In [6]:
# Print to verify results
print(metrics_production.head())
print(metrics_consumption.head())

         M_Seq2Seq_MAE  M_Seq2Seq_MAPE  M_Seq2Seq_RMSE  M_Naive_MAE  \
2023-01    4209.034563        7.164219     5537.243157  4374.866351   
2023-04    3715.540501        8.735432     4746.238347  3729.262916   
2023-07    3998.430597        9.771898     5029.028049  3815.762295   
2023-10    4303.194186        8.253955     5509.544339  4757.105981   
2024-01    4120.710006        7.972712     5348.507382  4563.111359   

         M_Naive_MAPE  M_Naive_RMSE  M_RegSeq2Seq_MAE  M_RegSeq2Seq_MAPE  \
2023-01      7.210105   5498.126733       4175.051455           6.962711   
2023-04      8.135363   4811.858010       3514.387535           8.047494   
2023-07      8.671644   5047.384581       3893.839137           9.522977   
2023-10      8.859621   6126.717627       4491.224954           8.385954   
2024-01      8.720259   5740.468621       4205.264685           7.886142   

         M_RegSeq2Seq_RMSE  M_Stacked_MAE  M_Stacked_MAPE  M_Stacked_RMSE  
2023-01        5358.213094    5294.53364

In [9]:
def calculate_combined_error_metrics(df, pred_col, actual_col):
    mse = mean_squared_error(df[actual_col], df[pred_col])
    mae = mean_absolute_error(df[actual_col], df[pred_col])
    mape = np.mean(np.abs((df[actual_col] - df[pred_col]) / df[actual_col])) * 100
    rmse = np.sqrt(mse)
    return {"MAE": mae, "MAPE": mape, "RMSE": rmse}
# Aggregated
# Load data and set 'start_time' to datetime
data_frames = {
    'M_Seq2Seq': pd.read_csv('../results/imbalance_predictions_smard_real_lstm_seq2seq_additive_corrected_hour.csv'),
    'M_Naive': pd.read_csv('../results/imbalance_predictions_smard_real_mse_lstm_naive_hour.csv'),
    'M_RegSeq2Seq': pd.read_csv('../results/imbalance_predictions_smard_real_mse_lstm_seq2seq_additive_regu_31_07_hour.csv'),
    'M_Stacked': pd.read_csv('../results/imbalance_predictions_smard_real_mse_lstm_stacked_hour.csv')
}

for model, df in data_frames.items():
    df['start_time'] = pd.to_datetime(df['start_time'])
    df['Year'] = df['start_time'].dt.year
    df['Quarter'] = df['start_time'].dt.quarter

# Define quarters to aggregate and metrics
quarters = ['Q1', 'Q2', 'Q3', 'Q4']
years = [2023, 2024]
metrics_results = {}

for model, df in data_frames.items():
    metrics_results[model] = {}
    for quarter in quarters:
        if quarter == 'Q1':  # Combine Q1 of 2023 and 2024
            filtered_data = df[(df['Quarter'] == 1) & (df['Year'].isin(years))]
        else:  # Use data only from 2023 for Q2, Q3, Q4
            filtered_data = df[(df['Quarter'] == int(quarter[1])) & (df['Year'] == 2023)]

        if not filtered_data.empty:
            combined_metrics = calculate_combined_error_metrics(
                filtered_data,
                'pred_total_production_mwh',  # Example prediction column
                'total_production_mwh'  # Example actual column
            )
            metrics_results[model][quarter] = combined_metrics

# Output the combined metrics to check before generating LaTeX table
metrics_results

KeyError: 't'