In [97]:
import pandas as pd # type: ignore
import pprint # type: ignore
import glob # type: ignore

In [98]:
folder_path = 'forecasts'

# Get a list of all CSV files in the folder
csv_files = glob.glob(folder_path + '/*.csv')
csv_files

['forecasts\\actuals.csv',
 'forecasts\\forecast_GBR.csv',
 'forecasts\\forecast_GRU.csv',
 'forecasts\\forecast_LTSM_1.csv',
 'forecasts\\forecast_LTSM_2.csv',
 'forecasts\\forecast_Prophet.csv',
 'forecasts\\forecast_Prophet_holidays.csv']

In [99]:
df = pd.read_csv(csv_files[0])
name = csv_files[0].split("\\")[-1][:-4]
df.rename(columns={'date': 'date', 'consumption': name}, inplace=True)
df.head()

Unnamed: 0,date,actuals
0,2024-10-17 00:00:00,1141.0
1,2024-10-17 01:00:00,1097.0
2,2024-10-17 02:00:00,1062.0
3,2024-10-17 03:00:00,1083.0
4,2024-10-17 04:00:00,1101.0


In [100]:
for file in csv_files[1:]:
	name = file.split("\\")[-1][:-4]
	df2 = pd.read_csv(file)
	df2.rename(columns={'date': 'date', 'consumption': name}, inplace=True)

	df = pd.merge(df, df2, how='outer')

df.head()

Unnamed: 0,date,actuals,forecast_GBR,forecast_GRU,forecast_LTSM_1,forecast_LTSM_2,forecast_Prophet,forecast_Prophet_holidays
0,2024-10-17 00:00:00,1141.0,1113.662512,1072.0228,1135.1686,1132.7897,1208.334691,1216.73999
1,2024-10-17 01:00:00,1097.0,1057.909867,1007.8444,1077.1505,1067.0319,1154.371306,1162.828869
2,2024-10-17 02:00:00,1062.0,1055.821729,1001.2846,1050.8236,1027.4688,1107.693352,1116.208419
3,2024-10-17 03:00:00,1083.0,1057.020375,1050.6017,1088.7871,1032.9064,1092.396852,1100.94928
4,2024-10-17 04:00:00,1101.0,1057.679346,1082.2162,1134.3145,1087.9597,1140.244134,1148.792224


In [105]:
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, mean_absolute_percentage_error # type: ignore
from sktime.performance_metrics.forecasting import mean_squared_percentage_error # type: ignore

def calculate_metrics(actuals, forecasts, name):
	mse = root_mean_squared_error(actuals, forecasts)
	mae = mean_absolute_error(actuals, forecasts)
	mape = mean_absolute_percentage_error(actuals, forecasts)
	rmspe = mean_squared_percentage_error(actuals, forecasts, square_root=True)

	lst = [name, mae, mse, mape*100, rmspe*100]

	return lst

In [121]:
metrics_df = pd.DataFrame(columns=['name', 'MAE', 'RMSE', 'MAPE', 'RMSPE'])

for i in range(1, len(df.columns)):
	new_row = calculate_metrics(df['actuals'], df[df.columns[i]], df.columns[i])
	metrics_df.loc[len(metrics_df)] = new_row

pd.set_option('display.precision', 1)
metrics_df.sort_values('RMSE', inplace=True)

print(metrics_df)

                        name    MAE   RMSE  MAPE  RMSPE
0                    actuals    0.0    0.0   0.0    0.0
1               forecast_GBR   72.1  109.3   6.2   10.7
5           forecast_Prophet  110.9  147.1   9.5   13.9
6  forecast_Prophet_holidays  111.6  150.1   9.7   14.6
4            forecast_LTSM_2  112.1  178.9   9.0   14.5
2               forecast_GRU  108.9  180.2   8.8   14.5
3            forecast_LTSM_1  113.8  184.9   9.4   15.6
