Import packages.

In [15]:
import pandas as pd

from pathlib import Path

Read the aggregated results for the two configs.

In [16]:
cfg1 = "SLSQPEnsemble_4-models_opt-mse_1-windows"
cfg2 = "SLSQPEnsemble_4-models_opt-mae_1-windows"

input_path = Path("../results/all_results")

rename_columns = {
    "eval_metrics/MASE[0.5]": "MASE",
    "eval_metrics/mean_weighted_sum_quantile_loss": "CRPS",
}

keep_columns = ["dataset", "MASE", "CRPS"]

df1 = pd.read_csv(input_path / cfg1 / "all_results.csv")
df1 = df1.rename(columns=rename_columns)
df1 = df1[keep_columns].sort_values(by="dataset")

df2 = pd.read_csv(input_path / cfg2 / "all_results.csv")
df2 = df2.rename(columns=rename_columns)
df2 = df2[keep_columns].sort_values(by="dataset")

print(f"{cfg1} results (shape: {df1.shape})")
display(df1.head())

print(f"{cfg2} results (shape: {df2.shape})")
display(df2.head())

SLSQPEnsemble_4-models_opt-mse_1-windows results (shape: (96, 3))


Unnamed: 0,dataset,MASE,CRPS
38,bitbrains_fast_storage/5T/long,0.912993,0.773663
37,bitbrains_fast_storage/5T/medium,0.999749,0.640577
36,bitbrains_fast_storage/5T/short,0.677921,0.389961
39,bitbrains_fast_storage/H/short,1.040435,0.649417
78,bitbrains_rnd/5T/long,3.358888,0.626335


SLSQPEnsemble_4-models_opt-mae_1-windows results (shape: (96, 3))


Unnamed: 0,dataset,MASE,CRPS
35,bitbrains_fast_storage/5T/long,0.912993,0.773663
34,bitbrains_fast_storage/5T/medium,0.999749,0.640577
36,bitbrains_fast_storage/5T/short,0.677921,0.389961
33,bitbrains_fast_storage/H/short,1.040435,0.649417
9,bitbrains_rnd/5T/long,3.358888,0.626335


Join the two DataFrames on the "dataset" column.

In [17]:
merged_df = pd.merge(df1, df2, on="dataset", suffixes=("_mse", "_mae"))
print(f"Merged results (shape: {merged_df.shape})")
display(merged_df.head())

Merged results (shape: (96, 5))


Unnamed: 0,dataset,MASE_mse,CRPS_mse,MASE_mae,CRPS_mae
0,bitbrains_fast_storage/5T/long,0.912993,0.773663,0.912993,0.773663
1,bitbrains_fast_storage/5T/medium,0.999749,0.640577,0.999749,0.640577
2,bitbrains_fast_storage/5T/short,0.677921,0.389961,0.677921,0.389961
3,bitbrains_fast_storage/H/short,1.040435,0.649417,1.040435,0.649417
4,bitbrains_rnd/5T/long,3.358888,0.626335,3.358888,0.626335


Count the number of rows that have identical MASE or CPRS values.

In [18]:
# Individual counts
identical_mase = (merged_df["MASE_mse"] == merged_df["MASE_mae"]).sum()
identical_crps = (merged_df["CRPS_mse"] == merged_df["CRPS_mae"]).sum()
both_identical = (identical_mase & identical_crps).sum()

# Either identical (using inclusion-exclusion principle)
either_identical = identical_mase + identical_crps - both_identical

print(f"MASE identical: {identical_mase}")
print(f"CRPS identical: {identical_crps}")
print(f"Both identical: {both_identical}")
print(f"Either identical: {either_identical}")

# Print the percentage of identical MASE and CRPS values.
print(f"MASE identical: {identical_mase / len(merged_df):.2%}")
print(f"CRPS identical: {identical_crps / len(merged_df):.2%}")
print(f"Both identical: {both_identical / len(merged_df):.2%}")
print(f"Either identical: {either_identical / len(merged_df):.2%}")

MASE identical: 96
CRPS identical: 96
Both identical: 96
Either identical: 96
MASE identical: 100.00%
CRPS identical: 100.00%
Both identical: 100.00%
Either identical: 100.00%
