# Results

Import packages.

In [1]:
import pandas as pd
from pathlib import Path

Specify the configuration to read results for.

In [2]:
# * Model choice
alias = "SLSQPEnsemble_3-models_chronos-flowstate-timesfm_opt-mae_1-windows"

# * Random metric
# alias = "SLSQPEnsemble_3-models_moirai-sundial-toto_opt-random_1-windows"

# * Additional models
# alias = "SLSQPEnsemble_4-models_chronos-moirai-sundial-toto_opt-mae_1-windows"

Read all of the configuration's CSV files into a single DataFrame and save the aggregated results.

In [3]:
input_path = Path(f"../results/{alias}")
csv_files = list(input_path.rglob("*.csv"))

df = pd.concat((pd.read_csv(f).tail(1) for f in csv_files), ignore_index=True)
print(f"df shape: {df.shape}")

output_path = Path(f"../results/all_results/{alias}") / "all_results.csv"
output_path.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(output_path, index=False)
print(f"Saved aggregatted results to {output_path}")

df.head()

df shape: (71, 15)
Saved aggregatted results to ../results/all_results/SLSQPEnsemble_3-models_chronos-flowstate-timesfm_opt-mae_1-windows/all_results.csv


Unnamed: 0,dataset,model,eval_metrics/MSE[mean],eval_metrics/MSE[0.5],eval_metrics/MAE[0.5],eval_metrics/MASE[0.5],eval_metrics/MAPE[0.5],eval_metrics/sMAPE[0.5],eval_metrics/MSIS,eval_metrics/RMSE[mean],eval_metrics/NRMSE[mean],eval_metrics/ND[0.5],eval_metrics/mean_weighted_sum_quantile_loss,domain,num_variates
0,us_births/M/short,SLSQPEnsemble_3-models_chronos-flowstate-times...,39726600.0,39726600.0,5216.296998,0.588763,0.016061,0.016235,4.171646,6302.904331,0.019577,0.016202,0.01227,Healthcare,1
1,us_births/W/short,SLSQPEnsemble_3-models_chronos-flowstate-times...,1930535.0,1930535.0,1081.783271,0.983535,0.014695,0.014715,5.57085,1389.436852,0.018861,0.014685,0.011468,Healthcare,1
2,us_births/D/short,SLSQPEnsemble_3-models_chronos-flowstate-times...,136143.7,136143.7,221.82,0.326542,0.02123,0.021208,2.635652,368.976612,0.034589,0.020794,0.016536,Healthcare,1
3,kdd_cup_2018/D/short,SLSQPEnsemble_3-models_chronos-flowstate-times...,2988.72,2988.72,21.386729,1.203104,0.499154,0.470891,9.264702,54.669189,1.224648,0.479085,0.378754,Nature,1
4,kdd_cup_2018/H/short,SLSQPEnsemble_3-models_chronos-flowstate-times...,4275.291,4275.291,22.440467,0.940784,0.891211,0.494234,7.542641,65.385712,1.368641,0.46972,0.375839,Nature,1


View the number of datasets for each term.

In [4]:
num_short_datasets = df['dataset'].str.contains('short', case=False, na=False).sum()
print(f"Number of short datasets: {num_short_datasets}")

Number of short datasets: 53


View the number of datasets for each domain.

In [5]:
domain_counts = (
    df.groupby("domain")
      .size()
      .reset_index(name="num_evaluated_datasets")
)

display(domain_counts)

Unnamed: 0,domain,num_evaluated_datasets
0,Econ/Fin,6
1,Energy,22
2,Healthcare,5
3,Nature,11
4,Sales,4
5,Transport,9
6,Web/CloudOps,14


Display all of the results and only show the MASE and CRPS columns.

In [6]:
pd.set_option('display.max_rows', None)

df = df.rename(columns={
    "eval_metrics/MASE[0.5]": "MASE",
    "eval_metrics/mean_weighted_sum_quantile_loss": "CRPS",
},
errors="ignore",
)

keep_cols = [
    "dataset",
    "MASE",
    "CRPS",
    "domain",
    "num_variates",
]
df = df[keep_cols].sort_values(by="CRPS", ascending=False).reset_index(drop=True)
display(df)

Unnamed: 0,dataset,MASE,CRPS,domain,num_variates
0,car_parts/M/short,0.835387,0.944829,Sales,1
1,bitbrains_fast_storage/H/short,1.107796,0.69761,Web/CloudOps,2
2,bitbrains_rnd/H/short,5.857974,0.605985,Web/CloudOps,2
3,hierarchical_sales/D/short,0.738195,0.566412,Sales,1
4,temperature_rain/D/short,1.326381,0.541326,Nature,1
5,solar/10T/short,0.794058,0.400759,Energy,1
6,kdd_cup_2018/D/short,1.203104,0.378754,Nature,1
7,kdd_cup_2018/H/short,0.940784,0.375839,Nature,1
8,saugeen/W/short,1.17362,0.357284,Nature,1
9,saugeen/D/short,2.802534,0.342358,Nature,1
