# Model Choice

This notebook compares the SLSQP ensemble's performance between two groups of models:
- **Group 1:** Moirai 2.0, Sundial, and Toto
- **Group 2:** Chronos-2, FlowState, TimesFM 2.5

Import packages.

In [8]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from matplotlib.ticker import FormatStrFormatter
from src.utils.path import resolve_results_path
from tqdm import tqdm

Load Group 1's results.

In [9]:
alias_1 = "SLSQPEnsemble_3-models_moirai-sundial-toto_opt-mae_1-windows"
all_results_dir = "all_results"
file_name = "all_results.csv"

alias_1_results_path = resolve_results_path() / all_results_dir / alias_1 / file_name
df_1 = pd.read_csv(alias_1_results_path)

# Rename columns to be more descriptive and remove unnecessary columns
df_1 = df_1.rename(
    columns={
        "eval_metrics/MASE[0.5]": "MASE",
        "eval_metrics/mean_weighted_sum_quantile_loss": "CRPS",
    }
)
keep_cols = ["dataset", "model", "MASE", "CRPS", "domain", "num_variates"]
df_1 = df_1[keep_cols]

print(f"Loaded {len(df_1)} rows for {alias_1}")
df_1.head()

Loaded 97 rows for SLSQPEnsemble_3-models_moirai-sundial-toto_opt-mae_1-windows


Unnamed: 0,dataset,model,MASE,CRPS,domain,num_variates
0,bitbrains_fast_storage/5T/long,TSOrchestra,0.940343,0.825926,Web/CloudOps,2
1,bitbrains_fast_storage/5T/medium,TSOrchestra,1.023058,0.655812,Web/CloudOps,2
2,bitbrains_fast_storage/5T/short,TSOrchestra,0.700117,0.419518,Web/CloudOps,2
3,bitbrains_fast_storage/H/short,TSOrchestra,1.085526,0.645681,Web/CloudOps,2
4,bitbrains_rnd/5T/long,TSOrchestra,3.393987,0.626281,Web/CloudOps,2


Load group 2's results.

In [11]:
alias_2 = "SLSQPEnsemble_3-models_chronos-flowstate-timesfm_opt-mae_1-windows"
all_results_dir = "all_results"
file_name = "all_results.csv"

alias_2_results_path = resolve_results_path() / all_results_dir / alias_2 / file_name
df_2 = pd.read_csv(alias_2_results_path)

# Rename columns to be more descriptive and remove unnecessary columns
df_2 = df_2.rename(
    columns={
        "eval_metrics/MASE[0.5]": "MASE",
        "eval_metrics/mean_weighted_sum_quantile_loss": "CRPS",
    }
)
df_2 = df_2[keep_cols]

print(f"Loaded {len(df_2)} rows for {alias_2}")
df_2.head()

Loaded 71 rows for SLSQPEnsemble_3-models_chronos-flowstate-timesfm_opt-mae_1-windows


Unnamed: 0,dataset,model,MASE,CRPS,domain,num_variates
0,us_births/M/short,SLSQPEnsemble_3-models_chronos-flowstate-times...,0.588763,0.01227,Healthcare,1
1,us_births/W/short,SLSQPEnsemble_3-models_chronos-flowstate-times...,0.983535,0.011468,Healthcare,1
2,us_births/D/short,SLSQPEnsemble_3-models_chronos-flowstate-times...,0.326542,0.016536,Healthcare,1
3,kdd_cup_2018/D/short,SLSQPEnsemble_3-models_chronos-flowstate-times...,1.203104,0.378754,Nature,1
4,kdd_cup_2018/H/short,SLSQPEnsemble_3-models_chronos-flowstate-times...,0.940784,0.375839,Nature,1


Merge the results so we're only comparing datasets that both groups were evaluated on.

In [12]:
suffixes = ["_1", "_2"]
merged_df = pd.merge(
    df_1,
    df_2,
    on=["dataset", "domain", "num_variates"],
    how="inner",
    suffixes=suffixes,
)

# Only keep short-term datasets
merged_df = merged_df[merged_df["dataset"].str.contains("short", case=False)]

print(f"Number of rows: {len(merged_df)}")
merged_df.head()

Number of rows: 53


Unnamed: 0,dataset,model_1,MASE_1,CRPS_1,domain,num_variates,model_2,MASE_2,CRPS_2
0,bitbrains_fast_storage/H/short,TSOrchestra,1.085526,0.645681,Web/CloudOps,2,SLSQPEnsemble_3-models_chronos-flowstate-times...,1.107796,0.69761
1,bitbrains_rnd/H/short,TSOrchestra,5.871847,0.664922,Web/CloudOps,2,SLSQPEnsemble_3-models_chronos-flowstate-times...,5.857974,0.605985
4,bizitobs_application/10S/short,TSOrchestra,1.359036,0.013357,Web/CloudOps,2,SLSQPEnsemble_3-models_chronos-flowstate-times...,0.996941,0.009216
7,bizitobs_l2c/5T/short,TSOrchestra,0.24871,0.067188,Web/CloudOps,7,SLSQPEnsemble_3-models_chronos-flowstate-times...,0.266656,0.070616
10,bizitobs_l2c/H/short,TSOrchestra,0.476446,0.220888,Web/CloudOps,7,SLSQPEnsemble_3-models_chronos-flowstate-times...,0.421841,0.177849
