In [1]:
import pandas as pd
from pathlib import Path

REPO_ROOT = Path.cwd().parent.parent.parent.parent
RESULTS_PATH = REPO_ROOT / "gift_eval" / "submission" / "all_results.csv"
EXT_RESULTS_ROOT_DIR = REPO_ROOT / "tabpfn_time_series" / "experimental" / "visualization" / "gift-eval-ext-results"

df = pd.read_csv(RESULTS_PATH)

In [2]:
df.head()

Unnamed: 0,dataset,model,eval_metrics/MSE[mean],eval_metrics/MSE[0.5],eval_metrics/MAE[0.5],eval_metrics/MASE[0.5],eval_metrics/MAPE[0.5],eval_metrics/sMAPE[0.5],eval_metrics/MSIS,eval_metrics/RMSE[mean],eval_metrics/NRMSE[mean],eval_metrics/ND[0.5],eval_metrics/mean_weighted_sum_quantile_loss,domain,num_variates
0,bitbrains_fast_storage/5T/long,TabPFN-TS,5094671.0,5094671.0,483.162473,1.152664,6.841835,0.805314,70.219287,2257.13783,5.964941,1.276854,0.885016,Web/CloudOps,2
1,bitbrains_fast_storage/5T/medium,TabPFN-TS,4622063.0,4622063.0,441.637621,1.307634,6.80794,0.805071,98.995552,2149.898456,6.531985,1.341817,0.948632,Web/CloudOps,2
2,bitbrains_fast_storage/5T/short,TabPFN-TS,2246919.0,2246919.0,263.976427,0.998051,4.243834,0.746523,83.52859,1498.972668,4.706403,0.828821,0.661819,Web/CloudOps,2
3,bitbrains_fast_storage/H/short,TabPFN-TS,3140360.0,3140360.0,318.895975,1.184095,4.487001,0.558861,23.623996,1772.106184,5.051078,0.908957,0.669886,Web/CloudOps,2
4,bitbrains_rnd/5T/long,TabPFN-TS,2963662.0,2963662.0,265.860295,3.874638,4.921696,0.749694,253.212313,1721.528843,6.594789,1.018451,0.819124,Web/CloudOps,2


In [3]:
# Get all dataset names
import pandas as pd

# Recursively find all "all_results.csv" files under the EXTERNAL_RESULTS_ROOT_DIR
all_results_files = list(EXT_RESULTS_ROOT_DIR.glob("**/all_results.csv"))

# Read and concatenate all the CSV files
all_results_dfs = []
for file_path in all_results_files:
    single_df = pd.read_csv(file_path)
    all_results_dfs.append(single_df)

# Combine all dataframes if any were successfully read
if all_results_dfs:
    combined_results_df = pd.concat(all_results_dfs, ignore_index=True)
    print(f"Found and combined {len(all_results_dfs)} 'all_results.csv' files")
else:
    combined_results_df = pd.DataFrame()
    print("No 'all_results.csv' files found")

print("combined_results_df.shape", combined_results_df.shape)

Found and combined 10 'all_results.csv' files
combined_results_df.shape (970, 15)


In [4]:
all_df = pd.concat([df, combined_results_df])

In [5]:
all_df.head()

Unnamed: 0,dataset,model,eval_metrics/MSE[mean],eval_metrics/MSE[0.5],eval_metrics/MAE[0.5],eval_metrics/MASE[0.5],eval_metrics/MAPE[0.5],eval_metrics/sMAPE[0.5],eval_metrics/MSIS,eval_metrics/RMSE[mean],eval_metrics/NRMSE[mean],eval_metrics/ND[0.5],eval_metrics/mean_weighted_sum_quantile_loss,domain,num_variates
0,bitbrains_fast_storage/5T/long,TabPFN-TS,5094671.0,5094671.0,483.162473,1.152664,6.841835,0.805314,70.219287,2257.13783,5.964941,1.276854,0.885016,Web/CloudOps,2.0
1,bitbrains_fast_storage/5T/medium,TabPFN-TS,4622063.0,4622063.0,441.637621,1.307634,6.80794,0.805071,98.995552,2149.898456,6.531985,1.341817,0.948632,Web/CloudOps,2.0
2,bitbrains_fast_storage/5T/short,TabPFN-TS,2246919.0,2246919.0,263.976427,0.998051,4.243834,0.746523,83.52859,1498.972668,4.706403,0.828821,0.661819,Web/CloudOps,2.0
3,bitbrains_fast_storage/H/short,TabPFN-TS,3140360.0,3140360.0,318.895975,1.184095,4.487001,0.558861,23.623996,1772.106184,5.051078,0.908957,0.669886,Web/CloudOps,2.0
4,bitbrains_rnd/5T/long,TabPFN-TS,2963662.0,2963662.0,265.860295,3.874638,4.921696,0.749694,253.212313,1721.528843,6.594789,1.018451,0.819124,Web/CloudOps,2.0


In [6]:
processed_df = all_df.copy()
processed_df["dataset_name"] = processed_df["dataset"].apply(lambda x: x.split("/")[0])
processed_df["freq"] = processed_df["dataset"].apply(lambda x: x.split("/")[1])
processed_df["term"] = processed_df["dataset"].apply(lambda x: x.split("/")[2])

model_type_mapping = {}
for model_type, models in {
    "Tabular Foundation Model": ["TabPFN-TS"],
    "Time-Series Foundation Model": ["timesfm_2_0_500m", "chronos_bolt_base", "chronos-bolt-small", "chronos-bolt-tiny"],
    "Deep Learning Time-Series Model": ["DeepAR", "TFT", "PatchTST"],
    "Statistical Time-Series Model": ["Auto_Theta", "Auto_Arima", "Seasonal_Naive"],
}.items():
    for model in models:
        model_type_mapping[model] = model_type

processed_df["model_type"] = processed_df["model"].map(model_type_mapping)


for col in [
    "dataset_name",
    "freq",
    "term",
]:
    assert processed_df[col].notna().all(), f"Found NaN values in the {col} column"


In [7]:
processed_df.head()

Unnamed: 0,dataset,model,eval_metrics/MSE[mean],eval_metrics/MSE[0.5],eval_metrics/MAE[0.5],eval_metrics/MASE[0.5],eval_metrics/MAPE[0.5],eval_metrics/sMAPE[0.5],eval_metrics/MSIS,eval_metrics/RMSE[mean],eval_metrics/NRMSE[mean],eval_metrics/ND[0.5],eval_metrics/mean_weighted_sum_quantile_loss,domain,num_variates,dataset_name,freq,term,model_type
0,bitbrains_fast_storage/5T/long,TabPFN-TS,5094671.0,5094671.0,483.162473,1.152664,6.841835,0.805314,70.219287,2257.13783,5.964941,1.276854,0.885016,Web/CloudOps,2.0,bitbrains_fast_storage,5T,long,Tabular Foundation Model
1,bitbrains_fast_storage/5T/medium,TabPFN-TS,4622063.0,4622063.0,441.637621,1.307634,6.80794,0.805071,98.995552,2149.898456,6.531985,1.341817,0.948632,Web/CloudOps,2.0,bitbrains_fast_storage,5T,medium,Tabular Foundation Model
2,bitbrains_fast_storage/5T/short,TabPFN-TS,2246919.0,2246919.0,263.976427,0.998051,4.243834,0.746523,83.52859,1498.972668,4.706403,0.828821,0.661819,Web/CloudOps,2.0,bitbrains_fast_storage,5T,short,Tabular Foundation Model
3,bitbrains_fast_storage/H/short,TabPFN-TS,3140360.0,3140360.0,318.895975,1.184095,4.487001,0.558861,23.623996,1772.106184,5.051078,0.908957,0.669886,Web/CloudOps,2.0,bitbrains_fast_storage,H,short,Tabular Foundation Model
4,bitbrains_rnd/5T/long,TabPFN-TS,2963662.0,2963662.0,265.860295,3.874638,4.921696,0.749694,253.212313,1721.528843,6.594789,1.018451,0.819124,Web/CloudOps,2.0,bitbrains_rnd,5T,long,Tabular Foundation Model


In [8]:
processed_df.model.unique()

array(['TabPFN-TS', 'Seasonal_Naive', 'TFT', 'timesfm_2_0_500m',
       'PatchTST', 'Auto_Theta', 'Auto_Arima', 'DeepAR',
       'chronos-bolt-tiny', 'chronos-bolt-small', 'chronos_bolt_base'],
      dtype=object)

In [9]:
model_name_order = [
    ("TabPFN-TS", "TabPFN-TS"),
    ("timesfm_2_0_500m", "TimesFM2.0-500M"),
    ("chronos_bolt_base", "Chronos-Bolt Base"),
    ("chronos-bolt-small", "Chronos-Bolt Small"),
    ("chronos-bolt-tiny", "Chronos-Bolt Tiny"),
    ("DeepAR", "DeepAR"),
    ("PatchTST", "PatchTST"),
    ("TFT", "TFT"),
    ("Auto_Arima", "AutoARIMA"),
    ("Auto_Theta", "AutoTheta"),
    ("Seasonal_Naive", "Seasonal Naive"),
]

model_type_order = [
    "Tabular Foundation Model",
    "Time-Series Foundation Model",
    "Deep Learning Time-Series Model",
    "Statistical Time-Series Model",
]

In [10]:
# Group by dataset_name, freq, term and model
# Calculate mean values for MASE and wQL metrics
grouped_df = processed_df.groupby(['dataset_name', 'freq', 'term', 'model']).agg({
    'eval_metrics/MASE[0.5]': 'mean',
    'eval_metrics/mean_weighted_sum_quantile_loss': 'mean',
    'model_type': 'first'  # Get the model_type for each group
}).reset_index()

# Create a mapping dictionary from model_name_order
model_mapping = {old: new for old, new in model_name_order}

# Apply the mapping to the model column
grouped_df['display_model'] = grouped_df['model'].map(model_mapping)

# Create a hierarchical column structure with model_type as the top level
model_type_groups = {}
for model_name, display_name in model_name_order:
    model_type = model_type_mapping.get(model_name)
    if model_type not in model_type_groups:
        model_type_groups[model_type] = []
    model_type_groups[model_type].append(display_name)

# Create a categorical type for model_type with the specified order
grouped_df['model_type'] = pd.Categorical(
    grouped_df['model_type'], 
    categories=model_type_order, 
    ordered=True
)

# Sort the DataFrame by model_type to ensure consistent ordering
grouped_df = grouped_df.sort_values(['model_type', 'model'])

# Rename columns for display
grouped_df = grouped_df.rename(columns={
    'dataset_name': 'Dataset',
    'freq': 'Freq.',
    'term': 'Term'
})

# Pivot the table to get models as columns
mase_table = grouped_df.pivot_table(
    index=['Dataset', 'Freq.', 'Term'],
    columns=['model_type', 'display_model'],
    values='eval_metrics/MASE[0.5]'
)

wql_table = grouped_df.pivot_table(
    index=['Dataset', 'Freq.', 'Term'],
    columns=['model_type', 'display_model'],
    values='eval_metrics/mean_weighted_sum_quantile_loss'
)

# Function to format and bold minimum values in each row
def format_with_bold_min(df):
    formatted_df = df.copy()
    for idx in formatted_df.index:
        row = formatted_df.loc[idx]
        min_val = row.min()
        for col in formatted_df.columns:
            val = row[col]
            if val == min_val:
                formatted_df.loc[idx, col] = f"\\textbf{{{val:.3f}}}"
            else:
                formatted_df.loc[idx, col] = f"{val:.3f}"
    return formatted_df

# Apply formatting with bold minimum values
mase_table_formatted = format_with_bold_min(mase_table)
wql_table_formatted = format_with_bold_min(wql_table)

# Modify column names to include rotatebox for LaTeX
def add_rotatebox_to_columns(df):
    new_columns = df.columns.to_flat_index()
    rotated_columns = []
    
    for col in new_columns:
        model_type, model_name = col
        # Apply rotatebox to the model name (second level)
        rotated_model_name = f"\\rotatebox{{45}}{{{model_name}}}"
        rotated_columns.append((model_type, rotated_model_name))
    
    return pd.MultiIndex.from_tuples(rotated_columns, names=df.columns.names)

# Apply rotatebox to column names
mase_table_formatted.columns = add_rotatebox_to_columns(mase_table_formatted)
wql_table_formatted.columns = add_rotatebox_to_columns(wql_table_formatted)

# Generate LaTeX tables
mase_latex = mase_table_formatted.to_latex(
    multicolumn=True,
    multicolumn_format='c',
    bold_rows=False,
    longtable=False,
    escape=False,
)

wql_latex = wql_table_formatted.to_latex(
    multicolumn=True,
    multicolumn_format='c',
    bold_rows=False,
    longtable=False,
    escape=False,
)

# print("MASE Table LaTeX:")
# print(mase_latex)
# print("\nwQL Table LaTeX:")
print(wql_latex)

# For display in the notebook, we need a different approach
def highlight_min_display(s):
    is_min = s == s.min()
    return ['font-weight: bold' if v else '' for v in is_min]

# Custom CSS to rotate only the model names (second level headers) by 45 degrees
custom_styles = [
    # Target only the second level of column headers (model names)
    {'selector': 'th.col_heading.level1', 'props': [('transform', 'rotate(-45deg)'), 
                                                   ('vertical-align', 'bottom'),
                                                   ('padding-left', '10px'),
                                                   ('padding-right', '10px'),
                                                   ('height', '80px')]},
    # Keep the first level headers (model_type) horizontal
    {'selector': 'th.col_heading.level0', 'props': [('text-align', 'center')]},
    # Keep the row headers (Dataset, Freq, Term) normal
    {'selector': 'th.row_heading', 'props': [('transform', 'none')]}
]

# Display the tables in the notebook with bold minimum values and rotated model names
# print("MASE Table:")
display(mase_table.style.format("{:.3f}")
       .apply(highlight_min_display, axis=1)
       .set_table_styles(custom_styles))

# print("\nwQL Table:")
display(wql_table.style.format("{:.3f}")
       .apply(highlight_min_display, axis=1)
       .set_table_styles(custom_styles))


  formatted_df.loc[idx, col] = f"{val:.3f}"
  formatted_df.loc[idx, col] = f"\\textbf{{{val:.3f}}}"
  formatted_df.loc[idx, col] = f"{val:.3f}"
  formatted_df.loc[idx, col] = f"{val:.3f}"
  formatted_df.loc[idx, col] = f"{val:.3f}"
  formatted_df.loc[idx, col] = f"{val:.3f}"
  formatted_df.loc[idx, col] = f"{val:.3f}"
  formatted_df.loc[idx, col] = f"{val:.3f}"
  formatted_df.loc[idx, col] = f"{val:.3f}"
  formatted_df.loc[idx, col] = f"{val:.3f}"
  formatted_df.loc[idx, col] = f"{val:.3f}"
  formatted_df.loc[idx, col] = f"{val:.3f}"
  formatted_df.loc[idx, col] = f"{val:.3f}"
  formatted_df.loc[idx, col] = f"{val:.3f}"
  formatted_df.loc[idx, col] = f"{val:.3f}"
  formatted_df.loc[idx, col] = f"\\textbf{{{val:.3f}}}"
  formatted_df.loc[idx, col] = f"{val:.3f}"
  formatted_df.loc[idx, col] = f"{val:.3f}"
  formatted_df.loc[idx, col] = f"{val:.3f}"


\begin{tabular}{llllllllllllll}
\toprule
 &  & model_type & Tabular Foundation Model & \multicolumn{4}{c}{Time-Series Foundation Model} & \multicolumn{3}{c}{Deep Learning Time-Series Model} & \multicolumn{3}{c}{Statistical Time-Series Model} \\
 &  & display_model & \rotatebox{45}{TabPFN-TS} & \rotatebox{45}{Chronos-Bolt Base} & \rotatebox{45}{Chronos-Bolt Small} & \rotatebox{45}{Chronos-Bolt Tiny} & \rotatebox{45}{TimesFM2.0-500M} & \rotatebox{45}{DeepAR} & \rotatebox{45}{PatchTST} & \rotatebox{45}{TFT} & \rotatebox{45}{AutoARIMA} & \rotatebox{45}{AutoTheta} & \rotatebox{45}{Seasonal Naive} \\
Dataset & Freq. & Term &  &  &  &  &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{4}{*}{bitbrains_fast_storage} & \multirow[t]{3}{*}{5T} & long & 0.885 & 0.748 & 0.753 & 0.750 & 0.908 & 1.010 & \textbf{0.669} & 0.734 & 1.290 & 1.360 & 1.290 \\
 &  & medium & 0.949 & 0.755 & 0.867 & 0.814 & 0.881 & 0.990 & 0.642 & \textbf{0.610} & 1.270 & 1.450 & 1.270 \\
 &  & short & 0.662 & 0.454 & 0.435 & \tex

Unnamed: 0_level_0,Unnamed: 1_level_0,model_type,Tabular Foundation Model,Time-Series Foundation Model,Time-Series Foundation Model,Time-Series Foundation Model,Time-Series Foundation Model,Deep Learning Time-Series Model,Deep Learning Time-Series Model,Deep Learning Time-Series Model,Statistical Time-Series Model,Statistical Time-Series Model,Statistical Time-Series Model
Unnamed: 0_level_1,Unnamed: 1_level_1,display_model,TabPFN-TS,Chronos-Bolt Base,Chronos-Bolt Small,Chronos-Bolt Tiny,TimesFM2.0-500M,DeepAR,PatchTST,TFT,AutoARIMA,AutoTheta,Seasonal Naive
Dataset,Freq.,Term,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
bitbrains_fast_storage,5T,long,1.153,0.948,0.953,0.994,0.98,7.33,1.14,1.21,1.14,1.61,1.14
bitbrains_fast_storage,5T,medium,1.308,1.062,1.06,1.115,1.075,8.5,1.2,1.38,1.22,1.42,1.22
bitbrains_fast_storage,5T,short,0.998,0.752,0.77,0.864,0.731,0.945,0.973,0.996,1.14,1.15,1.14
bitbrains_fast_storage,H,short,1.184,1.07,1.08,1.15,1.095,6.06,1.34,1.73,1.43,1.35,1.3
bitbrains_rnd,5T,long,3.875,3.397,3.413,3.488,3.6,4.44,3.72,3.71,3.5,4.11,3.5
bitbrains_rnd,5T,medium,4.831,4.449,4.474,4.519,4.595,4.89,4.65,4.81,4.54,4.88,4.54
bitbrains_rnd,5T,short,2.031,1.705,1.72,1.755,1.769,2.1,1.98,2.27,1.97,2.07,1.97
bitbrains_rnd,H,short,6.68,5.897,5.88,5.951,5.987,6.06,6.11,6.19,6.08,5.75,6.04
bizitobs_application,10S,long,3.094,10.484,9.648,12.835,4.075,4.47,3.19,14.8,36400.0,2.93,36400.0
bizitobs_application,10S,medium,2.49,9.72,9.147,11.417,3.082,3.22,2.77,13.8,2.69,1.78,2.69


Unnamed: 0_level_0,Unnamed: 1_level_0,model_type,Tabular Foundation Model,Time-Series Foundation Model,Time-Series Foundation Model,Time-Series Foundation Model,Time-Series Foundation Model,Deep Learning Time-Series Model,Deep Learning Time-Series Model,Deep Learning Time-Series Model,Statistical Time-Series Model,Statistical Time-Series Model,Statistical Time-Series Model
Unnamed: 0_level_1,Unnamed: 1_level_1,display_model,TabPFN-TS,Chronos-Bolt Base,Chronos-Bolt Small,Chronos-Bolt Tiny,TimesFM2.0-500M,DeepAR,PatchTST,TFT,AutoARIMA,AutoTheta,Seasonal Naive
Dataset,Freq.,Term,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
bitbrains_fast_storage,5T,long,0.885,0.748,0.753,0.75,0.908,1.01,0.669,0.734,1.29,1.36,1.29
bitbrains_fast_storage,5T,medium,0.949,0.755,0.867,0.814,0.881,0.99,0.642,0.61,1.27,1.45,1.27
bitbrains_fast_storage,5T,short,0.662,0.454,0.435,0.42,0.447,0.493,0.471,0.451,1.21,0.731,1.21
bitbrains_fast_storage,H,short,0.67,0.774,0.589,0.593,0.688,0.778,0.549,0.595,0.844,1.15,1.08
bitbrains_rnd,5T,long,0.819,0.756,0.756,0.917,0.706,0.672,0.664,0.624,1.29,1.6,1.29
bitbrains_rnd,5T,medium,0.819,0.605,0.792,0.697,0.727,0.647,0.62,0.628,1.26,1.47,1.26
bitbrains_rnd,5T,short,0.608,0.438,0.453,0.482,0.461,0.557,0.474,0.486,1.1,0.741,1.1
bitbrains_rnd,H,short,0.742,0.624,0.623,0.604,0.649,0.585,0.603,0.65,0.874,1.38,1.3
bizitobs_application,10S,long,0.049,0.109,0.092,0.137,0.057,0.083,0.054,0.056,0.973,0.035,0.973
bizitobs_application,10S,medium,0.041,0.104,0.085,0.115,0.033,0.053,0.047,0.047,0.042,0.024,0.042
