In [38]:
# !pip install --upgrade pip setuptools wheel
# !pip install 'critdd @ git+https://github.com/mirkobunse/critdd'

In [39]:
import numpy as np
import pandas as pd
from critdd import Diagram

In [40]:
res_df = pd.read_csv("results_normalized_True_cleaned.csv")
res_df = res_df.fillna("None")

In [None]:
current_df = res_df.copy()
current_df = current_df[
    (current_df["transformer_name"] != "LastKnownNormalizer_wo_standardscaler")
    & (current_df["transformer_name"] != "DifferenceNormalizer_wo_standardscaler")
]
current_df = current_df.drop_duplicates(
    subset=["model", "strategy_time", "datetime", "id", "transformer_name", "mode"]
)

df = current_df.pivot(
    index=["model", "strategy_time", "datetime", "id", "mode"],
    columns="transformer_name",
    values="mae_test",
)

# Transformers

In [None]:
current_df = res_df.copy()
current_df = current_df[
    (current_df["transformer_name"] != "LastKnownNormalizer_wo_standardscaler")
    & (current_df["transformer_name"] != "DifferenceNormalizer_wo_standardscaler")
]
current_df = current_df.drop_duplicates(
    subset=["model", "strategy_time", "datetime", "id", "transformer_name", "mode"]
)

df = current_df.pivot(
    index=["model", "strategy_time", "datetime", "id", "mode"],
    columns="transformer_name",
    values="mae_test",
)

df = df.dropna()

diagram = Diagram(
    df.to_numpy(),
    treatment_names=df.columns,
)

diagram.average_ranks
diagram.get_groups(alpha=0.05, adjustment="holm")

diagram.to_file(
    "transformer_CD.tex",
    alpha=0.05,
    adjustment="holm",
    reverse_x=True,
    axis_options={"title": "critdd"},
)

In [None]:
def get_model_type(model_name: str) -> str:
    if "NN" in model_name.upper():
        return "NN"
    else:
        return "Boosting"

In [None]:
def calc_avg_rank_median_mae_for_param(
    df_subset: pd.DataFrame, param_group: str, metric_col: str = "mae_test"
) -> pd.DataFrame:
    pivot_index = [
        c
        for c in df_subset.columns
        if c
        not in [
            param_group,
            metric_col,
            "rmse_test",
            "fit_time_test",
            "forecast_time_test",
            "mae_val",
            "rmse_val",
            "fit_time_val",
            "forecast_time_val",
        ]
    ]
    pivoted = df_subset.pivot_table(
        index=pivot_index, columns=param_group, values=metric_col, aggfunc="mean"
    )

    pivoted = pivoted.dropna(axis=0, how="any")
    ranked = pivoted.rank(axis=1, ascending=True, method="min")
    avg_ranks = ranked.mean(axis=0)

    median_mae_by_param = df_subset.groupby(param_group)[metric_col].median()

    results = pd.DataFrame(
        {
            "param_value": avg_ranks.index,
            "avg_rank": avg_ranks.values,
            "median_mae": [median_mae_by_param.get(x, np.nan) for x in avg_ranks.index],
        }
    )
    return results

In [None]:
def build_hparams_comparison_table(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df["model_type"] = df["model"].apply(get_model_type)
    df = df[~((df["model_type"] == "Boosting") & (df["mode"] == "multivariate CI"))]

    param_groups = ["mode", "strategy_time", "datetime", "id"]

    big_results = []

    for param_group in param_groups:
        df_nn = df[df["model_type"] == "NN"]
        nn_res = calc_avg_rank_median_mae_for_param(df_nn, param_group, metric_col="mae_test")

        df_boost = df[df["model_type"] == "Boosting"]
        boost_res = calc_avg_rank_median_mae_for_param(
            df_boost, param_group, metric_col="mae_test"
        )

        df_all = df
        overall_res = calc_avg_rank_median_mae_for_param(
            df_all, param_group, metric_col="mae_test"
        )

        merged = nn_res.merge(boost_res, on="param_value", how="outer", suffixes=("_nn", "_boost"))
        merged = merged.merge(overall_res, on="param_value", how="outer")

        merged.rename(
            columns={"avg_rank": "avg_rank_all", "median_mae": "median_mae_all"}, inplace=True
        )

        rows = []
        for row in merged.itertuples(index=False):
            rows.append(
                {
                    ("param_group"): param_group,
                    ("param_value"): row.param_value,
                    ("NN", "Rank"): row.avg_rank_nn,
                    ("NN", "Median_MAE"): row.median_mae_nn,
                    ("Boosting", "Rank"): row.avg_rank_boost,
                    ("Boosting", "Median_MAE"): row.median_mae_boost,
                    ("Overall", "Rank"): row.avg_rank_all,
                    ("Overall", "Median_MAE"): row.median_mae_all,
                }
            )

        big_results.extend(rows)

    final_df = pd.DataFrame(big_results)
    final_df.sort_values(by=["param_group", "param_value"], inplace=True)

    final_df.set_index(["param_group", "param_value"], inplace=True)
    final_df = final_df[
        [
            ("NN", "Rank"),
            ("NN", "Median_MAE"),
            ("Boosting", "Rank"),
            ("Boosting", "Median_MAE"),
            ("Overall", "Rank"),
            ("Overall", "Median_MAE"),
        ]
    ]

    return final_df

In [48]:
comparison_table = build_hparams_comparison_table(current_df)
display(comparison_table)

Unnamed: 0_level_0,Unnamed: 1_level_0,"(NN, Rank)","(NN, Median_MAE)","(Boosting, Rank)","(Boosting, Median_MAE)","(Overall, Rank)","(Overall, Median_MAE)"
param_group,param_value,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
datetime,False,1.381944,1.008674,1.333333,1.604974,1.374269,1.044819
datetime,with_normalization_over_all,1.618056,1.132314,1.666667,1.617397,1.625731,1.178527
id,False,1.72619,1.078031,1.571429,1.617397,1.695238,1.131925
id,with_le_normalization_over_all,1.27381,1.002403,1.428571,1.589798,1.304762,1.061101
mode,global,1.547619,1.005592,1.095238,1.564849,1.547619,1.073479
mode,multivariate CI,2.261905,1.12174,,,2.261905,1.12174
mode,multivariate CM,2.190476,1.131925,1.904762,1.624808,2.190476,1.212908
strategy_time,FlatWideMIMOStrategy__model_horizon_NaN,3.9375,1.307954,2.888889,1.620753,3.77193,1.354324
strategy_time,MIMOStrategy__model_horizon_NaN,1.75,1.027963,2.444444,1.607232,1.859649,1.06211
strategy_time,RecursiveStrategy__model_horizon_1.0,2.416667,1.031449,2.777778,1.606648,2.473684,1.076293


In [None]:
comparison_table.columns = pd.MultiIndex.from_tuples(
    comparison_table.columns, names=["Model", "Metric"]
)

comparison_table.to_latex(
    "hparams_comparison.tex",
    multirow=True,
    multicolumn=True,
    caption="Comparison of hyperparameters.",
    label="tab:hparams",
    float_format="%.4f",
)

---

In [None]:
def build_top10_test_val_table(df: pd.DataFrame) -> pd.DataFrame:
    """
    Формирует таблицу из двух "блоков" (топ-10 по тесту и топ-10 по валидации),
    чтобы в каждой строке было:
      - model_test, strategy_test, rank_test, mae_test
      - model_val, strategy_val, rank_val, mae_val
    с индексом 1..10.
    """
    grouped = df.groupby(["model", "strategy_time"], as_index=False).agg(
        {"mae_test": "min", "mae_val": "min"}  # или "median" по желанию
    )

    df_test = grouped.sort_values("mae_test", ascending=True).head(10).copy()

    df_test.rename(
        columns={"model": "model_test", "strategy_time": "strategy_test", "mae_test": "mae_test"},
        inplace=True,
    )

    df_test.index = range(1, len(df_test) + 1)
    df_test["rank_test"] = df_test.index  # столбец rank_test

    df_val = grouped.sort_values("mae_val", ascending=True).head(10).copy()
    df_val.rename(
        columns={"model": "model_val", "strategy_time": "strategy_val", "mae_val": "mae_val"},
        inplace=True,
    )
    df_val.index = range(1, len(df_val) + 1)
    df_val["rank_val"] = df_val.index

    df_final = pd.concat([df_test, df_val], axis=1)

    cols = ["model_test", "strategy_test", "mae_test", "model_val", "strategy_val", "mae_val"]
    df_final = df_final[cols]
    df_final.index.name = "rank"

    return df_final

In [None]:
current_df = res_df.copy()

strategy_time_replacements = {
    "FlatWideMIMOStrategy__model_horizon_NaN": "FlatWideMIMO",
    "MIMOStrategy__model_horizon_NaN": "MIMO",
    "RecursiveStrategy__model_horizon_1.0": "Recursive (horizon=1)",
    "RecursiveStrategy__model_horizon_6.0": "Recursive (horizon=6)",
}

for old_value, new_value in strategy_time_replacements.items():
    current_df.loc[current_df["strategy_time"] == old_value, "strategy_time"] = new_value

In [57]:
comparison_table = build_top10_test_val_table(current_df)
comparison_table = comparison_table.iloc[:, [0, 1, 2, 4, 5, 7]]

In [58]:
comparison_table

Unnamed: 0_level_0,model_test,strategy_test,mae_test,model_val,strategy_val,mae_val
rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,GPT4TS_NN,Recursive (horizon=6),0.780371,GPT4TS_NN,MIMO,0.271333
2,GPT4TS_NN,Recursive (horizon=1),0.782212,GPT4TS_NN,Recursive (horizon=6),0.283282
3,PyBoost,FlatWideMIMO,0.792144,GPT4TS_NN,Recursive (horizon=1),0.293814
4,GPT4TS_NN,MIMO,0.792593,PatchTST_NN,MIMO,0.300529
5,PatchTST_NN,Recursive (horizon=6),0.85475,PatchTST_NN,Recursive (horizon=6),0.30498
6,PatchTST_NN,Recursive (horizon=1),0.857175,DLinear_NN,Recursive (horizon=6),0.316898
7,DLinear_NN,Recursive (horizon=6),0.874276,PatchTST_NN,Recursive (horizon=1),0.317978
8,PatchTST_NN,MIMO,0.876885,DLinear_NN,MIMO,0.320469
9,DLinear_NN,Recursive (horizon=1),0.897668,PyBoost,FlatWideMIMO,0.323934
10,DLinear_NN,MIMO,0.921975,DLinear_NN,Recursive (horizon=1),0.33125


In [None]:
comparison_table.to_latex(
    "comparison of models.tex",
    multirow=True,
    multicolumn=True,
    caption="Comparison of models.",
    label="tab:hparams",
    float_format="%.4f",
)