In [14]:
from src.reproduce_utils import Results
import pandas as pd

In [24]:
def get_autopytorch_results_df(path):
    headers = ["metric", "dataset"]
    indices = [
    "method",

    "optimization_metric",
    "optimization_time",
    "eval_position",
    "split"
    ]
    return pd.read_csv(
        path,
        index_col=list(range(len(indices))),
        header=list(range(len(headers))),
    )



def get_result(methods, metrics, dataset_names, result_df):
    headers = ["metric", "dataset"]
    indices = [
        "method",
        "seed",
    ]
    columns = pd.MultiIndex.from_product([metrics, dataset_names], names=headers)
    index = pd.MultiIndex.from_product([methods, [1]], names=indices)
    df = pd.DataFrame(columns=columns, index=index)
    df.sort_index(inplace=True)
    for index, row in result_df.iterrows():
        for method in methods:
            if int(row["dataset_id"]) not in dataset_names:
                continue
            if "logistic" in method:
                score_method = "linear"
            else:
                score_method = method
            row_id = (method, 1)
            col = ("acc", row["dataset_id"])
            df.loc[row_id, col] = row[f"score_{score_method}"]
    return Results(df=df)

In [54]:
cocktails_default_df = get_autopytorch_results_df("csv_files/results_autopytorch_default_remaining.csv")
scores_cocktails_default = cocktails_default_df.groupby('method').mean().T.loc[('score')]
scores_cocktails_default.index = scores_cocktails_default.index.astype("int")

In [55]:
cocktails_default_df_failed = get_autopytorch_results_df("csv_files/results_autopytorch_default_custom.csv")
scores_cocktails_default_failed = cocktails_default_df_failed.groupby('method').mean().T.loc[('score')]
scores_cocktails_default_failed.index = scores_cocktails_default_failed.index.astype("int")

In [58]:
for id in scores_cocktails_default_failed.index:
    scores_cocktails_default.loc[id, 'autopytorch_default'] = scores_cocktails_default_failed.loc[id, 'autopytorch_default'] 

In [17]:
autopytorch_default_df = get_autopytorch_results_df("csv_files/results_autopytorch_master_default_remaining.csv")
scores_autopytorch_default = autopytorch_default_df.groupby('method').mean().T.loc[('score')]
scores_autopytorch_default.index = scores_autopytorch_default.index.astype("int")

In [60]:
all_my_results = pd.read_csv("csv_files/all_my_results_remaining.csv", index_col="dataset_id").sort_index()

In [61]:
all_my_results["score_cocktails_default"] = scores_cocktails_default['autopytorch_default']
all_my_results["score_autopytorch_default"] = scores_autopytorch_default['autopytorch_master_default']


In [63]:
all_my_results

Unnamed: 0_level_0,dataset_name,original_n_samples,original_n_features,num_categorical_columns,num_pseudo_categorical_columns,num_columns_missing,num_rows_missing,score_resnet,score_linear,score_hgbt,score_tree,score_mlp,score_rf,heterogeneous,n_samples,too_small,too_easy,score_cocktails_default,score_autopytorch_default
dataset_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
44,spambase,4601.0,57.0,0.0,0.0,0.0,0.0,0.923897,0.917096,0.947794,0.902757,0.933272,0.942096,,3626.0,False,,0.933211,0.920956
60,waveform-5000,5000.0,40.0,0.0,0.0,0.0,0.0,0.900504,0.927694,0.919033,0.865055,0.912588,0.921450,,3310.0,False,,0.881562,0.869751
151,electricity,45312.0,8.0,1.0,0.0,0.0,0.0,0.789512,0.740939,0.866601,0.834464,0.787723,0.862358,,38474.0,False,,0.812402,0.800914
279,meta_stream_intervals.arff,45164.0,74.0,0.0,1.0,0.0,0.0,0.990750,0.972645,0.996898,0.995431,0.991935,0.996560,,11818.0,False,,0.993702,0.991540
293,covertype,581012.0,54.0,0.0,0.0,0.0,0.0,0.824347,0.616487,0.818973,0.764407,0.784513,0.827880,,566602.0,False,,0.841367,0.814311
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42477,default-of-credit-card-clients,30000.0,23.0,0.0,2.0,0.0,0.0,0.699397,0.669312,0.695178,0.620743,0.705927,0.699347,,13272.0,False,,0.676544,0.695798
42742,porto-seguro,595212.0,57.0,31.0,6.0,0.0,142164.0,0.534245,0.563424,0.565413,0.514056,0.534193,0.557282,,34308.0,False,,0.527761,0.547511
42746,KDDCup99,4898431.0,41.0,9.0,5.0,0.0,0.0,0.999767,0.999993,1.000000,0.999967,0.999967,1.000000,,2144034.0,False,,0.999978,0.999922
42769,Higgs,1000000.0,28.0,0.0,4.0,0.0,0.0,0.676173,0.635553,0.711640,0.616840,0.683793,0.705680,,940160.0,False,,0.664689,0.674767


In [64]:
def get_average_rank_table(results: Results):
    datasets = results.datasets
    metrics = sorted(results.metrics, reverse=True)
    # print(results.methods)
    df = results.df
    results_rank = {}
    results_score = {}
    for metric in metrics:
        if "time" in metric:
            continue
        metric_df = df[metric]
        dataset_rank_dfs = []
        dataset_mean_dfs = []
        for dataset in datasets:
            if dataset not in metric_df.columns:
                continue
            dataset_rank_df = metric_df[dataset].groupby('method').mean().rank(ascending=False)
            dataset_rank_dfs.append(dataset_rank_df)
            dataset_mean_dfs.append(metric_df[dataset])

        results_rank[metric.upper()] = pd.concat(dataset_rank_dfs).groupby("method").mean()
        
        results_score[metric.upper()] = pd.concat(dataset_mean_dfs).groupby("method").mean()
    score_df = pd.DataFrame(results_score).reset_index()
    rank_df = pd.DataFrame(results_rank).reset_index()
    final_table = rank_df.merge(score_df, on="method", suffixes=[" Mean Rank", " Mean Score"]).T
    final_table.columns = final_table.iloc[0]
    final_table = final_table.iloc[1:]
    return final_table

def pprint(df):
    for column in df:
        df[column] = df[column].astype('float').round(decimals=4)

    print(df.to_markdown())

In [None]:
def get_too_easy_select_acc_to_difference(df: pd.DataFrame, methods: list):
    std_datasets = df[methods].std(axis=1)

In [65]:
def get_too_easy_select_acc_to_criteria(df: pd.DataFrame, better_methods: list, worse_methods: list):
        
    lhs = df[better_methods].max(axis=1) if len(better_methods) > 1 else df[better_methods[0]]
    rhs = df[worse_methods].max(axis=1) if len(worse_methods) > 1 else df[worse_methods[0]]
    selection_criteria = lhs < 1.05 * rhs
    too_easy_on_selection_criteria = df.loc[selection_criteria].index.to_list()
    select_on_selection_criteria = df.loc[list(map(lambda x: not x, selection_criteria))].index.to_list()
    return too_easy_on_selection_criteria, select_on_selection_criteria

In [66]:
too_easy_autopytorch_default_hgbt_linear_dids, select_autopytorch_default_hgbt_linear_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt", "score_autopytorch_default"], worse_methods=["score_linear"])
too_easy_cocktails_default_hgbt_linear_dids, select_cocktails_default_hgbt_linear_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt", "score_cocktails_default"], worse_methods=["score_linear"])
too_easy_autopytorch_default_hgbt_tree_dids, select_autopytorch_default_hgbt_tree_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt", "score_autopytorch_default"], worse_methods=["score_tree"])
too_easy_cocktails_default_hgbt_tree_dids, select_cocktails_default_hgbt_tree_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt", "score_cocktails_default"], worse_methods=["score_tree"])
too_easy_cocktails_default_hgbt_combined_dids, select_cocktails_default_hgbt_combined_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt", "score_cocktails_default"], worse_methods=["score_tree", "score_linear"])
too_easy_autopytorch_default_hgbt_combined_dids, select_autopytorch_default_hgbt_combined_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt", "score_autopytorch_default"], worse_methods=["score_tree", "score_linear"])
too_easy_hgbt_tree_dids, select_hgbt_tree_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt"], worse_methods=["score_tree"])
too_easy_hgbt_linear_dids, select_hgbt_linear_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt"], worse_methods=["score_linear"])
too_easy_hgbt_combined_dids, select_hgbt_combined_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt"], worse_methods=["score_tree", "score_linear"])
too_easy_resnet_tree_dids, select_resnet_tree_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_resnet"], worse_methods=["score_tree"])
too_easy_mlp_tree_dids, select_mlp_tree_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_mlp"], worse_methods=["score_tree"])
too_easy_cocktails_default_tree_dids, select_cocktails_default_tree_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_cocktails_default"], worse_methods=["score_tree"])
too_easy_autopytorch_default_tree_dids, select_autopytorch_default_tree_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_autopytorch_default"], worse_methods=["score_tree"])
too_easy_autopytorch_default_cocktails_default_hgbt_combined_dids, select_autopytorch_default_cocktails_default_hgbt_combined_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt", "score_cocktails_default", "score_autopytorch_default"], worse_methods=["score_linear", "score_tree"])
too_easy_cocktails_default_linear_dids, select_cocktails_default_linear_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_cocktails_default"], worse_methods=["score_linear"])
too_easy_autopytorch_default_linear_dids, select_autopytorch_default_linear_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_autopytorch_default"], worse_methods=["score_linear"])


In [67]:


ranks_df = {
    # results vs logreg
    "too_easy_(HGBT,_Autopytorch_default)_vs_(Logreg)_on_my": {
        "ranks": None,
        "dids": too_easy_autopytorch_default_hgbt_linear_dids},
    "select_(HGBT,_Autopytorch_default)_vs_(Logreg)_on_my": {
        "ranks": None,
        "dids": select_autopytorch_default_hgbt_linear_dids},
    "too_easy_(HGBT,_Cocktails_default)_vs_(Logreg)_on_my": {
        "ranks": None,
        "dids": too_easy_cocktails_default_hgbt_linear_dids},
    "select_(HGBT,_Cocktails_default)_vs_(Logreg)_on_my": {
        "ranks": None,
        "dids": select_cocktails_default_hgbt_linear_dids},

    # results vs tree
    "too_easy_(HGBT,_Autopytorch_default)_vs_(tree)_on_my": {
        "ranks": None,
        "dids": too_easy_autopytorch_default_hgbt_tree_dids},
    "select_(HGBT,_Autopytorch_default)_vs_(tree)_on_my": {
        "ranks": None,
        "dids": select_autopytorch_default_hgbt_tree_dids},
    "too_easy_(HGBT,_Cocktails_default)_vs_(tree)_on_my": {
        "ranks": None,
        "dids": too_easy_cocktails_default_hgbt_tree_dids},
    "select_(HGBT,_Cocktails_default)_vs_(tree)_on_my": {
        "ranks": None,
        "dids": select_cocktails_default_hgbt_tree_dids},
    
    # results vs both
    "too_easy_(HGBT,_Autopytorch_default)_vs_(tree,Logreg)_on_my": {
        "ranks": None,
        "dids": too_easy_autopytorch_default_hgbt_combined_dids},
    "select_(HGBT,_Autopytorch_default)_vs_(tree,Logreg)_on_my": {
        "ranks": None,
        "dids": select_autopytorch_default_hgbt_combined_dids},
    "too_easy_(HGBT,_Cocktails_default)_vs_(tree,Logreg)_on_my": {
        "ranks": None,
        "dids": too_easy_cocktails_default_hgbt_combined_dids},
    "select_(HGBT,_Cocktails_default)_vs_(tree,Logreg)_on_my": {
        "ranks": None,
        "dids": select_cocktails_default_hgbt_combined_dids},

    # results individual
    # vs tree
    "too_easy_(_Autopytorch_default)_vs_(tree)_on_my": {
        "ranks": None,
        "dids": too_easy_autopytorch_default_tree_dids},
    "select_(_Autopytorch_default)_vs_(tree)_on_my": {
        "ranks": None,
        "dids": select_autopytorch_default_tree_dids},
    "too_easy_(_Cocktails_default)_vs_(tree)_on_my": {
        "ranks": None,
        "dids": too_easy_cocktails_default_tree_dids},
    "select_(_Cocktails_default)_vs_(tree)_on_my": {
        "ranks": None,
        "dids": select_cocktails_default_tree_dids},
    # vs logreg
    "too_easy_(_Autopytorch_default)_vs_(Logreg)_on_my": {
        "ranks": None,
        "dids": too_easy_autopytorch_default_linear_dids},
    "select_(_Autopytorch_default)_vs_(Logreg)_on_my": {
        "ranks": None,
        "dids": select_autopytorch_default_linear_dids},
    "too_easy_(_Cocktails_default)_vs_(Logreg)_on_my": {
        "ranks": None,
        "dids": too_easy_cocktails_default_linear_dids},
    "select_(_Cocktails_default)_vs_(Logreg)_on_my": {
        "ranks": None,
        "dids": select_cocktails_default_linear_dids},

    # all combined
    "too_easy_(HGBT,_Autopytorch_default,_Cocktails_default)_vs_(tree,Logreg)_on_my": {
        "ranks": None,
        "dids": too_easy_autopytorch_default_cocktails_default_hgbt_combined_dids},
    "select_(HGBT,_Autopytorch_default,_Cocktails_default)_vs_(tree,Logreg)_on_my": {
        "ranks": None,
        "dids": select_autopytorch_default_cocktails_default_hgbt_combined_dids},

    # from previous tables
    "too_easy_(HGBT)_vs_(tree)_on_my": {
        "ranks": None,
        "dids": too_easy_hgbt_tree_dids},
    "select_(HGBT)_vs_(tree)_on_my": {
        "ranks": None,
        "dids": select_hgbt_tree_dids},
    "too_easy_(HGBT)_vs_(Logreg)_on_my": {
        "ranks": None,
        "dids": too_easy_hgbt_linear_dids},
    "select_(HGBT)_vs_(Logreg)_on_my": {
        "ranks": None,
        "dids": select_hgbt_linear_dids},
    "too_easy_(HGBT)_vs_(tree,Logreg)_on_my": {
        "ranks": None,
        "dids": too_easy_hgbt_combined_dids},
    "select_(HGBT)_vs_(tree,Logreg)_on_my": {
        "ranks": None,
        "dids": select_hgbt_combined_dids},
        }


In [68]:
methods = [ "hgbt", "logistic"] # , "resnet", "rf", "tree", "mlp",]
metrics = ["acc"]
for key in ranks_df:
    current_result = get_result(methods, metrics, ranks_df[key]['dids'], all_my_results.reset_index())
    current_ranks = get_average_rank_table(current_result)
    ranks_df[key]['ranks'] = current_ranks
    print(f"\n\n{key.upper().replace('_', ' ')} ({len(ranks_df[key]['dids'])})")
    pprint(current_ranks)



TOO EASY (HGBT, AUTOPYTORCH DEFAULT) VS (LOGREG) ON MY (53)
|                |   hgbt |   logistic |
|:---------------|-------:|-----------:|
| ACC Mean Rank  | 1.3208 |     1.6792 |
| ACC Mean Score | 0.8614 |     0.8541 |


SELECT (HGBT, AUTOPYTORCH DEFAULT) VS (LOGREG) ON MY (18)
|                |   hgbt |   logistic |
|:---------------|-------:|-----------:|
| ACC Mean Rank  | 1      |     2      |
| ACC Mean Score | 0.7866 |     0.6984 |


TOO EASY (HGBT, COCKTAILS DEFAULT) VS (LOGREG) ON MY (55)
|                |   hgbt |   logistic |
|:---------------|-------:|-----------:|
| ACC Mean Rank  | 1.3091 |     1.6909 |
| ACC Mean Score | 0.8581 |     0.8502 |


SELECT (HGBT, COCKTAILS DEFAULT) VS (LOGREG) ON MY (16)
|                |   hgbt |   logistic |
|:---------------|-------:|-----------:|
| ACC Mean Rank  | 1      |     2      |
| ACC Mean Score | 0.7886 |     0.6924 |


TOO EASY (HGBT, AUTOPYTORCH DEFAULT) VS (TREE) ON MY (30)
|                |   hgbt |   logistic |
|:-

In [69]:
methods = [ "hgbt", "logistic", "resnet", "rf", "tree", "mlp","autopytorch_default", "cocktails_default"]
metrics = ["acc"]
for key in ranks_df:
    current_result = get_result(methods, metrics, ranks_df[key]['dids'], all_my_results.reset_index())
    current_ranks = get_average_rank_table(current_result)
    ranks_df[key]['ranks'] = current_ranks
    print(f"\n\n{key.upper().replace('_', ' ')} ({len(ranks_df[key]['dids'])})")
    pprint(current_ranks)



TOO EASY (HGBT, AUTOPYTORCH DEFAULT) VS (LOGREG) ON MY (53)
|                |   autopytorch_default |   cocktails_default |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|----------------------:|--------------------:|-------:|-----------:|-------:|---------:|-------:|-------:|
| ACC Mean Rank  |                4.2264 |              4.566  | 3.1132 |     4.5943 | 3.6509 |   5      | 3.5189 | 7.3302 |
| ACC Mean Score |                0.8566 |              0.8544 | 0.8614 |     0.8541 | 0.8578 |   0.8544 | 0.8579 | 0.8166 |


SELECT (HGBT, AUTOPYTORCH DEFAULT) VS (LOGREG) ON MY (18)
|                |   autopytorch_default |   cocktails_default |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|----------------------:|--------------------:|-------:|-----------:|-------:|---------:|-------:|-------:|
| ACC Mean Rank  |                4.0556 |              4.1111 | 2.1667 |     7.2778 | 3.9444 |   5.0556 | 2.5    | 6.8889 |
| A

In [70]:


def get_too_easy_select_acc_to_difference(df: pd.DataFrame, methods: list, stddev: float = 0.05):
    std_datasets = df[[f"score_{method}" for method in methods]].std(axis=1)
    selection_criteria = std_datasets < stddev
    too_easy_on_selection_criteria = df.loc[selection_criteria].index.to_list()
    select_on_selection_criteria = df.loc[list(map(lambda x: not x, selection_criteria))].index.to_list()
    return too_easy_on_selection_criteria, select_on_selection_criteria


In [72]:
methods = [ "hgbt", "linear", "resnet", "rf", "tree", "mlp","autopytorch_default", "cocktails_default"]

too_easy_std_methods_dids, select_std_methods_dids = get_too_easy_select_acc_to_difference(all_my_results, methods=methods)
# subset_methods = [ "hgbt", "linear", "rf", "tree", "mlp","autopytorch_default", "cocktails_default"]

# too_easy_autopytorch_default_linear_dids, select_autopytorch_default_linear_dids = get_too_easy_select_acc_to_difference(all_my_results, methods=methods)


In [74]:
methods = [ "hgbt", "linear", "resnet", "rf", "tree", "mlp","autopytorch_default", "cocktails_default"]

too_easy_std_methods_dids, select_std_methods_dids = get_too_easy_select_acc_to_difference(all_my_results, methods=methods)

ranks_df = {
    "std_<_0.05,_methods=all_(too_easy)": {
        "ranks": None,
        "dids": too_easy_std_methods_dids},
    "std_>_0.05,_methods=all_(select)": {
        "ranks": None,
        "dids": select_std_methods_dids},
        }
methods = [ "hgbt", "logistic", "resnet", "rf", "tree", "mlp","autopytorch_default", "cocktails_default"]
metrics = ["acc"]
for key in ranks_df:
    current_result = get_result(methods, metrics, ranks_df[key]['dids'], all_my_results.reset_index())
    current_ranks = get_average_rank_table(current_result)
    ranks_df[key]['ranks'] = current_ranks
    print(f"\n\n{key.upper().replace('_', ' ')} ({len(ranks_df[key]['dids'])})")
    pprint(current_ranks)



STD < 0.05, METHODS=ALL (TOO EASY) (66)
|                |   autopytorch_default |   cocktails_default |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|----------------------:|--------------------:|-------:|-----------:|-------:|---------:|-------:|-------:|
| ACC Mean Rank  |                4.2121 |              4.6364 | 2.8788 |     5.2045 | 3.6591 |   5.0152 | 3.1742 | 7.2197 |
| ACC Mean Score |                0.8332 |              0.8308 | 0.8411 |     0.8214 | 0.834  |   0.8304 | 0.8386 | 0.7954 |


STD > 0.05, METHODS=ALL (SELECT) (5)
|                |   autopytorch_default |   cocktails_default |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|----------------------:|--------------------:|-------:|-----------:|-------:|---------:|-------:|-------:|
| ACC Mean Rank  |                3.8    |               2     | 2.8    |     6.2    | 4.6    |   5      | 4.4    | 7.2    |
| ACC Mean Score |                0.8426 |  

In [75]:
methods = [ "hgbt", "linear", "tree", "autopytorch_default"] # , "cocktails_default"]

too_easy_std_methods_dids, select_std_methods_dids = get_too_easy_select_acc_to_difference(all_my_results, methods=methods)

ranks_df = {
    "std_<_0.05,_methods=hgbt,linear,tree,autopytorch_default_(too_easy)": {
        "ranks": None,
        "dids": too_easy_std_methods_dids},
    "std_>_0.05,_methods=hgbt,linear,tree,autopytorch_default_(select)": {
        "ranks": None,
        "dids": select_std_methods_dids},
        }
methods = [ "hgbt", "logistic", "resnet", "rf", "tree", "mlp","autopytorch_default", "cocktails_default"]
metrics = ["acc"]
for key in ranks_df:
    current_result = get_result(methods, metrics, ranks_df[key]['dids'], all_my_results.reset_index())
    current_ranks = get_average_rank_table(current_result)
    ranks_df[key]['ranks'] = current_ranks
    print(f"\n\n{key.upper().replace('_', ' ')} ({len(ranks_df[key]['dids'])})")
    pprint(current_ranks)



STD < 0.05, METHODS=HGBT,LINEAR,TREE,AUTOPYTORCH DEFAULT (TOO EASY) (62)
|                |   autopytorch_default |   cocktails_default |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|----------------------:|--------------------:|-------:|-----------:|-------:|---------:|-------:|-------:|
| ACC Mean Rank  |                4.2097 |              4.7097 | 2.8548 |     5.2016 | 3.6048 |   5.0161 | 3.1371 | 7.2661 |
| ACC Mean Score |                0.8305 |              0.8278 | 0.8378 |     0.8208 | 0.8318 |   0.8278 | 0.8356 | 0.794  |


STD > 0.05, METHODS=HGBT,LINEAR,TREE,AUTOPYTORCH DEFAULT (SELECT) (9)
|                |   autopytorch_default |   cocktails_default |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|----------------------:|--------------------:|-------:|-----------:|-------:|---------:|-------:|-------:|
| ACC Mean Rank  |                4      |              2.6667 | 3      |     5.7778 | 4.5556 |   5    

In [78]:
subset_methods = [ "hgbt", "linear", "tree", "cocktails_default"]

too_easy_std_subset_methods_dids, select_std_subset_methods_dids = get_too_easy_select_acc_to_difference(all_my_results, methods=subset_methods)

ranks_df = {
    "std_<_0.05,_methods=hgbt,linear,tree,cocktails_default_(too_easy)": {
        "ranks": None,
        "dids": too_easy_std_subset_methods_dids},
    "std_>_0.05,_methods=hgbt,linear,tree,cocktails_default_(select)": {
        "ranks": None,
        "dids": select_std_subset_methods_dids},
        }
methods = [ "hgbt", "logistic", "resnet", "rf", "tree", "mlp","autopytorch_default", "cocktails_default"]
metrics = ["acc"]
for key in ranks_df:
    current_result = get_result(methods, metrics, ranks_df[key]['dids'], all_my_results.reset_index())
    current_ranks = get_average_rank_table(current_result)
    ranks_df[key]['ranks'] = current_ranks
    print(f"\n\n{key.upper().replace('_', ' ')} ({len(ranks_df[key]['dids'])})")
    pprint(current_ranks)



STD < 0.05, METHODS=HGBT,LINEAR,TREE,COCKTAILS DEFAULT (TOO EASY) (62)
|                |   autopytorch_default |   cocktails_default |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|----------------------:|--------------------:|-------:|-----------:|-------:|---------:|-------:|-------:|
| ACC Mean Rank  |                4.2097 |              4.7097 | 2.8548 |     5.2016 | 3.6048 |   5.0161 | 3.1371 | 7.2661 |
| ACC Mean Score |                0.8305 |              0.8278 | 0.8378 |     0.8208 | 0.8318 |   0.8278 | 0.8356 | 0.794  |


STD > 0.05, METHODS=HGBT,LINEAR,TREE,COCKTAILS DEFAULT (SELECT) (9)
|                |   autopytorch_default |   cocktails_default |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|----------------------:|--------------------:|-------:|-----------:|-------:|---------:|-------:|-------:|
| ACC Mean Rank  |                4      |              2.6667 | 3      |     5.7778 | 4.5556 |   5      | 

In [None]:
# # full_datasets_info[~full_datasets_info["dataset_id"].isin(real_too_easy_dataset_ids)]["dataset_id"].astype(int).to_numpy()
# # full_datasets_info[~full_datasets_info["dataset_id"].isin(real_too_easy_dataset_ids)]["dataset_id"].astype(int).to_numpy()
# my_results = pd.read_csv("csv_files/new_too_easy_without_resnet_numerical.csv").fillna(0)

# full_datasets_info["score_hgbt"] = full_datasets_info["score_hbgt"].str.replace(',', '.').astype(float)
# full_datasets_info["score_linear"] = full_datasets_info["score_logistic"].str.replace(',', '.').astype(float)
# leo_results_required = full_datasets_info[full_datasets_info["dataset_id"].isin(real_too_easy_dataset_ids)][["score_hgbt", "score_linear"]].astype(float)
# leo_results_required = leo_results_required.reindex(range(leo_results_required.shape[0]))
# my_results_required = my_results[my_results["dataset_id"].isin(real_too_easy_dataset_ids)][["score_hgbt", "score_linear"]].astype(float)
# my_results_required = my_results_required.reindex(range(leo_results_required.shape[0]))
# (my_results_required['score_hgbt'] - 1.05 * my_results_required['score_linear'] > 0)
# diff = my_results_required - leo_results_required 
# leo_results_required.plot(kind="box")



# diff.median(axis=0)
# diff.plot(kind="box")
# plt.grid()