In [13]:
from src.reproduce_utils import Results
import pandas as pd

In [14]:
cocktails_default_df = pd.read_csv("csv_files/default_cocktails_results.csv").set_index('dataset_id')

In [17]:
def get_autopytorch_results_df(path):
    headers = ["metric", "dataset"]
    indices = [
    "method",

    "optimization_metric",
    "optimization_time",
    "eval_position",
    "split"
    ]
    return pd.read_csv(
        path,
        index_col=list(range(len(indices))),
        header=list(range(len(headers))),
    )



def get_result(methods, metrics, dataset_names, result_df):
    headers = ["metric", "dataset"]
    indices = [
        "method",
        "seed",
    ]
    columns = pd.MultiIndex.from_product([metrics, dataset_names], names=headers)
    index = pd.MultiIndex.from_product([methods, [1]], names=indices)
    df = pd.DataFrame(columns=columns, index=index)
    df.sort_index(inplace=True)
    for index, row in result_df.iterrows():
        for method in methods:
            if int(row["dataset_id"]) not in dataset_names:
                continue
            if "logistic" in method:
                score_method = "linear"
            else:
                score_method = method
            row_id = (method, 1)
            col = ("acc", row["dataset_id"])
            df.loc[row_id, col] = row[f"score_{score_method}"]
    return Results(df=df)

In [29]:
all_my_results = pd.read_csv("csv_files/all_my_results_remaining.csv", index_col="dataset_id").sort_index()

In [45]:
all_my_results["score_cocktails_default"] = cocktails_default_df['test_score']
# all_my_results["score_cocktails"] = cocktails_df['test_score']


In [33]:
def get_average_rank_table(results: Results):
    datasets = results.datasets
    metrics = sorted(results.metrics, reverse=True)
    # print(results.methods)
    df = results.df
    results_rank = {}
    results_score = {}
    for metric in metrics:
        if "time" in metric:
            continue
        metric_df = df[metric]
        dataset_rank_dfs = []
        dataset_mean_dfs = []
        for dataset in datasets:
            if dataset not in metric_df.columns:
                continue
            dataset_rank_df = metric_df[dataset].groupby('method').mean().rank(ascending=False)
            dataset_rank_dfs.append(dataset_rank_df)
            dataset_mean_dfs.append(metric_df[dataset])

        results_rank[metric.upper()] = pd.concat(dataset_rank_dfs).groupby("method").mean()
        
        results_score[metric.upper()] = pd.concat(dataset_mean_dfs).groupby("method").mean()
    score_df = pd.DataFrame(results_score).reset_index()
    rank_df = pd.DataFrame(results_rank).reset_index()
    final_table = rank_df.merge(score_df, on="method", suffixes=[" Mean Rank", " Mean Score"]).T
    final_table.columns = final_table.iloc[0]
    final_table = final_table.iloc[1:]
    return final_table

def pprint(df):
    for column in df:
        df[column] = df[column].astype('float').round(decimals=4)

    print(df.to_markdown())

In [34]:
def get_too_easy_select_acc_to_difference(df: pd.DataFrame, methods: list):
    std_datasets = df[methods].std(axis=1)

In [35]:
def get_too_easy_select_acc_to_criteria(df: pd.DataFrame, better_methods: list, worse_methods: list):
        
    lhs = df[better_methods].max(axis=1) if len(better_methods) > 1 else df[better_methods[0]]
    rhs = df[worse_methods].max(axis=1) if len(worse_methods) > 1 else df[worse_methods[0]]
    selection_criteria = lhs < 1.05 * rhs
    too_easy_on_selection_criteria = df.loc[selection_criteria].index.to_list()
    select_on_selection_criteria = df.loc[list(map(lambda x: not x, selection_criteria))].index.to_list()
    return too_easy_on_selection_criteria, select_on_selection_criteria

In [46]:
too_easy_cocktails_default_hgbt_linear_dids, select_cocktails_default_hgbt_linear_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt", "score_cocktails_default"], worse_methods=["score_linear"])
too_easy_cocktails_default_hgbt_tree_dids, select_cocktails_default_hgbt_tree_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt", "score_cocktails_default"], worse_methods=["score_tree"])
too_easy_cocktails_default_hgbt_combined_dids, select_cocktails_default_hgbt_combined_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt", "score_cocktails_default"], worse_methods=["score_tree", "score_linear"])
too_easy_hgbt_tree_dids, select_hgbt_tree_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt"], worse_methods=["score_tree"])
too_easy_hgbt_linear_dids, select_hgbt_linear_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt"], worse_methods=["score_linear"])
too_easy_hgbt_combined_dids, select_hgbt_combined_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt"], worse_methods=["score_tree", "score_linear"])
too_easy_resnet_tree_dids, select_resnet_tree_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_resnet"], worse_methods=["score_tree"])
too_easy_mlp_tree_dids, select_mlp_tree_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_mlp"], worse_methods=["score_tree"])
too_easy_cocktails_default_tree_dids, select_cocktails_default_tree_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_cocktails_default"], worse_methods=["score_tree"])
too_easy_cocktails_random_cocktails_default_hgbt_combined_dids, select_autopytorch_default_cocktails_default_hgbt_combined_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt", "score_cocktails_default", "score_cocktails_random"], worse_methods=["score_linear", "score_tree"])
too_easy_cocktails_default_linear_dids, select_cocktails_default_linear_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_cocktails_default"], worse_methods=["score_linear"])


In [39]:
local_vars = locals().copy()
all_dids = []
for local in local_vars:
    if 'select_' in local and '_dids' in local:
        all_dids.append(set(local_vars[local]))

In [40]:
to_run_dids = set().union(*all_dids) #- set(select_hgbt_linear_dids_their)

In [41]:
to_run_dids_other = {60, 151, 293, 354, 357, 720, 722, 734, 735, 737, 816, 819, 821, 833, 846, 847, 979, 993, 1044, 1053, 1119, 1120, 1222, 1242, 1461, 1489, 1507, 1590, 4134, 4541, 41147, 41150, 41162, 41168, 41671, 41972, 42206, 42343, 42395, 42468, 42477, 42742, 42769, 43489}


In [47]:
selected_datasets = [151, 293, 722, 821, 993, 1044, 1120, 1461, 1489, 41150, 41168, 42769, 44089, 44090, 44091]
final_benchmark_dataset_ids = [44120, 44121, 44122, 44123, 44124, 44125, 44126, 44127, 44128, 44129, 44130, 44131, 44089, 44090, 44091]

In [45]:
all_my_results.index

Int64Index([   44,    60,   151,   279,   293,   351,   354,   357,   720,
              722,   725,   734,   735,   737,   761,   803,   816,   819,
              821,   823,   833,   846,   847,   871,   976,   979,   993,
             1044,  1053,  1110,  1113,  1119,  1120,  1222,  1241,  1242,
             1461,  1476,  1477,  1478,  1486,  1489,  1503,  1507,  1526,
             1590,  4134,  4541, 23517, 40685, 40923, 41146, 41147, 41150,
            41162, 41163, 41164, 41166, 41168, 41169, 41671, 41972, 42206,
            42343, 42395, 42468, 42477, 42742, 42746, 42769, 43489, 44089,
            44090, 44091],
           dtype='int64', name='dataset_id')

In [48]:
autopytorch_cocktails_random_df = pd.read_csv('csv_files/refit_results.csv', index_col=0)

In [49]:
autopytorch_cocktails_random_df['dataset_id'] = autopytorch_cocktails_random_df['dataset_id'].astype(int).replace(dict(zip(final_benchmark_dataset_ids, selected_datasets)))
autopytorch_cocktails_random_df = autopytorch_cocktails_random_df.set_index('dataset_id')

In [50]:
all_my_results['score_cocktails_random'] = autopytorch_cocktails_random_df['test_score']


In [51]:
all_my_results.to_csv('csv_files/all_results_with_cocktail_random.csv')

In [52]:


ranks_df = {
    # results vs logreg
    # "too_easy_(HGBT,_Autopytorch_default)_vs_(Logreg)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_autopytorch_default_hgbt_linear_dids},
    "select_(HGBT,_Autopytorch_default)_vs_(Logreg)_on_my": {
        "ranks": None,
        "dids": select_autopytorch_default_hgbt_linear_dids},
    # "too_easy_(HGBT,_Cocktails_default)_vs_(Logreg)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_cocktails_default_hgbt_linear_dids},
    "select_(HGBT,_Cocktails_default)_vs_(Logreg)_on_my": {
        "ranks": None,
        "dids": select_cocktails_default_hgbt_linear_dids},

    # results vs tree
    # "too_easy_(HGBT,_Autopytorch_default)_vs_(tree)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_autopytorch_default_hgbt_tree_dids},
    "select_(HGBT,_Autopytorch_default)_vs_(tree)_on_my": {
        "ranks": None,
        "dids": select_autopytorch_default_hgbt_tree_dids},
    # "too_easy_(HGBT,_Cocktails_default)_vs_(tree)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_cocktails_default_hgbt_tree_dids},
    "select_(HGBT,_Cocktails_default)_vs_(tree)_on_my": {
        "ranks": None,
        "dids": select_cocktails_default_hgbt_tree_dids},
    
    # results vs both
    # "too_easy_(HGBT,_Autopytorch_default)_vs_(tree,Logreg)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_autopytorch_default_hgbt_combined_dids},
    "select_(HGBT,_Autopytorch_default)_vs_(tree,Logreg)_on_my": {
        "ranks": None,
        "dids": select_autopytorch_default_hgbt_combined_dids},
    # "too_easy_(HGBT,_Cocktails_default)_vs_(tree,Logreg)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_cocktails_default_hgbt_combined_dids},
    "select_(HGBT,_Cocktails_default)_vs_(tree,Logreg)_on_my": {
        "ranks": None,
        "dids": select_cocktails_default_hgbt_combined_dids},

    # results individual
    # vs tree
    # "too_easy_(_Autopytorch_default)_vs_(tree)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_autopytorch_default_tree_dids},
    "select_(_Autopytorch_default)_vs_(tree)_on_my": {
        "ranks": None,
        "dids": select_autopytorch_default_tree_dids},
    # "too_easy_(_Cocktails_default)_vs_(tree)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_cocktails_default_tree_dids},
    "select_(_Cocktails_default)_vs_(tree)_on_my": {
        "ranks": None,
        "dids": select_cocktails_default_tree_dids},
    # vs logreg
    # "too_easy_(_Autopytorch_default)_vs_(Logreg)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_autopytorch_default_linear_dids},
    "select_(_Autopytorch_default)_vs_(Logreg)_on_my": {
        "ranks": None,
        "dids": select_autopytorch_default_linear_dids},
    # "too_easy_(_Cocktails_default)_vs_(Logreg)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_cocktails_default_linear_dids},
    "select_(_Cocktails_default)_vs_(Logreg)_on_my": {
        "ranks": None,
        "dids": select_cocktails_default_linear_dids},

    # all combined
    # "too_easy_(HGBT,_Autopytorch_default,_Cocktails_default)_vs_(tree,Logreg)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_autopytorch_default_cocktails_default_hgbt_combined_dids},
    "select_(HGBT,_Autopytorch_default,_Cocktails_default)_vs_(tree,Logreg)_on_my": {
        "ranks": None,
        "dids": select_autopytorch_default_cocktails_default_hgbt_combined_dids},

    # from previous tables
    # "too_easy_(HGBT)_vs_(tree)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_hgbt_tree_dids},
    "select_(HGBT)_vs_(tree)_on_my": {
        "ranks": None,
        "dids": select_hgbt_tree_dids},
    # "too_easy_(HGBT)_vs_(Logreg)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_hgbt_linear_dids},
    "select_(HGBT)_vs_(Logreg)_on_my": {
        "ranks": None,
        "dids": select_hgbt_linear_dids},
    # "too_easy_(HGBT)_vs_(tree,Logreg)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_hgbt_combined_dids},
    "select_(HGBT)_vs_(tree,Logreg)_on_my": {
        "ranks": None,
        "dids": select_hgbt_combined_dids},
        }


In [53]:
methods = [ "hgbt", "logistic", "cocktails_random"] # , "resnet", "rf", "tree", "mlp",]
metrics = ["acc"]
for key in ranks_df:
    current_result = get_result(methods, metrics, ranks_df[key]['dids'], all_my_results.reset_index())
    current_ranks = get_average_rank_table(current_result)
    ranks_df[key]['ranks'] = current_ranks
    print(f"\n\n{key.upper().replace('_', ' ')} ({len(ranks_df[key]['dids'])})")
    pprint(current_ranks)



SELECT (HGBT, AUTOPYTORCH DEFAULT) VS (LOGREG) ON MY (21)
|                |   cocktails_random |   hgbt |   logistic |
|:---------------|-------------------:|-------:|-----------:|
| ACC Mean Rank  |             1.85   | 1.2857 |     2.8095 |
| ACC Mean Score |             0.7779 | 0.7929 |     0.7083 |


SELECT (HGBT, COCKTAILS DEFAULT) VS (LOGREG) ON MY (19)
|                |   cocktails_random |   hgbt |   logistic |
|:---------------|-------------------:|-------:|-----------:|
| ACC Mean Rank  |             1.8333 | 1.2632 |     2.8421 |
| ACC Mean Score |             0.7788 | 0.7952 |     0.7042 |


SELECT (HGBT, AUTOPYTORCH DEFAULT) VS (TREE) ON MY (44)
|                |   cocktails_random |   hgbt |   logistic |
|:---------------|-------------------:|-------:|-----------:|
| ACC Mean Rank  |             1.878  | 1.5909 |     2.4545 |
| ACC Mean Score |             0.7782 | 0.7781 |     0.7441 |


SELECT (HGBT, COCKTAILS DEFAULT) VS (TREE) ON MY (44)
|                |   coc

In [55]:
methods = [ "hgbt", "logistic", "resnet", "rf", "tree", "mlp","autopytorch_default", "cocktails_default", "cocktails_random"]
metrics = ["acc"]
for key in ranks_df:
    current_result = get_result(methods, metrics, ranks_df[key]['dids'], all_my_results.reset_index())
    current_ranks = get_average_rank_table(current_result)
    ranks_df[key]['ranks'] = current_ranks
    print(f"\n\n{key.upper().replace('_', ' ')} ({len(ranks_df[key]['dids'])})")
    pprint(current_ranks)



SELECT (HGBT, AUTOPYTORCH DEFAULT) VS (LOGREG) ON MY (21)
|                |   autopytorch_default |   cocktails_default |   cocktails_random |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|----------------------:|--------------------:|-------------------:|-------:|-----------:|-------:|---------:|-------:|-------:|
| ACC Mean Rank  |                4.5    |              4.9048 |             4.6    | 2.2857 |     8      | 4.3333 |   5.4762 | 2.6667 | 7.381  |
| ACC Mean Score |                0.7668 |              0.7764 |             0.7779 | 0.7929 |     0.7083 | 0.7683 |   0.7552 | 0.787  | 0.7253 |


SELECT (HGBT, COCKTAILS DEFAULT) VS (LOGREG) ON MY (19)
|                |   autopytorch_default |   cocktails_default |   cocktails_random |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|----------------------:|--------------------:|-------------------:|-------:|-----------:|-------:|---------:|-------:|-------:|
| ACC 

In [68]:


def get_too_easy_select_acc_to_difference(df: pd.DataFrame, methods: list, stddev: float = 0.05):
    std_datasets = df[[f"score_{method}" for method in methods]].std(axis=1)
    selection_criteria = std_datasets < stddev
    too_easy_on_selection_criteria = df.loc[selection_criteria].index.to_list()
    select_on_selection_criteria = df.loc[list(map(lambda x: not x, selection_criteria))].index.to_list()
    return too_easy_on_selection_criteria, select_on_selection_criteria


In [69]:
methods = [ "hgbt", "linear", "resnet", "rf", "tree", "mlp","autopytorch_default", "cocktails_default", "cocktails_random"]

too_easy_std_methods_dids, select_std_methods_dids = get_too_easy_select_acc_to_difference(all_my_results, methods=methods)
# subset_methods = [ "hgbt", "linear", "rf", "tree", "mlp","autopytorch_default", "cocktails_default"]

# too_easy_autopytorch_default_linear_dids, select_autopytorch_default_linear_dids = get_too_easy_select_acc_to_difference(all_my_results, methods=methods)


In [72]:
methods = [ "hgbt", "linear", "resnet", "rf", "tree", "mlp","autopytorch_default", "cocktails_default", "cocktails_random"]

too_easy_std_methods_dids, select_std_methods_dids = get_too_easy_select_acc_to_difference(all_my_results, methods=methods)

ranks_df = {
    "std_<_0.05,_methods=all_(too_easy)": {
        "ranks": None,
        "dids": too_easy_std_methods_dids},
    "std_>_0.05,_methods=all_(select)": {
        "ranks": None,
        "dids": select_std_methods_dids},
        }
methods = [ "hgbt", "logistic", "resnet", "rf", "tree", "mlp","autopytorch_default", "cocktails_default", "cocktails_random"]
metrics = ["acc"]
for key in ranks_df:
    current_result = get_result(methods, metrics, ranks_df[key]['dids'], all_my_results.reset_index())
    current_ranks = get_average_rank_table(current_result)
    ranks_df[key]['ranks'] = current_ranks
    print(f"\n\n{key.upper().replace('_', ' ')} ({len(ranks_df[key]['dids'])})")
    pprint(current_ranks)



STD < 0.05, METHODS=ALL (TOO EASY) (66)
|                |   autopytorch_default |   cocktails_default |   cocktails_random |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|----------------------:|--------------------:|-------------------:|-------:|-----------:|-------:|---------:|-------:|-------:|
| ACC Mean Rank  |                4.4921 |              5.0159 |             4.2703 | 3.0909 |     5.5682 | 3.8409 |   5.2576 | 3.4015 | 7.6439 |
| ACC Mean Score |                0.8373 |              0.8348 |             0.7812 | 0.8429 |     0.824  | 0.8367 |   0.8331 | 0.8407 | 0.7978 |


STD > 0.05, METHODS=ALL (SELECT) (8)
|                |   autopytorch_default |   cocktails_default |   cocktails_random |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|----------------------:|--------------------:|-------------------:|-------:|-----------:|-------:|---------:|-------:|-------:|
| ACC Mean Rank  |                4.5    | 

In [73]:
methods = [ "hgbt", "linear", "tree", "autopytorch_default"] # , "cocktails_default"]

too_easy_std_methods_dids, select_std_methods_dids = get_too_easy_select_acc_to_difference(all_my_results, methods=methods)

ranks_df = {
    "std_<_0.05,_methods=hgbt,linear,tree,autopytorch_default_(too_easy)": {
        "ranks": None,
        "dids": too_easy_std_methods_dids},
    "std_>_0.05,_methods=hgbt,linear,tree,autopytorch_default_(select)": {
        "ranks": None,
        "dids": select_std_methods_dids},
        }
methods = [ "hgbt", "logistic", "resnet", "rf", "tree", "mlp","autopytorch_default", "cocktails_default", "cocktails_random"]
metrics = ["acc"]
for key in ranks_df:
    current_result = get_result(methods, metrics, ranks_df[key]['dids'], all_my_results.reset_index())
    current_ranks = get_average_rank_table(current_result)
    ranks_df[key]['ranks'] = current_ranks
    print(f"\n\n{key.upper().replace('_', ' ')} ({len(ranks_df[key]['dids'])})")
    pprint(current_ranks)



STD < 0.05, METHODS=HGBT,LINEAR,TREE,AUTOPYTORCH DEFAULT (TOO EASY) (65)
|                |   autopytorch_default |   cocktails_default |   cocktails_random |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|----------------------:|--------------------:|-------------------:|-------:|-----------:|-------:|---------:|-------:|-------:|
| ACC Mean Rank  |                4.4839 |              5.0484 |             4.5    | 2.9692 |     5.6231 | 3.8077 |   5.2308 | 3.3    | 7.6846 |
| ACC Mean Score |                0.8305 |              0.8278 |             0.7669 | 0.8374 |     0.8183 | 0.8305 |   0.8266 | 0.8352 | 0.7924 |


STD > 0.05, METHODS=HGBT,LINEAR,TREE,AUTOPYTORCH DEFAULT (SELECT) (9)
|                |   autopytorch_default |   cocktails_default |   cocktails_random |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|----------------------:|--------------------:|-------------------:|-------:|-----------:|-------:|-------

In [74]:
def get_average_rank_table(results: Results):
    datasets = results.datasets
    metrics = sorted(results.metrics, reverse=True)
    # print(results.methods)
    df = results.df
    results_rank = {}
    results_score = {}
    for metric in metrics:
        if "time" in metric:
            continue
        metric_df = df[metric]
        dataset_rank_dfs = []
        dataset_mean_dfs = []
        for dataset in datasets:
            if dataset not in metric_df.columns:
                continue
            dataset_rank_df = metric_df[dataset].groupby('method').mean().rank(ascending=False)
            dataset_rank_dfs.append(dataset_rank_df)
            dataset_mean_dfs.append(metric_df[dataset])

        results_rank[metric.upper()] = pd.concat(dataset_rank_dfs).groupby("method").mean()
        
        results_score[metric.upper()] = pd.concat(dataset_mean_dfs).groupby("method").mean()
    score_df = pd.DataFrame(results_score).reset_index()
    rank_df = pd.DataFrame(results_rank).reset_index()
    final_table = rank_df.merge(score_df, on="method", suffixes=[" Mean Rank", " Mean Score"]).T
    final_table.columns = final_table.iloc[0]
    final_table = final_table.iloc[1:]
    return final_table

In [75]:
current_result.df

Unnamed: 0_level_0,metric,acc,acc,acc,acc,acc,acc,acc,acc,acc
Unnamed: 0_level_1,dataset,151,293,354,722,846,1489,1507,41972,42395
method,seed,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
autopytorch_default,1,0.800914,0.814311,0.734622,0.97708,0.879615,0.855392,0.96545,0.937861,0.749352
cocktails_default,1,0.812402,0.841367,0.946122,0.986116,0.869222,0.858894,0.970708,0.915446,0.7407
cocktails_random,1,0.818778,0.849633,0.804133,0.909421,0.880156,0.770533,0.973862,0.937417,0.69511
hgbt,1,0.866601,0.818973,0.795073,0.98519,0.861059,0.873739,0.969356,0.911851,0.787133
logistic,1,0.740939,0.616487,0.4995,0.856595,0.869698,0.737395,0.976476,0.878562,0.77498
mlp,1,0.787723,0.784513,0.729213,0.985785,0.877363,0.839496,0.969716,0.918509,0.719106
resnet,1,0.789512,0.824347,0.77698,0.695339,0.872686,0.85105,0.966381,0.907324,0.730068
rf,1,0.862358,0.82788,0.75254,0.981686,0.826892,0.883193,0.968905,0.866445,0.740284
tree,1,0.834464,0.764407,0.653913,0.972231,0.753751,0.833613,0.84146,0.766977,0.580037


In [77]:
cocktails_success_dataset_ids = [151, 293, 722, 41150, 993, 1120, 1489, 821, 1461, 1044, 41168]
methods = [ "hgbt", "logistic", "resnet", "rf", "tree", "mlp","autopytorch_default", "cocktails_default", "cocktails_random"]
metrics = ["acc"]
current_result = get_result(methods, metrics, cocktails_success_dataset_ids, all_my_results.reset_index())
# current_ranks = get_average_rank_table(current_result)
# print(f"\n\n{key.upper().replace('_', ' ')} ({len(ranks_df[key]['dids'])})")
print(current_ranks)

method          autopytorch_default  cocktails_default  cocktails_random  \
ACC Mean Rank                4.5556             3.2222            3.8889   
ACC Mean Score               0.8572             0.8823            0.8488   

method            hgbt  logistic     mlp  resnet      rf    tree  
ACC Mean Rank   3.5556    6.5556  5.2222  5.7778  4.6667  7.5556  
ACC Mean Score  0.8743    0.7723  0.8457  0.8237  0.8567  0.7779  


In [79]:
subset_methods = [ "hgbt", "linear", "tree", "cocktails_default"]

too_easy_std_subset_methods_dids, select_std_subset_methods_dids = get_too_easy_select_acc_to_difference(all_my_results, methods=subset_methods)

ranks_df = {
    "std_<_0.05,_methods=hgbt,linear,tree,cocktails_default_(too_easy)": {
        "ranks": None,
        "dids": too_easy_std_subset_methods_dids},
    "std_>_0.05,_methods=hgbt,linear,tree,cocktails_default_(select)": {
        "ranks": None,
        "dids": select_std_subset_methods_dids},
        }
methods = [ "hgbt", "logistic", "resnet", "rf", "tree", "mlp","autopytorch_default", "cocktails_default", "cocktails_random"]
metrics = ["acc"]
for key in ranks_df:
    current_result = get_result(methods, metrics, ranks_df[key]['dids'], all_my_results.reset_index())
    current_ranks = get_average_rank_table(current_result)
    ranks_df[key]['ranks'] = current_ranks
    print(f"\n\n{key.upper().replace('_', ' ')} ({len(ranks_df[key]['dids'])})")
    pprint(current_ranks)



STD < 0.05, METHODS=HGBT,LINEAR,TREE,COCKTAILS DEFAULT (TOO EASY) (65)
|                |   autopytorch_default |   cocktails_default |   cocktails_random |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|----------------------:|--------------------:|-------------------:|-------:|-----------:|-------:|---------:|-------:|-------:|
| ACC Mean Rank  |                4.4839 |              5.0484 |             4.5    | 2.9692 |     5.6231 | 3.8077 |   5.2308 | 3.3    | 7.6846 |
| ACC Mean Score |                0.8305 |              0.8278 |             0.7669 | 0.8374 |     0.8183 | 0.8305 |   0.8266 | 0.8352 | 0.7924 |


STD > 0.05, METHODS=HGBT,LINEAR,TREE,COCKTAILS DEFAULT (SELECT) (9)
|                |   autopytorch_default |   cocktails_default |   cocktails_random |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|----------------------:|--------------------:|-------------------:|-------:|-----------:|-------:|---------:|

In [80]:
# # full_datasets_info[~full_datasets_info["dataset_id"].isin(real_too_easy_dataset_ids)]["dataset_id"].astype(int).to_numpy()
# # full_datasets_info[~full_datasets_info["dataset_id"].isin(real_too_easy_dataset_ids)]["dataset_id"].astype(int).to_numpy()
# my_results = pd.read_csv("csv_files/new_too_easy_without_resnet_numerical.csv").fillna(0)

# full_datasets_info["score_hgbt"] = full_datasets_info["score_hbgt"].str.replace(',', '.').astype(float)
# full_datasets_info["score_linear"] = full_datasets_info["score_logistic"].str.replace(',', '.').astype(float)
# leo_results_required = full_datasets_info[full_datasets_info["dataset_id"].isin(real_too_easy_dataset_ids)][["score_hgbt", "score_linear"]].astype(float)
# leo_results_required = leo_results_required.reindex(range(leo_results_required.shape[0]))
# my_results_required = my_results[my_results["dataset_id"].isin(real_too_easy_dataset_ids)][["score_hgbt", "score_linear"]].astype(float)
# my_results_required = my_results_required.reindex(range(leo_results_required.shape[0]))
# (my_results_required['score_hgbt'] - 1.05 * my_results_required['score_linear'] > 0)
# diff = my_results_required - leo_results_required 
# leo_results_required.plot(kind="box")



# diff.median(axis=0)
# diff.plot(kind="box")
# plt.grid()