In [1]:
import pandas as pd
import numpy as np
from src.reproduce_utils import Results

In [2]:
def get_result(methods, metrics, dataset_names, result_df):
    headers = ["metric", "dataset"]
    indices = [
        "method",
        "seed",
    ]
    columns = pd.MultiIndex.from_product([metrics, dataset_names], names=headers)
    index = pd.MultiIndex.from_product([methods, [1]], names=indices)
    df = pd.DataFrame(columns=columns, index=index)
    df.sort_index(inplace=True)
    for index, row in result_df.iterrows():
        for method in methods:
            if int(row["dataset_id"]) not in dataset_names:
                continue
            if "logistic" in method:
                score_method = "linear"
            else:
                score_method = method
            row_id = (method, 1)
            col = ("acc", row["dataset_id"])
            df.loc[row_id, col] = row[f"score_{score_method}"]
    return Results(df=df)

In [3]:
def get_average_rank_table(results: Results):
    datasets = results.datasets
    metrics = sorted(results.metrics, reverse=True)
    # print(results.methods)
    df = results.df
    results_rank = {}
    results_score = {}
    for metric in metrics:
        if "time" in metric:
            continue
        metric_df = df[metric]
        dataset_rank_dfs = []
        dataset_mean_dfs = []
        for dataset in datasets:
            if dataset not in metric_df.columns:
                continue
            dataset_rank_df = metric_df[dataset].groupby('method').mean().rank(ascending=False)
            dataset_rank_dfs.append(dataset_rank_df)
            dataset_mean_dfs.append(metric_df[dataset])

        results_rank[metric.upper()] = pd.concat(dataset_rank_dfs).groupby("method").mean()
        
        results_score[metric.upper()] = pd.concat(dataset_mean_dfs).groupby("method").mean()
    score_df = pd.DataFrame(results_score).reset_index()
    rank_df = pd.DataFrame(results_rank).reset_index()
    final_table = rank_df.merge(score_df, on="method", suffixes=[" Mean Rank", " Mean Score"]).T
    final_table.columns = final_table.iloc[0]
    final_table = final_table.iloc[1:]
    return final_table

def pprint(df):
    for column in df:
        df[column] = df[column].astype('float').round(decimals=4)

    print(df.to_markdown())

In [10]:
def get_too_easy_select_acc_to_criteria(df: pd.DataFrame, better_methods: list, worse_methods: list):
        
    lhs = df[better_methods].max(axis=1) if len(better_methods) > 1 else df[better_methods[0]]
    rhs = df[worse_methods].max(axis=1) if len(worse_methods) > 1 else df[worse_methods[0]]
    selection_criteria = lhs < 1.05 * rhs
    too_easy_on_selection_criteria = df.loc[selection_criteria].index.to_list()
    select_on_selection_criteria = df.loc[list(map(lambda x: not x, selection_criteria))].index.to_list()
    return too_easy_on_selection_criteria, select_on_selection_criteria

In [11]:
full_datasets_info = pd.read_csv("csv_files/Datasets tabular data benchmark - numerical_classif-3.csv")
full_datasets_info["score_hgbt"] = full_datasets_info["score_hbgt"].str.replace(',', '.').astype(float)
full_datasets_info["score_linear"] = full_datasets_info["score_logistic"].str.replace(',', '.').astype(float)
full_datasets_info = full_datasets_info.fillna(0)
real_run_dataset_ids = []
too_easy_dids = []
not_too_easy_dids = []
for index, row in full_datasets_info.iterrows():
    if int(row["dataset_id"]) != 0 and int(row["Remove"]) != 1 and row["Redundant"] != 1:
        prefix_to_skip = ["BNG", "RandomRBF", "GTSRB", "CovPokElec", "PCam"]
        if not (np.any([row["dataset_name"].startswith(prefix) for prefix in
                        prefix_to_skip]) or "mnist" in row["dataset_name"].lower() or "image" in row[
                    "dataset_name"].lower() or "cifar" in row["dataset_name"].lower() or row["dataset_id"] == 1414):
                    if row["score_hgbt"] == 0:
                        continue
                    real_run_dataset_ids.append(int(row["dataset_id"]))
                    if int(row["too_easy"]) == 1:
                        too_easy_dids.append(int(row["dataset_id"]))
                    else:
                        not_too_easy_dids.append(int(row["dataset_id"]))
        # print(row["dataset_id"])
# real_too_easy_dataset_ids


In [12]:
len(real_run_dataset_ids)

71

In [13]:

run_their_results = full_datasets_info[full_datasets_info["dataset_id"].isin(real_run_dataset_ids)]
run_their_results = run_their_results.astype({'dataset_id': int}).set_index("dataset_id")

In [14]:
too_easy_hgbt_linear_dids_their, select_hgbt_linear_dids_their = get_too_easy_select_acc_to_criteria(run_their_results, better_methods=["score_hgbt"], worse_methods=["score_linear"])


In [10]:
ranks_df_their = {
    "too_easy_(HGBT)_vs_(Logreg)_on_their": {
        "ranks": None,
        "dids": too_easy_hgbt_linear_dids_their},
    "select_(HGBT)_vs_(Logreg)_on_their": {
        "ranks": None,
        "dids": select_hgbt_linear_dids_their},
    }
methods = [ "hgbt", "logistic"] # , "resnet", "rf", "tree", "mlp",]
metrics = ["acc"]
for key in ranks_df_their:
    current_result = get_result(methods, metrics, ranks_df_their[key]['dids'], run_their_results.reset_index())
    current_ranks = get_average_rank_table(current_result)
    ranks_df_their[key]['ranks'] = current_ranks
    print(f"\n\n{key.upper().replace('_', ' ')} ({len(ranks_df_their[key]['dids'])})")
    pprint(current_ranks)



TOO EASY (HGBT) VS (LOGREG) ON THEIR (58)
|                |   hgbt |   logistic |
|:---------------|-------:|-----------:|
| ACC Mean Rank  | 1.3707 |     1.6293 |
| ACC Mean Score | 0.8391 |     0.8334 |


SELECT (HGBT) VS (LOGREG) ON THEIR (13)
|                |   hgbt |   logistic |
|:---------------|-------:|-----------:|
| ACC Mean Rank  | 1      |     2      |
| ACC Mean Score | 0.7966 |     0.7149 |


In [21]:
ranks_df_their = {
    "too_easy_(HGBT)_vs_(Logreg)_on_their_results": {
        "ranks": None,
        "dids": too_easy_hgbt_linear_dids_their},
    "select_(HGBT)_vs_(Logreg)_on_their_results": {
        "ranks": None,
        "dids": select_hgbt_linear_dids_their},
    }
methods = [ "hgbt", "logistic"] # , "resnet", "rf", "tree", "mlp",]
metrics = ["acc"]
for key in ranks_df_their:
    current_result = get_result(methods, metrics, ranks_df_their[key]['dids'], all_my_results.reset_index())
    current_ranks = get_average_rank_table(current_result)
    ranks_df_their[key]['ranks'] = current_ranks
    print(f"\n\n{key.upper().replace('_', ' ')} ({len(ranks_df_their[key]['dids'])})")
    pprint(current_ranks)



TOO EASY (HGBT) VS (LOGREG) ON THEIR RESULTS (58)
|                |   hgbt |   logistic |
|:---------------|-------:|-----------:|
| ACC Mean Rank  | 1.2931 |     1.7069 |
| ACC Mean Score | 0.8444 |     0.8354 |


SELECT (HGBT) VS (LOGREG) ON THEIR RESULTS (13)
|                |   hgbt |   logistic |
|:---------------|-------:|-----------:|
| ACC Mean Rank  | 1      |      2     |
| ACC Mean Score | 0.8337 |      0.722 |


In [6]:
too_easy_without_resnet = pd.read_csv("csv_files/new_too_easy_without_resnet_numerical.csv", index_col=None)
too_easy_resnet = pd.read_csv("csv_files/new_too_easy_resnet_numerical_60_0.csv", index_col=None)
too_easy_resnet_2 = pd.read_csv("csv_files/new_too_easy_resnet_numerical_60_1.csv", index_col=None)
too_easy_resnet = pd.concat([too_easy_resnet, too_easy_resnet_2])
too_easy_hgbt = pd.read_csv("csv_files/too_easy_hgbt_numerical_with_preprocessing.csv")

too_easy_results = too_easy_without_resnet.set_index("dataset_id").copy()
too_easy_results["score_resnet"] = too_easy_resnet.set_index("dataset_id")["score_resnet"]

too_easy_results["score_hgbt"] = too_easy_hgbt.set_index("dataset_id")["score_hgbt"]

remaining_resuls_resnet = pd.read_csv("csv_files/remaining_resnet_numerical_with_preprocessing.csv", index_col=None).dropna(subset="score_resnet")
not_too_easy_results_resnet = pd.read_csv("csv_files/not_too_easy_resnet_with_preprocesssing.csv", index_col=None).dropna(subset="score_resnet")
remaining_resnet_results = pd.concat([remaining_resuls_resnet, not_too_easy_results_resnet]).drop_duplicates(subset='dataset_id')


In [7]:
remaining_resuls = pd.read_csv("csv_files/remaining_without_resnet_numerical_with_preprocessing_.csv", index_col=None).dropna(subset="score_hgbt")
not_too_easy_results = pd.read_csv("csv_files/not_too_easy_without_resnet_numerical_with_preprocessing.csv", index_col=None).dropna(subset="score_hgbt")

In [8]:
drop_ids = set(remaining_resnet_results['dataset_id'].to_list()) - set(remaining_resuls['dataset_id'].to_list())
remaining_resnet_results = remaining_resnet_results[~remaining_resnet_results['dataset_id'].isin(drop_ids)]

In [9]:
remaining_resnet_results = remaining_resnet_results.set_index('dataset_id')
remaining_resuls = remaining_resuls.set_index('dataset_id')
remaining_resuls['score_resnet'] = remaining_resnet_results['score_resnet']

In [12]:
methods = [ "hgbt", "linear", "rf", "tree", "mlp","resnet"]

subset = [f"score_{method}" for method in methods]
all_my_results = pd.concat([too_easy_results, remaining_resuls])
all_my_results = all_my_results.dropna(subset=subset)
all_my_results = all_my_results.loc[set(all_my_results.index) & set(run_their_results.index)]


  all_my_results = pd.concat([too_easy_results, remaining_resuls])
  all_my_results = all_my_results.loc[set(all_my_results.index) & set(run_their_results.index)]


In [14]:
all_my_results.drop("Unnamed: 0", axis=1).to_csv("csv_files/all_my_results_remaining.csv", index=True)

In [31]:
too_easy_linear_dids, select_linear_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt", "score_resnet"], worse_methods=["score_linear"])
too_easy_tree_dids, select_tree_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt", "score_resnet"], worse_methods=["score_tree"])
too_easy_combined_dids, select_combined_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt", "score_resnet"], worse_methods=["score_tree", "score_linear"])
too_easy_hgbt_tree_dids, select_hgbt_tree_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt"], worse_methods=["score_tree"])
too_easy_hgbt_linear_dids, select_hgbt_linear_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt"], worse_methods=["score_linear"])
too_easy_hgbt_combined_dids, select_hgbt_combined_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_hgbt"], worse_methods=["score_tree", "score_linear"])
too_easy_resnet_tree_dids, select_resnet_tree_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_resnet"], worse_methods=["score_tree"])
too_easy_mlp_tree_dids, select_mlp_tree_dids = get_too_easy_select_acc_to_criteria(all_my_results, better_methods=["score_mlp"], worse_methods=["score_tree"])


In [17]:
methods = [ "hgbt", "logistic", "resnet", "rf", "tree", "mlp",]
metrics = ["acc"]
# too_easy_linear_on_my_result = get_result(methods, metrics, too_easy_linear_dids, all_my_results.reset_index())
# too_easy_linear_on_my_ranks = get_average_rank_table(too_easy_linear_on_my_result)

In [35]:
ranks_df = {
    # "too_easy_(HGBT,_Resnet)_vs_(Logreg)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_linear_dids},
    # "select_(HGBT,_Resnet)_vs_(Logreg)_on_my": {
    #     "ranks": None,
    #     "dids": select_linear_dids},
    # "too_easy_(HGBT,_Resnet)_vs_(tree)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_tree_dids},
    # "select_(HGBT,_Resnet)_vs_(tree)_on_my": {
    #     "ranks": None,
    #     "dids": select_tree_dids},
    # "too_easy_(HGBT,_Resnet)_vs_(tree,Logreg)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_combined_dids},
    # "select_(HGBT,_Resnet)_vs_(tree,Logreg)_on_my": {
    #     "ranks": None,
    #     "dids": select_combined_dids},
    # "too_easy_(Resnet)_vs_(tree)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_resnet_tree_dids},
    # "select_(Resnet)_vs_(tree)_on_my": {
    #     "ranks": None,
    #     "dids": select_resnet_tree_dids},
    "too_easy_(MLP)_vs_(tree)_on_my": {
        "ranks": None,
        "dids": too_easy_mlp_tree_dids},
    "select_(MLP)_vs_(tree)_on_my": {
        "ranks": None,
        "dids": select_mlp_tree_dids},
    # "too_easy_(HGBT)_vs_(tree)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_hgbt_tree_dids},
    # "select_(HGBT)_vs_(tree)_on_my": {
    #     "ranks": None,
    #     "dids": select_hgbt_tree_dids},
    # "too_easy_(HGBT)_vs_(Logreg)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_hgbt_linear_dids},
    # "select_(HGBT)_vs_(Logreg)_on_my": {
    #     "ranks": None,
    #     "dids": select_hgbt_linear_dids},
    # "too_easy_(HGBT)_vs_(tree,Logreg)_on_my": {
    #     "ranks": None,
    #     "dids": too_easy_hgbt_combined_dids},
    # "select_(HGBT)_vs_(tree,Logreg)_on_my": {
    #     "ranks": None,
    #     "dids": select_hgbt_combined_dids},
        }


In [36]:
methods = [ "hgbt", "logistic"] # , "resnet", "rf", "tree", "mlp",]
metrics = ["acc"]
for key in ranks_df:
    current_result = get_result(methods, metrics, ranks_df[key]['dids'], all_my_results.reset_index())
    current_ranks = get_average_rank_table(current_result)
    ranks_df[key]['ranks'] = current_ranks
    print(f"\n\n{key.upper().replace('_', ' ')} ({len(ranks_df[key]['dids'])})")
    pprint(current_ranks)



TOO EASY (MLP) VS (TREE) ON MY (36)
|                |   hgbt |   logistic |
|:---------------|-------:|-----------:|
| ACC Mean Rank  | 1.2222 |     1.7778 |
| ACC Mean Score | 0.8832 |     0.8543 |


SELECT (MLP) VS (TREE) ON MY (35)
|                |   hgbt |   logistic |
|:---------------|-------:|-----------:|
| ACC Mean Rank  | 1.2571 |     1.7429 |
| ACC Mean Score | 0.8005 |     0.7738 |


In [37]:
methods = [ "hgbt", "logistic", "resnet", "rf", "tree", "mlp",]
metrics = ["acc"]
for key in ranks_df:
    current_result = get_result(methods, metrics, ranks_df[key]['dids'], all_my_results.reset_index())
    current_ranks = get_average_rank_table(current_result)
    ranks_df[key]['ranks'] = current_ranks
    print(f"\n\n{key.upper().replace('_', ' ')} ({len(ranks_df[key]['dids'])})")
    pprint(current_ranks)



TOO EASY (MLP) VS (TREE) ON MY (36)
|                |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|-------:|-----------:|-------:|---------:|-------:|-------:|
| ACC Mean Rank  | 2.3056 |     4.1528 | 3.0417 |   4.1389 | 2.4306 | 4.9306 |
| ACC Mean Score | 0.8832 |     0.8543 | 0.8728 |   0.8628 | 0.8826 | 0.861  |


SELECT (MLP) VS (TREE) ON MY (35)
|                |   hgbt |   logistic |    mlp |   resnet |     rf |   tree |
|:---------------|-------:|-----------:|-------:|---------:|-------:|-------:|
| ACC Mean Rank  | 2.2571 |     3.8571 | 2.7429 |   3.4286 | 2.8    | 5.9143 |
| ACC Mean Score | 0.8005 |     0.7738 | 0.7932 |   0.7909 | 0.7927 | 0.721  |


In [18]:
for dataset_id in select_hgbt_linear_dids_their:
    print(f'python run_autopytorch_random_experiment.py --dataset_id {dataset_id} --max_configs 400 --epochs 105 --device "cuda" --slurm --partition alldlc_gpu-rtx2080 --nr_workers 10 --slurm_job_time_secs 10000 --exp_dir "/work/dlclarge2/rkohli-results_tab-bench/autopytorch_cocktails_random_runs"')


python run_autopytorch_random_experiment.py --dataset_id 151 --max_configs 400 --epochs 105 --device "cuda" --slurm --partition alldlc_gpu-rtx2080 --nr_workers 10 --slurm_job_time_secs 10000 --exp_dir "/work/dlclarge2/rkohli-results_tab-bench/autopytorch_cocktails_random_runs"
python run_autopytorch_random_experiment.py --dataset_id 293 --max_configs 400 --epochs 105 --device "cuda" --slurm --partition alldlc_gpu-rtx2080 --nr_workers 10 --slurm_job_time_secs 10000 --exp_dir "/work/dlclarge2/rkohli-results_tab-bench/autopytorch_cocktails_random_runs"
python run_autopytorch_random_experiment.py --dataset_id 354 --max_configs 400 --epochs 105 --device "cuda" --slurm --partition alldlc_gpu-rtx2080 --nr_workers 10 --slurm_job_time_secs 10000 --exp_dir "/work/dlclarge2/rkohli-results_tab-bench/autopytorch_cocktails_random_runs"
python run_autopytorch_random_experiment.py --dataset_id 722 --max_configs 400 --epochs 105 --device "cuda" --slurm --partition alldlc_gpu-rtx2080 --nr_workers 10 --s