# Review Results
General notebook for reviewing results files

In [1]:
import pandas as pd

In [6]:
experiment_name = "algs-gpu-1-datasets-b-v3"

# get the list of datasets and algs that we expect for this batch
datasets_list_file = "../scripts/DATASETS_B.sh"
algs_list_file = "../scripts/ALGS_GPU_1.sh"

In [17]:
# parse the expected datasets and algs from the scripts

alg_list = []
dataset_list = []

with open(algs_list_file, 'r') as f:
    lines = f.readlines()
    start = False
    for i, l in enumerate(lines):
        if "MODELS_ENVS" in l:
            start = True
            continue
        if start and (")" in l):
            break
        if start:
            alg_list.append(l.split(":")[0].strip())


with open(datasets_list_file, 'r') as f:
    lines = f.readlines()
    start = False
    for i, l in enumerate(lines):
        if "DATASETS" in l:
            start = True
            continue
        if start and (")" in l):
            break
        if start:
            dataset_list.append(l.split("\n")[0].strip())

print(f"found {len(alg_list)} algs in {algs_list_file}:")
for i, a in enumerate(alg_list):
    print(i+1, a)
print()
print(f"found {len(dataset_list)} datasets in {datasets_list_file}: ")
for i, d in enumerate(dataset_list):
    print(i+1, d)



found 5 algs in ../scripts/ALGS_GPU_1.sh:
1 XGBoost
2 CatBoost
3 MLP
4 TabNet
5 VIME

found 82 datasets in ../scripts/DATASETS_B.sh: 
1 openml__acute-inflammations__10089
2 openml__ada_agnostic__3896
3 openml__aloi__12732
4 openml__analcatdata_boxing1__3540
5 openml__analcatdata_chlamydia__3739
6 openml__anneal__2867
7 openml__artificial-characters__14964
8 openml__audiology__7
9 openml__autos__9
10 openml__bodyfat__5514
11 openml__car-evaluation__146192
12 openml__cardiotocography__9979
13 openml__Census-Income__168340
14 openml__chess__3952
15 openml__chscase_foot__5012
16 openml__cjs__14967
17 openml__cleveland__2285
18 openml__Click_prediction_small__190408
19 openml__colic__25
20 openml__colic__27
21 openml__collins__3567
22 openml__cpu_small__4883
23 openml__dataset_sales__190418
24 openml__dermatology__35
25 openml__EgyptianSkulls__5040
26 openml__elevators__3711
27 openml__fertility__9984
28 openml__fl2000__3566
29 openml__fri_c0_100_5__3620
30 openml__fri_c3_100_5__3779
31 ope

In [23]:
df = pd.read_csv("../TabSurvey/metadataset.csv")

# filter by experiment name
df = df.loc[df["exp_name"].str.contains(experiment_name)]

In [32]:
# for each alg-dataset combination, check for results in the metadataset
import itertools

alg_dataset_pairs = list(itertools.product(alg_list, dataset_list))

alg_list_tmp = []
dataset_list_tmp = []
counts_list_tmp = []
# keep track of the number of results for each pair
for alg, dataset in alg_dataset_pairs:
    alg_list_tmp.append(alg)
    dataset_list_tmp.append(dataset)
    counts_list_tmp.append(sum((df["dataset_name"] == dataset) & (df["alg_name"] == alg)))

df_counts = pd.DataFrame(
    {
        "alg": alg_list_tmp,
        "dataset": dataset_list_tmp,
        "count": counts_list_tmp
    }
)


In [33]:
# pairs with fewer than 300 results (300 = complete.)

print("pairs with fewer than 300 results")
df_counts[df_counts["count"] < 300]

pairs with fewer than 300 results


Unnamed: 0,alg,dataset,count
144,CatBoost,openml__pollen__3735,0
145,CatBoost,openml__postoperative-patient-data__146210,0
148,CatBoost,openml__Satellite__167211,0
149,CatBoost,openml__scene__3485,0
150,CatBoost,openml__socmob__3797,0
151,CatBoost,openml__solar-flare__2068,0
152,CatBoost,openml__sonar__39,0
166,MLP,openml__aloi__12732,130
176,MLP,openml__Census-Income__168340,70
209,MLP,openml__ldpa__9974,230


In [34]:
# pairs with fewer than 0 results (complete fail)

print("pairs with fewer than 0 results (complete fail)")
df_counts[df_counts["count"] == 0]

pairs with fewer than 0 results (complete fail)


Unnamed: 0,alg,dataset,count
144,CatBoost,openml__pollen__3735,0
145,CatBoost,openml__postoperative-patient-data__146210,0
148,CatBoost,openml__Satellite__167211,0
149,CatBoost,openml__scene__3485,0
150,CatBoost,openml__socmob__3797,0
151,CatBoost,openml__solar-flare__2068,0
152,CatBoost,openml__sonar__39,0
248,TabNet,openml__aloi__12732,0
258,TabNet,openml__Census-Income__168340,0
291,TabNet,openml__ldpa__9974,0


In [43]:
# for each dataset, what % of algorithms are there at least 200 results?
print(df_counts.groupby("dataset")["count"].apply(lambda x: sum(x > 200) / float(len(alg_list))).sort_values())

dataset
openml__Census-Income__168340             0.4
openml__poker-hand__9890                  0.4
openml__aloi__12732                       0.4
openml__walking-activity__9945            0.4
openml__Click_prediction_small__190408    0.6
                                         ... 
openml__colic__27                         1.0
openml__colic__25                         1.0
openml__cleveland__2285                   1.0
openml__fl2000__3566                      1.0
openml__yeast__145793                     1.0
Name: count, Length: 82, dtype: float64


Again some problematic datasets, not a big issue though.

In [44]:
# for each alg, what % of datasets are there at least 200 results?
print(df_counts.groupby("alg")["count"].apply(lambda x: sum(x > 200) / float(len(dataset_list))).sort_values())

alg
TabNet      0.768293
VIME        0.890244
CatBoost    0.914634
MLP         0.951220
XGBoost     1.000000
Name: count, dtype: float64


Nothing terrible. Great.