# Review Results
General notebook for reviewing results files

In [1]:
import pandas as pd

In [2]:
experiment_name = "cpu-expt"

# get the list of datasets and algs that we expect for this batch
datasets_list_file = "../scripts/DATASETS_A.sh"
algs_list_file = "../scripts/ALGS_CPU_1.sh"

In [3]:
# parse the expected datasets and algs from the scripts

alg_list = []
dataset_list = []

with open(algs_list_file, 'r') as f:
    lines = f.readlines()
    start = False
    for i, l in enumerate(lines):
        if "MODELS_ENVS" in l:
            start = True
            continue
        if start and (")" in l):
            break
        if start:
            alg_list.append(l.split(":")[0].strip())


with open(datasets_list_file, 'r') as f:
    lines = f.readlines()
    start = False
    for i, l in enumerate(lines):
        if "DATASETS" in l:
            start = True
            continue
        if start and (")" in l):
            break
        if start:
            dataset_list.append(l.split("\n")[0].strip())

print(f"found {len(alg_list)} algs in {algs_list_file}:")
for i, a in enumerate(alg_list):
    print(i+1, a)
print()
print(f"found {len(dataset_list)} datasets in {datasets_list_file}: ")
for i, d in enumerate(dataset_list):
    print(i+1, d)



found 6 algs in ../scripts/ALGS_CPU_1.sh:
1 LinearModel
2 KNN
3 SVM
4 DecisionTree
5 RandomForest
6 LightGBM

found 114 datasets in ../scripts/DATASETS_A.sh: 
1 openml__sick__3021
2 openml__kr-vs-kp__3
3 openml__letter__6
4 openml__balance-scale__11
5 openml__mfeat-factors__12
6 openml__mfeat-fourier__14
7 openml__breast-w__15
8 openml__mfeat-karhunen__16
9 openml__mfeat-morphological__18
10 openml__mfeat-zernike__22
11 openml__cmc__23
12 openml__optdigits__28
13 openml__credit-approval__29
14 openml__credit-g__31
15 openml__pendigits__32
16 openml__diabetes__37
17 openml__spambase__43
18 openml__splice__45
19 openml__tic-tac-toe__49
20 openml__vehicle__53
21 openml__electricity__219
22 openml__satimage__2074
23 openml__eucalyptus__2079
24 openml__vowel__3022
25 openml__isolet__3481
26 openml__analcatdata_authorship__3549
27 openml__analcatdata_dmft__3560
28 openml__mnist_784__3573
29 openml__pc4__3902
30 openml__pc3__3903
31 openml__jm1__3904
32 openml__kc2__3913
33 openml__kc1__3917


In [4]:
df = pd.read_csv("../TabSurvey/metadataset.csv")

# filter by experiment name
df = df.loc[df["exp_name"].str.contains(experiment_name)]

In [5]:
# for each alg-dataset combination, check for results in the metadataset
import itertools

alg_dataset_pairs = list(itertools.product(alg_list, dataset_list))

alg_list_tmp = []
dataset_list_tmp = []
counts_list_tmp = []
# keep track of the number of results for each pair
for alg, dataset in alg_dataset_pairs:
    alg_list_tmp.append(alg)
    dataset_list_tmp.append(dataset)
    counts_list_tmp.append(sum((df["dataset_name"] == dataset) & (df["alg_name"] == alg)))

df_counts = pd.DataFrame(
    {
        "alg": alg_list_tmp,
        "dataset": dataset_list_tmp,
        "count": counts_list_tmp
    }
)


In [6]:
# pairs with fewer than 300 results (300 = complete.)

print("pairs with fewer than 300 results")
df_counts[df_counts["count"] < 300]

pairs with fewer than 300 results


Unnamed: 0,alg,dataset,count
63,LinearModel,openml__mfeat-pixel__146824,0
64,LinearModel,openml__Fashion-MNIST__146825,0
65,LinearModel,openml__jungle_chess_2pcs_raw_endgame_complete...,0
66,LinearModel,openml__numerai28.6__167120,0
67,LinearModel,openml__Devnagari-Script__167121,0
...,...,...,...
655,LightGBM,openml__riccardo__168338,10
657,LightGBM,openml__christine__168908,0
658,LightGBM,openml__dilbert__168909,0
663,LightGBM,openml__dionis__189355,0


In [7]:
# pairs with fewer than 0 results (complete fail)

print("pairs with fewer than 0 results (complete fail)")
df_counts[df_counts["count"] == 0]

pairs with fewer than 0 results (complete fail)


Unnamed: 0,alg,dataset,count
63,LinearModel,openml__mfeat-pixel__146824,0
64,LinearModel,openml__Fashion-MNIST__146825,0
65,LinearModel,openml__jungle_chess_2pcs_raw_endgame_complete...,0
66,LinearModel,openml__numerai28.6__167120,0
67,LinearModel,openml__Devnagari-Script__167121,0
...,...,...,...
654,LightGBM,openml__guillermo__168337,0
657,LightGBM,openml__christine__168908,0
658,LightGBM,openml__dilbert__168909,0
663,LightGBM,openml__dionis__189355,0


In [8]:
# for each dataset, what % of algorithms are there at least 200 results?
print(df_counts.groupby("dataset")["count"].apply(lambda x: sum(x > 200) / float(len(alg_list))).sort_values())

dataset
openml__dionis__189355              0.000000
openml__Devnagari-Script__167121    0.333333
openml__Fashion-MNIST__146825       0.333333
openml__helena__168329              0.333333
openml__albert__189356              0.500000
                                      ...   
openml__dresses-sales__125920       1.000000
openml__dna__167140                 1.000000
openml__diabetes__37                1.000000
openml__mfeat-fourier__14           1.000000
openml__wilt__146820                1.000000
Name: count, Length: 114, dtype: float64


There are a few problematic datasets, which is fine.

In [9]:
# for each alg, what % of datasets are there at least 200 results?
print(df_counts.groupby("alg")["count"].apply(lambda x: sum(x > 200) / float(len(dataset_list))).sort_values())

alg
SVM             0.605263
KNN             0.850877
LightGBM        0.903509
LinearModel     0.929825
RandomForest    0.973684
DecisionTree    0.991228
Name: count, dtype: float64


Not bad. There are many issues with SVM and KNN, likely due to memory or runtime. 