In [None]:
import collections
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.transforms
import getpass
import glob
import itertools
import json
import numpy as np
import os
import openml
import pandas as pd
import scipy.stats

from plot_utils.style import style_dc
from plot_utils.common_plots import rank, average_plot
from plot_utils.common_tables import collect_data_for_final_table, do_wilcoxon_test
from plot_utils.common_loading import load_from_openml

import sys
sys.path.append("/home/eggenspk/Work/Project/2020_PoSH_Autosklearn/2020_IEEE_Autosklearn_experiments/experiment_scripts/")
sys.path.append("/home/feurerm/sync_dir/projects/2020_posh/2020_IEEE_Autosklearn_experiments/experiment_scripts")
from utils import openml_automl_benchmark, get_normalization_constants

In [None]:
username = getpass.getuser()
dir_ = {
    'eggenspk': "/home/eggenspk/Work/Project/2020_PoSH_Autosklearn/DATA/",
    'feurerm': "/home/feurerm/projects/2020_posh/",
}[username]
valid_pretty = {
    10: {
    ("auto", True, "10MIN/RQ1_AutoAuto_simulate/dynamic/60/autoauto"): "Auto-sklearn (2.0)",
    (None, True, "10MIN/ASKL_automldata_baseline_full/RF/RF_None_holdout_full_es_nif"): "Auto-sklearn (1.0)",
    (None, True, "10MIN/ASKL_automldata_baseline_full_no_metalearning/RF/RF_None_holdout_full_es_nif"): "Auto-sklearn (1.0, no MtL)",
    (None, True, "10MIN/ASKL_automldata_baseline_full_random/None/None_None_holdout_full_es_nif"): "Auto-sklearn (1.0, random search)",
    (None, True, "10MIN/ASKL_automldata_baseline_iter/RF/RF_None_holdout_iterative_es_nif"): "Auto-sklearn (1.0, iterative search space)",
    (None, True, "10MIN/ASKL_automldata_baseline_iter_no_metalearning/RF/RF_None_holdout_iterative_es_nif"): "Auto-sklearn (1.0, iterative search space, no MtL)",
    (None, True, "10MIN/ASKL_automldata_baseline_iter_random/None/None_None_holdout_iterative_es_nif"): "Auto-sklearn (1.0, random search, iterative search space)",
    ("auto", False, "10MIN/RQ1_AutoAuto_simulate/dynamic/60/autoauto"): "Selector (ens in sel)",
    ("auto", False, "10MIN/AutoAuto_simulate/dynamic/60/autoauto"): "Selector (no ens in sel)",
    ("auto", False, "10MIN/AutoAuto_simulate_RQ1_target_dir/dynamic/60/autoauto"): "Selector (no ens in sel) - other dir",
    (None, False, "10MIN/ASKL_automldata_baseline_full/RF/RF_None_holdout_full_es_nif"): "ASKL(full)",
    (None, False, "10MIN/ASKL_automldata_baseline_full_random/None/None_None_holdout_full_es_nif"): "ASKL(full,random)",
    (None, False, "10MIN/ASKL_automldata_baseline_iter/RF/RF_None_holdout_iterative_es_nif"): "ASKL(iter)",
    (None, False, "10MIN/ASKL_automldata_baseline_iter_random/None/None_None_holdout_iterative_es_nif"): "ASKL(iter,random)",
    },
    60: {
    ("auto", True, "60MIN/RQ1_AutoAuto_simulate/dynamic/360/autoauto"): "Auto-sklearn (2.0)",
    (None, True, "60MIN/ASKL_automldata_baseline_full/RF/RF_None_holdout_full_es_nif"): "Auto-sklearn (1.0)",
    (None, True, "60MIN/ASKL_automldata_baseline_full_no_metalearning/RF/RF_None_holdout_full_es_nif"): "Auto-sklearn (1.0, no MtL)",
    (None, True, "60MIN/ASKL_automldata_baseline_full_random/None/None_None_holdout_full_es_nif"): "Auto-sklearn (1.0, random search)",
    (None, True, "60MIN/ASKL_automldata_baseline_iter/RF/RF_None_holdout_iterative_es_nif"): "Auto-sklearn (1.0, iterative search space)",
    (None, True, "60MIN/ASKL_automldata_baseline_iter_no_metalearning/RF/RF_None_holdout_iterative_es_nif"): "Auto-sklearn (1.0, iterative search space, no MtL)",
    (None, True, "60MIN/ASKL_automldata_baseline_iter_random/None/None_None_holdout_iterative_es_nif"): "Auto-sklearn (1.0, random search, iterative search space)",
    ("auto", False, "60MIN/RQ1_AutoAuto_simulate/dynamic/360/autoauto"): "Selector (ens in sel)",
    ("auto", False, "60MIN/AutoAuto_simulate/dynamic/360/autoauto"): "Selector (no ens in sel)",
    ("auto", False, "60MIN/AutoAuto_simulate_RQ1_target_dir/dynamic/360/autoauto"): "Selector (no ens in sel) - other dir",
    (None, False, "60MIN/ASKL_automldata_baseline_full/RF/RF_None_holdout_full_es_nif"): "ASKL(full)",
    (None, False, "60MIN/ASKL_automldata_baseline_full_random/None/None_None_holdout_full_es_nif"): "ASKL(full,random)",
    (None, False, "60MIN/ASKL_automldata_baseline_iter/RF/RF_None_holdout_iterative_es_nif"): "ASKL(iter)",
    (None, False, "60MIN/ASKL_automldata_baseline_iter_random/None/None_None_holdout_iterative_es_nif"): "ASKL(iter,random)",
    },
}

task_ids = openml_automl_benchmark
#for tid in [189873, 189874, 75193]:
#    try:
#        task_ids.remove(tid)
#    except:
#        pass
print(task_ids, len(task_ids))

res_dc = {}
miss = 0
fallback = 0
for horizon in list(valid_pretty.keys()):
    res_dc[horizon] = {}
    for tid in task_ids:
        res_dc[horizon][tid] = {}
        for mode in list(valid_pretty[horizon].keys()):
            auto, ensemble, model_name = mode
            res_dc[horizon][tid][mode] = []
            for seed in range(10):
                if auto == "auto":
                    if ensemble:
                        fl_tmpl = dir_ + "/" + model_name + "_%d_%d/ensemble_results_0.000000thresh_50size_1.000000best" % (tid, seed)
                    else:
                        fl_tmpl = dir_ + "/" + model_name + "_%d_%d/result.json" % (tid, seed)

                else:
                    if ensemble:
                        fl_tmpl = dir_ + "/" + model_name + "_%d_%d_*_0/ensemble_results_0.000000thresh_50size_1.000000best" % (tid, seed)
                    else:
                        fl_tmpl = dir_ + "/" + model_name + "_%d_%d_*_0/result.json" % (tid, seed)

                fl = glob.glob(fl_tmpl)               
                if len(fl) == 0:
                    if auto == "auto":
                        fl_tmpl = dir_ + "/" + model_name + "_%d_%d/result.json" % (tid, seed)
                    else:
                        fl_tmpl = dir_ + "/" + model_name + "_%d_%d_*_0/result.json" % (tid, seed)
                    fl = glob.glob(fl_tmpl)
                    if len(fl) == 0:                    
                        miss += 1
                        res_dc[horizon][tid][mode].append({0: 1})
                        print('Missing', fl_tmpl)
                        continue
                    else:
                        fallback += 1
                        print('Ensemble missing, falling back to regular', fl[0])
                        pass
                fl = fl[0]
                with open(fl, "r") as fh:
                    try:
                        line = json.load(fh)
                    except json.JSONDecodeError as e:
                        fh.seek(0)
                        # Some files have two closing brackets...
                        line = json.loads(fh.read()[:-1])
                    except:
                        print(fl)
                        raise
                    if "50" in line:
                        loss = line["50"]["trajectory"]
                    else:
                        loss = line["0"]["trajectory"]
                    loss = [(l[0], l[1]) for l in loss]
                    loss = dict(loss)
                    res_dc[horizon][tid][mode].append(loss)
print("Missing %d entries" % miss)
print("Fallback %d entries" % fallback)

In [None]:
# Load some stuff from disc and openml - takes some time
tasks, task_ids_sorted_by_num_features = load_from_openml(task_ids)
min_diff_dc = get_normalization_constants(dir_, load=True)
tasks

In [None]:
labels = (
    "Auto-sklearn (2.0)",
    "Auto-sklearn (1.0)",
    "Auto-sklearn (1.0) \nno KND",
    "Auto-sklearn (1.0) \nrandom search", 
    "Auto-sklearn (1.0) \niterative search space",
    "Auto-sklearn (1.0) \nno KND & iterative search space",
    "Auto-sklearn (1.0) \nrandom search & iterative search space",
)
fig = plt.figure(figsize=(15, 4))
ax = fig.add_subplot(111)
patches = [
    mpatches.Patch(color=color, label=label)
    for label, color in zip(labels, style_dc['colors'])]
fig.legend(patches, labels, loc='upper left', frameon=True, ncol=2,
           bbox_to_anchor=(0, 1.6, 0, 0), 
           borderaxespad=1., fontsize=style_dc["fontsize"]*2)
box = ax.get_position()
ax.set_position([box.x0 + 1.6, box.y0, box.width * 0.1, box.height * 0.1])
#plt.show()
#plt.tight_layout()
plt.savefig('/tmp/RQ1_legend.pdf', bbox_inches=matplotlib.transforms.Bbox.from_bounds(0, 0, 22, 6))

In [None]:
HORIZON = 60
for tid in task_ids_sorted_by_num_features:
    plt.figure(figsize=[16,12])
    colors = itertools.cycle(style_dc['colors'])
    for mode in list(valid_pretty[HORIZON].keys()):
        c = next(colors)
        tmp = pd.DataFrame(res_dc[HORIZON][tid][mode]).sort_index(axis=1).ffill(axis=1)
        med = tmp.median(axis=0)
        med.loc[HORIZON*60] = med.iloc[-1]
        low = tmp.quantile(0.25)
        low.loc[HORIZON*60] = low.iloc[-1]
        up = tmp.quantile(0.75, axis=0)
        up.loc[HORIZON*60] = up.iloc[-1]
        plt.plot(med.index, med.to_numpy(), label=valid_pretty[HORIZON][mode], linewidth=3)
        plt.fill_between(med.index, low, up, alpha=0.3)
    plt.title('Name: %s (%d), #instances: %d, #attributes: %d' % (
        tasks.loc[tid, 'name'], tid, tasks.loc[tid, 'NumberOfInstances'], tasks.loc[tid, 'NumberOfFeatures']))
    plt.legend(fontsize=20)
    plt.ylim([plt.ylim()[0], plt.ylim()[0] + 0.3*(plt.ylim()[1]-plt.ylim()[0])])
    #plt.yscale("log")
    plt.show()

In [None]:
# Plot average BER across all datasets
for use_ensemble in ((True, ), (False, ), (True, False)):
    for HORIZON in (10, 60):
        model_list = []
        for m in valid_pretty[HORIZON].keys():
            if m[1] in use_ensemble:
                model_list.append(m)

        average_plot(model_list=model_list, res_dc=res_dc, valid_pretty=valid_pretty,
                     horizon=HORIZON, task_ids_sorted_by_num_features=task_ids_sorted_by_num_features,
                     min_diff_dc=min_diff_dc, figsize=(10, 5), legend=False)
        plt.yscale("log")
        plt.tight_layout()
        plt.savefig('/tmp/RQ1_%sMIN_%s_perf.pdf' % (
            str(HORIZON), "ens" if use_ensemble[0] is True and len(use_ensemble) == 1 else "other")
        )
        plt.show()

In [None]:
# Assume we have either all or no models
horizon_list = sorted(list(valid_pretty.keys()))
model_list = {}
for h in horizon_list:
    model_list[h] = []
    for m in valid_pretty[h].keys():
        model_list[h].append(m)

tab_data, stat_test_data = collect_data_for_final_table(model_list, res_dc, valid_pretty, horizon_list,
                                                        task_ids_sorted_by_num_features, min_diff_dc)
not_different = do_wilcoxon_test(stat_test_data, model_list, horizon_list, valid_pretty, exclude=["Oracle",])

In [None]:
df = pd.DataFrame(tab_data)
#for horizon in horizon_list:
#    df['Rank_%s' % horizon] = df[horizon].rank(method='average', ascending=True)
print(df)
print(df.to_latex())
print(not_different)

In [None]:
# Assume we have either all or no models
horizon_list = sorted(list(valid_pretty.keys()))
model_list = {}
for h in horizon_list:
    model_list[h] = []
    for m in valid_pretty[h].keys():
        if valid_pretty[h][m] in ("Auto-sklearn (2.0)", "Selector (no ens in sel) - other dir"):
            model_list[h].append(m)

tab_data, stat_test_data = collect_data_for_final_table(model_list, res_dc, valid_pretty, horizon_list,
                                                        task_ids_sorted_by_num_features, min_diff_dc)
not_different = do_wilcoxon_test(stat_test_data, model_list, horizon_list, valid_pretty, exclude=["Oracle",])
df = pd.DataFrame(tab_data)
#for horizon in horizon_list:
#    df['Rank_%s' % horizon] = df[horizon].rank(method='average', ascending=True)
print(df)
print(df.to_latex())
print(not_different)

In [None]:
for use_ensemble in ((True, ), (True, False), (False, )):
    for HORIZON in (10, 60):
        model_list = []
        for m in valid_pretty[HORIZON].keys():
            if m[1] in use_ensemble:
                model_list.append(m)
        rank(model_list, res_dc, valid_pretty, HORIZON, task_ids_sorted_by_num_features, n_iter=20, 
             steplength=int(HORIZON * 60 / 100), legend=False)
        # plt.legend(fontsize=style_dc["fontsize"], loc="upper right")
        plt.tight_layout()
        plt.savefig('/tmp/RQ1_%sMIN_%s_rank.pdf' % (
            str(HORIZON), "ens" if use_ensemble[0] is True and len(use_ensemble) == 1 else "other")
        )
        plt.show()

In [None]:
for HORIZON in (10, 60):
    model_list = []
    for m in valid_pretty[HORIZON].keys():
        if m[0] == 'auto':
            model_list.append(m)
    rank(model_list, res_dc, valid_pretty, HORIZON, task_ids_sorted_by_num_features, 
         n_iter=200, steplength=int(HORIZON * 60 / 100), paired=True, legend=False)
    #plt.legend(fontsize=style_dc["fontsize"], loc="center right")
    plt.show()

In [None]:
def table_per_dataset(model_list, res_dc, valid_pretty, horizon,
                      task_ids_sorted_by_num_features, min_diff_dc):
    # Generate data for final table and statistical testing
    tab_data = {}

    for mode in model_list:
        assert mode in valid_pretty[horizon], (mode, valid_pretty[horizon].keys())
        tab_data[valid_pretty[horizon][mode]] = {}
        # Use label, not actual key
        # Get means per tid
        for tid in task_ids_sorted_by_num_features:
            tmp = pd.DataFrame(res_dc[horizon][tid][mode]).sort_index(axis=1).ffill(axis=1).iloc[:, -1]
            assert tmp.shape == (10, )
            tmp = (tmp - min_diff_dc[tid][0]) / min_diff_dc[tid][1]
            tab_data[valid_pretty[horizon][mode]][tid] = tmp.mean()
    tab_data = pd.DataFrame(tab_data)
    return tab_data

h = 60
horizon_list = sorted(list(valid_pretty.keys()))
model_list = []
for m in valid_pretty[h].keys():
    if m[1]:
        model_list.append(m)

tab_data = table_per_dataset(model_list, res_dc, valid_pretty, h,
                             task_ids_sorted_by_num_features, min_diff_dc)

In [None]:
tmp = tab_data.copy()
print(tmp.mean())
tmp.drop(189873, inplace=True)
print(tmp.mean())
tmp.drop(189874, inplace=True)
print(tmp.mean())
tmp.drop(75193, inplace=True)
print(tmp.mean())

In [None]:
tmp = tab_data[["Auto-sklearn (1.0)", "Auto-sklearn (1.0, random search)"]]
tmp['diff'] = tab_data["Auto-sklearn (1.0)"] - tab_data["Auto-sklearn (1.0, random search)"]
tmp

In [None]:
plt.figure(figsize=(10, 5))
plt.plot(tmp['diff'].to_numpy())
plt.plot((0, 38), (0, 0))
plt.xlabel('Dataset index (sorted from smallest to largest)')
plt.ylabel('loss(ASKLv1(random,full) - loss(ASKLv2))')
plt.title('ASKLv1 vs ASKLv2 for %d minutes' % h)