In [None]:
import collections
import matplotlib.pyplot as plt
import getpass
import glob
import itertools
import json
import numpy as np
import os
import openml
import pandas as pd
import scipy.stats

from plot_utils.style import style_dc
from plot_utils.common_plots import rank, average_plot
from plot_utils.common_tables import collect_data_for_final_table, do_wilcoxon_test
from plot_utils.common_loading import load_from_openml

import sys
sys.path.append("/home/eggenspk/Work/Project/2020_PoSH_Autosklearn/2020_IEEE_Autosklearn_experiments/experiment_scripts/")
sys.path.append("/home/feurerm/sync_dir/projects/2020_posh/2020_IEEE_Autosklearn_experiments/experiment_scripts")
from utils import openml_automl_benchmark, get_normalization_constants

In [None]:
username = getpass.getuser()
dir_ = {
    'eggenspk': "/media/eggenspk/04a9389c-b7e2-474a-a9de-c66d5345f407/2020_posh/",
    'feurerm': "/home/feurerm/projects/2020_posh/",
}[username]

valid_pretty = {
    10: {
    ("auto", "10MIN/AutoAuto_simulate_RQ1_target_dir/dynamic/60/autoauto"): "Selector",
    ("auto", "10MIN/AutoAuto_simulate_RQ1_target_dir/static/60/autoauto"): "Single Best",
    (None, "10MIN/ASKL_run_with_portfolio/60/RF/RF_None_holdout_iterative_es_if"): "holdout",
    (None, "10MIN/ASKL_run_with_portfolio/60/RF/RF_SH-eta4-i_holdout_iterative_es_if"): "SH; holdout",
    (None, "10MIN/ASKL_run_with_portfolio/60/RF/RF_None_3CV_iterative_es_if"): "3CV",
    (None, "10MIN/ASKL_run_with_portfolio/60/RF/RF_SH-eta4-i_3CV_iterative_es_if"): "SH; 3CV",
    (None, "10MIN/ASKL_run_with_portfolio/60/RF/RF_None_5CV_iterative_es_if"): "5CV",
    (None, "10MIN/ASKL_run_with_portfolio/60/RF/RF_SH-eta4-i_5CV_iterative_es_if"): "SH; 5CV",
    (None, "10MIN/ASKL_run_with_portfolio/60/RF/RF_None_10CV_iterative_es_if"): "10CV",
    (None, "10MIN/ASKL_run_with_portfolio/60/RF/RF_SH-eta4-i_10CV_iterative_es_if"): "SH; 10CV",
    },
    60: {
    ("auto", "60MIN/AutoAuto_simulate_RQ1_target_dir/dynamic/360/autoauto"): "Selector",
    ("auto", "60MIN/AutoAuto_simulate_RQ1_target_dir/static/360/autoauto"): "Single Best",
    (None, "60MIN/ASKL_run_with_portfolio/360/RF/RF_None_holdout_iterative_es_if"): "holdout",
    (None, "60MIN/ASKL_run_with_portfolio/360/RF/RF_SH-eta4-i_holdout_iterative_es_if"): "SH; holdout",
    (None, "60MIN/ASKL_run_with_portfolio/360/RF/RF_None_3CV_iterative_es_if"): "3CV",
    (None, "60MIN/ASKL_run_with_portfolio/360/RF/RF_SH-eta4-i_3CV_iterative_es_if"): "SH; 3CV",
    (None, "60MIN/ASKL_run_with_portfolio/360/RF/RF_None_5CV_iterative_es_if"): "5CV",
    (None, "60MIN/ASKL_run_with_portfolio/360/RF/RF_SH-eta4-i_5CV_iterative_es_if"): "SH; 5CV",
    (None, "60MIN/ASKL_run_with_portfolio/360/RF/RF_None_10CV_iterative_es_if"): "10CV",
    (None, "60MIN/ASKL_run_with_portfolio/360/RF/RF_SH-eta4-i_10CV_iterative_es_if"): "SH; 10CV",
    },
}

task_ids = openml_automl_benchmark

res_dc = {}
miss = 0
for horizon in list(valid_pretty.keys()):
    res_dc[horizon] = {}
    for tid in task_ids:
        res_dc[horizon][tid] = {}
        for mode in list(valid_pretty[horizon].keys()):
            if type(mode) == tuple:
                auto, model_name = mode
            else:
                model_name = mode
                auto = None
            res_dc[horizon][tid][mode] = []
            for seed in range(10):
                if auto == "auto":
                    fl_tmpl = dir_ + "/" + model_name + "_%d_%d/result.json" % (tid, seed)
                else:
                    fl_tmpl = dir_ + "/" + model_name + "_%d_%d_0_0/result.json" % (tid, seed)
                fl = glob.glob(fl_tmpl)               
                if len(fl) == 0:
                    miss += 1
                    print(fl_tmpl)
                    continue
                fl = fl[0]
                with open(fl, "r") as fh:
                    line = json.load(fh)
                    loss = line["0"]["trajectory"]
                    loss = [(l[0], l[1]) for l in loss]
                    loss = dict(loss)
                    res_dc[horizon][tid][mode].append(loss)
print("Missing %d entries" % miss)

In [None]:
# Artificially add oracle and random
for horizon in res_dc:
    if horizon == 36000:
        continue
    rng = np.random.RandomState(1)
    for tid in task_ids:
        options = [k for k in res_dc[horizon][tid] if k[0] == None]
        # random
        rand_mode = ("Random", "Random")
        res_dc[horizon][tid][rand_mode] = []        
        choices = rng.choice(len(options), 10)
        for s, c in enumerate(choices):
            res_dc[horizon][tid][rand_mode].append(res_dc[horizon][tid][options[c]][s])
        
        # oracle
        orac_mode = ("Oracle", "Oracle")
        res_dc[horizon][tid][orac_mode] = []
        for s in range(10):
            vals_for_this_seed = []
            for o in options:
                tmp_key = sorted(list(res_dc[horizon][tid][o][s].keys()))[-1]
                tmp = res_dc[horizon][tid][o][s][tmp_key]
                vals_for_this_seed.append(tmp)
            best = np.argmin(vals_for_this_seed)
            res_dc[horizon][tid][orac_mode].append(res_dc[horizon][tid][options[best]][s])
    valid_pretty[horizon][orac_mode] = "Oracle"
    valid_pretty[horizon][rand_mode] = "Random"

In [None]:
# Load some stuff from disc and openml - takes some time
tasks, task_ids_sorted_by_num_features = load_from_openml(task_ids)
min_diff_dc = get_normalization_constants(dir_, load=True)
tasks

In [None]:
HORIZON = 10
for tid in task_ids_sorted_by_num_features:
    plt.figure(figsize=[16,12])
    colors = itertools.cycle(style_dc["colors"])
    for mode in list(valid_pretty[HORIZON].keys()):
        c = next(colors)
        tmp = pd.DataFrame(res_dc[HORIZON][tid][mode]).sort_index(axis=1).ffill(axis=1)
        med = tmp.median(axis=0)
        med.loc[HORIZON*60] = med.iloc[-1]
        low = tmp.quantile(0.25)
        low.loc[HORIZON*60] = low.iloc[-1]
        up = tmp.quantile(0.75, axis=0)
        up.loc[HORIZON*60] = up.iloc[-1]
        plt.plot(med.index, med.to_numpy(), label=valid_pretty[HORIZON][mode], linewidth=3)
        plt.fill_between(med.index, low, up, alpha=0.3)
    plt.title('Name: %s (%d), #instances: %d, #attributes: %d' % (
        tasks.loc[tid, 'name'], tid, tasks.loc[tid, 'NumberOfInstances'], tasks.loc[tid, 'NumberOfFeatures']))
    plt.legend(fontsize=20)
    plt.ylim([plt.ylim()[0], plt.ylim()[0] + 0.3*(plt.ylim()[1]-plt.ylim()[0])])
    #plt.yscale("log")
    plt.show()

In [None]:
# Plot average BER across all datasets
HORIZON = 60
model_list = []
for m in valid_pretty[HORIZON].keys():
    if m[0] in ("auto", "Random"):
        model_list.append(m)

average_plot(model_list=model_list, res_dc=res_dc, valid_pretty=valid_pretty,
             horizon=HORIZON, task_ids_sorted_by_num_features=task_ids_sorted_by_num_features,
             min_diff_dc=min_diff_dc)
plt.yscale("log")
plt.show()

In [None]:
# Assume we have either all or no models
horizon_list = sorted(list(valid_pretty.keys()))
model_list = {}
for h in horizon_list:
    model_list[h] = []
    for m in valid_pretty[h].keys():
        if m[0] in ("auto", "Random", "Oracle"):
            model_list[h].append(m)

tab_data, stat_test_data = collect_data_for_final_table(model_list, res_dc, valid_pretty, horizon_list,
                                                        task_ids_sorted_by_num_features, min_diff_dc,)
not_different = do_wilcoxon_test(stat_test_data, model_list, horizon_list, valid_pretty, exclude=["Oracle",])

In [None]:
df = pd.DataFrame(tab_data)
#for horizon in horizon_list:
#    df['Rank_%s' % horizon] = df[horizon].rank(method='average', ascending=True)
print(df)
print(df.to_latex())
print(not_different)

In [None]:
HORIZON = 10
model_list = []
for m in valid_pretty[HORIZON].keys():
    if m[0] in ("auto", "Random"):
        model_list.append(m)
rank(model_list, res_dc, valid_pretty, HORIZON, task_ids_sorted_by_num_features, n_iter=200, steplength=5)
plt.legend(fontsize=style_dc["fontsize"], loc="upper right")
plt.tight_layout()
plt.savefig('/tmp/Improvement3_%sMIN_rank.pdf' % str(HORIZON))
plt.show()