In [None]:
import collections
import matplotlib.pyplot as plt
import getpass
import glob
import itertools
import json
import numpy as np
import os
import openml
import pandas as pd
import scipy.stats

from plot_utils.style import style_dc
from plot_utils.common_plots import rank, average_plot
from plot_utils.common_tables import collect_data_for_final_table, do_wilcoxon_test
from plot_utils.common_loading import load_from_openml

import sys
sys.path.append("/home/eggenspk/Work/Project/2020_PoSH_Autosklearn/2020_IEEE_Autosklearn_experiments/experiment_scripts/")
sys.path.append("/home/feurerm/sync_dir/projects/2020_posh/2020_IEEE_Autosklearn_experiments/experiment_scripts")
from utils import openml_automl_benchmark, get_normalization_constants

In [None]:
COLORS = ['#1b9e77','#d95f02','#7570b3','#e7298a','#66a61e','#e6ab02','#a6761d','#666666']
FONTSIZE = 20

In [None]:
username = getpass.getuser()
dir_ = {
    'eggenspk': "/home/eggenspk/Work/Project/2020_PoSH_Autosklearn/DATA/",
    'feurerm': "/home/feurerm/projects/2020_posh/",
}[username]
valid_pretty = {
    60: {
    (True, "60MIN/RQ1_AutoAuto_simulate/dynamic/360/"): "With Portfolio (dynamic)",
    (True, "60MIN/RQ1_AutoAuto_simulate/static/360/"): "With Portfolio (static)",
    #(False, "60MIN/RQ1_AutoAuto_simulate/dynamic/360/"): "Only Portfolio",
    (True, "60MIN/RQ2.2_AutoAuto_simulate/dynamic/360/"): "Without Portfolio (dynamic)",
    (True, "60MIN/RQ2.2_AutoAuto_simulate/static/360/"): "Without Portfolio (static)",
    },
}

task_ids = openml_automl_benchmark

res_dc = {}
miss = 0
fallback = 0
for horizon in list(valid_pretty.keys()):
    res_dc[horizon] = {}
    for tid in task_ids:
        res_dc[horizon][tid] = {}
        for mode in list(valid_pretty[horizon].keys()):
            res_dc[horizon][tid][mode] = []
            for seed in range(10):
                if not mode[0]:
                    fl_tmpl = dir_ + mode[1] + "autoauto_%d_%d/ensemble_results_0.000000thresh_50size_1.000000best_only_portfolio" % (tid, seed)
                else:
                    fl_tmpl = dir_ + mode[1] + "autoauto_%d_%d/ensemble_results_0.000000thresh_50size_1.000000best" % (tid, seed)
                fl = glob.glob(fl_tmpl)               
                if len(fl) == 0:
                    fl_tmpl = dir_ + mode[1] + "autoauto_%d_%d/result.json" % (tid, seed)
                    fl = glob.glob(fl_tmpl)
                    if len(fl) == 0:
                        miss += 1
                        print(fl_tmpl)
                        continue
                    else:
                        fallback += 1
                        print('Ensemble missing, falling back to regular', fl[0])
                        pass
                fl = fl[0]
                with open(fl, "r") as fh:
                    line = json.load(fh)
                    if "50" in line:
                        loss = line["50"]["trajectory"]
                    else:
                        loss = line["0"]["trajectory"]
                    loss = [(l[0], l[1]) for l in loss]
                    loss = dict(loss)
                    res_dc[horizon][tid][mode].append(loss)
print("Missing %d entries" % miss)

# There should be 7 fallbacks. These are all due to the statict strategy being 3-fold CV, which does
# not work for the dionis dataset
print("Fallback %d entries" % fallback)

In [None]:
# Load some stuff from disc and openml - takes some time
tasks, task_ids_sorted_by_num_features = load_from_openml(task_ids)
min_diff_dc = get_normalization_constants(dir_, load=True)
tasks

In [None]:
HORIZON = 60
for tid in task_ids_sorted_by_num_features:
    plt.figure(figsize=[16,12])
    colors = itertools.cycle(COLORS)
    for mode in list(valid_pretty[HORIZON].keys()):
        c = next(colors)
        tmp = pd.DataFrame(res_dc[HORIZON][tid][mode]).sort_index(axis=1).ffill(axis=1)
        med = tmp.median(axis=0)
        med.loc[HORIZON*60] = med.iloc[-1]
        low = tmp.quantile(0.25)
        low.loc[HORIZON*60] = low.iloc[-1]
        up = tmp.quantile(0.75, axis=0)
        up.loc[HORIZON*60] = up.iloc[-1]
        plt.plot(med.index, med.to_numpy(), label=valid_pretty[HORIZON][mode], linewidth=3)
        plt.fill_between(med.index, low, up, alpha=0.3)
    plt.title('Name: %s (%d), #instances: %d, #attributes: %d' % (
        tasks.loc[tid, 'name'], tid, tasks.loc[tid, 'NumberOfInstances'], tasks.loc[tid, 'NumberOfFeatures']))
    plt.legend(fontsize=20)
    plt.ylim([plt.ylim()[0], plt.ylim()[0] + 0.3*(plt.ylim()[1]-plt.ylim()[0])])
    #plt.yscale("log")
    plt.show()

In [None]:
# Plot average BER across all datasets
HORIZON = 60
model_list = []
for m in valid_pretty[HORIZON].keys():
    model_list.append(m)

average_plot(model_list=model_list, res_dc=res_dc, valid_pretty=valid_pretty,
             horizon=HORIZON, task_ids_sorted_by_num_features=task_ids_sorted_by_num_features,
             min_diff_dc=min_diff_dc)
plt.yscale("log")
plt.show()

In [None]:
# Assume we have either all or no models
horizon_list = sorted(list(valid_pretty.keys()))
model_list = {}
for h in horizon_list:
    model_list[h] = []
    for m in valid_pretty[h].keys():
        model_list[h].append(m)

tab_data, stat_test_data = collect_data_for_final_table(model_list, res_dc, valid_pretty, horizon_list,
                                                        task_ids_sorted_by_num_features, min_diff_dc)
not_different = do_wilcoxon_test(stat_test_data, model_list, horizon_list, valid_pretty, exclude=["Oracle",])

In [None]:
df = pd.DataFrame(tab_data)
#for horizon in horizon_list:
#    df['Rank_%s' % horizon] = df[horizon].rank(method='average', ascending=True)
print(df)
print(df.to_latex())
print(not_different)

In [None]:
HORIZON = 60
model_list = []
for m in valid_pretty[HORIZON].keys():
    model_list.append(m)
rank(model_list, res_dc, valid_pretty, HORIZON, task_ids_sorted_by_num_features, n_iter=200, steplength=5)
plt.legend(fontsize=style_dc["fontsize"], loc="upper right")