In [None]:
import glob
import os
import sys

automlbenchmark_path = ".."
amlb_dir = os.path.realpath(os.path.expanduser(automlbenchmark_path))
amlb_reports_dir = os.path.join(amlb_dir, 'reports')
for lib in [amlb_dir, amlb_reports_dir]:
    sys.path.insert(0, lib)

In [None]:
import numpy as np
import pandas as pd

In [None]:
%run ./reports_config.py

In [None]:
from report import draw_score_heatmap, draw_score_parallel_coord, draw_score_pointplot, draw_score_stripplot, draw_score_barplot\
                  ,prepare_results, render_leaderboard, render_metadata, render_summary
from report.config import *
from report.util import create_file, display
from report.visualizations.util import register_colormap, render_colormap, savefig

# Comparisons

## Compare custom runs

Here comparing results for one fold

In [None]:
constraint = "1h8c"
results_dir = "."
output_dir = "."

included_frameworks = []
excluded_frameworks = []
frameworks_sort_key = None
# frameworks_sort_key = lambda f: definitions[f]['key'] if 'key' in definitions[f] else f.lower()
frameworks_labels = None
# frameworks_labels = lambda l: definitions[l]['framework'].lower()
duplicates_handling = 'fail' # accepted values: 'fail', 'keep_first', 'keep_last', 'keep_none'
imputation = None
normalization = None
# normalization = (0, 'h2o', 'mean')
row_filter = None
# row_filter = lamdba r: r.fold == 0     #! r is a pd.Series
title_extra = ""
# register_colormap(config.colormap, ('colorblind', [1, 0, 2, 3, 4, 5]))

In [None]:
# this cell  is an example showing how to use/customize this notebook depending on your results

results_dir = "../results"
output_dir = "./tmp"
duplicates_handling = 'keep_last'
normalization = (0, 'constantpredictor', 'mean')
row_filter = lambda r: ~r.task.isin(['kddcup09_appetency', 'colleges'])

definitions = dict(
    constantpredictor=dict(
        ref = True,
        framework='constantpredictor_enc',
        results_files=glob.glob(f"{results_dir}/constantpredictor*/scores/results.csv")
    ),
    autogluon=dict(
        framework='AutoGluon',
        results_files=glob.glob(f"{results_dir}/autogluon*/scores/results.csv")
    ),
    autosklearn=dict(
        framework='autosklearn',
        results_files=glob.glob(f"{results_dir}/autosklearn*/scores/results.csv")
    ),
    h2oautoml=dict(
        framework='H2OAutoML',
        results_files=glob.glob(f"{results_dir}/h2oautoml*/scores/results.csv")
    ),
    tpot=dict(
        framework='TPOT',
        results_files=glob.glob(f"{results_dir}/tpot*/scores/results.csv")
    )
)

definitions

In [None]:
runs = {k:v for k, v in definitions.items() 
        if (k in included_frameworks if included_frameworks else True) 
        and k not in excluded_frameworks}
runs

In [None]:
def results_as_df(results_dict, row_filter=None):
    def apply_filter(res, filtr):
        r = res.results
        return r.loc[filtr(r)]

    if row_filter is None:
        row_filter = lambda r: True

    return pd.concat([apply_filter(res, lambda r: (r.framework==name) & row_filter(r)) 
                      for name, res in results_dict.items() 
                      if res is not None])  

In [None]:
ref_results = {name: prepare_results(run['results_files'], 
                                     renamings={run['framework']: name},
                                     exclusions=excluded_frameworks,
                                     normalization=normalization,
                                     duplicates_handling=duplicates_handling,
                                     ) 
               for name, run in runs.items() if runs[name].get('ref', False)}

In [None]:
metadata = next(res for res in ref_results.values()).metadata

In [None]:
problem_types = pd.DataFrame(m.__dict__ for m in metadata.values())['type'].unique().tolist()

In [None]:
render_metadata(metadata, 
                filename=create_file(output_dir, "datasets", results_group, "metadata.csv"))

In [None]:
all_ref_res = results_as_df(ref_results, row_filter)

In [None]:
runs_results = {name: prepare_results(run['results_files'], 
                                      renamings={run['framework']: name},
                                      exclusions=excluded_frameworks,
                                      imputation=imputation,
                                      normalization=normalization,
                                      ref_results=all_ref_res,
                                      duplicates_handling=duplicates_handling
                                      ) 
                for name, run in runs.items() if name not in ref_results}

In [None]:
all_res = pd.concat([
    all_ref_res, 
    results_as_df(runs_results, row_filter)
])

In [None]:
res_summary = render_summary('result', 
                             results=all_res)
res_summary.to_csv(create_file(output_dir, "tables", "results_summary.csv"))

In [None]:
score_summary = render_summary('score', 
                               results=all_res)
score_summary.to_csv(create_file(output_dir, "tables", "score_summary.csv"))

In [None]:
models_summary = render_summary('models_count', 
                                results=all_res)
models_summary.to_csv(create_file(output_dir, "tables", "models_summary.csv"))

In [None]:
if normalization:
    norm_score_summary = render_summary('norm_score', 
                                        results=all_res)
    norm_score_summary.to_csv(create_file(output_dir, "tables", "normalized_score_summary.csv"))

In [None]:
benchmark_leaderboard = render_leaderboard('score', 
                                           results=all_res,
                                           aggregate=True)
benchmark_leaderboard.to_csv(create_file(output_dir, "tables", "benchmark_leaderboard.csv"))

In [None]:
if 'binary' in problem_types:
    fig = draw_score_heatmap('score',
                             results=all_res,
                             type_filter='binary', 
                             metadata=metadata,
                             x_labels=frameworks_labels or True,
                             x_sort_by=frameworks_sort_key,
                             y_sort_by='nrows',
                             title=f"Scores ({binary_score_label}) on {results_group} binary classification problems{title_extra}",
                             center=0.5
                            );
    savefig(fig, create_file(output_dir, "visualizations", "binary_score_heat.png"))

In [None]:
if 'multiclass' in problem_types:
    fig = draw_score_heatmap('score', 
                             results=all_res,
                             type_filter='multiclass', 
                             metadata=metadata,
                             x_labels=frameworks_labels  or True,
                             x_sort_by=frameworks_sort_key,
                             y_sort_by='nrows',
                             title=f"Scores ({multiclass_score_label}) on {results_group} multi-class classification problems{title_extra}",
                             center=0
                            );
    savefig(fig, create_file(output_dir, "visualizations", "multiclass_score_heat.png"))

In [None]:
if 'regression' in problem_types:
    fig = draw_score_heatmap('score', 
                             results=all_res,
                             type_filter='regression', 
                             metadata=metadata,
                             x_labels=frameworks_labels  or True,
                             x_sort_by=frameworks_sort_key,
                             y_sort_by='nrows',
                             title=f"Scores ({regression_score_label}) on {results_group} regression problems{title_extra}",
                             center=0
                            );
    savefig(fig, create_file(output_dir, "visualizations", "regression_score_heat.png"))

In [None]:
render_colormap(config.colormap)

In [None]:
if 'binary' in problem_types:
    fig = draw_score_barplot('score',
                             results=all_res,
                             type_filter='binary', 
                             metadata=metadata,
                             x_sort_by=tasks_sort_by,
                             ylabel=binary_score_label,
                             ylim=dict(bottom=.5),
                             hue_sort_by=frameworks_sort_key, 
                             ci=95,
                             title=f"Scores ({binary_score_label}) on {results_group} binary classification problems{title_extra}",
                             legend_loc='lower center',
                             legend_labels=frameworks_labels,
                            );
    savefig(fig, create_file(output_dir, "visualizations", "binary_score_barplot.png"))

In [None]:
if 'multiclass' in problem_types:
    fig = draw_score_barplot('score',
                             results=all_res,
                             type_filter='multiclass', 
                             metadata=metadata,
                             x_sort_by=tasks_sort_by,
                             ylabel=multiclass_score_label,
                             ylim=dict(top=0.1),
                             hue_sort_by=frameworks_sort_key,
                             ci=95,
                             title=f"Scores ({multiclass_score_label}) on {results_group} multiclass classification problems{title_extra}",
                             legend_loc='lower center',
                             legend_labels=frameworks_labels,
                            );
    savefig(fig, create_file(output_dir, "visualizations", "multiclass_score_barplot.png"))

In [None]:
if 'regression' in problem_types:
    fig = draw_score_barplot('score',
                             results=all_res,
                             type_filter='regression', 
                             metadata=metadata,
                             x_sort_by=tasks_sort_by,
                             yscale='symlog',
                             ylabel=regression_score_label,
                             ylim=dict(top=0.1),
                             hue_sort_by=frameworks_sort_key, 
                             ci=95,
                             title=f"Scores ({regression_score_label}) on {results_group} regression classification problems{title_extra}",
                             legend_loc='lower center',
                             legend_labels=frameworks_labels,
                             size=(8, 6),
                            );
    savefig(fig, create_file(output_dir, "visualizations", "regression_score_barplot.png"))

In [None]:
if 'binary' in problem_types:
    fig = draw_score_pointplot('score',
                               results=all_res,
                               type_filter='binary', 
                               metadata=metadata,
                               x_sort_by=tasks_sort_by,
                               ylabel=binary_score_label,
                               ylim=dict(bottom=.5),
                               hue_sort_by=frameworks_sort_key,
                               join='none', marker='hline_xspaced', ci=95, 
                               title=f"Scores ({binary_score_label}) on {results_group} binary classification problems{title_extra}",
                               legend_loc='lower center',
                               legend_labels=frameworks_labels,
                              );
    savefig(fig, create_file(output_dir, "visualizations", "binary_score_pointplot.png"))

In [None]:
if 'multiclass' in problem_types:
    fig = draw_score_pointplot('score',
                               results=all_res,
                               type_filter='multiclass', 
                               metadata=metadata,
                               x_sort_by=tasks_sort_by,
                               ylabel=multiclass_score_label,
                               hue_sort_by=frameworks_sort_key,
                               join='none', marker='hline_xspaced', ci=95, 
                               title=f"Scores ({multiclass_score_label}) on {results_group} multiclass classification problems{title_extra}",
                               legend_loc='lower center',
                               legend_labels=frameworks_labels,
                              );
    savefig(fig, create_file(output_dir, "visualizations", "multiclass_score_pointplot.png"))

In [None]:
if 'regression' in problem_types:
    fig = draw_score_pointplot('score',
                               results=all_res,
                               type_filter='regression', 
                               metadata=metadata,
                               x_sort_by=tasks_sort_by,
                               ylabel=regression_score_label,
                               yscale='symlog',
                               ylim=dict(top=0.1),
                               hue_sort_by=frameworks_sort_key,
                               join='none', marker='hline_xspaced', ci=95, 
                               title=f"Scores ({regression_score_label}) on {results_group} regression classification problems{title_extra}",
                               legend_loc='lower center',
                               legend_labels=frameworks_labels,
                               size=(8, 6),
                              );
    savefig(fig, create_file(output_dir, "visualizations", "regression_score_pointplot.png"))

In [None]:
if 'binary' in problem_types:
    fig = draw_score_stripplot('score', 
                               results=all_res.sort_values(by=['framework']),
                               type_filter='binary', 
                               metadata=metadata,
                               xlabel=binary_score_label,
                               y_sort_by=tasks_sort_by,
                               hue_sort_by=frameworks_sort_key,
                               title=f"Scores ({binary_score_label}) on {results_group} binary classification problems{title_extra}",
                               legend_labels=frameworks_labels,
                              );
    savefig(fig, create_file(output_dir, "visualizations", "binary_score_stripplot.png"))

In [None]:
if 'multiclass' in problem_types:
    fig = draw_score_stripplot('score', 
                               results=all_res.sort_values(by=['framework']),
                               type_filter='multiclass', 
                               metadata=metadata,
                               xlabel=multiclass_score_label,
                               xscale='symlog',
                               y_sort_by=tasks_sort_by,
                               hue_sort_by=frameworks_sort_key,
                               title=f"Scores ({multiclass_score_label}) on {results_group} multi-class classification problems{title_extra}",
                               legend_labels=frameworks_labels,
                              );
    savefig(fig, create_file(output_dir, "visualizations", "multiclass_score_stripplot.png"))

In [None]:
if 'regression' in problem_types:
    fig = draw_score_stripplot('score', 
                               results=all_res,
                               type_filter='regression', 
                               metadata=metadata,
                               xlabel=regression_score_label,
                               xscale='symlog',
                               y_sort_by=tasks_sort_by,
                               hue_sort_by=frameworks_sort_key,
                               title=f"Scores ({regression_score_label}) on {results_group} regression problems{title_extra}",
                               legend_labels=frameworks_labels,
                              );
    savefig(fig, create_file(output_dir, "visualizations", "regression_score_stripplot.png"))

In [None]:
if 'binary' in problem_types and normalization:
    fig = draw_score_stripplot('norm_score', 
                               results=all_res,
                               type_filter='binary', 
                               metadata=metadata,
                               xlabel=f"rel. {binary_score_label}",
                               y_sort_by='nrows',
                               hue_sort_by=frameworks_sort_key,
                               title=f"Relative scores ({binary_score_label}) on {results_group} binary classification problems{title_extra}",
                               legend_labels=frameworks_labels,
                              );
    savefig(fig, create_file(output_dir, "visualizations", "binary_rel_score_stripplot.png"))

In [None]:
if 'multiclass' in problem_types and normalization:
    fig = draw_score_stripplot('norm_score', 
                               results=all_res,
                               type_filter='multiclass', 
                               metadata=metadata,
                               xlabel=f"rel. {multiclass_score_label}",
                               xscale='symlog',
                               y_sort_by='nrows',
                               hue_sort_by=frameworks_sort_key,
                               title=f"Relative scores ({multiclass_score_label}) on {results_group} multi-class classification problems{title_extra}",
                               legend_labels=frameworks_labels,
                              );
    savefig(fig, create_file(output_dir, "visualizations", "multiclass_rel_score_stripplot.png"))

In [None]:
if 'regression' in problem_types and normalization:
    fig = draw_score_stripplot('norm_score', 
                               results=all_res,
                               type_filter='regression', 
                               metadata=metadata,
                               xlabel=f"rel. {regression_score_label}",
                               y_sort_by='nrows',
                               hue_sort_by=frameworks_sort_key,
                               title=f"Relative scores ({regression_score_label}) on {results_group} regression problems{title_extra}",
                               legend_labels=frameworks_labels,
                              );
    savefig(fig, create_file(output_dir, "visualizations", "regression_rel_score_stripplot.png"))