In [None]:
import openml
import pandas as pd
import holoviews as hv
hv.extension('bokeh')

In [None]:
weka_flows = {
    'SVM': 8666,
    'LoginBoost(REPTree)': 8676,
    'REPTree': 8693,
    'Logistic': 8680,
    'Random Forest': 8690,
    'k-NN': 8682,
#    'NaiveBayes': 8688,   
}
flowid_flowname = {flow_id: flow_name 
                   for flow_name, flow_id in weka_flows.items()}

study = openml.study.get_study('OpenML-CC18', 'tasks')

measures = ['predictive_accuracy']

In [None]:
evaluations = dict()
for measure in measures:
    evaluations[measure] = openml.evaluations.list_evaluations(measure, 
                                                               flow=weka_flows.values(), 
                                                               task=study.tasks)

In [None]:
columns = ['task_id', 'setup_id', 'flow_id', 'flow_name', 'measure', 'value']

records = []
for measure in measures:
    for eid, evaluation in evaluations[measure].items():
        record = {'task_id': evaluation.task_id,
                  'setup_id': evaluation.setup_id,
                  'flow_id': evaluation.flow_id, 
                  'flow_name': flowid_flowname[evaluation.flow_id],
                  'measure': measure,
                  'value': evaluation.value}
        records.append(record)
df = pd.DataFrame(data=records, columns=columns)
df = pd.pivot_table(df, index=['task_id', 'setup_id', 'flow_id', 'flow_name'], columns='measure', values='value')
df

In [None]:
title = "Performance of Weka classifiers"
boxplot = hv.BoxWhisker(df, 'flow_name', 'predictive_accuracy',  label=title)
boxplot.options(show_legend=False, width=800)

boxplot