In [1]:
!pip install openml tqdm
import openml
from tqdm import tqdm
import pickle




What we want is as following:
- Get a list of runs that were done with Multi Layer Perceptron
- For these runs, get the relative tasks
- Check the ensemble runs for each task
- Get average accuracy for ensemble and MLP
- Get dataset features
- Train a model having dataset features as input, best type of model as output

In [2]:
runs_with_mlp = openml.runs.list_runs(flow=[1820])
runs_with_mlp

OrderedDict([(502135,
              {'run_id': 502135,
               'task_id': 1,
               'setup_id': 2826,
               'flow_id': 1820,
               'uploader': 894,
               'task_type': <TaskType.SUPERVISED_CLASSIFICATION: 1>,
               'upload_time': '2015-12-20 18:03:19',
               'error_message': ''}),
             (1570518,
              {'run_id': 1570518,
               'task_id': 2,
               'setup_id': 2826,
               'flow_id': 1820,
               'uploader': 1,
               'task_type': <TaskType.SUPERVISED_CLASSIFICATION: 1>,
               'upload_time': '2016-07-18 17:51:13',
               'error_message': ''}),
             (505241,
              {'run_id': 505241,
               'task_id': 3,
               'setup_id': 2826,
               'flow_id': 1820,
               'uploader': 603,
               'task_type': <TaskType.SUPERVISED_CLASSIFICATION: 1>,
               'upload_time': '2016-01-11 21:46:34',
               

In [3]:
runs_with_mlp.values()

odict_values([{'run_id': 502135, 'task_id': 1, 'setup_id': 2826, 'flow_id': 1820, 'uploader': 894, 'task_type': <TaskType.SUPERVISED_CLASSIFICATION: 1>, 'upload_time': '2015-12-20 18:03:19', 'error_message': ''}, {'run_id': 1570518, 'task_id': 2, 'setup_id': 2826, 'flow_id': 1820, 'uploader': 1, 'task_type': <TaskType.SUPERVISED_CLASSIFICATION: 1>, 'upload_time': '2016-07-18 17:51:13', 'error_message': ''}, {'run_id': 505241, 'task_id': 3, 'setup_id': 2826, 'flow_id': 1820, 'uploader': 603, 'task_type': <TaskType.SUPERVISED_CLASSIFICATION: 1>, 'upload_time': '2016-01-11 21:46:34', 'error_message': ''}, {'run_id': 535757, 'task_id': 3, 'setup_id': 2826, 'flow_id': 1820, 'uploader': 939, 'task_type': <TaskType.SUPERVISED_CLASSIFICATION: 1>, 'upload_time': '2016-03-09 15:10:52', 'error_message': ''}, {'run_id': 578264, 'task_id': 3, 'setup_id': 2826, 'flow_id': 1820, 'uploader': 939, 'task_type': <TaskType.SUPERVISED_CLASSIFICATION: 1>, 'upload_time': '2016-06-03 17:05:16', 'error_message

In [4]:
with open('runs_with_mlp.pickle', "wb") as handle:
    pickle.dump(runs_with_mlp, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [5]:
with open('runs_with_mlp.pickle', "rb") as handle:
    a = pickle.load(handle)


In [6]:
tasks = []
for run in tqdm(runs_with_mlp.values()):
    try:
        tasks.append(openml.tasks.get_task(run['task_id']))
    except Exception as e:
        print(e)
tasks

 40%|███▉      | 1353/3387 [37:01<1:13:11,  2.16s/it] 

https://www.openml.org/api/v1/xml/data/features/1176 returned code 274: No features found. Additionally, dataset processed with error - None


 85%|████████▍ | 2866/3387 [41:11<09:21,  1.08s/it]  

https://www.openml.org/api/v1/xml/data/features/1176 returned code 274: No features found. Additionally, dataset processed with error - None


 91%|█████████▏| 3091/3387 [41:51<00:32,  9.02it/s]

https://www.openml.org/api/v1/xml/data/features/1176 returned code 274: No features found. Additionally, dataset processed with error - None


 98%|█████████▊| 3320/3387 [42:09<00:06,  9.67it/s]

https://www.openml.org/api/v1/xml/data/features/1176 returned code 274: No features found. Additionally, dataset processed with error - None


100%|██████████| 3387/3387 [42:18<00:00,  1.33it/s]


[OpenML Classification Task
 Task Type Description: https://www.openml.org/tt/TaskType.SUPERVISED_CLASSIFICATION
 Task ID..............: 1
 Task URL.............: https://www.openml.org/t/1
 Estimation Procedure.: crossvalidation
 Evaluation Measure...: predictive_accuracy
 Target Feature.......: class
 # of Classes.........: 6
 Cost Matrix..........: Available,
 OpenML Classification Task
 Task Type Description: https://www.openml.org/tt/TaskType.SUPERVISED_CLASSIFICATION
 Task ID..............: 2
 Task URL.............: https://www.openml.org/t/2
 Estimation Procedure.: crossvalidation
 Evaluation Measure...: predictive_accuracy
 Target Feature.......: class
 # of Classes.........: 6
 Cost Matrix..........: Available,
 OpenML Classification Task
 Task Type Description: https://www.openml.org/tt/TaskType.SUPERVISED_CLASSIFICATION
 Task ID..............: 3
 Task URL.............: https://www.openml.org/t/3
 Estimation Procedure.: crossvalidation
 Target Feature.......: class
 # of Clas

In [7]:
with open('tasks.pickle', "wb") as handle:
    pickle.dump(tasks, handle, protocol=pickle.HIGHEST_PROTOCOL)


We're only interested in ensemble methods and neural networks. Let's create a dictionary stating whether a method is an ensemble/NN. The value will be true for ensembles, false for NN, missing for other methods.

In [8]:
is_ensemble = {"boost": True, "ada": True, "forest": True,"ensemble": True,"bag":True, "nn": False, "nnet": False, "mlp": False, "multilayerperceptron": False}

In [None]:
from random import shuffle
from collections import defaultdict
runs_for_task = defaultdict(list)

checked_flows = {}
non_interesting_flows = set()

for task in tqdm(tasks):
    runs = 0
    list_runs = openml.runs.list_runs(task=[task.task_id]).items()
    for run_id, run in list_runs:
        if runs>2000: # Don't get more than 2000 runs as it's too many
            break
        if run['flow_id'] in non_interesting_flows: # Already checked this flow and it's not ensemble or mlp
            continue
        elif run['flow_id'] in checked_flows: # Already checked this flow and it's ensemble/mlp, so it's interesting
            runs_for_task[task.task_id].append((openml.runs.get_run(run_id).evaluations['predictive_accuracy'], checked_flows[run['flow_id']]))
        else:
            flow_name = openml.flows.get_flow(run['flow_id']).name.lower()
            ensemble_found = [ensemble for method, ensemble in is_ensemble.items() if method in flow_name]
            if ensemble_found: # if the method used is an ensemble or NN
                runs_for_task[task.task_id].append((openml.runs.get_run(run_id).evaluations['predictive_accuracy'], ensemble_found[0]))
                checked_flows[run['flow_id']] = ensemble_found[0]
            else:
                non_interesting_flows.add(run['flow_id'])
        runs += 1
runs_for_task

  3%|▎         | 110/3383 [4:06:18<47:38:37, 52.40s/it]  

In [None]:
with open('runs_for_task.pickle', "wb") as handle:
    pickle.dump(runs_for_task, handle, protocol=pickle.HIGHEST_PROTOCOL)


In [None]:
data_for_task = {}
for task in tqdm(tasks):
    data_for_task[task.task_id] = openml.datasets.get_dataset(task.dataset_id)

In [None]:
import pandas as pd
import numpy as np
rows = []
for task in tqdm(tasks):
    average_ensemble = np.mean([run for run,is_ensemble in runs_for_task[task.task_id] if is_ensemble])
    average_mlp = np.mean([run for run,is_ensemble in runs_for_task[task.task_id] if not is_ensemble])
    this_task = {"id": task.task_id,  "average_ensemble": average_ensemble, "average_mlp": average_mlp, "ensemble_mlp_diffn":average_ensemble-average_mlp}
    rows.append({**this_task,   ** data_for_task[task.task_id].qualities})

In [None]:
dataset = pd.DataFrame(rows).dropna(subset=["ensemble_mlp_diffn"]).interpolate()

In [None]:
dataset



We then get the datasets, and save a dataframe with:
features of dataset
average accuracy of MLP
average accuracy of ensembles

desired output: difference between the last two columns


In [None]:
#!pip install catboost
from catboost import CatBoostRegressor
from sklearn.model_selection import train_test_split

model = CatBoostRegressor()
X = dataset.drop(["average_ensemble", "average_mlp", "ensemble_mlp_diffn", "id"], axis=1)
y = dataset["ensemble_mlp_diffn"]
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)

In [None]:
model.score(X_test, y_test)