Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 35 additions & 21 deletions openml/datasets/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from oslo_concurrency import lockutils
import xmltodict

import openml.utils
from .dataset import OpenMLDataset
from ..exceptions import OpenMLCacheException, OpenMLServerNoResult
from .. import config
Expand Down Expand Up @@ -137,8 +138,10 @@ def _get_cached_dataset_arff(dataset_id):
"cached" % dataset_id)


def list_datasets(offset=None, size=None, status=None, **kwargs):
"""Return a list of all dataset which are on OpenML.
def list_datasets(offset=None, size=None, status=None, tag=None, **kwargs):

"""
Return a list of all dataset which are on OpenML. (Supports large amount of results)

Parameters
----------
Expand All @@ -150,9 +153,11 @@ def list_datasets(offset=None, size=None, status=None, **kwargs):
Should be {active, in_preparation, deactivated}. By
default active datasets are returned, but also datasets
from another status can be requested.
tag : str, optional
kwargs : dict, optional
Legal filter operators (keys in the dict):
{tag, status, limit, offset, data_name, data_version, number_instances, number_features, number_classes, number_missing_values}.
data_name, data_version, number_instances,
number_features, number_classes, number_missing_values.

Returns
-------
Expand All @@ -169,29 +174,38 @@ def list_datasets(offset=None, size=None, status=None, **kwargs):
If qualities are calculated for the dataset, some of
these are also returned.
"""
api_call = "data/list"
if offset is not None:
api_call += "/offset/%d" % int(offset)

if size is not None:
api_call += "/limit/%d" % int(size)
return openml.utils.list_all(_list_datasets, offset=offset, size=size, status=status, tag=tag, **kwargs)

if status is not None:
api_call += "/status/%s" %status

def _list_datasets(**kwargs):

"""
Perform api call to return a list of all datasets.

Parameters
----------
kwargs : dict, optional
Legal filter operators (keys in the dict):
{tag, status, limit, offset, data_name, data_version, number_instances,
number_features, number_classes, number_missing_values.

Returns
-------
datasets : dict of dicts
"""

api_call = "data/list"

if kwargs is not None:
for filter, value in kwargs.items():
api_call += "/%s/%s" % (filter, value)
for operator, value in kwargs.items():
api_call += "/%s/%s" % (operator, value)
return __list_datasets(api_call)

return _list_datasets(api_call)

def __list_datasets(api_call):

def _list_datasets(api_call):
# TODO add proper error handling here!
try:
xml_string = _perform_api_call(api_call)
except OpenMLServerNoResult:
return dict()
xml_string = _perform_api_call(api_call)
datasets_dict = xmltodict.parse(xml_string, force_list=('oml:dataset',))

# Minimalistic check if the XML is useful
Expand Down Expand Up @@ -224,7 +238,7 @@ def check_datasets_active(dataset_ids):

Parameters
----------
dataset_id : iterable
dataset_ids : iterable
Integers representing dataset ids.

Returns
Expand Down Expand Up @@ -279,7 +293,7 @@ def get_dataset(dataset_id):

Parameters
----------
ddataset_id : int
dataset_id : int
Dataset ID of the dataset to download

Returns
Expand Down
68 changes: 48 additions & 20 deletions openml/evaluations/functions.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
import xmltodict

from openml.exceptions import OpenMLServerNoResult
import openml.utils
from .._api_calls import _perform_api_call
from ..evaluations import OpenMLEvaluation


def list_evaluations(function, offset=None, size=None, id=None, task=None,
setup=None, flow=None, uploader=None, tag=None):
"""List all run-evaluation pairs matching all of the given filters.
"""
List all run-evaluation pairs matching all of the given filters.
(Supports large amount of results)

Perform API call ``/evaluation/function{function}/{filters}``

Parameters
----------
function : str
function : str
the evaluation function. e.g., predictive_accuracy
offset : int, optional
the number of runs to skip, starting from the first
Expand All @@ -37,11 +38,45 @@ def list_evaluations(function, offset=None, size=None, id=None, task=None,
dict
"""

api_call = "evaluation/list/function/%s" %function
if offset is not None:
api_call += "/offset/%d" % int(offset)
if size is not None:
api_call += "/limit/%d" % int(size)
return openml.utils.list_all(_list_evaluations, function, offset=offset, size=size,
id=id, task=task, setup=setup, flow=flow, uploader=uploader, tag=tag)


def _list_evaluations(function, id=None, task=None,
setup=None, flow=None, uploader=None, **kwargs):
"""
Perform API call ``/evaluation/function{function}/{filters}``

Parameters
----------
The arguments that are lists are separated from the single value
ones which are put into the kwargs.

function : str
the evaluation function. e.g., predictive_accuracy

id : list, optional

task : list, optional

setup: list, optional

flow : list, optional

uploader : list, optional

kwargs: dict, optional
Legal filter operators: tag, limit, offset.

Returns
-------
dict
"""

api_call = "evaluation/list/function/%s" % function
if kwargs is not None:
for operator, value in kwargs.items():
api_call += "/%s/%s" % (operator, value)
if id is not None:
api_call += "/run/%s" % ','.join([str(int(i)) for i in id])
if task is not None:
Expand All @@ -52,19 +87,13 @@ def list_evaluations(function, offset=None, size=None, id=None, task=None,
api_call += "/flow/%s" % ','.join([str(int(i)) for i in flow])
if uploader is not None:
api_call += "/uploader/%s" % ','.join([str(int(i)) for i in uploader])
if tag is not None:
api_call += "/tag/%s" % tag

return _list_evaluations(api_call)
return __list_evaluations(api_call)


def _list_evaluations(api_call):
def __list_evaluations(api_call):
"""Helper function to parse API calls which are lists of runs"""
try:
xml_string = _perform_api_call(api_call)
except OpenMLServerNoResult:
return dict()

xml_string = _perform_api_call(api_call)
evals_dict = xmltodict.parse(xml_string, force_list=('oml:evaluation',))
# Minimalistic check if the XML is useful
if 'oml:evaluations' not in evals_dict:
Expand All @@ -88,5 +117,4 @@ def _list_evaluations(api_call):
eval_['oml:upload_time'], float(eval_['oml:value']),
array_data)
evals[run_id] = evaluation
return evals

return evals
52 changes: 34 additions & 18 deletions openml/flows/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from openml._api_calls import _perform_api_call
from openml.exceptions import OpenMLServerNoResult
from . import OpenMLFlow
import openml.utils


def get_flow(flow_id):
Expand All @@ -30,8 +31,11 @@ def get_flow(flow_id):
return flow


def list_flows(offset=None, size=None, tag=None):
"""Return a list of all flows which are on OpenML.
def list_flows(offset=None, size=None, tag=None, **kwargs):

"""
Return a list of all flows which are on OpenML.
(Supports large amount of results)

Parameters
----------
Expand All @@ -41,6 +45,8 @@ def list_flows(offset=None, size=None, tag=None):
the maximum number of flows to return
tag : str, optional
the tag to include
kwargs: dict, optional
Legal filter operators: uploader.

Returns
-------
Expand All @@ -57,17 +63,29 @@ def list_flows(offset=None, size=None, tag=None):
- external version
- uploader
"""
api_call = "flow/list"
if offset is not None:
api_call += "/offset/%d" % int(offset)
return openml.utils.list_all(_list_flows, offset=offset, size=size, tag=tag, **kwargs)


def _list_flows(**kwargs):
"""
Perform the api call that return a list of all flows.

Parameters
----------
kwargs: dict, optional
Legal filter operators: uploader, tag, limit, offset.

if size is not None:
api_call += "/limit/%d" % int(size)
Returns
-------
flows : dict
"""
api_call = "flow/list"

if tag is not None:
api_call += "/tag/%s" % tag
if kwargs is not None:
for operator, value in kwargs.items():
api_call += "/%s/%s" % (operator, value)

return _list_flows(api_call)
return __list_flows(api_call)


def flow_exists(name, external_version):
Expand All @@ -79,7 +97,7 @@ def flow_exists(name, external_version):
----------
name : string
Name of the flow
version : string
external_version : string
Version information associated with flow.

Returns
Expand Down Expand Up @@ -108,11 +126,9 @@ def flow_exists(name, external_version):
return False


def _list_flows(api_call):
try:
xml_string = _perform_api_call(api_call)
except OpenMLServerNoResult:
return dict()
def __list_flows(api_call):

xml_string = _perform_api_call(api_call)
flows_dict = xmltodict.parse(xml_string, force_list=('oml:flow',))

# Minimalistic check if the XML is useful
Expand Down Expand Up @@ -186,11 +202,11 @@ def assert_flows_equal(flow1, flow2,
# Tags aren't directly created by the server,
# but the uploader has no control over them!
'tags']
ignored_by_python_API = ['binary_url', 'binary_format', 'binary_md5',
ignored_by_python_api = ['binary_url', 'binary_format', 'binary_md5',
'model']

for key in set(flow1.__dict__.keys()).union(flow2.__dict__.keys()):
if key in generated_by_the_server + ignored_by_python_API:
if key in generated_by_the_server + ignored_by_python_api:
continue
attr1 = getattr(flow1, key, None)
attr2 = getattr(flow2, key, None)
Expand Down
Loading