openml · mfeurer · Mar 28, 2018 · Mar 21, 2018 · Mar 21, 2018 · Mar 22, 2018
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
@@ -8,6 +8,7 @@
 from oslo_concurrency import lockutils
 import xmltodict
 
+import openml.utils
 from .dataset import OpenMLDataset
 from ..exceptions import OpenMLCacheException, OpenMLServerNoResult
 from .. import config
@@ -137,8 +138,10 @@ def _get_cached_dataset_arff(dataset_id):
                                    "cached" % dataset_id)
 
 
-def list_datasets(offset=None, size=None, status=None, **kwargs):
-    """Return a list of all dataset which are on OpenML.
+def list_datasets(offset=None, size=None, status=None, tag=None, **kwargs):
+
+    """
+    Return a list of all dataset which are on OpenML. (Supports large amount of results)
 
     Parameters
     ----------
@@ -150,9 +153,11 @@ def list_datasets(offset=None, size=None, status=None, **kwargs):
         Should be {active, in_preparation, deactivated}. By
         default active datasets are returned, but also datasets
         from another status can be requested.
+    tag : str, optional
     kwargs : dict, optional
         Legal filter operators (keys in the dict):
-        {tag, status, limit, offset, data_name, data_version, number_instances, number_features, number_classes, number_missing_values}.
+        data_name, data_version, number_instances,
+        number_features, number_classes, number_missing_values.
 
     Returns
     -------
@@ -169,29 +174,38 @@ def list_datasets(offset=None, size=None, status=None, **kwargs):
         If qualities are calculated for the dataset, some of
         these are also returned.
     """
-    api_call = "data/list"
-    if offset is not None:
-        api_call += "/offset/%d" % int(offset)
 
-    if size is not None:
-        api_call += "/limit/%d" % int(size)
+    return openml.utils.list_all(_list_datasets, offset=offset, size=size, status=status, tag=tag, **kwargs)
 
-    if status is not None:
-        api_call += "/status/%s" %status
+
+def _list_datasets(**kwargs):
+
+    """
+    Perform api call to return a list of all datasets.
+
+    Parameters
+    ----------
+    kwargs : dict, optional
+        Legal filter operators (keys in the dict):
+        {tag, status, limit, offset, data_name, data_version, number_instances,
+        number_features, number_classes, number_missing_values.
+
+    Returns
+    -------
+    datasets : dict of dicts
+    """
+
+    api_call = "data/list"
 
     if kwargs is not None:
-        for filter, value in kwargs.items():
-            api_call += "/%s/%s" % (filter, value)
+        for operator, value in kwargs.items():
+            api_call += "/%s/%s" % (operator, value)
+    return __list_datasets(api_call)
 
-    return _list_datasets(api_call)
 
+def __list_datasets(api_call):
 
-def _list_datasets(api_call):
-    # TODO add proper error handling here!
-    try:
-        xml_string = _perform_api_call(api_call)
-    except OpenMLServerNoResult:
-        return dict()
+    xml_string = _perform_api_call(api_call)
     datasets_dict = xmltodict.parse(xml_string, force_list=('oml:dataset',))
 
     # Minimalistic check if the XML is useful
@@ -224,7 +238,7 @@ def check_datasets_active(dataset_ids):
 
     Parameters
     ----------
-    dataset_id : iterable
+    dataset_ids : iterable
         Integers representing dataset ids.
 
     Returns
@@ -279,7 +293,7 @@ def get_dataset(dataset_id):
 
     Parameters
     ----------
-    ddataset_id : int
+    dataset_id : int
         Dataset ID of the dataset to download
 
     Returns

diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
@@ -1,19 +1,20 @@
 import xmltodict
 
 from openml.exceptions import OpenMLServerNoResult
+import openml.utils
 from .._api_calls import _perform_api_call
 from ..evaluations import OpenMLEvaluation
 
 
 def list_evaluations(function, offset=None, size=None, id=None, task=None,
                      setup=None, flow=None, uploader=None, tag=None):
-    """List all run-evaluation pairs matching all of the given filters.
+    """
+    List all run-evaluation pairs matching all of the given filters.
+    (Supports large amount of results)
 
-    Perform API call ``/evaluation/function{function}/{filters}``
-
     Parameters
     ----------
-    function : str 
+    function : str
         the evaluation function. e.g., predictive_accuracy
     offset : int, optional
         the number of runs to skip, starting from the first
@@ -37,11 +38,45 @@ def list_evaluations(function, offset=None, size=None, id=None, task=None,
     dict
     """
 
-    api_call = "evaluation/list/function/%s" %function
-    if offset is not None:
-        api_call += "/offset/%d" % int(offset)
-    if size is not None:
-        api_call += "/limit/%d" % int(size)
+    return openml.utils.list_all(_list_evaluations, function, offset=offset, size=size,
+                                 id=id, task=task, setup=setup, flow=flow, uploader=uploader, tag=tag)
+
+
+def _list_evaluations(function, id=None, task=None,
+                      setup=None, flow=None, uploader=None, **kwargs):
+    """
+    Perform API call ``/evaluation/function{function}/{filters}``
+
+    Parameters
+    ----------
+    The arguments that are lists are separated from the single value
+    ones which are put into the kwargs.
+
+    function : str
+        the evaluation function. e.g., predictive_accuracy
+
+    id : list, optional
+
+    task : list, optional
+
+    setup: list, optional
+
+    flow : list, optional
+
+    uploader : list, optional
+
+    kwargs: dict, optional
+        Legal filter operators: tag, limit, offset.
+
+    Returns
+    -------
+    dict
+    """
+
+    api_call = "evaluation/list/function/%s" % function
+    if kwargs is not None:
+        for operator, value in kwargs.items():
+            api_call += "/%s/%s" % (operator, value)
     if id is not None:
         api_call += "/run/%s" % ','.join([str(int(i)) for i in id])
     if task is not None:
@@ -52,19 +87,13 @@ def list_evaluations(function, offset=None, size=None, id=None, task=None,
         api_call += "/flow/%s" % ','.join([str(int(i)) for i in flow])
     if uploader is not None:
         api_call += "/uploader/%s" % ','.join([str(int(i)) for i in uploader])
-    if tag is not None:
-        api_call += "/tag/%s" % tag
 
-    return _list_evaluations(api_call)
+    return __list_evaluations(api_call)
 
 
-def _list_evaluations(api_call):
+def __list_evaluations(api_call):
     """Helper function to parse API calls which are lists of runs"""
-    try:
-        xml_string = _perform_api_call(api_call)
-    except OpenMLServerNoResult:
-        return dict()
-
+    xml_string = _perform_api_call(api_call)
     evals_dict = xmltodict.parse(xml_string, force_list=('oml:evaluation',))
     # Minimalistic check if the XML is useful
     if 'oml:evaluations' not in evals_dict:
@@ -88,5 +117,4 @@ def _list_evaluations(api_call):
                                       eval_['oml:upload_time'], float(eval_['oml:value']),
                                       array_data)
         evals[run_id] = evaluation
-    return evals
-
+    return evals
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
@@ -6,6 +6,7 @@
 from openml._api_calls import _perform_api_call
 from openml.exceptions import OpenMLServerNoResult
 from . import OpenMLFlow
+import openml.utils
 
 
 def get_flow(flow_id):
@@ -30,8 +31,11 @@ def get_flow(flow_id):
     return flow
 
 
-def list_flows(offset=None, size=None, tag=None):
-    """Return a list of all flows which are on OpenML.
+def list_flows(offset=None, size=None, tag=None, **kwargs):
+
+    """
+    Return a list of all flows which are on OpenML.
+    (Supports large amount of results)
 
     Parameters
     ----------
@@ -41,6 +45,8 @@ def list_flows(offset=None, size=None, tag=None):
         the maximum number of flows to return
     tag : str, optional
         the tag to include
+    kwargs: dict, optional
+        Legal filter operators: uploader.
 
     Returns
     -------
@@ -57,17 +63,29 @@ def list_flows(offset=None, size=None, tag=None):
         - external version
         - uploader
     """
-    api_call = "flow/list"
-    if offset is not None:
-        api_call += "/offset/%d" % int(offset)
+    return openml.utils.list_all(_list_flows, offset=offset, size=size, tag=tag, **kwargs)
+
+
+def _list_flows(**kwargs):
+    """
+    Perform the api call that return a list of all flows.
+
+    Parameters
+    ----------
+    kwargs: dict, optional
+        Legal filter operators: uploader, tag, limit, offset.
 
-    if size is not None:
-        api_call += "/limit/%d" % int(size)
+    Returns
+    -------
+    flows : dict
+    """
+    api_call = "flow/list"
 
-    if tag is not None:
-        api_call += "/tag/%s" % tag
+    if kwargs is not None:
+        for operator, value in kwargs.items():
+            api_call += "/%s/%s" % (operator, value)
 
-    return _list_flows(api_call)
+    return __list_flows(api_call)
 
 
 def flow_exists(name, external_version):
@@ -79,7 +97,7 @@ def flow_exists(name, external_version):
     ----------
     name : string
         Name of the flow
-    version : string
+    external_version : string
         Version information associated with flow.
 
     Returns
@@ -108,11 +126,9 @@ def flow_exists(name, external_version):
         return False
 
 
-def _list_flows(api_call):
-    try:
-        xml_string = _perform_api_call(api_call)
-    except OpenMLServerNoResult:
-        return dict()
+def __list_flows(api_call):
+
+    xml_string = _perform_api_call(api_call)
     flows_dict = xmltodict.parse(xml_string, force_list=('oml:flow',))
 
     # Minimalistic check if the XML is useful
@@ -186,11 +202,11 @@ def assert_flows_equal(flow1, flow2,
                                # Tags aren't directly created by the server,
                                # but the uploader has no control over them!
                                'tags']
-    ignored_by_python_API = ['binary_url', 'binary_format', 'binary_md5',
+    ignored_by_python_api = ['binary_url', 'binary_format', 'binary_md5',
                              'model']
 
     for key in set(flow1.__dict__.keys()).union(flow2.__dict__.keys()):
-        if key in generated_by_the_server + ignored_by_python_API:
+        if key in generated_by_the_server + ignored_by_python_api:
             continue
         attr1 = getattr(flow1, key, None)
         attr2 = getattr(flow2, key, None)