From 8646ef2d44676c2f58bc212f9641e9b7299b1739 Mon Sep 17 00:00:00 2001
From: janvanrijn <janvanrijn@gmail.com>
Date: Sat, 20 Oct 2018 12:21:38 -0400
Subject: [PATCH 1/3] makes listing calls obtain correct amount of calls when
 not enough results are available

---
 openml/evaluations/functions.py |  4 ++--
 openml/utils.py                 |  9 ++++++---
 tests/test_utils/test_utils.py  | 33 ++++++++++++++++++++++++++-------
 3 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py
index 543a1d768..a7691a72e 100644
--- a/openml/evaluations/functions.py
+++ b/openml/evaluations/functions.py
@@ -108,7 +108,7 @@ def __list_evaluations(api_call):
         run_id = int(eval_['oml:run_id'])
         array_data = None
         if 'oml:array_data' in eval_:
-            eval_['oml:array_data']
+            array_data = eval_['oml:array_data']
 
         evals[run_id] = OpenMLEvaluation(int(eval_['oml:run_id']), int(eval_['oml:task_id']),
                                       int(eval_['oml:setup_id']), int(eval_['oml:flow_id']),
@@ -117,4 +117,4 @@ def __list_evaluations(api_call):
                                       eval_['oml:upload_time'], float(eval_['oml:value']),
                                       array_data)
 
-    return evals
\ No newline at end of file
+    return evals
diff --git a/openml/utils.py b/openml/utils.py
index 39013d835..12c848264 100644
--- a/openml/utils.py
+++ b/openml/utils.py
@@ -126,7 +126,6 @@ def _list_all(listing_call, *args, **filters):
     if 'batch_size' in active_filters:
         BATCH_SIZE_ORIG = active_filters['batch_size']
         del active_filters['batch_size']
-    batch_size = BATCH_SIZE_ORIG
 
     # max number of results to be shown
     LIMIT = None
@@ -137,22 +136,26 @@ def _list_all(listing_call, *args, **filters):
     # check if the batch size is greater than the number of results that need to be returned.
     if LIMIT is not None:
         if BATCH_SIZE_ORIG > LIMIT:
-            batch_size = LIMIT
+            BATCH_SIZE_ORIG = min(LIMIT, BATCH_SIZE_ORIG)
     if 'offset' in active_filters:
         offset = active_filters['offset']
         del active_filters['offset']
+    batch_size = BATCH_SIZE_ORIG
     while True:
         try:
+            current_offset = offset + BATCH_SIZE_ORIG * page
             new_batch = listing_call(
                 *args,
                 limit=batch_size,
-                offset=offset + BATCH_SIZE_ORIG * page,
+                offset=current_offset,
                 **active_filters
             )
         except openml.exceptions.OpenMLServerNoResult:
             # we want to return an empty dict in this case
             break
         result.update(new_batch)
+        if len(new_batch) < batch_size:
+            break
         page += 1
         if LIMIT is not None:
             # check if the number of required results has been achieved
diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py
index e0c914acf..4e55a77fe 100644
--- a/tests/test_utils/test_utils.py
+++ b/tests/test_utils/test_utils.py
@@ -2,19 +2,38 @@
 import numpy as np
 import openml
 
+from unittest import mock
+
 
 class OpenMLTaskTest(TestBase):
     _multiprocess_can_split_ = True
     _batch_size = 25
 
+    def mocked_perform_api_call(call):
+        # TODO: JvR: Why is this not a staticmethod?
+        url = openml.config.server + '/' + call
+        return openml._api_calls._read_url(url)
+
     def test_list_all(self):
         openml.utils._list_all(openml.tasks.functions._list_tasks)
 
+    @mock.patch('openml._api_calls._perform_api_call', side_effect=mocked_perform_api_call)
+    def test_list_all_few_results_available(self, _perform_api_call):
+        # we want to make sure that the number of api calls is only 1.
+        # Although we have multiple versions of the iris dataset, there is only
+        # one with this name/version combination
+
+        datasets = openml.datasets.list_datasets(size=1000,
+                                                 data_name='iris',
+                                                 data_version=1)
+        self.assertEqual(len(datasets), 1)
+        self.assertEqual(_perform_api_call.call_count, 1)
+
     def test_list_all_for_datasets(self):
         required_size = 127  # default test server reset value
         datasets = openml.datasets.list_datasets(batch_size=self._batch_size, size=required_size)
 
-        self.assertEquals(len(datasets), required_size)
+        self.assertEqual(len(datasets), required_size)
         for did in datasets:
             self._check_dataset(datasets[did])
 
@@ -22,19 +41,19 @@ def test_list_datasets_with_high_size_parameter(self):
         datasets_a = openml.datasets.list_datasets()
         datasets_b = openml.datasets.list_datasets(size=np.inf)
 
-        self.assertEquals(len(datasets_a), len(datasets_b))
+        self.assertEqual(len(datasets_a), len(datasets_b))
 
     def test_list_all_for_tasks(self):
         required_size = 1068  # default test server reset value
         tasks = openml.tasks.list_tasks(batch_size=self._batch_size, size=required_size)
 
-        self.assertEquals(len(tasks), required_size)
+        self.assertEqual(len(tasks), required_size)
 
     def test_list_all_for_flows(self):
         required_size = 15  # default test server reset value
         flows = openml.flows.list_flows(batch_size=self._batch_size, size=required_size)
 
-        self.assertEquals(len(flows), required_size)
+        self.assertEqual(len(flows), required_size)
 
     def test_list_all_for_setups(self):
         required_size = 50
@@ -42,14 +61,14 @@ def test_list_all_for_setups(self):
         setups = openml.setups.list_setups(size=required_size)
 
         # might not be on test server after reset, please rerun test at least once if fails
-        self.assertEquals(len(setups), required_size)
+        self.assertEqual(len(setups), required_size)
 
     def test_list_all_for_runs(self):
         required_size = 48
         runs = openml.runs.list_runs(batch_size=self._batch_size, size=required_size)
 
         # might not be on test server after reset, please rerun test at least once if fails
-        self.assertEquals(len(runs), required_size)
+        self.assertEqual(len(runs), required_size)
 
     def test_list_all_for_evaluations(self):
         required_size = 57
@@ -58,4 +77,4 @@ def test_list_all_for_evaluations(self):
                                                           size=required_size)
 
         # might not be on test server after reset, please rerun test at least once if fails
-        self.assertEquals(len(evaluations), required_size)
+        self.assertEqual(len(evaluations), required_size)

From a283df8d5182413859f44bb024a1c84754f00f53 Mon Sep 17 00:00:00 2001
From: janvanrijn <janvanrijn@gmail.com>
Date: Sat, 20 Oct 2018 13:18:23 -0400
Subject: [PATCH 2/3] conditional mock import

---
 tests/test_utils/test_utils.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py
index 4e55a77fe..d42b1d18d 100644
--- a/tests/test_utils/test_utils.py
+++ b/tests/test_utils/test_utils.py
@@ -1,8 +1,12 @@
 from openml.testing import TestBase
 import numpy as np
 import openml
+import sys
 
-from unittest import mock
+if sys.version_info[0] >= 3:
+    from unittest import mock
+else:
+    import mock
 
 
 class OpenMLTaskTest(TestBase):

From c232ef21125250d9b1c8a4e12b775bdfefb24c28 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Mon, 22 Oct 2018 12:51:01 +0200
Subject: [PATCH 3/3] Please flake8

---
 tests/test_utils/test_utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py
index d42b1d18d..176622dbc 100644
--- a/tests/test_utils/test_utils.py
+++ b/tests/test_utils/test_utils.py
@@ -21,7 +21,8 @@ def mocked_perform_api_call(call):
     def test_list_all(self):
         openml.utils._list_all(openml.tasks.functions._list_tasks)
 
-    @mock.patch('openml._api_calls._perform_api_call', side_effect=mocked_perform_api_call)
+    @mock.patch('openml._api_calls._perform_api_call', 
+                side_effect=mocked_perform_api_call)
     def test_list_all_few_results_available(self, _perform_api_call):
         # we want to make sure that the number of api calls is only 1.
         # Although we have multiple versions of the iris dataset, there is only