From 8646ef2d44676c2f58bc212f9641e9b7299b1739 Mon Sep 17 00:00:00 2001 From: janvanrijn Date: Sat, 20 Oct 2018 12:21:38 -0400 Subject: [PATCH 1/3] makes listing calls obtain correct amount of calls when not enough results are available --- openml/evaluations/functions.py | 4 ++-- openml/utils.py | 9 ++++++--- tests/test_utils/test_utils.py | 33 ++++++++++++++++++++++++++------- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/openml/evaluations/functions.py b/openml/evaluations/functions.py index 543a1d768..a7691a72e 100644 --- a/openml/evaluations/functions.py +++ b/openml/evaluations/functions.py @@ -108,7 +108,7 @@ def __list_evaluations(api_call): run_id = int(eval_['oml:run_id']) array_data = None if 'oml:array_data' in eval_: - eval_['oml:array_data'] + array_data = eval_['oml:array_data'] evals[run_id] = OpenMLEvaluation(int(eval_['oml:run_id']), int(eval_['oml:task_id']), int(eval_['oml:setup_id']), int(eval_['oml:flow_id']), @@ -117,4 +117,4 @@ def __list_evaluations(api_call): eval_['oml:upload_time'], float(eval_['oml:value']), array_data) - return evals \ No newline at end of file + return evals diff --git a/openml/utils.py b/openml/utils.py index 39013d835..12c848264 100644 --- a/openml/utils.py +++ b/openml/utils.py @@ -126,7 +126,6 @@ def _list_all(listing_call, *args, **filters): if 'batch_size' in active_filters: BATCH_SIZE_ORIG = active_filters['batch_size'] del active_filters['batch_size'] - batch_size = BATCH_SIZE_ORIG # max number of results to be shown LIMIT = None @@ -137,22 +136,26 @@ def _list_all(listing_call, *args, **filters): # check if the batch size is greater than the number of results that need to be returned. if LIMIT is not None: if BATCH_SIZE_ORIG > LIMIT: - batch_size = LIMIT + BATCH_SIZE_ORIG = min(LIMIT, BATCH_SIZE_ORIG) if 'offset' in active_filters: offset = active_filters['offset'] del active_filters['offset'] + batch_size = BATCH_SIZE_ORIG while True: try: + current_offset = offset + BATCH_SIZE_ORIG * page new_batch = listing_call( *args, limit=batch_size, - offset=offset + BATCH_SIZE_ORIG * page, + offset=current_offset, **active_filters ) except openml.exceptions.OpenMLServerNoResult: # we want to return an empty dict in this case break result.update(new_batch) + if len(new_batch) < batch_size: + break page += 1 if LIMIT is not None: # check if the number of required results has been achieved diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index e0c914acf..4e55a77fe 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -2,19 +2,38 @@ import numpy as np import openml +from unittest import mock + class OpenMLTaskTest(TestBase): _multiprocess_can_split_ = True _batch_size = 25 + def mocked_perform_api_call(call): + # TODO: JvR: Why is this not a staticmethod? + url = openml.config.server + '/' + call + return openml._api_calls._read_url(url) + def test_list_all(self): openml.utils._list_all(openml.tasks.functions._list_tasks) + @mock.patch('openml._api_calls._perform_api_call', side_effect=mocked_perform_api_call) + def test_list_all_few_results_available(self, _perform_api_call): + # we want to make sure that the number of api calls is only 1. + # Although we have multiple versions of the iris dataset, there is only + # one with this name/version combination + + datasets = openml.datasets.list_datasets(size=1000, + data_name='iris', + data_version=1) + self.assertEqual(len(datasets), 1) + self.assertEqual(_perform_api_call.call_count, 1) + def test_list_all_for_datasets(self): required_size = 127 # default test server reset value datasets = openml.datasets.list_datasets(batch_size=self._batch_size, size=required_size) - self.assertEquals(len(datasets), required_size) + self.assertEqual(len(datasets), required_size) for did in datasets: self._check_dataset(datasets[did]) @@ -22,19 +41,19 @@ def test_list_datasets_with_high_size_parameter(self): datasets_a = openml.datasets.list_datasets() datasets_b = openml.datasets.list_datasets(size=np.inf) - self.assertEquals(len(datasets_a), len(datasets_b)) + self.assertEqual(len(datasets_a), len(datasets_b)) def test_list_all_for_tasks(self): required_size = 1068 # default test server reset value tasks = openml.tasks.list_tasks(batch_size=self._batch_size, size=required_size) - self.assertEquals(len(tasks), required_size) + self.assertEqual(len(tasks), required_size) def test_list_all_for_flows(self): required_size = 15 # default test server reset value flows = openml.flows.list_flows(batch_size=self._batch_size, size=required_size) - self.assertEquals(len(flows), required_size) + self.assertEqual(len(flows), required_size) def test_list_all_for_setups(self): required_size = 50 @@ -42,14 +61,14 @@ def test_list_all_for_setups(self): setups = openml.setups.list_setups(size=required_size) # might not be on test server after reset, please rerun test at least once if fails - self.assertEquals(len(setups), required_size) + self.assertEqual(len(setups), required_size) def test_list_all_for_runs(self): required_size = 48 runs = openml.runs.list_runs(batch_size=self._batch_size, size=required_size) # might not be on test server after reset, please rerun test at least once if fails - self.assertEquals(len(runs), required_size) + self.assertEqual(len(runs), required_size) def test_list_all_for_evaluations(self): required_size = 57 @@ -58,4 +77,4 @@ def test_list_all_for_evaluations(self): size=required_size) # might not be on test server after reset, please rerun test at least once if fails - self.assertEquals(len(evaluations), required_size) + self.assertEqual(len(evaluations), required_size) From a283df8d5182413859f44bb024a1c84754f00f53 Mon Sep 17 00:00:00 2001 From: janvanrijn Date: Sat, 20 Oct 2018 13:18:23 -0400 Subject: [PATCH 2/3] conditional mock import --- tests/test_utils/test_utils.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index 4e55a77fe..d42b1d18d 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -1,8 +1,12 @@ from openml.testing import TestBase import numpy as np import openml +import sys -from unittest import mock +if sys.version_info[0] >= 3: + from unittest import mock +else: + import mock class OpenMLTaskTest(TestBase): From c232ef21125250d9b1c8a4e12b775bdfefb24c28 Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Mon, 22 Oct 2018 12:51:01 +0200 Subject: [PATCH 3/3] Please flake8 --- tests/test_utils/test_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index d42b1d18d..176622dbc 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -21,7 +21,8 @@ def mocked_perform_api_call(call): def test_list_all(self): openml.utils._list_all(openml.tasks.functions._list_tasks) - @mock.patch('openml._api_calls._perform_api_call', side_effect=mocked_perform_api_call) + @mock.patch('openml._api_calls._perform_api_call', + side_effect=mocked_perform_api_call) def test_list_all_few_results_available(self, _perform_api_call): # we want to make sure that the number of api calls is only 1. # Although we have multiple versions of the iris dataset, there is only