Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions openml/evaluations/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def __list_evaluations(api_call):
run_id = int(eval_['oml:run_id'])
array_data = None
if 'oml:array_data' in eval_:
eval_['oml:array_data']
array_data = eval_['oml:array_data']

evals[run_id] = OpenMLEvaluation(int(eval_['oml:run_id']), int(eval_['oml:task_id']),
int(eval_['oml:setup_id']), int(eval_['oml:flow_id']),
Expand All @@ -117,4 +117,4 @@ def __list_evaluations(api_call):
eval_['oml:upload_time'], float(eval_['oml:value']),
array_data)

return evals
return evals
9 changes: 6 additions & 3 deletions openml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ def _list_all(listing_call, *args, **filters):
if 'batch_size' in active_filters:
BATCH_SIZE_ORIG = active_filters['batch_size']
del active_filters['batch_size']
batch_size = BATCH_SIZE_ORIG

# max number of results to be shown
LIMIT = None
Expand All @@ -137,22 +136,26 @@ def _list_all(listing_call, *args, **filters):
# check if the batch size is greater than the number of results that need to be returned.
if LIMIT is not None:
if BATCH_SIZE_ORIG > LIMIT:
batch_size = LIMIT
BATCH_SIZE_ORIG = min(LIMIT, BATCH_SIZE_ORIG)
if 'offset' in active_filters:
offset = active_filters['offset']
del active_filters['offset']
batch_size = BATCH_SIZE_ORIG
while True:
try:
current_offset = offset + BATCH_SIZE_ORIG * page
new_batch = listing_call(
*args,
limit=batch_size,
offset=offset + BATCH_SIZE_ORIG * page,
offset=current_offset,
**active_filters
)
except openml.exceptions.OpenMLServerNoResult:
# we want to return an empty dict in this case
break
result.update(new_batch)
if len(new_batch) < batch_size:
break
page += 1
if LIMIT is not None:
# check if the number of required results has been achieved
Expand Down
38 changes: 31 additions & 7 deletions tests/test_utils/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,79 @@
from openml.testing import TestBase
import numpy as np
import openml
import sys

if sys.version_info[0] >= 3:
from unittest import mock
else:
import mock


class OpenMLTaskTest(TestBase):
_multiprocess_can_split_ = True
_batch_size = 25

def mocked_perform_api_call(call):
# TODO: JvR: Why is this not a staticmethod?
url = openml.config.server + '/' + call
return openml._api_calls._read_url(url)

def test_list_all(self):
openml.utils._list_all(openml.tasks.functions._list_tasks)

@mock.patch('openml._api_calls._perform_api_call',
side_effect=mocked_perform_api_call)
def test_list_all_few_results_available(self, _perform_api_call):
# we want to make sure that the number of api calls is only 1.
# Although we have multiple versions of the iris dataset, there is only
# one with this name/version combination

datasets = openml.datasets.list_datasets(size=1000,
data_name='iris',
data_version=1)
self.assertEqual(len(datasets), 1)
self.assertEqual(_perform_api_call.call_count, 1)

def test_list_all_for_datasets(self):
required_size = 127 # default test server reset value
datasets = openml.datasets.list_datasets(batch_size=self._batch_size, size=required_size)

self.assertEquals(len(datasets), required_size)
self.assertEqual(len(datasets), required_size)
for did in datasets:
self._check_dataset(datasets[did])

def test_list_datasets_with_high_size_parameter(self):
datasets_a = openml.datasets.list_datasets()
datasets_b = openml.datasets.list_datasets(size=np.inf)

self.assertEquals(len(datasets_a), len(datasets_b))
self.assertEqual(len(datasets_a), len(datasets_b))

def test_list_all_for_tasks(self):
required_size = 1068 # default test server reset value
tasks = openml.tasks.list_tasks(batch_size=self._batch_size, size=required_size)

self.assertEquals(len(tasks), required_size)
self.assertEqual(len(tasks), required_size)

def test_list_all_for_flows(self):
required_size = 15 # default test server reset value
flows = openml.flows.list_flows(batch_size=self._batch_size, size=required_size)

self.assertEquals(len(flows), required_size)
self.assertEqual(len(flows), required_size)

def test_list_all_for_setups(self):
required_size = 50
# TODO apparently list_setups function does not support kwargs
setups = openml.setups.list_setups(size=required_size)

# might not be on test server after reset, please rerun test at least once if fails
self.assertEquals(len(setups), required_size)
self.assertEqual(len(setups), required_size)

def test_list_all_for_runs(self):
required_size = 48
runs = openml.runs.list_runs(batch_size=self._batch_size, size=required_size)

# might not be on test server after reset, please rerun test at least once if fails
self.assertEquals(len(runs), required_size)
self.assertEqual(len(runs), required_size)

def test_list_all_for_evaluations(self):
required_size = 57
Expand All @@ -58,4 +82,4 @@ def test_list_all_for_evaluations(self):
size=required_size)

# might not be on test server after reset, please rerun test at least once if fails
self.assertEquals(len(evaluations), required_size)
self.assertEqual(len(evaluations), required_size)