From 5aabb04c011b68640823e6c96935d0dd1ab6ab22 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Sun, 17 Mar 2019 13:42:20 +0200 Subject: [PATCH 1/6] Now use different did for active, as d/1 is deactivated. Test against production server as test server does not have deactivated datasets. --- tests/test_datasets/test_dataset_functions.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 06ebe4f6e..9912d6c32 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -206,10 +206,11 @@ def test_list_datasets_empty(self): self.assertIsInstance(datasets, dict) - @unittest.skip('See https://github.com/openml/openml-python/issues/149') def test_check_datasets_active(self): - active = openml.datasets.check_datasets_active([1, 17]) - self.assertTrue(active[1]) + # Have to test on live because there is no deactivated dataset on the test server. + openml.config.server = self.production_server + active = openml.datasets.check_datasets_active([2, 17]) + self.assertTrue(active[2]) self.assertFalse(active[17]) self.assertRaisesRegex( ValueError, @@ -217,6 +218,7 @@ def test_check_datasets_active(self): openml.datasets.check_datasets_active, [79], ) + openml.config.server = self.test_server def test_get_datasets(self): dids = [1, 2] From 34d62689fd4c7b245120afd82cf67cdf272b5d28 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Sun, 17 Mar 2019 13:51:01 +0200 Subject: [PATCH 2/6] Fix that reflects dataset_list has integer keys (and can not be indexed). Fix retrieving all datasets instead of only active ones. Add documentation. --- openml/datasets/functions.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 8b43625c6..3a317c0a9 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -3,6 +3,7 @@ import os import re import warnings +from typing import Dict, List import numpy as np import arff @@ -268,24 +269,23 @@ def __list_datasets(api_call): return datasets -def check_datasets_active(dataset_ids): - """Check if the dataset ids provided are active. +def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]: + """ Check if the dataset ids provided are active. Parameters ---------- - dataset_ids : iterable - Integers representing dataset ids. + dataset_ids : List[int] + A list of integers representing dataset ids. Returns ------- dict A dictionary with items {did: bool} """ - dataset_list = list_datasets() - dataset_ids = sorted(dataset_ids) + dataset_list = list_datasets(status='all') active = {} - for dataset in dataset_list: + for dataset in dataset_list.values(): active[dataset['did']] = dataset['status'] == 'active' for did in dataset_ids: From b89b3bd7aaa41bf263e00855eef3a3db38b249f7 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Sun, 17 Mar 2019 13:57:18 +0200 Subject: [PATCH 3/6] Refactored to have a single use of 'active' and forgo many excessive checks on datasets that were not asked for. --- openml/datasets/functions.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 3a317c0a9..7b4bacbed 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -284,16 +284,13 @@ def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]: """ dataset_list = list_datasets(status='all') active = {} - - for dataset in dataset_list.values(): - active[dataset['did']] = dataset['status'] == 'active' - + for did in dataset_ids: - if did not in active: - raise ValueError('Could not find dataset {} in ' - 'OpenML dataset list.'.format(did)) - - active = {did: active[did] for did in dataset_ids} + dataset = dataset_list.get(did, None) + if dataset is None: + raise ValueError('Could not find dataset {} in OpenML dataset list.'.format(did)) + else: + active[did] = (dataset['status'] == 'active') return active From e73d0b4e5d9edddf67d2e25813c41bd32b47851a Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Sun, 17 Mar 2019 14:37:03 +0200 Subject: [PATCH 4/6] Remove spaces from empty like (flake error). --- openml/datasets/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 7b4bacbed..29624192b 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -284,7 +284,7 @@ def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]: """ dataset_list = list_datasets(status='all') active = {} - + for did in dataset_ids: dataset = dataset_list.get(did, None) if dataset is None: From 4f025cf2181517a71e17ca3f68ffe8130e6928b2 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Sun, 17 Mar 2019 15:02:44 +0200 Subject: [PATCH 5/6] Removed unused import. --- tests/test_datasets/test_dataset_functions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 9912d6c32..eaed1aa20 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -1,4 +1,3 @@ -import unittest import os import random from itertools import product From 50ce5e048bd12c40650063e1cc617eb63e872c0b Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Mon, 18 Mar 2019 22:39:26 +0100 Subject: [PATCH 6/6] PEP8 --- openml/datasets/functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 0a9f9e186..22f87b80a 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -278,7 +278,7 @@ def _load_features_from_file(features_file: str) -> Dict: force_list=('oml:feature', 'oml:nominal_value')) return xml_dict["oml:data_features"] - + def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]: """ Check if the dataset ids provided are active.