From 5aabb04c011b68640823e6c96935d0dd1ab6ab22 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Sun, 17 Mar 2019 13:42:20 +0200
Subject: [PATCH 1/6] Now use different did for active, as d/1 is deactivated.
Test against production server as test server does not have deactivated
datasets.
---
tests/test_datasets/test_dataset_functions.py | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 06ebe4f6e..9912d6c32 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -206,10 +206,11 @@ def test_list_datasets_empty(self):
self.assertIsInstance(datasets, dict)
- @unittest.skip('See https://github.com/openml/openml-python/issues/149')
def test_check_datasets_active(self):
- active = openml.datasets.check_datasets_active([1, 17])
- self.assertTrue(active[1])
+ # Have to test on live because there is no deactivated dataset on the test server.
+ openml.config.server = self.production_server
+ active = openml.datasets.check_datasets_active([2, 17])
+ self.assertTrue(active[2])
self.assertFalse(active[17])
self.assertRaisesRegex(
ValueError,
@@ -217,6 +218,7 @@ def test_check_datasets_active(self):
openml.datasets.check_datasets_active,
[79],
)
+ openml.config.server = self.test_server
def test_get_datasets(self):
dids = [1, 2]
From 34d62689fd4c7b245120afd82cf67cdf272b5d28 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Sun, 17 Mar 2019 13:51:01 +0200
Subject: [PATCH 2/6] Fix that reflects dataset_list has integer keys (and can
not be indexed). Fix retrieving all datasets instead of only active ones. Add
documentation.
---
openml/datasets/functions.py | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 8b43625c6..3a317c0a9 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -3,6 +3,7 @@
import os
import re
import warnings
+from typing import Dict, List
import numpy as np
import arff
@@ -268,24 +269,23 @@ def __list_datasets(api_call):
return datasets
-def check_datasets_active(dataset_ids):
- """Check if the dataset ids provided are active.
+def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]:
+ """ Check if the dataset ids provided are active.
Parameters
----------
- dataset_ids : iterable
- Integers representing dataset ids.
+ dataset_ids : List[int]
+ A list of integers representing dataset ids.
Returns
-------
dict
A dictionary with items {did: bool}
"""
- dataset_list = list_datasets()
- dataset_ids = sorted(dataset_ids)
+ dataset_list = list_datasets(status='all')
active = {}
- for dataset in dataset_list:
+ for dataset in dataset_list.values():
active[dataset['did']] = dataset['status'] == 'active'
for did in dataset_ids:
From b89b3bd7aaa41bf263e00855eef3a3db38b249f7 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Sun, 17 Mar 2019 13:57:18 +0200
Subject: [PATCH 3/6] Refactored to have a single use of 'active' and forgo
many excessive checks on datasets that were not asked for.
---
openml/datasets/functions.py | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 3a317c0a9..7b4bacbed 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -284,16 +284,13 @@ def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]:
"""
dataset_list = list_datasets(status='all')
active = {}
-
- for dataset in dataset_list.values():
- active[dataset['did']] = dataset['status'] == 'active'
-
+
for did in dataset_ids:
- if did not in active:
- raise ValueError('Could not find dataset {} in '
- 'OpenML dataset list.'.format(did))
-
- active = {did: active[did] for did in dataset_ids}
+ dataset = dataset_list.get(did, None)
+ if dataset is None:
+ raise ValueError('Could not find dataset {} in OpenML dataset list.'.format(did))
+ else:
+ active[did] = (dataset['status'] == 'active')
return active
From e73d0b4e5d9edddf67d2e25813c41bd32b47851a Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Sun, 17 Mar 2019 14:37:03 +0200
Subject: [PATCH 4/6] Remove spaces from empty like (flake error).
---
openml/datasets/functions.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 7b4bacbed..29624192b 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -284,7 +284,7 @@ def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]:
"""
dataset_list = list_datasets(status='all')
active = {}
-
+
for did in dataset_ids:
dataset = dataset_list.get(did, None)
if dataset is None:
From 4f025cf2181517a71e17ca3f68ffe8130e6928b2 Mon Sep 17 00:00:00 2001
From: PGijsbers
Date: Sun, 17 Mar 2019 15:02:44 +0200
Subject: [PATCH 5/6] Removed unused import.
---
tests/test_datasets/test_dataset_functions.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py
index 9912d6c32..eaed1aa20 100644
--- a/tests/test_datasets/test_dataset_functions.py
+++ b/tests/test_datasets/test_dataset_functions.py
@@ -1,4 +1,3 @@
-import unittest
import os
import random
from itertools import product
From 50ce5e048bd12c40650063e1cc617eb63e872c0b Mon Sep 17 00:00:00 2001
From: Matthias Feurer
Date: Mon, 18 Mar 2019 22:39:26 +0100
Subject: [PATCH 6/6] PEP8
---
openml/datasets/functions.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 0a9f9e186..22f87b80a 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -278,7 +278,7 @@ def _load_features_from_file(features_file: str) -> Dict:
force_list=('oml:feature', 'oml:nominal_value'))
return xml_dict["oml:data_features"]
-
+
def check_datasets_active(dataset_ids: List[int]) -> Dict[int, bool]:
""" Check if the dataset ids provided are active.