diff --git a/doc/api.rst b/doc/api.rst index 977a1f740..79d59577c 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -29,9 +29,6 @@ Top-level Classes :template: function.rst check_datasets_active - get_dataset_description - get_dataset_features - get_dataset_qualities get_dataset get_datasets list_datasets diff --git a/openml/datasets/__init__.py b/openml/datasets/__init__.py index 6f3eb70ad..cc5a1e532 100644 --- a/openml/datasets/__init__.py +++ b/openml/datasets/__init__.py @@ -1,11 +1,11 @@ from .functions import (list_datasets, list_datasets_by_tag, check_datasets_active, get_datasets, get_dataset, - get_dataset_description, - get_dataset_features, get_dataset_qualities) + _get_dataset_description, + _get_dataset_features, _get_dataset_qualities) from .dataset import OpenMLDataset __all__ = ['check_datasets_active', 'get_dataset', 'get_datasets', - 'get_datasets_arf', 'get_dataset_features', - 'get_dataset_qualities', 'OpenMLDataset', 'list_datasets', + 'get_datasets_arf', '_get_dataset_features', + '_get_dataset_qualities', 'OpenMLDataset', 'list_datasets', 'list_datasets_by_tag', - 'get_dataset_description', 'list_datasets'] + '_get_dataset_description', 'list_datasets'] diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 8c86d8af8..bd3b358ea 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -236,14 +236,14 @@ def get_dataset(did): raise ValueError("Dataset ID is neither an Integer nor can be " "cast to an Integer.") - description = get_dataset_description(did) + description = _get_dataset_description(did) arff_file = _get_dataset_arff(did, description=description) dataset = _create_dataset_from_description(description, arff_file) return dataset -def get_dataset_description(did): +def _get_dataset_description(did): # TODO implement a cache for this that invalidates itself after some # time # This can be saved on disk, but cannot be cached properly, because @@ -295,7 +295,7 @@ def _get_dataset_arff(did, description=None): pass if description is None: - description = get_dataset_description(did) + description = _get_dataset_description(did) url = description['oml:url'] return_code, arff_string = _read_url(url) # TODO: it is inefficient to load the dataset in memory prior to @@ -307,7 +307,7 @@ def _get_dataset_arff(did, description=None): return output_file -def get_dataset_features(did): +def _get_dataset_features(did): did_cache_dir = _create_dataset_cache_directory(did) features_file = os.path.join(did_cache_dir, "features.xml") @@ -337,7 +337,7 @@ def get_dataset_features(did): return features -def get_dataset_qualities(did): +def _get_dataset_qualities(did): # Dataset qualities are subject to change and must be fetched every time did_cache_dir = _create_dataset_cache_directory(did) qualities_file = os.path.join(did_cache_dir, "qualities.xml") diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 6c453a1de..47560b323 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -109,20 +109,20 @@ def test_download_rowid(self): dataset = openml.datasets.get_dataset(did) self.assertEqual(dataset.row_id_attribute, 'instance') - def test_get_dataset_description(self): + def test__get_dataset_description(self): # Only a smoke test, I don't know exactly how to test the URL # retrieval and "caching" - description = openml.datasets.get_dataset_description(2) + description = openml.datasets._get_dataset_description(2) self.assertIsInstance(description, dict) - def test_get_dataset_features(self): + def test__get_dataset_features(self): # Only a smoke check - features = openml.datasets.get_dataset_features(2) + features = openml.datasets._get_dataset_features(2) self.assertIsInstance(features, dict) - def test_get_dataset_qualities(self): + def test__get_dataset_qualities(self): # Only a smoke check - qualities = openml.datasets.get_dataset_qualities(2) + qualities = openml.datasets._get_dataset_qualities(2) self.assertIsInstance(qualities, dict) def test_publish_dataset(self):