Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@ Top-level Classes
:template: function.rst

check_datasets_active
get_dataset_description
get_dataset_features
get_dataset_qualities
get_dataset
get_datasets
list_datasets
Expand Down
10 changes: 5 additions & 5 deletions openml/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from .functions import (list_datasets, list_datasets_by_tag,
check_datasets_active, get_datasets, get_dataset,
get_dataset_description,
get_dataset_features, get_dataset_qualities)
_get_dataset_description,
_get_dataset_features, _get_dataset_qualities)
from .dataset import OpenMLDataset

__all__ = ['check_datasets_active', 'get_dataset', 'get_datasets',
'get_datasets_arf', 'get_dataset_features',
'get_dataset_qualities', 'OpenMLDataset', 'list_datasets',
'get_datasets_arf', '_get_dataset_features',
'_get_dataset_qualities', 'OpenMLDataset', 'list_datasets',
'list_datasets_by_tag',
'get_dataset_description', 'list_datasets']
'_get_dataset_description', 'list_datasets']
10 changes: 5 additions & 5 deletions openml/datasets/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,14 +236,14 @@ def get_dataset(did):
raise ValueError("Dataset ID is neither an Integer nor can be "
"cast to an Integer.")

description = get_dataset_description(did)
description = _get_dataset_description(did)
arff_file = _get_dataset_arff(did, description=description)

dataset = _create_dataset_from_description(description, arff_file)
return dataset


def get_dataset_description(did):
def _get_dataset_description(did):
# TODO implement a cache for this that invalidates itself after some
# time
# This can be saved on disk, but cannot be cached properly, because
Expand Down Expand Up @@ -295,7 +295,7 @@ def _get_dataset_arff(did, description=None):
pass

if description is None:
description = get_dataset_description(did)
description = _get_dataset_description(did)
url = description['oml:url']
return_code, arff_string = _read_url(url)
# TODO: it is inefficient to load the dataset in memory prior to
Expand All @@ -307,7 +307,7 @@ def _get_dataset_arff(did, description=None):
return output_file


def get_dataset_features(did):
def _get_dataset_features(did):
did_cache_dir = _create_dataset_cache_directory(did)
features_file = os.path.join(did_cache_dir, "features.xml")

Expand Down Expand Up @@ -337,7 +337,7 @@ def get_dataset_features(did):
return features


def get_dataset_qualities(did):
def _get_dataset_qualities(did):
# Dataset qualities are subject to change and must be fetched every time
did_cache_dir = _create_dataset_cache_directory(did)
qualities_file = os.path.join(did_cache_dir, "qualities.xml")
Expand Down
12 changes: 6 additions & 6 deletions tests/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,20 +109,20 @@ def test_download_rowid(self):
dataset = openml.datasets.get_dataset(did)
self.assertEqual(dataset.row_id_attribute, 'instance')

def test_get_dataset_description(self):
def test__get_dataset_description(self):
# Only a smoke test, I don't know exactly how to test the URL
# retrieval and "caching"
description = openml.datasets.get_dataset_description(2)
description = openml.datasets._get_dataset_description(2)
self.assertIsInstance(description, dict)

def test_get_dataset_features(self):
def test__get_dataset_features(self):
# Only a smoke check
features = openml.datasets.get_dataset_features(2)
features = openml.datasets._get_dataset_features(2)
self.assertIsInstance(features, dict)

def test_get_dataset_qualities(self):
def test__get_dataset_qualities(self):
# Only a smoke check
qualities = openml.datasets.get_dataset_qualities(2)
qualities = openml.datasets._get_dataset_qualities(2)
self.assertIsInstance(qualities, dict)

def test_publish_dataset(self):
Expand Down