Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion openml/datasets/dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import OrderedDict
import re
import gzip
import io
import logging
Expand Down Expand Up @@ -108,7 +109,17 @@ def __init__(self, name, description, format=None,
paper_url=None, update_comment=None,
md5_checksum=None, data_file=None, features=None,
qualities=None, dataset=None):

if description and not re.match("^[\x00-\x7F]*$", description):
# not basiclatin (XSD complains)
raise ValueError("Invalid symbols in description: {}".format(
description))
if citation and not re.match("^[\x00-\x7F]*$", citation):
# not basiclatin (XSD complains)
raise ValueError("Invalid symbols in citation: {}".format(
citation))
if not re.match("^[a-zA-Z0-9_\\-\\.\\(\\),]+$", name):
# regex given by server in error message
raise ValueError("Invalid symbols in name: {}".format(name))
# TODO add function to check if the name is casual_string128
# Attributes received by querying the RESTful API
self.dataset_id = int(dataset_id) if dataset_id is not None else None
Expand Down
16 changes: 15 additions & 1 deletion tests/test_datasets/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,24 @@ def setUp(self):
def test_repr(self):
# create a bare-bones dataset as would be returned by
# create_dataset
data = openml.datasets.OpenMLDataset(name="some name",
data = openml.datasets.OpenMLDataset(name="somename",
description="a description")
str(data)

def test_init_string_validation(self):
with pytest.raises(ValueError, match="Invalid symbols in name"):
openml.datasets.OpenMLDataset(name="some name",
description="a description")

with pytest.raises(ValueError, match="Invalid symbols in description"):
openml.datasets.OpenMLDataset(name="somename",
description="a descriptïon")

with pytest.raises(ValueError, match="Invalid symbols in citation"):
openml.datasets.OpenMLDataset(name="somename",
description="a description",
citation="Something by Müller")

def test_get_data_array(self):
# Basic usage
rval, _, categorical, attribute_names = self.dataset.get_data(dataset_format='array')
Expand Down