From c10dbbbe428819a50594460de17ad11190765610 Mon Sep 17 00:00:00 2001 From: Max Copeland Date: Tue, 28 Aug 2018 12:03:28 -0700 Subject: [PATCH 1/2] found missing nominal values in OpenMLFeature object, need to trace xmlfeature to source of nominal values --- openml/datasets/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py index f25557783..0aae69e19 100644 --- a/openml/datasets/dataset.py +++ b/openml/datasets/dataset.py @@ -81,7 +81,7 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None, feature = OpenMLDataFeature(int(xmlfeature['oml:index']), xmlfeature['oml:name'], xmlfeature['oml:data_type'], - None, # todo add nominal values (currently not in database) + xmlfeature['oml:nominal_values'] ######## This must pass nominal values for feature int(xmlfeature.get('oml:number_of_missing_values', 0))) if idx != feature.index: raise ValueError('Data features not provided in right order') From eef091aa3585ce48c33aae656fae96ef5ace0e4f Mon Sep 17 00:00:00 2001 From: Max Copeland Date: Tue, 28 Aug 2018 16:29:32 -0700 Subject: [PATCH 2/2] added nominal values to feature in OpenMLDataset --- openml/datasets/dataset.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/openml/datasets/dataset.py b/openml/datasets/dataset.py index 0aae69e19..34afc16d6 100644 --- a/openml/datasets/dataset.py +++ b/openml/datasets/dataset.py @@ -78,10 +78,16 @@ def __init__(self, dataset_id=None, name=None, version=None, description=None, if features is not None: self.features = {} for idx, xmlfeature in enumerate(features['oml:feature']): + # split string of nominal values into type list if feature is nominal + # otherwise passing none for nominal values + try: + nom_vals = [str(x[1:-1]) for x in xmlfeature['oml:nominal_values'][1:-1].split(',')] + except KeyError: + nom_vals = None feature = OpenMLDataFeature(int(xmlfeature['oml:index']), xmlfeature['oml:name'], xmlfeature['oml:data_type'], - xmlfeature['oml:nominal_values'] ######## This must pass nominal values for feature + nom_vals, int(xmlfeature.get('oml:number_of_missing_values', 0))) if idx != feature.index: raise ValueError('Data features not provided in right order')