openml · mfeurer · Dec 3, 2018 · Sep 20, 2018 · Sep 20, 2018 · Sep 20, 2018
diff --git a/ci_scripts/flake8_diff.sh b/ci_scripts/flake8_diff.sh
diff --git a/openml/config.py b/openml/config.py
@@ -19,11 +19,11 @@
     'apikey': None,
     'server': "https://www.openml.org/api/v1/xml",
     'verbosity': 0,
-    'cachedir': os.path.expanduser('~/.openml/cache'),
+    'cachedir': os.path.expanduser(os.path.join('~', '.openml', 'cache')),
     'avoid_duplicate_runs': 'True',
 }
 
-config_file = os.path.expanduser('~/.openml/config')
+config_file = os.path.expanduser(os.path.join('~', '.openml' 'config'))
 
 # Default values are actually added here in the _setup() function which is
 # called at the end of this module
@@ -48,7 +48,7 @@ def _setup():
     global avoid_duplicate_runs
     # read config file, create cache directory
     try:
-        os.mkdir(os.path.expanduser('~/.openml'))
+        os.mkdir(os.path.expanduser(os.path.join('~', '.openml')))
     except (IOError, OSError):
         # TODO add debug information
         pass
@@ -96,7 +96,7 @@ def get_cache_directory():
 
     """
     url_suffix = urlparse(server).netloc
-    reversed_url_suffix = '/'.join(url_suffix.split('.')[::-1])
+    reversed_url_suffix = os.sep.join(url_suffix.split('.')[::-1])
     if not cache_directory:
         _cachedir = _defaults(cache_directory)
     else:

diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
@@ -10,9 +10,10 @@
 from ..datasets import get_dataset
 from .task import (
     OpenMLClassificationTask,
-    OpenMLRegressionTask,
     OpenMLClusteringTask,
     OpenMLLearningCurveTask,
+    OpenMLRegressionTask,
+    OpenMLSupervisedTask
 )
 import openml.utils
 import openml._api_calls
@@ -292,9 +293,13 @@ def get_task(task_id):
         try:
             task = _get_task_description(task_id)
             dataset = get_dataset(task.dataset_id)
-            class_labels = dataset.retrieve_class_labels(task.target_name)
-            task.class_labels = class_labels
-            task.download_split()
+            # Clustering tasks do not have class labels
+            # and do not offer download_split
+            if isinstance(task, OpenMLSupervisedTask):
+                task.download_split()
+                if isinstance(task, OpenMLClassificationTask):
+                    task.class_labels = \
+                        dataset.retrieve_class_labels(task.target_name)
         except Exception as e:
             openml.utils._remove_cache_dir_for_id(
                 TASKS_CACHE_DIR_NAME,
@@ -323,6 +328,7 @@ def _get_task_description(task_id):
             fh.write(task_xml)
         return _create_task_from_xml(task_xml)
 
+
 def _create_task_from_xml(xml):
     """Create a task given a xml string.
 
@@ -336,46 +342,53 @@ def _create_task_from_xml(xml):
     OpenMLTask
     """
     dic = xmltodict.parse(xml)["oml:task"]
-
     estimation_parameters = dict()
     inputs = dict()
     # Due to the unordered structure we obtain, we first have to extract
     # the possible keys of oml:input; dic["oml:input"] is a list of
     # OrderedDicts
-    for input_ in dic["oml:input"]:
-        name = input_["@name"]
-        inputs[name] = input_
+
+    # Check if there is a list of inputs
+    if isinstance(dic["oml:input"], list):
+        for input_ in dic["oml:input"]:
+            name = input_["@name"]
+            inputs[name] = input_
+    # Single input case
+    elif isinstance(dic["oml:input"], dict):
+        name = dic["oml:input"]["@name"]
+        inputs[name] = dic["oml:input"]
 
     evaluation_measures = None
     if 'evaluation_measures' in inputs:
         evaluation_measures = inputs["evaluation_measures"][
             "oml:evaluation_measures"]["oml:evaluation_measure"]
 
-    # Convert some more parameters
-    for parameter in \
-            inputs["estimation_procedure"]["oml:estimation_procedure"][
-                "oml:parameter"]:
-        name = parameter["@name"]
-        text = parameter.get("#text", "")
-        estimation_parameters[name] = text
-
     task_type = dic["oml:task_type"]
     common_kwargs = {
         'task_id': dic["oml:task_id"],
         'task_type': task_type,
         'task_type_id': dic["oml:task_type_id"],
         'data_set_id': inputs["source_data"][
             "oml:data_set"]["oml:data_set_id"],
-        'estimation_procedure_type': inputs["estimation_procedure"][
-                "oml:estimation_procedure"]["oml:type"],
-        'estimation_parameters': estimation_parameters,
         'evaluation_measure': evaluation_measures,
     }
     if task_type in (
         "Supervised Classification",
         "Supervised Regression",
         "Learning Curve"
     ):
+        # Convert some more parameters
+        for parameter in \
+                inputs["estimation_procedure"]["oml:estimation_procedure"][
+                    "oml:parameter"]:
+            name = parameter["@name"]
+            text = parameter.get("#text", "")
+            estimation_parameters[name] = text
+
+        common_kwargs['estimation_procedure_type'] = inputs[
+            "estimation_procedure"][
+            "oml:estimation_procedure"]["oml:type"],
+        common_kwargs['estimation_parameters'] = estimation_parameters,
         common_kwargs['target_name'] = inputs[
                 "source_data"]["oml:data_set"]["oml:target_feature"]
         common_kwargs['data_splits_url'] = inputs["estimation_procedure"][

diff --git a/openml/tasks/task.py b/openml/tasks/task.py
@@ -9,16 +9,11 @@
 
 class OpenMLTask(object):
     def __init__(self, task_id, task_type_id, task_type, data_set_id,
-                 estimation_procedure_type, estimation_parameters,
                  evaluation_measure):
         self.task_id = int(task_id)
         self.task_type_id = int(task_type_id)
         self.task_type = task_type
         self.dataset_id = int(data_set_id)
-        self.estimation_procedure = dict()
-        self.estimation_procedure["type"] = estimation_procedure_type
-        self.estimation_procedure["parameters"] = estimation_parameters
-        self.estimation_parameters = estimation_parameters
         self.evaluation_measure = evaluation_measure
 
     def get_dataset(self):
@@ -57,12 +52,14 @@ def __init__(self, task_id, task_type_id, task_type, data_set_id,
             task_type_id=task_type_id,
             task_type=task_type,
             data_set_id=data_set_id,
-            estimation_procedure_type=estimation_procedure_type,
-            estimation_parameters=estimation_parameters,
             evaluation_measure=evaluation_measure,
         )
-        self.target_name = target_name
+        self.estimation_procedure = dict()
+        self.estimation_procedure["type"] = estimation_procedure_type
+        self.estimation_procedure["parameters"] = estimation_parameters
+        self.estimation_parameters = estimation_parameters
         self.estimation_procedure["data_splits_url"] = data_splits_url
+        self.target_name = target_name
         self.split = None
 
     def get_X_and_y(self):
@@ -169,15 +166,12 @@ def __init__(self, task_id, task_type_id, task_type, data_set_id,
 
 class OpenMLClusteringTask(OpenMLTask):
     def __init__(self, task_id, task_type_id, task_type, data_set_id,
-                 estimation_procedure_type, estimation_parameters,
                  evaluation_measure, number_of_clusters=None):
         super(OpenMLClusteringTask, self).__init__(
             task_id=task_id,
             task_type_id=task_type_id,
             task_type=task_type,
             data_set_id=data_set_id,
-            estimation_procedure_type=estimation_procedure_type,
-            estimation_parameters=estimation_parameters,
             evaluation_measure=evaluation_measure,
         )
         self.number_of_clusters = number_of_clusters

diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py
@@ -156,6 +156,15 @@ def test_get_task_with_cache(self):
         task = openml.tasks.get_task(1)
         self.assertIsInstance(task, OpenMLTask)
 
+    def test_get_task_different_types(self):
+        openml.config.server = self.production_server
+        # Regression task
+        openml.tasks.functions.get_task(5001)
+        # Learning curve
+        openml.tasks.functions.get_task(64)
+        # Issue 538, get_task failing with clustering task.
+        openml.tasks.functions.get_task(126033)
+
     def test_download_split(self):
         task = openml.tasks.get_task(1)
         split = task.download_split()