From eff74f61265cab4c36b6f6387513edc79caddc50 Mon Sep 17 00:00:00 2001
From: Jan van Rijn <janvanrijn@gmail.com>
Date: Wed, 5 Apr 2017 18:06:44 +0200
Subject: [PATCH] small changes requested by @mfeurer

---
 openml/exceptions.py                      | 11 +++++++----
 openml/flows/functions.py                 | 10 +++++-----
 openml/runs/functions.py                  |  6 +++---
 openml/setups/functions.py                |  7 +++----
 tests/test_flows/test_flow.py             |  6 +++---
 tests/test_runs/test_run_functions.py     |  2 +-
 tests/test_setups/test_setup_functions.py | 12 ++++++------
 7 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/openml/exceptions.py b/openml/exceptions.py
index 530c1b00d..f6eb75bd6 100644
--- a/openml/exceptions.py
+++ b/openml/exceptions.py
@@ -2,16 +2,19 @@ class PyOpenMLError(Exception):
     def __init__(self, message):
         super(PyOpenMLError, self).__init__(message)
 
-# class for when something is really wrong on the server (result did not parse to dict)
 class OpenMLServerError(PyOpenMLError):
-    """Server didn't respond 200, contains unparsed error."""
+    """class for when something is really wrong on the server
+       (result did not parse to dict), contains unparsed error."""
+
     def __init__(self, message):
         message = "OpenML Server error: " + message
         super(OpenMLServerError, self).__init__(message)
 
-# class for when the result of the server was not 200 (e.g., listing call w/o results)
+#
 class OpenMLServerException(OpenMLServerError):
-    """Server didn't respond 200."""
+    """exception for when the result of the server was
+       not 200 (e.g., listing call w/o results). """
+
     def __init__(self, code, message, additional=None):
         self.code = code
         self.additional = additional
diff --git a/openml/flows/functions.py b/openml/flows/functions.py
index c863740e2..ecded2b4c 100644
--- a/openml/flows/functions.py
+++ b/openml/flows/functions.py
@@ -70,8 +70,8 @@ def list_flows(offset=None, size=None, tag=None):
     return _list_flows(api_call)
 
 
-def flow_exists(name, version):
-    """Retrieves the flow id of the flow uniquely identified by name+version.
+def flow_exists(name, external_version):
+    """Retrieves the flow id of the flow uniquely identified by name + external_version.
 
     Parameter
     ---------
@@ -91,18 +91,18 @@ def flow_exists(name, version):
     """
     if not (isinstance(name, six.string_types) and len(name) > 0):
         raise ValueError('Argument \'name\' should be a non-empty string')
-    if not (isinstance(name, six.string_types) and len(version) > 0):
+    if not (isinstance(name, six.string_types) and len(external_version) > 0):
         raise ValueError('Argument \'version\' should be a non-empty string')
 
     xml_response = _perform_api_call("flow/exists",
-                                     data={'name': name, 'external_version': version})
+                                     data={'name': name, 'external_version': external_version})
 
     result_dict = xmltodict.parse(xml_response)
     flow_id = int(result_dict['oml:flow_exists']['oml:id'])
     if flow_id > 0:
         return flow_id
     else:
-        return False;
+        return False
 
 
 def _list_flows(api_call):
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 403964c46..3e0bc8457 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -23,7 +23,7 @@
 
 
 
-def run_task(task, model):
+def run_task(task, model, avoid_duplicate_runs=True):
     """Performs a CV run on the dataset of the given task, using the split.
 
     Parameters
@@ -46,12 +46,12 @@ def run_task(task, model):
     # TODO why doesn't this accept a flow as input? - this would make this more flexible!
     flow = sklearn_to_flow(model)
 
-    # returns flow id if the flow exists on the server, -1 otherwise
+    # returns flow id if the flow exists on the server, False otherwise
     flow_id = flow_exists(flow.name, flow.external_version)
 
     # skips the run if it already exists and the user opts for this in the config file.
     # also, if the flow is not present on the server, the check is not needed.
-    if config.avoid_duplicate_runs and flow_id:
+    if avoid_duplicate_runs and flow_id:
         flow = get_flow(flow_id)
         setup_id = setup_exists(flow, model)
         ids = _run_exists(task.task_id, setup_id)
diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index ef7712440..e9167d4cc 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -11,15 +11,14 @@ def setup_exists(downloaded_flow, sklearn_model):
     ---------
 
     downloaded_flow : flow
-        the openml flow object (should be downloaded from server.
-        Otherwise also give flow id parameter)
+        the openml flow object (should be downloaded from server)
     sklearn_model : BaseEstimator
         The base estimator that was used to create the flow. Will
          be used to extract parameter settings from.
 
     Returns
     -------
-    setup_id : int s
+    setup_id : int
         setup id iff exists, False otherwise
     '''
 
@@ -29,7 +28,7 @@ def setup_exists(downloaded_flow, sklearn_model):
     file_elements = {'description': ('description.arff',description)}
 
     result = openml._api_calls._perform_api_call('/setup/exists/',
-                                                 file_elements = file_elements)
+                                                 file_elements=file_elements)
     result_dict = xmltodict.parse(result)
     setup_id = int(result_dict['oml:setup_exists']['oml:id'])
     if setup_id > 0:
diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py
index 6c8bdc7f6..7a211d9a0 100644
--- a/tests/test_flows/test_flow.py
+++ b/tests/test_flows/test_flow.py
@@ -176,14 +176,14 @@ def test_illegal_flow(self):
                                                    ('classif', sklearn.tree.DecisionTreeClassifier())])
         self.assertRaises(ValueError, openml.flows.sklearn_to_flow, illegal)
 
-    def test_nonexiting_flow_exists(self):
+    def test_nonexisting_flow_exists(self):
         name = get_sentinel() + get_sentinel()
         version = get_sentinel()
 
         flow_id = openml.flows.flow_exists(name, version)
-        self.assertEquals(flow_id, False)
+        self.assertFalse(flow_id)
 
-    def test_exiting_flow_exists(self):
+    def test_existing_flow_exists(self):
         # create a flow
         sentinel = get_sentinel()
         nb = sklearn.naive_bayes.GaussianNB()
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index de595c408..f92e7a8f0 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -27,7 +27,7 @@ class TestRun(TestBase):
 
     def _perform_run(self, task_id, num_instances, clf):
         task = openml.tasks.get_task(task_id)
-        run = openml.runs.run_task(task, clf)
+        run = openml.runs.run_task(task, clf, openml.config.avoid_duplicate_runs)
         run_ = run.publish()
         self.assertEqual(run_, run)
         self.assertIsInstance(run.dataset_id, int)
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index 9039cf894..013f25168 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -6,6 +6,9 @@
 import openml.exceptions
 from openml.testing import TestBase
 
+from sklearn.ensemble import BaggingClassifier
+from sklearn.tree import DecisionTreeClassifier
+
 if sys.version_info[0] >= 3:
     from unittest import mock
 else:
@@ -27,8 +30,7 @@ def get_sentinel():
 class TestRun(TestBase):
 
     def test_nonexisting_setup_exists(self):
-        from sklearn.tree import DecisionTreeClassifier
-        # first publish a nonexiting flow
+        # first publish a non-existing flow
         sentinel = get_sentinel()
         dectree = DecisionTreeClassifier()
         flow = openml.flows.sklearn_to_flow(dectree)
@@ -38,12 +40,10 @@ def test_nonexisting_setup_exists(self):
         # although the flow exists, we can be sure there are no
         # setups (yet) as it hasn't been ran
         setup_id = openml.setups.setup_exists(flow, dectree)
-        self.assertEquals(setup_id, False)
+        self.assertFalse(setup_id)
 
 
     def test_existing_setup_exists(self):
-        from sklearn.ensemble import BaggingClassifier
-        from sklearn.tree import DecisionTreeClassifier
         # first publish a nonexiting flow
         bagging = BaggingClassifier(DecisionTreeClassifier(max_depth=5,
                                                            min_samples_split=3),
@@ -57,7 +57,7 @@ def test_existing_setup_exists(self):
         # although the flow exists, we can be sure there are no
         # setups (yet) as it hasn't been ran
         setup_id = openml.setups.setup_exists(flow, bagging)
-        self.assertEquals(setup_id, False)
+        self.assertFalse(setup_id)
 
         # now run the flow on an easy task:
         task = openml.tasks.get_task(115) #diabetes