From eff74f61265cab4c36b6f6387513edc79caddc50 Mon Sep 17 00:00:00 2001 From: Jan van Rijn Date: Wed, 5 Apr 2017 18:06:44 +0200 Subject: [PATCH] small changes requested by @mfeurer --- openml/exceptions.py | 11 +++++++---- openml/flows/functions.py | 10 +++++----- openml/runs/functions.py | 6 +++--- openml/setups/functions.py | 7 +++---- tests/test_flows/test_flow.py | 6 +++--- tests/test_runs/test_run_functions.py | 2 +- tests/test_setups/test_setup_functions.py | 12 ++++++------ 7 files changed, 28 insertions(+), 26 deletions(-) diff --git a/openml/exceptions.py b/openml/exceptions.py index 530c1b00d..f6eb75bd6 100644 --- a/openml/exceptions.py +++ b/openml/exceptions.py @@ -2,16 +2,19 @@ class PyOpenMLError(Exception): def __init__(self, message): super(PyOpenMLError, self).__init__(message) -# class for when something is really wrong on the server (result did not parse to dict) class OpenMLServerError(PyOpenMLError): - """Server didn't respond 200, contains unparsed error.""" + """class for when something is really wrong on the server + (result did not parse to dict), contains unparsed error.""" + def __init__(self, message): message = "OpenML Server error: " + message super(OpenMLServerError, self).__init__(message) -# class for when the result of the server was not 200 (e.g., listing call w/o results) +# class OpenMLServerException(OpenMLServerError): - """Server didn't respond 200.""" + """exception for when the result of the server was + not 200 (e.g., listing call w/o results). """ + def __init__(self, code, message, additional=None): self.code = code self.additional = additional diff --git a/openml/flows/functions.py b/openml/flows/functions.py index c863740e2..ecded2b4c 100644 --- a/openml/flows/functions.py +++ b/openml/flows/functions.py @@ -70,8 +70,8 @@ def list_flows(offset=None, size=None, tag=None): return _list_flows(api_call) -def flow_exists(name, version): - """Retrieves the flow id of the flow uniquely identified by name+version. +def flow_exists(name, external_version): + """Retrieves the flow id of the flow uniquely identified by name + external_version. Parameter --------- @@ -91,18 +91,18 @@ def flow_exists(name, version): """ if not (isinstance(name, six.string_types) and len(name) > 0): raise ValueError('Argument \'name\' should be a non-empty string') - if not (isinstance(name, six.string_types) and len(version) > 0): + if not (isinstance(name, six.string_types) and len(external_version) > 0): raise ValueError('Argument \'version\' should be a non-empty string') xml_response = _perform_api_call("flow/exists", - data={'name': name, 'external_version': version}) + data={'name': name, 'external_version': external_version}) result_dict = xmltodict.parse(xml_response) flow_id = int(result_dict['oml:flow_exists']['oml:id']) if flow_id > 0: return flow_id else: - return False; + return False def _list_flows(api_call): diff --git a/openml/runs/functions.py b/openml/runs/functions.py index 403964c46..3e0bc8457 100644 --- a/openml/runs/functions.py +++ b/openml/runs/functions.py @@ -23,7 +23,7 @@ -def run_task(task, model): +def run_task(task, model, avoid_duplicate_runs=True): """Performs a CV run on the dataset of the given task, using the split. Parameters @@ -46,12 +46,12 @@ def run_task(task, model): # TODO why doesn't this accept a flow as input? - this would make this more flexible! flow = sklearn_to_flow(model) - # returns flow id if the flow exists on the server, -1 otherwise + # returns flow id if the flow exists on the server, False otherwise flow_id = flow_exists(flow.name, flow.external_version) # skips the run if it already exists and the user opts for this in the config file. # also, if the flow is not present on the server, the check is not needed. - if config.avoid_duplicate_runs and flow_id: + if avoid_duplicate_runs and flow_id: flow = get_flow(flow_id) setup_id = setup_exists(flow, model) ids = _run_exists(task.task_id, setup_id) diff --git a/openml/setups/functions.py b/openml/setups/functions.py index ef7712440..e9167d4cc 100644 --- a/openml/setups/functions.py +++ b/openml/setups/functions.py @@ -11,15 +11,14 @@ def setup_exists(downloaded_flow, sklearn_model): --------- downloaded_flow : flow - the openml flow object (should be downloaded from server. - Otherwise also give flow id parameter) + the openml flow object (should be downloaded from server) sklearn_model : BaseEstimator The base estimator that was used to create the flow. Will be used to extract parameter settings from. Returns ------- - setup_id : int s + setup_id : int setup id iff exists, False otherwise ''' @@ -29,7 +28,7 @@ def setup_exists(downloaded_flow, sklearn_model): file_elements = {'description': ('description.arff',description)} result = openml._api_calls._perform_api_call('/setup/exists/', - file_elements = file_elements) + file_elements=file_elements) result_dict = xmltodict.parse(result) setup_id = int(result_dict['oml:setup_exists']['oml:id']) if setup_id > 0: diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py index 6c8bdc7f6..7a211d9a0 100644 --- a/tests/test_flows/test_flow.py +++ b/tests/test_flows/test_flow.py @@ -176,14 +176,14 @@ def test_illegal_flow(self): ('classif', sklearn.tree.DecisionTreeClassifier())]) self.assertRaises(ValueError, openml.flows.sklearn_to_flow, illegal) - def test_nonexiting_flow_exists(self): + def test_nonexisting_flow_exists(self): name = get_sentinel() + get_sentinel() version = get_sentinel() flow_id = openml.flows.flow_exists(name, version) - self.assertEquals(flow_id, False) + self.assertFalse(flow_id) - def test_exiting_flow_exists(self): + def test_existing_flow_exists(self): # create a flow sentinel = get_sentinel() nb = sklearn.naive_bayes.GaussianNB() diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index de595c408..f92e7a8f0 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -27,7 +27,7 @@ class TestRun(TestBase): def _perform_run(self, task_id, num_instances, clf): task = openml.tasks.get_task(task_id) - run = openml.runs.run_task(task, clf) + run = openml.runs.run_task(task, clf, openml.config.avoid_duplicate_runs) run_ = run.publish() self.assertEqual(run_, run) self.assertIsInstance(run.dataset_id, int) diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py index 9039cf894..013f25168 100644 --- a/tests/test_setups/test_setup_functions.py +++ b/tests/test_setups/test_setup_functions.py @@ -6,6 +6,9 @@ import openml.exceptions from openml.testing import TestBase +from sklearn.ensemble import BaggingClassifier +from sklearn.tree import DecisionTreeClassifier + if sys.version_info[0] >= 3: from unittest import mock else: @@ -27,8 +30,7 @@ def get_sentinel(): class TestRun(TestBase): def test_nonexisting_setup_exists(self): - from sklearn.tree import DecisionTreeClassifier - # first publish a nonexiting flow + # first publish a non-existing flow sentinel = get_sentinel() dectree = DecisionTreeClassifier() flow = openml.flows.sklearn_to_flow(dectree) @@ -38,12 +40,10 @@ def test_nonexisting_setup_exists(self): # although the flow exists, we can be sure there are no # setups (yet) as it hasn't been ran setup_id = openml.setups.setup_exists(flow, dectree) - self.assertEquals(setup_id, False) + self.assertFalse(setup_id) def test_existing_setup_exists(self): - from sklearn.ensemble import BaggingClassifier - from sklearn.tree import DecisionTreeClassifier # first publish a nonexiting flow bagging = BaggingClassifier(DecisionTreeClassifier(max_depth=5, min_samples_split=3), @@ -57,7 +57,7 @@ def test_existing_setup_exists(self): # although the flow exists, we can be sure there are no # setups (yet) as it hasn't been ran setup_id = openml.setups.setup_exists(flow, bagging) - self.assertEquals(setup_id, False) + self.assertFalse(setup_id) # now run the flow on an easy task: task = openml.tasks.get_task(115) #diabetes