Skip to content

Commit

Permalink
small changes requested by @mfeurer
Browse files Browse the repository at this point in the history
  • Loading branch information
janvanrijn committed Apr 5, 2017
1 parent 4b46d5a commit eff74f6
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 26 deletions.
11 changes: 7 additions & 4 deletions openml/exceptions.py
Expand Up @@ -2,16 +2,19 @@ class PyOpenMLError(Exception):
def __init__(self, message):
super(PyOpenMLError, self).__init__(message)

# class for when something is really wrong on the server (result did not parse to dict)
class OpenMLServerError(PyOpenMLError):
"""Server didn't respond 200, contains unparsed error."""
"""class for when something is really wrong on the server
(result did not parse to dict), contains unparsed error."""

def __init__(self, message):
message = "OpenML Server error: " + message
super(OpenMLServerError, self).__init__(message)

# class for when the result of the server was not 200 (e.g., listing call w/o results)
#
class OpenMLServerException(OpenMLServerError):
"""Server didn't respond 200."""
"""exception for when the result of the server was
not 200 (e.g., listing call w/o results). """

def __init__(self, code, message, additional=None):
self.code = code
self.additional = additional
Expand Down
10 changes: 5 additions & 5 deletions openml/flows/functions.py
Expand Up @@ -70,8 +70,8 @@ def list_flows(offset=None, size=None, tag=None):
return _list_flows(api_call)


def flow_exists(name, version):
"""Retrieves the flow id of the flow uniquely identified by name+version.
def flow_exists(name, external_version):
"""Retrieves the flow id of the flow uniquely identified by name + external_version.
Parameter
---------
Expand All @@ -91,18 +91,18 @@ def flow_exists(name, version):
"""
if not (isinstance(name, six.string_types) and len(name) > 0):
raise ValueError('Argument \'name\' should be a non-empty string')
if not (isinstance(name, six.string_types) and len(version) > 0):
if not (isinstance(name, six.string_types) and len(external_version) > 0):
raise ValueError('Argument \'version\' should be a non-empty string')

xml_response = _perform_api_call("flow/exists",
data={'name': name, 'external_version': version})
data={'name': name, 'external_version': external_version})

result_dict = xmltodict.parse(xml_response)
flow_id = int(result_dict['oml:flow_exists']['oml:id'])
if flow_id > 0:
return flow_id
else:
return False;
return False


def _list_flows(api_call):
Expand Down
6 changes: 3 additions & 3 deletions openml/runs/functions.py
Expand Up @@ -23,7 +23,7 @@



def run_task(task, model):
def run_task(task, model, avoid_duplicate_runs=True):
"""Performs a CV run on the dataset of the given task, using the split.
Parameters
Expand All @@ -46,12 +46,12 @@ def run_task(task, model):
# TODO why doesn't this accept a flow as input? - this would make this more flexible!
flow = sklearn_to_flow(model)

# returns flow id if the flow exists on the server, -1 otherwise
# returns flow id if the flow exists on the server, False otherwise
flow_id = flow_exists(flow.name, flow.external_version)

# skips the run if it already exists and the user opts for this in the config file.
# also, if the flow is not present on the server, the check is not needed.
if config.avoid_duplicate_runs and flow_id:
if avoid_duplicate_runs and flow_id:
flow = get_flow(flow_id)
setup_id = setup_exists(flow, model)
ids = _run_exists(task.task_id, setup_id)
Expand Down
7 changes: 3 additions & 4 deletions openml/setups/functions.py
Expand Up @@ -11,15 +11,14 @@ def setup_exists(downloaded_flow, sklearn_model):
---------
downloaded_flow : flow
the openml flow object (should be downloaded from server.
Otherwise also give flow id parameter)
the openml flow object (should be downloaded from server)
sklearn_model : BaseEstimator
The base estimator that was used to create the flow. Will
be used to extract parameter settings from.
Returns
-------
setup_id : int s
setup_id : int
setup id iff exists, False otherwise
'''

Expand All @@ -29,7 +28,7 @@ def setup_exists(downloaded_flow, sklearn_model):
file_elements = {'description': ('description.arff',description)}

result = openml._api_calls._perform_api_call('/setup/exists/',
file_elements = file_elements)
file_elements=file_elements)
result_dict = xmltodict.parse(result)
setup_id = int(result_dict['oml:setup_exists']['oml:id'])
if setup_id > 0:
Expand Down
6 changes: 3 additions & 3 deletions tests/test_flows/test_flow.py
Expand Up @@ -176,14 +176,14 @@ def test_illegal_flow(self):
('classif', sklearn.tree.DecisionTreeClassifier())])
self.assertRaises(ValueError, openml.flows.sklearn_to_flow, illegal)

def test_nonexiting_flow_exists(self):
def test_nonexisting_flow_exists(self):
name = get_sentinel() + get_sentinel()
version = get_sentinel()

flow_id = openml.flows.flow_exists(name, version)
self.assertEquals(flow_id, False)
self.assertFalse(flow_id)

def test_exiting_flow_exists(self):
def test_existing_flow_exists(self):
# create a flow
sentinel = get_sentinel()
nb = sklearn.naive_bayes.GaussianNB()
Expand Down
2 changes: 1 addition & 1 deletion tests/test_runs/test_run_functions.py
Expand Up @@ -27,7 +27,7 @@ class TestRun(TestBase):

def _perform_run(self, task_id, num_instances, clf):
task = openml.tasks.get_task(task_id)
run = openml.runs.run_task(task, clf)
run = openml.runs.run_task(task, clf, openml.config.avoid_duplicate_runs)
run_ = run.publish()
self.assertEqual(run_, run)
self.assertIsInstance(run.dataset_id, int)
Expand Down
12 changes: 6 additions & 6 deletions tests/test_setups/test_setup_functions.py
Expand Up @@ -6,6 +6,9 @@
import openml.exceptions
from openml.testing import TestBase

from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

if sys.version_info[0] >= 3:
from unittest import mock
else:
Expand All @@ -27,8 +30,7 @@ def get_sentinel():
class TestRun(TestBase):

def test_nonexisting_setup_exists(self):
from sklearn.tree import DecisionTreeClassifier
# first publish a nonexiting flow
# first publish a non-existing flow
sentinel = get_sentinel()
dectree = DecisionTreeClassifier()
flow = openml.flows.sklearn_to_flow(dectree)
Expand All @@ -38,12 +40,10 @@ def test_nonexisting_setup_exists(self):
# although the flow exists, we can be sure there are no
# setups (yet) as it hasn't been ran
setup_id = openml.setups.setup_exists(flow, dectree)
self.assertEquals(setup_id, False)
self.assertFalse(setup_id)


def test_existing_setup_exists(self):
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
# first publish a nonexiting flow
bagging = BaggingClassifier(DecisionTreeClassifier(max_depth=5,
min_samples_split=3),
Expand All @@ -57,7 +57,7 @@ def test_existing_setup_exists(self):
# although the flow exists, we can be sure there are no
# setups (yet) as it hasn't been ran
setup_id = openml.setups.setup_exists(flow, bagging)
self.assertEquals(setup_id, False)
self.assertFalse(setup_id)

# now run the flow on an easy task:
task = openml.tasks.get_task(115) #diabetes
Expand Down

0 comments on commit eff74f6

Please sign in to comment.