diff --git a/openml/flows/sklearn_converter.py b/openml/flows/sklearn_converter.py index 70f13bfcf..a49999847 100644 --- a/openml/flows/sklearn_converter.py +++ b/openml/flows/sklearn_converter.py @@ -533,7 +533,7 @@ def _serialize_cross_validator(o): for key in args: # We need deprecation warnings to always be on in order to # catch deprecated param values. - # This is set in utils/__init__.py.py.py but it gets overwritten + # This is set in utils/__init__.py but it gets overwritten # when running under python3 somehow. warnings.simplefilter("always", DeprecationWarning) try: diff --git a/openml/setups/__init__.py b/openml/setups/__init__.py index 676d7a6b1..1c07274bb 100644 --- a/openml/setups/__init__.py +++ b/openml/setups/__init__.py @@ -1,4 +1,4 @@ from .setup import OpenMLSetup -from .functions import get_setup, setup_list, setup_exists, initialize_model +from .functions import get_setup, list_setups, setup_exists, initialize_model -__all__ = ['get_setup', 'setup_list', 'setup_exists', 'initialize_model'] \ No newline at end of file +__all__ = ['get_setup', 'list_setups', 'setup_exists', 'initialize_model'] \ No newline at end of file diff --git a/openml/setups/functions.py b/openml/setups/functions.py index 0f3620bda..f17dc2bfb 100644 --- a/openml/setups/functions.py +++ b/openml/setups/functions.py @@ -4,7 +4,7 @@ import xmltodict from .setup import OpenMLSetup, OpenMLParameter -from openml.flows import sklearn_to_flow, flow_exists +from openml.flows import flow_exists def setup_exists(flow, model=None): @@ -74,7 +74,7 @@ def get_setup(setup_id): return _create_setup_from_xml(result_dict) -def setup_list(flow=None, tag=None, offset=None, size=None): +def list_setups(flow=None, tag=None, setup=None, offset=None, size=None): """List all setups matching all of the given filters. Perform API call `/setup/list/{filters} @@ -85,6 +85,8 @@ def setup_list(flow=None, tag=None, offset=None, size=None): tag : str, optional + setup : list(int), optional + offset : int, optional size : int, optional @@ -100,6 +102,8 @@ def setup_list(flow=None, tag=None, offset=None, size=None): api_call += "/offset/%d" % int(offset) if size is not None: api_call += "/limit/%d" % int(size) + if size is not None: + api_call += "/setup/%s" % ','.join([str(int(i)) for i in setup]) if flow is not None: api_call += "/flow/%s" % flow if tag is not None: @@ -120,11 +124,11 @@ def _list_setups(api_call): % str(setups_dict)) elif '@xmlns:oml' not in setups_dict['oml:setups']: raise ValueError('Error in return XML, does not contain ' - '"oml:runs"/@xmlns:oml: %s' + '"oml:setups"/@xmlns:oml: %s' % str(setups_dict)) elif setups_dict['oml:setups']['@xmlns:oml'] != 'http://openml.org/openml': raise ValueError('Error in return XML, value of ' - '"oml:runs"/@xmlns:oml is not ' + '"oml:seyups"/@xmlns:oml is not ' '"http://openml.org/openml": %s' % str(setups_dict)) diff --git a/tests/test_flows/test_flow.py b/tests/test_flows/test_flow.py index bef085aa3..b62fd63e0 100644 --- a/tests/test_flows/test_flow.py +++ b/tests/test_flows/test_flow.py @@ -239,17 +239,26 @@ def get_sentinel(): def test_existing_flow_exists(self): # create a flow nb = sklearn.naive_bayes.GaussianNB() - flow = openml.flows.sklearn_to_flow(nb) - flow, _ = self._add_sentinel_to_flow_name(flow, None) - #publish the flow - flow = flow.publish() - #redownload the flow - flow = openml.flows.get_flow(flow.flow_id) - - # check if flow exists can find it - flow = openml.flows.get_flow(flow.flow_id) - downloaded_flow_id = openml.flows.flow_exists(flow.name, flow.external_version) - self.assertEquals(downloaded_flow_id, flow.flow_id) + + steps = [('imputation', sklearn.preprocessing.Imputer(strategy='median')), + ('hotencoding', sklearn.preprocessing.OneHotEncoder(sparse=False, + handle_unknown='ignore')), + ('variencethreshold', sklearn.feature_selection.VarianceThreshold()), + ('classifier', sklearn.tree.DecisionTreeClassifier())] + complicated = sklearn.pipeline.Pipeline(steps=steps) + + for classifier in [nb, complicated]: + flow = openml.flows.sklearn_to_flow(classifier) + flow, _ = self._add_sentinel_to_flow_name(flow, None) + #publish the flow + flow = flow.publish() + #redownload the flow + flow = openml.flows.get_flow(flow.flow_id) + + # check if flow exists can find it + flow = openml.flows.get_flow(flow.flow_id) + downloaded_flow_id = openml.flows.flow_exists(flow.name, flow.external_version) + self.assertEquals(downloaded_flow_id, flow.flow_id) def test_sklearn_to_upload_to_flow(self): iris = sklearn.datasets.load_iris() diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py index 90bde33cf..88e98708f 100644 --- a/tests/test_setups/test_setup_functions.py +++ b/tests/test_setups/test_setup_functions.py @@ -47,7 +47,6 @@ def get_params(self, deep=True): return {} - class TestRun(TestBase): def test_nonexisting_setup_exists(self): @@ -118,15 +117,27 @@ def test_get_setup(self): else: self.assertEquals(len(current.parameters), num_params[idx]) - def test_setup_list_filter_flow(self): - # TODO: please remove for better test - # openml.config.server = self.production_server + openml.config.server = self.production_server - flow_id = 31 # TODO please change + flow_id = 5873 - setups = openml.setups.setup_list(flow=31) + setups = openml.setups.list_setups(flow=flow_id) self.assertGreater(len(setups), 0) # TODO: please adjust 0 for setup_id in setups.keys(): self.assertEquals(setups[setup_id].flow_id, flow_id) + + def test_setuplist_offset(self): + # TODO: remove after pull on live for better testing + # openml.config.server = self.production_server + + size = 100 + setups = openml.setups.list_setups(offset=0, size=size) + self.assertEquals(len(setups), size) + setups2 = openml.setups.list_setups(offset=size, size=size) + self.assertEquals(len(setups), size) + + all = set(setups.keys()).union(setups2.keys()) + + self.assertEqual(len(all), size * 2) \ No newline at end of file