From a2b35a9cb9ee78165cd9bf0bab513eca9ef82aad Mon Sep 17 00:00:00 2001 From: Matthias Feurer Date: Mon, 28 Nov 2016 10:41:16 +0100 Subject: [PATCH] FIX add sentinel to all subflows when testing Sentinel was so far only added to the parent flow, thus subflows got reused during testing. To prevent reusing subflows during testing, this commits makes sure that each subflow has the same unique sentinel in it's external version string --- openml/flows/flow.py | 2 +- openml/flows/sklearn_converter.py | 15 ++++++++++----- tests/flows/test_flow.py | 10 +++++++++- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/openml/flows/flow.py b/openml/flows/flow.py index f53482dbe..f42c03f1d 100644 --- a/openml/flows/flow.py +++ b/openml/flows/flow.py @@ -238,7 +238,7 @@ def _from_xml(cls, xml_dict): OpenMLFlow """ - arguments = {} + arguments = OrderedDict() dic = xml_dict["oml:flow"] # Mandatory parts in the xml file diff --git a/openml/flows/sklearn_converter.py b/openml/flows/sklearn_converter.py index e254b2edb..f60005ff4 100644 --- a/openml/flows/sklearn_converter.py +++ b/openml/flows/sklearn_converter.py @@ -41,7 +41,7 @@ def sklearn_to_flow(o): elif isinstance(o, (bool, int, float)): rval = o elif isinstance(o, dict): - rval = {} + rval = OrderedDict() for key, value in o.items(): if not isinstance(key, six.string_types): raise TypeError('Can only use string as keys, you passed ' @@ -120,8 +120,9 @@ def flow_to_sklearn(o, **kwargs): else: # Regular dictionary - rval = {flow_to_sklearn(key, **kwargs): flow_to_sklearn(value, **kwargs) - for key, value in o.items()} + rval = OrderedDict((flow_to_sklearn(key, **kwargs), + flow_to_sklearn(value, **kwargs)) + for key, value in o.items()) elif isinstance(o, (list, tuple)): rval = [flow_to_sklearn(element, **kwargs) for element in o] if isinstance(o, tuple): @@ -271,14 +272,16 @@ def _deserialize_model(flow, **kwargs): parameters = flow.parameters components = flow.components - component_dict = defaultdict(dict) - parameter_dict = {} + component_dict = OrderedDict() + parameter_dict = OrderedDict() for name in components: if '__' in name: parameter_name, step = name.split('__') value = components[name] rval = flow_to_sklearn(value) + if parameter_name not in component_dict: + component_dict[parameter_name] = OrderedDict() component_dict[parameter_name][step] = rval else: value = components[name] @@ -292,6 +295,8 @@ def _deserialize_model(flow, **kwargs): # Replace the component placeholder by the actual flow if isinstance(rval, dict) and 'oml:serialized_object' in rval: parameter_name, step = rval['value'].split('__') + if parameter_name not in component_dict: + component_dict[parameter_name] = OrderedDict() rval = component_dict[parameter_name][step] parameter_dict[name] = rval diff --git a/tests/flows/test_flow.py b/tests/flows/test_flow.py index 1e9025443..4de41c029 100644 --- a/tests/flows/test_flow.py +++ b/tests/flows/test_flow.py @@ -158,7 +158,15 @@ def test_sklearn_to_upload_to_flow(self): estimator=model, param_distributions=parameter_grid, cv=cv) rs.fit(X, y) flow = openml.flows.sklearn_to_flow(rs) - flow.external_version = sentinel + flow.external_version + + # Add the sentinel to all external version strings in all subflows + to_visit = collections.deque() + to_visit.appendleft(flow) + while len(to_visit) > 0: + current_flow = to_visit.pop() + for sub_flow in current_flow.components.values(): + to_visit.appendleft(sub_flow) + current_flow.external_version = sentinel + current_flow.external_version flow.publish() self.assertIsInstance(flow.flow_id, int)