Skip to content

Commit

Permalink
FIX add sentinel to all subflows when testing
Browse files Browse the repository at this point in the history
Sentinel was so far only added to the parent flow, thus subflows got
reused during testing. To prevent reusing subflows during testing,
this commits makes sure that each subflow has the same unique
sentinel in it's external version string
  • Loading branch information
mfeurer committed Nov 28, 2016
1 parent fd8bd4b commit a2b35a9
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 7 deletions.
2 changes: 1 addition & 1 deletion openml/flows/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def _from_xml(cls, xml_dict):
OpenMLFlow
"""
arguments = {}
arguments = OrderedDict()
dic = xml_dict["oml:flow"]

# Mandatory parts in the xml file
Expand Down
15 changes: 10 additions & 5 deletions openml/flows/sklearn_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def sklearn_to_flow(o):
elif isinstance(o, (bool, int, float)):
rval = o
elif isinstance(o, dict):
rval = {}
rval = OrderedDict()
for key, value in o.items():
if not isinstance(key, six.string_types):
raise TypeError('Can only use string as keys, you passed '
Expand Down Expand Up @@ -120,8 +120,9 @@ def flow_to_sklearn(o, **kwargs):

else:
# Regular dictionary
rval = {flow_to_sklearn(key, **kwargs): flow_to_sklearn(value, **kwargs)
for key, value in o.items()}
rval = OrderedDict((flow_to_sklearn(key, **kwargs),
flow_to_sklearn(value, **kwargs))
for key, value in o.items())
elif isinstance(o, (list, tuple)):
rval = [flow_to_sklearn(element, **kwargs) for element in o]
if isinstance(o, tuple):
Expand Down Expand Up @@ -271,14 +272,16 @@ def _deserialize_model(flow, **kwargs):

parameters = flow.parameters
components = flow.components
component_dict = defaultdict(dict)
parameter_dict = {}
component_dict = OrderedDict()
parameter_dict = OrderedDict()

for name in components:
if '__' in name:
parameter_name, step = name.split('__')
value = components[name]
rval = flow_to_sklearn(value)
if parameter_name not in component_dict:
component_dict[parameter_name] = OrderedDict()
component_dict[parameter_name][step] = rval
else:
value = components[name]
Expand All @@ -292,6 +295,8 @@ def _deserialize_model(flow, **kwargs):
# Replace the component placeholder by the actual flow
if isinstance(rval, dict) and 'oml:serialized_object' in rval:
parameter_name, step = rval['value'].split('__')
if parameter_name not in component_dict:
component_dict[parameter_name] = OrderedDict()
rval = component_dict[parameter_name][step]
parameter_dict[name] = rval

Expand Down
10 changes: 9 additions & 1 deletion tests/flows/test_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,15 @@ def test_sklearn_to_upload_to_flow(self):
estimator=model, param_distributions=parameter_grid, cv=cv)
rs.fit(X, y)
flow = openml.flows.sklearn_to_flow(rs)
flow.external_version = sentinel + flow.external_version

# Add the sentinel to all external version strings in all subflows
to_visit = collections.deque()
to_visit.appendleft(flow)
while len(to_visit) > 0:
current_flow = to_visit.pop()
for sub_flow in current_flow.components.values():
to_visit.appendleft(sub_flow)
current_flow.external_version = sentinel + current_flow.external_version

flow.publish()
self.assertIsInstance(flow.flow_id, int)
Expand Down

0 comments on commit a2b35a9

Please sign in to comment.