Skip to content

Commit

Permalink
MAINT minor update to docstrings, better unit test
Browse files Browse the repository at this point in the history
  • Loading branch information
mfeurer committed Oct 5, 2016
1 parent af83de7 commit 1b0d3a4
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 16 deletions.
31 changes: 18 additions & 13 deletions openml/flows/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,25 +52,25 @@ class OpenMLFlow(object):
A list of dependencies necessary to run the flow. This field should
contain all libraries the flow depends on. To allow reproducibility
it should also specify the exact version numbers.
binary_url : str
binary_url : str, optional
Url from which the binary can be downloaded. Added by the server.
Ignored when uploaded manually. Will not be used by the python API
because binaries aren't compatible across machines.
binary_format : str
binary_format : str, optional
Format in which the binary code was uploaded. Will not be used by the
python API because binaries aren't compatible across machines.
binary_md5 : str
binary_md5 : str, optional
MD5 checksum to check if the binary code was correctly downloaded. Will
not be used by the python API because binaries aren't compatible across
machines.
uploader : str
uploader : str, optional
OpenML user ID of the uploader. Filled in by the server.
upload_date : str
upload_date : str, optional
Date the flow was uploaded. Filled in by the server.
flow_id : int, optional
Flow ID. Assigned by the server (fixme shouldn't be here?)
version : str
OpenML version of the flow.
Flow ID. Assigned by the server.
version : str, optional
OpenML version of the flow. Assigned by the server.
"""

def __init__(self, name, description, model, components, parameters,
Expand Down Expand Up @@ -139,7 +139,9 @@ def _to_dict(self):
""" Helper function used by _to_xml and _to_dict.
Creates a dictionary representation of self which can be serialized
to xml by the function _to_xml. Uses OrderedDict to
to xml by the function _to_xml. Since a flow can contain subflows
(components) this helper function calls itself recursively to also
serialize these flows to dictionaries.
Uses OrderedDict everywhere to make sure that the order of data stays
at it is added here. The return value (OrderedDict) will be used to
Expand Down Expand Up @@ -217,7 +219,6 @@ def _to_dict(self):
components.append(component_dict)

flow_dict['oml:flow']['oml:component'] = components

flow_dict['oml:flow']['oml:tag'] = self.tags

if self.binary_url is not None:
Expand All @@ -233,6 +234,9 @@ def _to_dict(self):
def _from_xml(cls, xml_dict):
"""Create a flow from an xml description.
Calls itself recursively to create :class:`OpenMLFlow` objects of
subflows (components).
Parameters
----------
xml_dict : dict
Expand Down Expand Up @@ -264,9 +268,10 @@ def _from_xml(cls, xml_dict):
# parameters, components (subflows) and tags. These can't be tackled
# in the loops above because xmltodict returns a dict if such an
# entity occurs once, and a list if it occurs multiple times.
# Furthermore, for components this method is called recursively and
# for parameters the actual xml is split into two dictionaries for
# easier access in python.
# Furthermore, they must be treated differently, for example
# for components this method is called recursively and
# for parameters the actual information is split into two dictionaries
# for easier access in python.

parameters = OrderedDict()
parameters_meta_info = OrderedDict()
Expand Down
2 changes: 1 addition & 1 deletion openml/flows/sklearn_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@


class SklearnToFlowConverter(object):
"""Convert scikit-learn estimator into an OpenMLFlow."""
"""Convert scikit-learn estimator into an OpenMLFlow and vice versa."""

def serialize(self, o):

Expand Down
17 changes: 15 additions & 2 deletions tests/flows/test_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@

import scipy.stats
import sklearn.datasets
import sklearn.decomposition
import sklearn.dummy
import sklearn.ensemble
import sklearn.feature_selection
import sklearn.model_selection
import sklearn.pipeline
import sklearn.preprocessing
Expand Down Expand Up @@ -151,11 +153,17 @@ def side_effect(self):
name_mock.side_effect = side_effect

# Test a more complicated flow
ohe = sklearn.preprocessing.OneHotEncoder(categorical_features=[1])
scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
pca = sklearn.decomposition.TruncatedSVD()
fs = sklearn.feature_selection.SelectPercentile(
score_func=sklearn.feature_selection.f_classif, percentile=30)
fu = sklearn.pipeline.FeatureUnion(transformer_list=[
('pca', pca), ('fs', fs)])
boosting = sklearn.ensemble.AdaBoostClassifier(
base_estimator=sklearn.tree.DecisionTreeClassifier())
model = sklearn.pipeline.Pipeline(steps=(
('scaler', scaler), ('boosting', boosting)))
model = sklearn.pipeline.Pipeline(steps=[('ohe', ohe), ('scaler', scaler),
('fu', fu), ('boosting', boosting)])
parameter_grid = {'boosting__n_estimators': [1, 5, 10, 100],
'boosting__learning_rate': scipy.stats.uniform(0.01, 0.99),
'boosting__base_estimator__max_depth': scipy.stats.randint(1, 10)}
Expand Down Expand Up @@ -199,6 +207,7 @@ def side_effect(self):
server_xml = server_xml.replace(' ', '').replace('\t', '').strip().replace('\n\n', '\n').replace('"', '"')
server_xml = re.sub(r'^$', '', server_xml)


self.assertEqual(server_xml, local_xml)

self.assertEqual(new_flow, flow)
Expand All @@ -208,7 +217,11 @@ def side_effect(self):
fixture_name = 'sklearn.model_selection._search.RandomizedSearchCV(' \
'sklearn.model_selection._split.StratifiedKFold,' \
'sklearn.pipeline.Pipeline(' \
'sklearn.preprocessing.data.OneHotEncoder,' \
'sklearn.preprocessing.data.StandardScaler,' \
'sklearn.pipeline.FeatureUnion(' \
'sklearn.decomposition.truncated_svd.TruncatedSVD,' \
'sklearn.feature_selection.univariate_selection.SelectPercentile),' \
'sklearn.ensemble.weight_boosting.AdaBoostClassifier(' \
'sklearn.tree.tree.DecisionTreeClassifier)))'

Expand Down

0 comments on commit 1b0d3a4

Please sign in to comment.