Skip to content

Commit

Permalink
Merge 929fec1 into 5c4bf4a
Browse files Browse the repository at this point in the history
  • Loading branch information
janvanrijn committed Apr 6, 2017
2 parents 5c4bf4a + 929fec1 commit f62beb5
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 8 deletions.
30 changes: 23 additions & 7 deletions openml/flows/sklearn_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
# Necessary to have signature available in python 2.7
from sklearn.utils.fixes import signature

from .flow import OpenMLFlow
from openml.flows import OpenMLFlow
from openml.exceptions import PyOpenMLError


if sys.version_info >= (3, 5):
Expand All @@ -32,37 +33,52 @@
'^(?P<name>[\w\-]+)((?P<operation>==|>=|>)(?P<version>(\d+\.)?(\d+\.)?(\d+)))?$')


def sklearn_to_flow(o):
def sklearn_to_flow(o, parent_model=None):
# TODO: assert that only on first recursion lvl `parent_model` can be None

if _is_estimator(o):
# is the main model or a submodel
rval = _serialize_model(o)
elif isinstance(o, (list, tuple)):
rval = [sklearn_to_flow(element) for element in o]
# TODO: explain what type of parameter is here
rval = [sklearn_to_flow(element, parent_model) for element in o]
if isinstance(o, tuple):
assert(parent_model is not None)
reserved_keywords = set(parent_model.get_params(deep=False).keys())
if o[0] in reserved_keywords:
parent_model_name = parent_model.__module__ + "." + \
parent_model.__class__.__name__
raise PyOpenMLError('Found element shadowing official ' + \
'parameter for %s: %s' %(parent_model_name, o[0]))
rval = tuple(rval)
elif isinstance(o, (bool, int, float, six.string_types)) or o is None:
# base parameter values
rval = o
elif isinstance(o, dict):
# TODO: explain what type of parameter is here
rval = OrderedDict()
for key, value in o.items():
if not isinstance(key, six.string_types):
raise TypeError('Can only use string as keys, you passed '
'type %s for value %s.' %
(type(key), str(key)))
key = sklearn_to_flow(key)
value = sklearn_to_flow(value)
key = sklearn_to_flow(key, parent_model)
value = sklearn_to_flow(value, parent_model)
rval[key] = value
rval = rval
elif isinstance(o, type):
# TODO: explain what type of parameter is here
rval = serialize_type(o)
elif isinstance(o, scipy.stats.distributions.rv_frozen):
rval = serialize_rv_frozen(o)
# This only works for user-defined functions (and not even partial).
# I think this is exactly what we want here as there shouldn't be any
# built-in or functool.partials in a pipeline
elif inspect.isfunction(o):
# TODO: explain what type of parameter is here
rval = serialize_function(o)
elif _is_cross_validator(o):
# TODO: explain what type of parameter is here
rval = _serialize_cross_validator(o)
else:
raise TypeError(o, type(o))
Expand Down Expand Up @@ -256,7 +272,7 @@ def _extract_information_from_model(model):

model_parameters = model.get_params(deep=False)
for k, v in sorted(model_parameters.items(), key=lambda t: t[0]):
rval = sklearn_to_flow(v)
rval = sklearn_to_flow(v, model)

if (isinstance(rval, (list, tuple)) and len(rval) > 0 and
isinstance(rval[0], (list, tuple)) and
Expand Down Expand Up @@ -310,7 +326,7 @@ def _extract_information_from_model(model):
component_reference[
'oml-python:serialized_object'] = 'component_reference'
component_reference['value'] = OrderedDict(key=k, step_name=None)
component_reference = sklearn_to_flow(component_reference)
component_reference = sklearn_to_flow(component_reference, model)
parameters[k] = json.dumps(component_reference)

else:
Expand Down
3 changes: 3 additions & 0 deletions openml/runs/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,9 @@ def get_flow_dict(_flow):
elif flow.name.startswith("sklearn.pipeline.FeatureUnion"):
# tolerate
pass
elif flow.name.startswith("sklearn.ensemble.voting_classifier.VotingClassifier"):
# tolerate
pass
else:
raise ValueError("parameter %s not in flow description of flow %s" %(param,flow.name))

Expand Down
23 changes: 22 additions & 1 deletion tests/test_flows/test_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
import sklearn.preprocessing
import sklearn.tree

import openml
from openml.flows import OpenMLFlow, sklearn_to_flow, flow_to_sklearn
from openml.flows.sklearn_converter import _format_external_version, _check_dependencies
from openml.exceptions import PyOpenMLError

this_directory = os.path.dirname(os.path.abspath(__file__))
sys.path.append(this_directory)
Expand Down Expand Up @@ -524,3 +524,24 @@ def test_check_dependencies(self):
dependencies = ['sklearn==0.1', 'sklearn>=99.99.99', 'sklearn>99.99.99']
for dependency in dependencies:
self.assertRaises(ValueError, _check_dependencies, dependency)

def test_illegal_parameter_names(self):
# illegal name: estimators
clf1 = sklearn.ensemble.VotingClassifier(
estimators=[('estimators', sklearn.ensemble.RandomForestClassifier()),
('whatevs', sklearn.ensemble.ExtraTreesClassifier())])


cases = [clf1]

for case in cases:
self.assertRaises(PyOpenMLError, sklearn_to_flow, case)

def test_illegal_parameter_names_sklearn(self):
# illegal name: steps
steps = [
('Imputer', sklearn.preprocessing.Imputer(strategy='median')),
('OneHotEncoder', sklearn.preprocessing.OneHotEncoder(sparse=False, handle_unknown='ignore')),
('steps', sklearn.ensemble.BaggingClassifier(base_estimator=sklearn.tree.DecisionTreeClassifier))
]
self.assertRaises(ValueError, sklearn.pipeline.Pipeline, {'steps': steps })

0 comments on commit f62beb5

Please sign in to comment.