From 10940a2a2e69ca8ffef690ed1f94a3f2cd50a218 Mon Sep 17 00:00:00 2001
From: Jan van Rijn <janvanrijn@gmail.com>
Date: Tue, 25 Apr 2017 15:52:50 +0200
Subject: [PATCH 01/16] seed functionality

---
 openml/runs/functions.py | 46 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 9cb08d7e5..970108342 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -24,7 +24,7 @@
 
 
 
-def run_task(task, model, avoid_duplicate_runs=True, flow_tags=None):
+def run_task(task, model, avoid_duplicate_runs=True, flow_tags=None, seed=None):
     """Performs a CV run on the dataset of the given task, using the split.
 
     Parameters
@@ -35,8 +35,13 @@ def run_task(task, model, avoid_duplicate_runs=True, flow_tags=None):
         a model which has a function fit(X,Y) and predict(X),
         all supervised estimators of scikit learn follow this definition of a model [1]
         [1](http://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html)
+    avoid_duplicate_runs : bool
+        if this flag is set to True, the run will throw an error if the
+        setup/task combination is already present on the server.
     flow_tags : list(str)
         a list of tags that the flow should have at creation
+    seed: int
+        the models that are not seeded will get this seed
 
     Returns
     -------
@@ -48,6 +53,7 @@ def run_task(task, model, avoid_duplicate_runs=True, flow_tags=None):
     # TODO move this into its onwn module. While it somehow belongs here, it
     # adds quite a lot of functionality which is better suited in other places!
     # TODO why doesn't this accept a flow as input? - this would make this more flexible!
+    model = _get_seeded_model(model, seed)
     flow = sklearn_to_flow(model)
 
     # returns flow id if the flow exists on the server, False otherwise
@@ -111,6 +117,44 @@ def _run_exists(task_id, setup_id):
         assert(exception.code == 512)
         return False
 
+def _get_seeded_model(model, seed=None):
+    '''Sets all the non-seeded components of a model with a seed.
+
+        Parameters
+        ----------
+        model : sklearn model
+            The model to be seeded
+        seed : int
+            The seed to initialize the RandomState with. Unseeded subcomponents
+            will be seeded with a random number from the RandomState.
+
+        Returns
+        -------
+        model : sklearn model
+            a version of the model where all (sub)components have
+            a seed
+    '''
+
+    rs = np.random.RandomState(seed)
+    model_params = model.get_params()
+    random_states = {}
+    for param_name in sorted(model_params):
+        if 'random_state' in param_name:
+            currentValue = model_params[param_name]
+            # important to draw the value at this point (and not in the if statement)
+            newValue = rs.randint(0, 2**16)
+            if currentValue is None:
+                random_states[param_name] = newValue
+            elif isinstance(currentValue, int):
+                # acceptable behaviour
+                pass
+            elif isinstance(currentValue, np.random.RandomState):
+                raise ValueError('Models initialized with a RandomState object are not supported. Please seed with an integer. ')
+            else:
+                raise ValueError('Models should be seeded with int or None (this should never happen). ')
+            model.set_params(**random_states)
+    return model
+
 
 
 def _prediction_to_row(rep_no, fold_no, row_id, correct_label, predicted_label,

From 982898e0fd6c5ce11eb39e50195cf343fa3abcbb Mon Sep 17 00:00:00 2001
From: Jan van Rijn <janvanrijn@gmail.com>
Date: Tue, 25 Apr 2017 21:57:26 +0200
Subject: [PATCH 02/16] implemented setup data structure

---
 openml/runs/functions.py                  | 22 +++++-
 openml/setups/__init__.py                 |  4 +-
 openml/setups/functions.py                | 82 ++++++++++++++++++++++-
 tests/test_setups/test_setup_functions.py | 16 +++++
 4 files changed, 117 insertions(+), 7 deletions(-)

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 970108342..0959e980f 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -6,15 +6,13 @@
 import warnings
 import sklearn
 import time
-from sklearn.model_selection._search import BaseSearchCV
 
 from ..exceptions import PyOpenMLError
 from .. import config
 from ..flows import sklearn_to_flow, get_flow, flow_exists
-from ..setups import setup_exists
+from ..setups import setup_exists, initialize_model
 from ..exceptions import OpenMLCacheException, OpenMLServerException
 from ..util import URLError, version_complies
-from ..tasks.functions import _create_task_from_xml
 from .._api_calls import _perform_api_call
 from .run import OpenMLRun, _get_version_information
 
@@ -94,6 +92,24 @@ def run_task(task, model, avoid_duplicate_runs=True, flow_tags=None, seed=None):
 
     return run
 
+def initialize_model_from_run(run_id):
+    '''
+    Initialized a model based on a run_id (i.e., using the exact
+    same parameter settings)
+
+    Parameters
+        ----------
+        run_id : int
+            The Openml run_id
+
+        Returns
+        -------
+        model : sklearn model
+            the scikitlearn model with all parameters initailized
+    '''
+    run = get_run(run_id)
+    return initialize_model(run.setup_id)
+
 def _run_exists(task_id, setup_id):
     '''
     Checks whether a task/setup combination is already present on the server.
diff --git a/openml/setups/__init__.py b/openml/setups/__init__.py
index 27c884446..c29271ec1 100644
--- a/openml/setups/__init__.py
+++ b/openml/setups/__init__.py
@@ -1,3 +1,3 @@
-from .functions import setup_exists
+from .functions import get_setup, setup_exists, initialize_model
 
-__all__ = ['setup_exists']
\ No newline at end of file
+__all__ = ['get_setup', 'setup_exists', 'initialize_model']
\ No newline at end of file
diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index e9167d4cc..c26b83dbb 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -2,6 +2,7 @@
 import xmltodict
 
 from collections import OrderedDict
+from .setup import OpenMLSetup, OpenMLParameter
 
 def setup_exists(downloaded_flow, sklearn_model):
     '''
@@ -34,14 +35,91 @@ def setup_exists(downloaded_flow, sklearn_model):
     if setup_id > 0:
         return setup_id
     else:
-        return False;
+        return False
+
+
+def get_setup(setup_id):
+    '''
+     Downloads the setup (configuration) description from OpenML
+     and returns a structured object
+
+    Parameters
+        ----------
+        setup_id : int
+            The Openml setup_id
+
+        Returns
+        -------
+        OpenMLSetup
+            an initialized openml setup object
+    '''
+    result = openml._api_calls._perform_api_call('/setup/%d' %setup_id)
+    result_dict = xmltodict.parse(result)
+    return _create_setup_from_xml(result_dict)
+
+
+def initialize_model(setup_id):
+    '''
+    Initialized a model based on a setup_id (i.e., using the exact
+    same parameter settings)
+
+    Parameters
+        ----------
+        setup_id : int
+            The Openml setup_id
+
+        Returns
+        -------
+        model : sklearn model
+            the scikitlearn model with all parameters initailized
+    '''
+
+    setup = get_setup(setup_id)
+    flow = openml.flows.get_flow(setup.flow_id)
+    sklearn_model = openml.flows.flow_to_sklearn(flow)
+    # print(sklearn_model.get_params())
+
+    raise ValueError('not implemented yet')
 
 
 def _to_dict(flow_id, openml_parameter_settings):
+    # for convenience, this function (ab)uses the run object.
     xml = OrderedDict()
     xml['oml:run'] = OrderedDict()
     xml['oml:run']['@xmlns:oml'] = 'http://openml.org/openml'
     xml['oml:run']['oml:flow_id'] = flow_id
     xml['oml:run']['oml:parameter_setting'] = openml_parameter_settings
 
-    return xml
\ No newline at end of file
+    return xml
+
+def _create_setup_from_xml(result_dict):
+    '''
+     Turns an API xml result into a OpenMLSetup object
+    '''
+    flow_id = int(result_dict['oml:setup_parameters']['oml:flow_id'])
+    parameters = {}
+    if 'oml:parameter' not in result_dict['oml:setup_parameters']:
+        parameters = None
+    else:
+        # basically all others
+        xml_parameters = result_dict['oml:setup_parameters']['oml:parameter']
+        if isinstance(xml_parameters, dict):
+            id = int(xml_parameters['oml:id'])
+            parameters[id] = _create_setup_parameter_from_xml(xml_parameters)
+        elif isinstance(xml_parameters, list):
+            for xml_parameter in xml_parameters:
+                id = int(xml_parameter['oml:id'])
+                parameters[id] = _create_setup_parameter_from_xml(xml_parameter)
+        else:
+            raise ValueError('Expected None, list or dict, received someting else: %s' %str(type(xml_parameters)))
+
+    return OpenMLSetup(flow_id, parameters)
+
+def _create_setup_parameter_from_xml(result_dict):
+    return OpenMLParameter(int(result_dict['oml:id']),
+                           int(result_dict['oml:flow_id']),
+                           result_dict['oml:full_name'],
+                           result_dict['oml:parameter_name'],
+                           result_dict['oml:data_type'],
+                           result_dict['oml:default_value'],
+                           result_dict['oml:value'])
\ No newline at end of file
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index 013f25168..be5a23c8f 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -72,3 +72,19 @@ def test_existing_setup_exists(self):
         setup_id = openml.setups.setup_exists(flow, bagging)
         self.assertEquals(setup_id, run.setup_id)
 
+    def test_setup_get(self):
+        # no setups in default test server
+        openml.config.server = 'https://www.openml.org/api/v1/xml/'
+
+        # contains all special cases, 0 params, 1 param, n params.
+        # Non scikitlearn flows.
+        setups = [18, 19, 20, 118]
+        num_params = [8, 0, 3, 1]
+
+        for idx in range(len(setups)):
+            current = openml.setups.get_setup(setups[idx])
+            assert current.flow_id > 0
+            if num_params[idx] == 0:
+                assert current.parameters is None
+            else:
+                assert len(current.parameters) == num_params[idx]

From 8ceb2905ef6237b4fcca991ed6955c3669426610 Mon Sep 17 00:00:00 2001
From: Jan van Rijn <janvanrijn@gmail.com>
Date: Tue, 25 Apr 2017 23:00:42 +0200
Subject: [PATCH 03/16] parsing parameter values into setup

---
 openml/setups/functions.py | 27 ++++++++++++++++++++++++---
 openml/setups/setup.py     | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 3 deletions(-)
 create mode 100644 openml/setups/setup.py

diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index c26b83dbb..adf8556a8 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -1,5 +1,6 @@
 import openml
 import xmltodict
+import copy
 
 from collections import OrderedDict
 from .setup import OpenMLSetup, OpenMLParameter
@@ -73,13 +74,33 @@ def initialize_model(setup_id):
         model : sklearn model
             the scikitlearn model with all parameters initailized
     '''
+    def get_flow_dict(_flow, identifier_trace):
+        flow_map = {_flow.flow_id: identifier_trace}
+        for identifier in _flow.components:
+            duplicate_trace = copy.deepcopy(identifier_trace)
+            duplicate_trace.append(identifier)
+            flow_map.update(get_flow_dict(_flow.components[identifier], duplicate_trace))
+        return flow_map
 
     setup = get_setup(setup_id)
     flow = openml.flows.get_flow(setup.flow_id)
     sklearn_model = openml.flows.flow_to_sklearn(flow)
-    # print(sklearn_model.get_params())
-
-    raise ValueError('not implemented yet')
+    identifier_trace = get_flow_dict(flow, [])
+    print(sklearn_model.get_params())
+    print(identifier_trace)
+    parameter_dict = {}
+    for param_id in setup.parameters:
+        parameter = setup.parameters[param_id]
+        if parameter.flow_id == flow.flow_id:
+            # TODO: parse value. If serialized object (e.g., steps, estimator), skip it (?)
+            parameter_dict[parameter.parameter_name] = parameter.value
+        else:
+            # TODO: parse value. If serialized object (e.g., steps, estimator), skip it (?)
+            # find my estimator path
+            parameter_name = '__'.join(identifier_trace[parameter.flow_id]) + "__" + parameter.parameter_name
+            parameter_dict[parameter_name] = parameter.value
+    print(parameter_dict)
+    sklearn_model.set_params(**parameter_dict)
 
 
 def _to_dict(flow_id, openml_parameter_settings):
diff --git a/openml/setups/setup.py b/openml/setups/setup.py
new file mode 100644
index 000000000..e3187ad8c
--- /dev/null
+++ b/openml/setups/setup.py
@@ -0,0 +1,35 @@
+
+class OpenMLSetup(object):
+    """Setup object (a.k.a. Configuration).
+
+       Parameters
+       ----------
+       flow_id : int
+            The flow that it is build upon
+        parameters : dict
+            The setting of the parameters
+           """
+
+    def __init__(self, flow_id, parameters):
+        self.flow_id = flow_id
+        self.parameters = parameters
+
+
+class OpenMLParameter(object):
+    """Parameter object (used in setup).
+
+       Parameters
+       ----------
+       flow_id : int
+            The flow that it is build upon
+        parameters : dict
+            The setting of the parameters
+    """
+    def __init__(self, id, flow_id, full_name, parameter_name, data_type, default_value, value):
+        self.id = id
+        self.flow_id = flow_id
+        self.full_name = full_name
+        self.parameter_name = parameter_name
+        self.data_type = data_type
+        self.default_value = default_value
+        self.value = value

From ae85e5bf6733c96540747d2e50f06beecb9fb8f7 Mon Sep 17 00:00:00 2001
From: Jan van Rijn <janvanrijn@gmail.com>
Date: Wed, 26 Apr 2017 13:30:10 +0200
Subject: [PATCH 04/16] update run with new parameter extraction procedure

---
 openml/runs/run.py | 86 ++++++++++++++++++++++++++--------------------
 1 file changed, 49 insertions(+), 37 deletions(-)

diff --git a/openml/runs/run.py b/openml/runs/run.py
index 9a0ed855e..0c845ef16 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -165,7 +165,7 @@ def _create_description_xml(self):
         return description_xml
 
     @staticmethod
-    def _parse_parameters(model, flow):
+    def _parse_parameters(model, server_flow):
         """Extracts all parameter settings from a model in OpenML format.
 
         Parameters
@@ -176,50 +176,62 @@ def _parse_parameters(model, flow):
             openml flow object (containing flow ids, i.e., it has to be downloaded from the server)
 
         """
-        if flow.flow_id is None:
+        if server_flow.flow_id is None:
             raise ValueError("The flow parameter needs to be downloaded from server")
 
-        python_param_settings = model.get_params()
-        openml_param_settings = []
-
         def get_flow_dict(_flow):
             flow_map = {_flow.name: _flow.flow_id}
             for subflow in _flow.components:
                 flow_map.update(get_flow_dict(_flow.components[subflow]))
             return flow_map
 
-        flow_dict = get_flow_dict(flow)
-
-        for param in python_param_settings:
-            if "__" in param:
-                # parameter of subflow. will be handled later
-                continue
-            if isinstance(python_param_settings[param], BaseEstimator):
-                # extract parameters of the subflow individually
-                subflow = flow.components[param]
-                openml_param_settings += OpenMLRun._parse_parameters(python_param_settings[param], subflow)
-
-            # add parameter setting (in some cases also the subflow. Just because we can)
-            if param in flow.parameters.keys():
-                param_dict = OrderedDict()
-                param_dict['oml:name'] = param
-                param_dict['oml:value'] = str(python_param_settings[param])
-                param_dict['oml:component'] = flow_dict[flow.name]
-                openml_param_settings.append(param_dict)
-            else:
-                if flow.name.startswith("sklearn.pipeline.Pipeline"):
-                    # tolerate
-                    pass
-                elif flow.name.startswith("sklearn.pipeline.FeatureUnion"):
-                    # tolerate
-                    pass
-                elif flow.name.startswith("sklearn.ensemble.voting_classifier.VotingClassifier"):
-                    # tolerate
-                    pass
-                else:
-                    raise ValueError("parameter %s not in flow description of flow %s" %(param,flow.name))
-
-        return openml_param_settings
+        def extract_parameters(_flow, _param_dict):
+            _params = {}
+            for _param_name in _flow.parameters:
+                _current = OrderedDict()
+                _current['oml:name'] = _param_name
+                _current['oml:value'] = _flow.parameters[_param_name]
+                _current['oml:component'] = _param_dict[_flow.flow_id]
+                _params.append(_current)
+            for _identifier in _flow.components:
+                _params.update(extract_parameters(_flow.components[_identifier], _param_dict))
+
+        flow_dict = get_flow_dict(server_flow)
+        print(flow_dict)
+        local_flow = openml.flows.sklearn_to_flow(model)
+
+        parameters = extract_parameters(local_flow, flow_dict)
+        #
+        # for param in python_param_settings:
+        #     if "__" in param:
+        #         # parameter of subflow. will be handled later
+        #         continue
+        #     if isinstance(python_param_settings[param], BaseEstimator):
+        #         # extract parameters of the subflow individually
+        #         subflow = flow.components[param]
+        #         openml_param_settings += OpenMLRun._parse_parameters(python_param_settings[param], subflow)
+        #
+        #     # add parameter setting (in some cases also the subflow. Just because we can)
+        #     if param in flow.parameters.keys():
+        #         param_dict = OrderedDict()
+        #         param_dict['oml:name'] = param
+        #         param_dict['oml:value'] = str(python_param_settings[param])
+        #         param_dict['oml:component'] = flow_dict[flow.name]
+        #         openml_param_settings.append(param_dict)
+        #     else:
+        #         if flow.name.startswith("sklearn.pipeline.Pipeline"):
+        #             # tolerate
+        #             pass
+        #         elif flow.name.startswith("sklearn.pipeline.FeatureUnion"):
+        #             # tolerate
+        #             pass
+        #         elif flow.name.startswith("sklearn.ensemble.voting_classifier.VotingClassifier"):
+        #             # tolerate
+        #             pass
+        #         else:
+        #             raise ValueError("parameter %s not in flow description of flow %s" %(param,flow.name))
+
+        return parameters
 
 ################################################################################
 # Functions which cannot be in runs/functions due to circular imports

From 83662d78c075e17530c98f99d765ade1fbb89ef9 Mon Sep 17 00:00:00 2001
From: Jan van Rijn <janvanrijn@gmail.com>
Date: Wed, 26 Apr 2017 13:53:04 +0200
Subject: [PATCH 05/16] fixed merge conflict bug, reimplemented extract
 parameters from run (based on sklearn converter)

---
 openml/runs/functions.py |  2 +-
 openml/runs/run.py       | 38 ++++----------------------------------
 2 files changed, 5 insertions(+), 35 deletions(-)

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 021283818..f73af2cf4 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -10,7 +10,7 @@
 from ..exceptions import PyOpenMLError
 from .. import config
 
-from ..flows import sklearn_to_flow, get_flow, flow_exists
+from ..flows import sklearn_to_flow, get_flow, flow_exists, _check_n_jobs
 from ..setups import setup_exists, initialize_model
 
 from ..exceptions import OpenMLCacheException, OpenMLServerException
diff --git a/openml/runs/run.py b/openml/runs/run.py
index 0c845ef16..e69717487 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -186,51 +186,21 @@ def get_flow_dict(_flow):
             return flow_map
 
         def extract_parameters(_flow, _param_dict):
-            _params = {}
+            _params = []
             for _param_name in _flow.parameters:
                 _current = OrderedDict()
                 _current['oml:name'] = _param_name
                 _current['oml:value'] = _flow.parameters[_param_name]
-                _current['oml:component'] = _param_dict[_flow.flow_id]
+                _current['oml:component'] = _param_dict[_flow.name]
                 _params.append(_current)
             for _identifier in _flow.components:
-                _params.update(extract_parameters(_flow.components[_identifier], _param_dict))
+                _params.extend(extract_parameters(_flow.components[_identifier], _param_dict))
+            return _params
 
         flow_dict = get_flow_dict(server_flow)
-        print(flow_dict)
         local_flow = openml.flows.sklearn_to_flow(model)
 
         parameters = extract_parameters(local_flow, flow_dict)
-        #
-        # for param in python_param_settings:
-        #     if "__" in param:
-        #         # parameter of subflow. will be handled later
-        #         continue
-        #     if isinstance(python_param_settings[param], BaseEstimator):
-        #         # extract parameters of the subflow individually
-        #         subflow = flow.components[param]
-        #         openml_param_settings += OpenMLRun._parse_parameters(python_param_settings[param], subflow)
-        #
-        #     # add parameter setting (in some cases also the subflow. Just because we can)
-        #     if param in flow.parameters.keys():
-        #         param_dict = OrderedDict()
-        #         param_dict['oml:name'] = param
-        #         param_dict['oml:value'] = str(python_param_settings[param])
-        #         param_dict['oml:component'] = flow_dict[flow.name]
-        #         openml_param_settings.append(param_dict)
-        #     else:
-        #         if flow.name.startswith("sklearn.pipeline.Pipeline"):
-        #             # tolerate
-        #             pass
-        #         elif flow.name.startswith("sklearn.pipeline.FeatureUnion"):
-        #             # tolerate
-        #             pass
-        #         elif flow.name.startswith("sklearn.ensemble.voting_classifier.VotingClassifier"):
-        #             # tolerate
-        #             pass
-        #         else:
-        #             raise ValueError("parameter %s not in flow description of flow %s" %(param,flow.name))
-
         return parameters
 
 ################################################################################

From 71b5efdcc1188029299fc1ed890442ee9b60c0d2 Mon Sep 17 00:00:00 2001
From: Jan van Rijn <janvanrijn@gmail.com>
Date: Wed, 26 Apr 2017 14:13:44 +0200
Subject: [PATCH 06/16] functionality to reconstruct a flow using a given set
 of parameters

---
 openml/setups/functions.py | 52 ++++++++++++++++++++------------------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index adf8556a8..c28650cfe 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -74,33 +74,37 @@ def initialize_model(setup_id):
         model : sklearn model
             the scikitlearn model with all parameters initailized
     '''
-    def get_flow_dict(_flow, identifier_trace):
-        flow_map = {_flow.flow_id: identifier_trace}
-        for identifier in _flow.components:
-            duplicate_trace = copy.deepcopy(identifier_trace)
-            duplicate_trace.append(identifier)
-            flow_map.update(get_flow_dict(_flow.components[identifier], duplicate_trace))
-        return flow_map
+    def _to_dict_of_dicts(_params):
+        # this subfunction transforms an openml setup object into
+        # a dict of dicts, structured: flow_id maps to dict of
+        # parameter_names mapping to parameter_value
+        _res = {}
+        for _param in _params:
+            if _param.flow_id not in _res:
+                _res[_param.flow_id] = {}
+            _res[_param.flow_id][_param.parameter_name] = _param.value
+        return _res
+
+    def _reconstruct_flow(_flow, _params):
+        # sets the values of flow parameters (and subflows) to
+        # the specific values from a setup. _params is a dict of
+        # dicts, mapping from flow id to param name to param value
+        # (obtained by using the subfunction _to_dict_of_dicts)
+        for _param in _flow.parameters:
+            _flow.parameters[_param] = _params[_flow.flow_id][_param]
+        for _identifier in flow.components:
+            _flow.components[_identifier] = _reconstruct_flow(_flow.components[_identifier], _params)
+        return _flow
 
     setup = get_setup(setup_id)
+    parameters = _to_dict_of_dicts(setup.parameters)
     flow = openml.flows.get_flow(setup.flow_id)
-    sklearn_model = openml.flows.flow_to_sklearn(flow)
-    identifier_trace = get_flow_dict(flow, [])
-    print(sklearn_model.get_params())
-    print(identifier_trace)
-    parameter_dict = {}
-    for param_id in setup.parameters:
-        parameter = setup.parameters[param_id]
-        if parameter.flow_id == flow.flow_id:
-            # TODO: parse value. If serialized object (e.g., steps, estimator), skip it (?)
-            parameter_dict[parameter.parameter_name] = parameter.value
-        else:
-            # TODO: parse value. If serialized object (e.g., steps, estimator), skip it (?)
-            # find my estimator path
-            parameter_name = '__'.join(identifier_trace[parameter.flow_id]) + "__" + parameter.parameter_name
-            parameter_dict[parameter_name] = parameter.value
-    print(parameter_dict)
-    sklearn_model.set_params(**parameter_dict)
+
+    # now we 'abuse' the parameter object by passing in the
+    # parameters obtained from the setup
+    flow = _reconstruct_flow(flow, parameters)
+
+    return openml.flows.flow_to_sklearn(flow)
 
 
 def _to_dict(flow_id, openml_parameter_settings):

From af784362bb5ce0f251bb946fad30cef510d6507a Mon Sep 17 00:00:00 2001
From: Jan van Rijn <janvanrijn@gmail.com>
Date: Wed, 26 Apr 2017 14:16:53 +0200
Subject: [PATCH 07/16] clarifications to _to_dict_of_dicts function

---
 openml/setups/functions.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index c28650cfe..58e09490a 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -80,9 +80,12 @@ def _to_dict_of_dicts(_params):
         # parameter_names mapping to parameter_value
         _res = {}
         for _param in _params:
-            if _param.flow_id not in _res:
-                _res[_param.flow_id] = {}
-            _res[_param.flow_id][_param.parameter_name] = _param.value
+            _flow_id = _params[_param].flow_id
+            _param_name = _params[_param].parameter_name
+            _param_value = _params[_param].value
+            if _flow_id not in _res:
+                _res[_flow_id] = {}
+            _res[_flow_id][_param_name] = _param_value
         return _res
 
     def _reconstruct_flow(_flow, _params):

From e37bb93b766bd2f18cac4a6baca5f034182a0dc4 Mon Sep 17 00:00:00 2001
From: Jan van Rijn <janvanrijn@gmail.com>
Date: Wed, 26 Apr 2017 14:33:19 +0200
Subject: [PATCH 08/16] almost finished reinstatiating setups

---
 openml/setups/functions.py            |  2 +-
 openml/testing.py                     |  1 +
 tests/test_runs/test_run_functions.py | 10 +++++++++-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/openml/setups/functions.py b/openml/setups/functions.py
index 58e09490a..83486840b 100644
--- a/openml/setups/functions.py
+++ b/openml/setups/functions.py
@@ -95,7 +95,7 @@ def _reconstruct_flow(_flow, _params):
         # (obtained by using the subfunction _to_dict_of_dicts)
         for _param in _flow.parameters:
             _flow.parameters[_param] = _params[_flow.flow_id][_param]
-        for _identifier in flow.components:
+        for _identifier in _flow.components:
             _flow.components[_identifier] = _reconstruct_flow(_flow.components[_identifier], _params)
         return _flow
 
diff --git a/openml/testing.py b/openml/testing.py
index c5c11091a..e24adc92a 100644
--- a/openml/testing.py
+++ b/openml/testing.py
@@ -17,6 +17,7 @@ class TestBase(unittest.TestCase):
     def setUp(self):
         # This cache directory is checked in to git to simulate a populated
         # cache
+        self.maxDiff = None
         self.static_cache_dir = None
         static_cache_dir = os.path.dirname(os.path.abspath(inspect.getfile(self.__class__)))
 
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index e4926491a..680f26539 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -25,7 +25,7 @@
 
 class TestRun(TestBase):
 
-    def _perform_run(self, task_id, num_instances, clf):
+    def _perform_run(self, task_id, num_instances, clf, check_setup=True):
         task = openml.tasks.get_task(task_id)
         run = openml.runs.run_task(task, clf, openml.config.avoid_duplicate_runs)
         run_ = run.publish()
@@ -34,6 +34,14 @@ def _perform_run(self, task_id, num_instances, clf):
 
         # check arff output
         self.assertEqual(len(run.data_content), num_instances)
+
+        if check_setup:
+            run_id = run_.run_id
+            run_prime = openml.runs.get_run(run_id)
+            clf_prime = openml.setups.initialize_model(run_prime.setup_id)
+            self.assertEquals(clf.get_params(), clf_prime.get_params())
+            # self.assertEquals(clf, clf_prime)
+
         return run
 
     def test_run_regression_on_classif_task(self):

From 08e2ae8bdc42977b0d8b441d7d891d2ea7f7200f Mon Sep 17 00:00:00 2001
From: Jan van Rijn <janvanrijn@gmail.com>
Date: Wed, 26 Apr 2017 15:15:46 +0200
Subject: [PATCH 09/16] implemented some sort of unit test. should be improved

---
 tests/test_runs/test_run_functions.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 680f26539..3087e10ac 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -2,6 +2,8 @@
 
 import openml
 import openml.exceptions
+import json
+
 from openml.testing import TestBase
 from openml.runs.functions import _run_task_get_arffcontent
 
@@ -39,7 +41,22 @@ def _perform_run(self, task_id, num_instances, clf, check_setup=True):
             run_id = run_.run_id
             run_prime = openml.runs.get_run(run_id)
             clf_prime = openml.setups.initialize_model(run_prime.setup_id)
-            self.assertEquals(clf.get_params(), clf_prime.get_params())
+
+            params_orig = clf.get_params()
+            params_serv = clf_prime.get_params()
+            self.assertEqual(params_orig.keys(), params_serv.keys())
+
+            for param in params_orig:
+                print("%s : %s" %(param, str(params_serv[param])))
+                try:
+                    value_orig = json.dumps(params_orig[param])
+                    value_serv = json.dumps(params_serv[param])
+                    self.assertEqual(value_orig, value_serv)
+                except TypeError:
+                    # TODO: think of a different check
+                    print('Object not json serializable')
+
+            #self.assertEquals(clf.get_params(), clf_prime.get_params())
             # self.assertEquals(clf, clf_prime)
 
         return run

From b8ced46da474daaa3eec9550f2677dd3c12ef42a Mon Sep 17 00:00:00 2001
From: Jan van Rijn <janvanrijn@gmail.com>
Date: Thu, 27 Apr 2017 16:49:47 +0200
Subject: [PATCH 10/16] finalized instantiate setup check

---
 tests/test_runs/test_run_functions.py | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index 3087e10ac..d8b1bfee3 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -2,7 +2,6 @@
 
 import openml
 import openml.exceptions
-import json
 
 from openml.testing import TestBase
 from openml.runs.functions import _run_task_get_arffcontent
@@ -39,22 +38,13 @@ def _perform_run(self, task_id, num_instances, clf, check_setup=True):
 
         if check_setup:
             run_id = run_.run_id
-            run_prime = openml.runs.get_run(run_id)
-            clf_prime = openml.setups.initialize_model(run_prime.setup_id)
-
-            params_orig = clf.get_params()
-            params_serv = clf_prime.get_params()
-            self.assertEqual(params_orig.keys(), params_serv.keys())
-
-            for param in params_orig:
-                print("%s : %s" %(param, str(params_serv[param])))
-                try:
-                    value_orig = json.dumps(params_orig[param])
-                    value_serv = json.dumps(params_serv[param])
-                    self.assertEqual(value_orig, value_serv)
-                except TypeError:
-                    # TODO: think of a different check
-                    print('Object not json serializable')
+            run_server = openml.runs.get_run(run_id)
+            clf_server = openml.setups.initialize_model(run_server.setup_id)
+
+            flow_local = openml.flows.sklearn_to_flow(clf)
+            flow_server = openml.flows.sklearn_to_flow(clf_server)
+
+            openml.flows.assert_flows_equal(flow_local, flow_server)
 
             #self.assertEquals(clf.get_params(), clf_prime.get_params())
             # self.assertEquals(clf, clf_prime)

From 363b381f726aa89daf2cefd76ac4e2a99f384375 Mon Sep 17 00:00:00 2001
From: "janvanrijn@gmail.com" <janvanrijn@gmail.com>
Date: Tue, 2 May 2017 15:25:30 +0200
Subject: [PATCH 11/16] fix unit tests for setup

---
 openml/runs/run.py                        | 12 +++++++++---
 tests/test_setups/test_setup_functions.py | 19 +++++++++----------
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/openml/runs/run.py b/openml/runs/run.py
index e69717487..727119ff1 100644
--- a/openml/runs/run.py
+++ b/openml/runs/run.py
@@ -185,13 +185,19 @@ def get_flow_dict(_flow):
                 flow_map.update(get_flow_dict(_flow.components[subflow]))
             return flow_map
 
-        def extract_parameters(_flow, _param_dict):
+        def extract_parameters(_flow, _param_dict, _main_call=False, main_id=None):
+            # _flow is openml flow object, _param dict maps from flow name to flow id
+            # for the main call, the param dict can be overridden (useful for unit tests / sentinels)
+            # this way, for flows without subflows we do not have to rely on _param_dict
             _params = []
             for _param_name in _flow.parameters:
                 _current = OrderedDict()
                 _current['oml:name'] = _param_name
                 _current['oml:value'] = _flow.parameters[_param_name]
-                _current['oml:component'] = _param_dict[_flow.name]
+                if _main_call:
+                    _current['oml:component'] = main_id
+                else:
+                    _current['oml:component'] = _param_dict[_flow.name]
                 _params.append(_current)
             for _identifier in _flow.components:
                 _params.extend(extract_parameters(_flow.components[_identifier], _param_dict))
@@ -200,7 +206,7 @@ def extract_parameters(_flow, _param_dict):
         flow_dict = get_flow_dict(server_flow)
         local_flow = openml.flows.sklearn_to_flow(model)
 
-        parameters = extract_parameters(local_flow, flow_dict)
+        parameters = extract_parameters(local_flow, flow_dict, True, server_flow.flow_id)
         return parameters
 
 ################################################################################
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index 14144bd9a..17bea0de1 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -32,6 +32,7 @@ class TestRun(TestBase):
     def test_nonexisting_setup_exists(self):
         # first publish a non-existing flow
         sentinel = get_sentinel()
+        # because of the sentinel, we can not use flows that contain subflows
         dectree = DecisionTreeClassifier()
         flow = openml.flows.sklearn_to_flow(dectree)
         flow.name = 'TEST%s%s' % (sentinel, flow.name)
@@ -45,26 +46,24 @@ def test_nonexisting_setup_exists(self):
 
     def test_existing_setup_exists(self):
         # first publish a nonexiting flow
-        bagging = BaggingClassifier(DecisionTreeClassifier(max_depth=5,
-                                                           min_samples_split=3),
-                                    n_estimators=3,
-                                    max_samples=0.5)
-        flow = openml.flows.sklearn_to_flow(bagging)
+
+        # because of the sentinel, we can not use flows that contain subflows
+        classif = DecisionTreeClassifier(max_depth=5,
+                                         min_samples_split=3)
+        flow = openml.flows.sklearn_to_flow(classif)
         flow.name = 'TEST%s%s' % (get_sentinel(), flow.name)
-        flow.components['base_estimator'].name = 'TEST%s%s' % (
-            get_sentinel(), flow.components['base_estimator'].name)
 
         flow = flow.publish()
         flow = openml.flows.get_flow(flow.flow_id)
 
         # although the flow exists, we can be sure there are no
         # setups (yet) as it hasn't been ran
-        setup_id = openml.setups.setup_exists(flow, bagging)
+        setup_id = openml.setups.setup_exists(flow, classif)
         self.assertFalse(setup_id)
 
         # now run the flow on an easy task:
         task = openml.tasks.get_task(115) #diabetes
-        run = openml.runs.run_task(task, bagging)
+        run = openml.runs.run_task(task, classif)
         # spoof flow id, otherwise the sentinel is ignored
         run.flow_id = flow.flow_id
         run = run.publish()
@@ -72,7 +71,7 @@ def test_existing_setup_exists(self):
         run = openml.runs.get_run(run.run_id)
 
         # execute the function we are interested in
-        setup_id = openml.setups.setup_exists(flow, bagging)
+        setup_id = openml.setups.setup_exists(flow, classif)
         self.assertEquals(setup_id, run.setup_id)
 
     def test_setup_get(self):

From 3ae5f8e0b520bc8684536b9df883db99e6441db9 Mon Sep 17 00:00:00 2001
From: "janvanrijn@gmail.com" <janvanrijn@gmail.com>
Date: Tue, 2 May 2017 18:26:40 +0200
Subject: [PATCH 12/16] requests from @mfeurer

---
 openml/runs/__init__.py               | 2 +-
 openml/runs/functions.py              | 2 ++
 tests/test_runs/test_run_functions.py | 6 ++++++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/openml/runs/__init__.py b/openml/runs/__init__.py
index 562327d34..a0735b4f1 100644
--- a/openml/runs/__init__.py
+++ b/openml/runs/__init__.py
@@ -1,4 +1,4 @@
 from .run import OpenMLRun
-from .functions import (run_task, get_run, list_runs, get_runs)
+from .functions import (run_task, get_run, list_runs, get_runs, initialize_model_from_run)
 
 __all__ = ['OpenMLRun', 'run_task', 'get_run', 'list_runs', 'get_runs']
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index f73af2cf4..8338519b5 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -160,6 +160,8 @@ def _get_seeded_model(model, seed=None):
         if 'random_state' in param_name:
             currentValue = model_params[param_name]
             # important to draw the value at this point (and not in the if statement)
+            # this way we guarantee that if a different set of subflows is seeded,
+            # the same number of the random generator is used
             newValue = rs.randint(0, 2**16)
             if currentValue is None:
                 random_states[param_name] = newValue
diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py
index d8b1bfee3..6c711f70a 100644
--- a/tests/test_runs/test_run_functions.py
+++ b/tests/test_runs/test_run_functions.py
@@ -37,6 +37,7 @@ def _perform_run(self, task_id, num_instances, clf, check_setup=True):
         self.assertEqual(len(run.data_content), num_instances)
 
         if check_setup:
+            # test the initialize setup function
             run_id = run_.run_id
             run_server = openml.runs.get_run(run_id)
             clf_server = openml.setups.initialize_model(run_server.setup_id)
@@ -46,6 +47,11 @@ def _perform_run(self, task_id, num_instances, clf, check_setup=True):
 
             openml.flows.assert_flows_equal(flow_local, flow_server)
 
+            # and test the initialize setup from run function
+            clf_server2 = openml.runs.initialize_model_from_run(run_server.run_id)
+            flow_server2 = openml.flows.sklearn_to_flow(clf_server2)
+            openml.flows.assert_flows_equal(flow_local, flow_server2)
+
             #self.assertEquals(clf.get_params(), clf_prime.get_params())
             # self.assertEquals(clf, clf_prime)
 

From 70475feea5a2225e479dcd01a1176cedab852a29 Mon Sep 17 00:00:00 2001
From: "janvanrijn@gmail.com" <janvanrijn@gmail.com>
Date: Wed, 3 May 2017 13:38:56 +0200
Subject: [PATCH 13/16] added comment

---
 openml/runs/functions.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 8338519b5..0a211d7fc 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -137,6 +137,9 @@ def _run_exists(task_id, setup_id):
 
 def _get_seeded_model(model, seed=None):
     '''Sets all the non-seeded components of a model with a seed.
+       Models that are already seeded will maintain the seed. In
+       this case, only integer seeds are allowed (An exception
+       is thrown when a RandomState was used as seed)
 
         Parameters
         ----------

From 23aaf813e3dffd2389d24240f5e53dbc8094d1f1 Mon Sep 17 00:00:00 2001
From: "janvanrijn@gmail.com" <janvanrijn@gmail.com>
Date: Wed, 3 May 2017 16:34:19 +0200
Subject: [PATCH 14/16] changed comments of setup, changed assertions

---
 openml/setups/setup.py                    | 19 +++++++++++++++----
 tests/test_setups/test_setup_functions.py |  4 ++--
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/openml/setups/setup.py b/openml/setups/setup.py
index e3187ad8c..d23893828 100644
--- a/openml/setups/setup.py
+++ b/openml/setups/setup.py
@@ -20,10 +20,21 @@ class OpenMLParameter(object):
 
        Parameters
        ----------
-       flow_id : int
-            The flow that it is build upon
-        parameters : dict
-            The setting of the parameters
+       id : int
+            The input id from the openml database
+        flow id : int
+            The flow to which this parameter is associated
+        full_name : str
+            The name of the flow and parameter combined
+        parameter_name : str
+            The name of the parameter 
+        data_type : str
+            The datatype of the parameter. generally unused for sklearn flows
+        default_value : str
+            The default value. For sklearn parameters, this is unknown and a
+            default value is selected arbitrarily
+        value : str
+            If the parameter was set, the value that it was set to. 
     """
     def __init__(self, id, flow_id, full_name, parameter_name, data_type, default_value, value):
         self.id = id
diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index 17bea0de1..10f18e321 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -87,6 +87,6 @@ def test_setup_get(self):
             current = openml.setups.get_setup(setups[idx])
             assert current.flow_id > 0
             if num_params[idx] == 0:
-                assert current.parameters is None
+                self.asserts(current.parameters is None)
             else:
-                assert len(current.parameters) == num_params[idx]
+                self.asserts(len(current.parameters) == num_params[idx])

From bd8ee242fab4a5cadb005eb5adb3cdac141a1481 Mon Sep 17 00:00:00 2001
From: "janvanrijn@gmail.com" <janvanrijn@gmail.com>
Date: Wed, 3 May 2017 16:46:26 +0200
Subject: [PATCH 15/16] typo

---
 tests/test_setups/test_setup_functions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index 10f18e321..f033a19b7 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -87,6 +87,6 @@ def test_setup_get(self):
             current = openml.setups.get_setup(setups[idx])
             assert current.flow_id > 0
             if num_params[idx] == 0:
-                self.asserts(current.parameters is None)
+                self.assertTrue(current.parameters is None)
             else:
-                self.asserts(len(current.parameters) == num_params[idx])
+                self.assertTrue(len(current.parameters) == num_params[idx])

From 85472cc644a60bd4db9c40e23bba7f4fc709b486 Mon Sep 17 00:00:00 2001
From: Jan van Rijn <janvanrijn@gmail.com>
Date: Thu, 4 May 2017 16:22:20 +0200
Subject: [PATCH 16/16] changes requested

---
 tests/test_setups/test_setup_functions.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py
index f033a19b7..99cff2ef7 100644
--- a/tests/test_setups/test_setup_functions.py
+++ b/tests/test_setups/test_setup_functions.py
@@ -38,8 +38,9 @@ def test_nonexisting_setup_exists(self):
         flow.name = 'TEST%s%s' % (sentinel, flow.name)
         flow.publish()
 
-        # although the flow exists, we can be sure there are no
-        # setups (yet) as it hasn't been ran
+        # although the flow exists (created as of previous statement),
+        # we can be sure there are no setups (yet) as it was just created
+        # and hasn't been ran
         setup_id = openml.setups.setup_exists(flow, dectree)
         self.assertFalse(setup_id)
 
@@ -87,6 +88,6 @@ def test_setup_get(self):
             current = openml.setups.get_setup(setups[idx])
             assert current.flow_id > 0
             if num_params[idx] == 0:
-                self.assertTrue(current.parameters is None)
+                self.assertIsNone(current.parameters)
             else:
-                self.assertTrue(len(current.parameters) == num_params[idx])
+                self.assertEquals(len(current.parameters), num_params[idx])