rasbt · rasbt · Apr 25, 2018 · Apr 24, 2018 · Apr 24, 2018 · Apr 24, 2018
diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
@@ -26,7 +26,7 @@ The CHANGELOG for the current development version is available at
 
 ##### Bug Fixes
 
-- -
+- Allow mlxtend estimators to be cloned via scikit-learn's `clone` function. ([#374](https://github.com/rasbt/mlxtend/pull/374))
 
 
 

diff --git a/mlxtend/_base/_base_model.py b/mlxtend/_base/_base_model.py
@@ -7,6 +7,12 @@
 # License: BSD 3 clause
 
 from time import time
+from collections import defaultdict
+
+try:
+    from inspect import signature
+except ImportError:
+    from ..externals.signature_py27 import signature
 
 
 class _BaseModel(object):
@@ -28,3 +34,105 @@ def _check_arrays(self, X, y=None):
 
         if not len(y) == X.shape[0]:
             raise ValueError('X and y must contain the same number of samples')
+
+    @classmethod
+    def _get_param_names(cls):
+        """Get parameter names for the estimator
+
+        adapted from
+        https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/base.py
+        # Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
+        # License: BSD 3 clause
+        """
+        # fetch the constructor or the original constructor before
+        # deprecation wrapping if any
+        init = getattr(cls.__init__, 'deprecated_original', cls.__init__)
+        if init is object.__init__:
+            # No explicit constructor to introspect
+            return []
+
+        # introspect the constructor arguments to find the model parameters
+        # to represent
+        init_signature = signature(init)
+        # Consider the constructor parameters excluding 'self'
+        parameters = [p for p in init_signature.parameters.values()
+                      if p.name != 'self' and p.kind != p.VAR_KEYWORD]
+        for p in parameters:
+            if p.kind == p.VAR_POSITIONAL:
+                raise RuntimeError("scikit-learn estimators should always "
+                                   "specify their parameters in the signature"
+                                   " of their __init__ (no varargs)."
+                                   " %s with constructor %s doesn't "
+                                   " follow this convention."
+                                   % (cls, init_signature))
+        # Extract and sort argument names excluding 'self'
+        return sorted([p.name for p in parameters])
+
+    def get_params(self, deep=True):
+        """Get parameters for this estimator.
+
+        Parameters
+        ----------
+        deep : boolean, optional
+            If True, will return the parameters for this estimator and
+            contained subobjects that are estimators.
+
+        Returns
+        -------
+        params : mapping of string to any
+            Parameter names mapped to their values.'
+
+        adapted from
+        https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/base.py
+        # Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
+        # License: BSD 3 clause
+        """
+        out = dict()
+        for key in self._get_param_names():
+            value = getattr(self, key, None)
+            if deep and hasattr(value, 'get_params'):
+                deep_items = value.get_params().items()
+                out.update((key + '__' + k, val) for k, val in deep_items)
+            out[key] = value
+        return out
+
+    def set_params(self, **params):
+        """Set the parameters of this estimator.
+        The method works on simple estimators as well as on nested objects
+        (such as pipelines). The latter have parameters of the form
+        ``<component>__<parameter>`` so that it's possible to update each
+        component of a nested object.
+
+        Returns
+        -------
+        self
+
+        adapted from
+        https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/base.py
+        # Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
+        # License: BSD 3 clause
+        """
+        if not params:
+            # Simple optimization to gain speed (inspect is slow)
+            return self
+        valid_params = self.get_params(deep=True)
+
+        nested_params = defaultdict(dict)  # grouped by prefix
+        for key, value in params.items():
+            key, delim, sub_key = key.partition('__')
+            if key not in valid_params:
+                raise ValueError('Invalid parameter %s for estimator %s. '
+                                 'Check the list of available parameters '
+                                 'with `estimator.get_params().keys()`.' %
+                                 (key, self))
+
+            if delim:
+                nested_params[key][sub_key] = value
+            else:
+                setattr(self, key, value)
+                valid_params[key] = value
+
+        for key, sub_params in nested_params.items():
+            valid_params[key].set_params(**sub_params)
+
+        return self
diff --git a/mlxtend/classifier/softmax_regression.py b/mlxtend/classifier/softmax_regression.py
@@ -16,7 +16,8 @@
 from .._base import _Classifier
 
 
-class SoftmaxRegression(_BaseModel, _IterativeModel, _Classifier,  _MultiClass):
+class SoftmaxRegression(_BaseModel, _IterativeModel,
+                        _Classifier,  _MultiClass):
 
     """Softmax regression classifier.
 

diff --git a/mlxtend/classifier/tests/test_adaline.py b/mlxtend/classifier/tests/test_adaline.py
@@ -9,6 +9,7 @@
 from mlxtend.classifier import Adaline
 from mlxtend.data import iris_data
 from mlxtend.utils import assert_raises
+from sklearn.base import clone
 
 
 # Iris Data
@@ -147,3 +148,8 @@ def test_ary_persistency_in_shuffling():
                   random_seed=1)
     ada.fit(X_std, y1)
     np.testing.assert_almost_equal(orig, X_std, 6)
+
+
+def test_clone():
+    ada = Adaline()
+    clone(ada)
diff --git a/mlxtend/classifier/tests/test_ensemble_vote_classifier.py b/mlxtend/classifier/tests/test_ensemble_vote_classifier.py
@@ -13,6 +13,7 @@
 from sklearn import datasets
 from sklearn.model_selection import GridSearchCV
 from sklearn.model_selection import cross_val_score
+from sklearn.base import clone
 
 
 iris = datasets.load_iris()
@@ -195,3 +196,14 @@ def test_string_labels_refit_false():
 
     eclf.fit(X, y_str)
     assert round(eclf.score(X, y_str), 2) == 0.97
+
+
+def test_clone():
+
+    clf1 = LogisticRegression()
+    clf2 = RandomForestClassifier()
+    clf3 = GaussianNB()
+    eclf = EnsembleVoteClassifier(clfs=[clf1, clf2, clf3],
+                                  voting='hard',
+                                  refit=False)
+    clone(eclf)
diff --git a/mlxtend/classifier/tests/test_logistic_regression.py b/mlxtend/classifier/tests/test_logistic_regression.py
@@ -9,6 +9,7 @@
 from mlxtend.classifier import LogisticRegression
 from mlxtend.data import iris_data
 from mlxtend.utils import assert_raises
+from sklearn.base import clone
 
 
 X, y = iris_data()
@@ -192,3 +193,8 @@ def test_ary_persistency_in_shuffling():
                             random_seed=1)
     lr.fit(X, y)
     np.testing.assert_almost_equal(orig, X, 6)
+
+
+def test_clone():
+    log = LogisticRegression()
+    clone(log)
diff --git a/mlxtend/classifier/tests/test_multilayerperceptron.py b/mlxtend/classifier/tests/test_multilayerperceptron.py
@@ -8,6 +8,7 @@
 from mlxtend.data import iris_data
 import numpy as np
 from mlxtend.utils import assert_raises
+from sklearn.base import clone
 
 
 X, y = iris_data()
@@ -170,3 +171,14 @@ def test_retrain():
 
     assert cost_2 == cost_1
     assert cost_3 < (cost_2 / 2.0)
+
+
+def test_clone():
+
+    mlp = MLP(epochs=5,
+              eta=0.05,
+              hidden_layers=[10],
+              minibatches=len(y),
+              random_seed=1)
+
+    clone(mlp)
diff --git a/mlxtend/classifier/tests/test_perceptron.py b/mlxtend/classifier/tests/test_perceptron.py
@@ -9,6 +9,7 @@
 from mlxtend.classifier import Perceptron
 from mlxtend.data import iris_data
 from mlxtend.utils import assert_raises
+from sklearn.base import clone
 
 # Iris Data
 X, y = iris_data()
@@ -85,3 +86,8 @@ def test_nonstandardized_iris_data():
     ppn = Perceptron(epochs=100, eta=0.01, random_seed=1)
     ppn = ppn.fit(X, y0)
     assert (y0 == ppn.predict(X)).all()
+
+
+def test_clone():
+    ppn = Perceptron()
+    clone(ppn)
diff --git a/mlxtend/classifier/tests/test_softmax_regression.py b/mlxtend/classifier/tests/test_softmax_regression.py
@@ -8,6 +8,7 @@
 from mlxtend.classifier import SoftmaxRegression
 from mlxtend.data import iris_data
 from mlxtend.utils import assert_raises
+from sklearn.base import clone
 
 
 X, y = iris_data()
@@ -166,3 +167,8 @@ def test_score_function():
     lr.fit(X, y)
     acc = lr.score(X, y)
     assert acc == 1.0, acc
+
+
+def test_clone():
+    lr = SoftmaxRegression()
+    clone(lr)
diff --git a/mlxtend/classifier/tests/test_stacking_classifier.py b/mlxtend/classifier/tests/test_stacking_classifier.py
@@ -17,6 +17,7 @@
 from mlxtend.utils import assert_raises
 from nose.tools import assert_almost_equal
 from sklearn.model_selection import train_test_split
+from sklearn.base import clone
 
 
 iris = datasets.load_iris()
@@ -320,3 +321,14 @@ def test_predict_meta_features():
     stclf.fit(X_train, y_train)
     test_meta_features = stclf.predict(X_test)
     assert test_meta_features.shape == (X_test.shape[0],)
+
+
+def test_clone():
+
+    knn = KNeighborsClassifier()
+    lr = LogisticRegression()
+    gnb = GaussianNB()
+    stclf = StackingClassifier(classifiers=[knn, gnb],
+                               meta_classifier=lr,
+                               store_train_meta_features=True)
+    clone(stclf)
diff --git a/mlxtend/classifier/tests/test_stacking_cv_classifier.py b/mlxtend/classifier/tests/test_stacking_cv_classifier.py
@@ -20,6 +20,8 @@
 from sklearn.model_selection import cross_val_score
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import roc_auc_score
+from sklearn.base import clone
+
 
 iris = datasets.load_iris()
 X_iris, y_iris = iris.data[:, 1:3], iris.target
@@ -348,3 +350,13 @@ def test_meta_feat_reordering():
 
     assert round(roc_auc_score(y_train,
                  stclf.train_meta_features_[:, 1]), 2) == 0.88
+
+
+def test_clone():
+    knn = KNeighborsClassifier()
+    lr = LogisticRegression()
+    gnb = GaussianNB()
+    stclf = StackingCVClassifier(classifiers=[knn, gnb],
+                                 meta_classifier=lr,
+                                 store_train_meta_features=True)
+    clone(stclf)
diff --git a/mlxtend/cluster/tests/test_kmeans.py b/mlxtend/cluster/tests/test_kmeans.py
@@ -8,6 +8,7 @@
 from mlxtend.cluster import Kmeans
 from mlxtend.utils import assert_raises
 import numpy as np
+from sklearn.base import clone
 
 
 X, y = three_blobs_data()
@@ -106,3 +107,8 @@ def test_continue_training():
     km.fit(X, init_params=False)
     np.testing.assert_almost_equal(second_iter, km.centroids_, decimal=2)
     assert km.iterations_ == 2, km.iterations_
+
+
+def test_clone():
+    km = Kmeans(k=2)
+    clone(km)