Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow estimators to be cloned #374

Merged
merged 4 commits into from
Apr 25, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/sources/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ The CHANGELOG for the current development version is available at

##### Bug Fixes

- -
- Allow mlxtend estimators to be cloned via scikit-learn's `clone` function. ([#374](https://github.com/rasbt/mlxtend/pull/374))



Expand Down
108 changes: 108 additions & 0 deletions mlxtend/_base/_base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
# License: BSD 3 clause

from time import time
from collections import defaultdict

try:
from inspect import signature
except ImportError:
from ..externals.signature_py27 import signature


class _BaseModel(object):
Expand All @@ -28,3 +34,105 @@ def _check_arrays(self, X, y=None):

if not len(y) == X.shape[0]:
raise ValueError('X and y must contain the same number of samples')

@classmethod
def _get_param_names(cls):
"""Get parameter names for the estimator

adapted from
https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/base.py
# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
# License: BSD 3 clause
"""
# fetch the constructor or the original constructor before
# deprecation wrapping if any
init = getattr(cls.__init__, 'deprecated_original', cls.__init__)
if init is object.__init__:
# No explicit constructor to introspect
return []

# introspect the constructor arguments to find the model parameters
# to represent
init_signature = signature(init)
# Consider the constructor parameters excluding 'self'
parameters = [p for p in init_signature.parameters.values()
if p.name != 'self' and p.kind != p.VAR_KEYWORD]
for p in parameters:
if p.kind == p.VAR_POSITIONAL:
raise RuntimeError("scikit-learn estimators should always "
"specify their parameters in the signature"
" of their __init__ (no varargs)."
" %s with constructor %s doesn't "
" follow this convention."
% (cls, init_signature))
# Extract and sort argument names excluding 'self'
return sorted([p.name for p in parameters])

def get_params(self, deep=True):
"""Get parameters for this estimator.

Parameters
----------
deep : boolean, optional
If True, will return the parameters for this estimator and
contained subobjects that are estimators.

Returns
-------
params : mapping of string to any
Parameter names mapped to their values.'

adapted from
https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/base.py
# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
# License: BSD 3 clause
"""
out = dict()
for key in self._get_param_names():
value = getattr(self, key, None)
if deep and hasattr(value, 'get_params'):
deep_items = value.get_params().items()
out.update((key + '__' + k, val) for k, val in deep_items)
out[key] = value
return out

def set_params(self, **params):
"""Set the parameters of this estimator.
The method works on simple estimators as well as on nested objects
(such as pipelines). The latter have parameters of the form
``<component>__<parameter>`` so that it's possible to update each
component of a nested object.

Returns
-------
self

adapted from
https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/base.py
# Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
# License: BSD 3 clause
"""
if not params:
# Simple optimization to gain speed (inspect is slow)
return self
valid_params = self.get_params(deep=True)

nested_params = defaultdict(dict) # grouped by prefix
for key, value in params.items():
key, delim, sub_key = key.partition('__')
if key not in valid_params:
raise ValueError('Invalid parameter %s for estimator %s. '
'Check the list of available parameters '
'with `estimator.get_params().keys()`.' %
(key, self))

if delim:
nested_params[key][sub_key] = value
else:
setattr(self, key, value)
valid_params[key] = value

for key, sub_params in nested_params.items():
valid_params[key].set_params(**sub_params)

return self
3 changes: 2 additions & 1 deletion mlxtend/classifier/softmax_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
from .._base import _Classifier


class SoftmaxRegression(_BaseModel, _IterativeModel, _Classifier, _MultiClass):
class SoftmaxRegression(_BaseModel, _IterativeModel,
_Classifier, _MultiClass):

"""Softmax regression classifier.

Expand Down
6 changes: 6 additions & 0 deletions mlxtend/classifier/tests/test_adaline.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from mlxtend.classifier import Adaline
from mlxtend.data import iris_data
from mlxtend.utils import assert_raises
from sklearn.base import clone


# Iris Data
Expand Down Expand Up @@ -147,3 +148,8 @@ def test_ary_persistency_in_shuffling():
random_seed=1)
ada.fit(X_std, y1)
np.testing.assert_almost_equal(orig, X_std, 6)


def test_clone():
ada = Adaline()
clone(ada)
12 changes: 12 additions & 0 deletions mlxtend/classifier/tests/test_ensemble_vote_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from sklearn import datasets
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.base import clone


iris = datasets.load_iris()
Expand Down Expand Up @@ -195,3 +196,14 @@ def test_string_labels_refit_false():

eclf.fit(X, y_str)
assert round(eclf.score(X, y_str), 2) == 0.97


def test_clone():

clf1 = LogisticRegression()
clf2 = RandomForestClassifier()
clf3 = GaussianNB()
eclf = EnsembleVoteClassifier(clfs=[clf1, clf2, clf3],
voting='hard',
refit=False)
clone(eclf)
6 changes: 6 additions & 0 deletions mlxtend/classifier/tests/test_logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from mlxtend.classifier import LogisticRegression
from mlxtend.data import iris_data
from mlxtend.utils import assert_raises
from sklearn.base import clone


X, y = iris_data()
Expand Down Expand Up @@ -192,3 +193,8 @@ def test_ary_persistency_in_shuffling():
random_seed=1)
lr.fit(X, y)
np.testing.assert_almost_equal(orig, X, 6)


def test_clone():
log = LogisticRegression()
clone(log)
12 changes: 12 additions & 0 deletions mlxtend/classifier/tests/test_multilayerperceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from mlxtend.data import iris_data
import numpy as np
from mlxtend.utils import assert_raises
from sklearn.base import clone


X, y = iris_data()
Expand Down Expand Up @@ -170,3 +171,14 @@ def test_retrain():

assert cost_2 == cost_1
assert cost_3 < (cost_2 / 2.0)


def test_clone():

mlp = MLP(epochs=5,
eta=0.05,
hidden_layers=[10],
minibatches=len(y),
random_seed=1)

clone(mlp)
6 changes: 6 additions & 0 deletions mlxtend/classifier/tests/test_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from mlxtend.classifier import Perceptron
from mlxtend.data import iris_data
from mlxtend.utils import assert_raises
from sklearn.base import clone

# Iris Data
X, y = iris_data()
Expand Down Expand Up @@ -85,3 +86,8 @@ def test_nonstandardized_iris_data():
ppn = Perceptron(epochs=100, eta=0.01, random_seed=1)
ppn = ppn.fit(X, y0)
assert (y0 == ppn.predict(X)).all()


def test_clone():
ppn = Perceptron()
clone(ppn)
6 changes: 6 additions & 0 deletions mlxtend/classifier/tests/test_softmax_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from mlxtend.classifier import SoftmaxRegression
from mlxtend.data import iris_data
from mlxtend.utils import assert_raises
from sklearn.base import clone


X, y = iris_data()
Expand Down Expand Up @@ -166,3 +167,8 @@ def test_score_function():
lr.fit(X, y)
acc = lr.score(X, y)
assert acc == 1.0, acc


def test_clone():
lr = SoftmaxRegression()
clone(lr)
12 changes: 12 additions & 0 deletions mlxtend/classifier/tests/test_stacking_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from mlxtend.utils import assert_raises
from nose.tools import assert_almost_equal
from sklearn.model_selection import train_test_split
from sklearn.base import clone


iris = datasets.load_iris()
Expand Down Expand Up @@ -320,3 +321,14 @@ def test_predict_meta_features():
stclf.fit(X_train, y_train)
test_meta_features = stclf.predict(X_test)
assert test_meta_features.shape == (X_test.shape[0],)


def test_clone():

knn = KNeighborsClassifier()
lr = LogisticRegression()
gnb = GaussianNB()
stclf = StackingClassifier(classifiers=[knn, gnb],
meta_classifier=lr,
store_train_meta_features=True)
clone(stclf)
12 changes: 12 additions & 0 deletions mlxtend/classifier/tests/test_stacking_cv_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.base import clone


iris = datasets.load_iris()
X_iris, y_iris = iris.data[:, 1:3], iris.target
Expand Down Expand Up @@ -348,3 +350,13 @@ def test_meta_feat_reordering():

assert round(roc_auc_score(y_train,
stclf.train_meta_features_[:, 1]), 2) == 0.88


def test_clone():
knn = KNeighborsClassifier()
lr = LogisticRegression()
gnb = GaussianNB()
stclf = StackingCVClassifier(classifiers=[knn, gnb],
meta_classifier=lr,
store_train_meta_features=True)
clone(stclf)
6 changes: 6 additions & 0 deletions mlxtend/cluster/tests/test_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from mlxtend.cluster import Kmeans
from mlxtend.utils import assert_raises
import numpy as np
from sklearn.base import clone


X, y = three_blobs_data()
Expand Down Expand Up @@ -106,3 +107,8 @@ def test_continue_training():
km.fit(X, init_params=False)
np.testing.assert_almost_equal(second_iter, km.centroids_, decimal=2)
assert km.iterations_ == 2, km.iterations_


def test_clone():
km = Kmeans(k=2)
clone(km)
Loading