Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] Add function score_samples to Pipeline (fix issue #12542) #13806

Merged
merged 23 commits into from Jun 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/whats_new/v0.22.rst
Expand Up @@ -47,6 +47,12 @@ Changelog
of the maximization procedure in :term:`fit`.
:pr:`13618` by :user:`Yoshihiro Uchida <c56pony>`.

:mod:`sklearn.pipeline`
.......................

- |Enhancement| :class:`pipeline.Pipeline` now supports :term:`score_samples` if
the final estimator does.
:pr:`13806` by :user:`Anaël Beaugnon <ab-anssi>`.

:mod:`sklearn.svm`
..................
Expand Down
19 changes: 19 additions & 0 deletions sklearn/pipeline.py
Expand Up @@ -491,6 +491,25 @@ def decision_function(self, X):
Xt = transform.transform(Xt)
return self.steps[-1][-1].decision_function(Xt)

@if_delegate_has_method(delegate='_final_estimator')
def score_samples(self, X):
"""Apply transforms, and score_samples of the final estimator.

Parameters
----------
X : iterable
Data to predict on. Must fulfill input requirements of first step
of the pipeline.

Returns
-------
y_score : ndarray, shape (n_samples,)
"""
Xt = X
for _, _, transformer in self._iter(with_final=False):
Xt = transformer.transform(Xt)
return self.steps[-1][-1].score_samples(Xt)

@if_delegate_has_method(delegate='_final_estimator')
def predict_log_proba(self, X):
"""Apply transforms, and predict_log_proba of the final estimator
Expand Down
32 changes: 32 additions & 0 deletions sklearn/tests/test_pipeline.py
Expand Up @@ -16,6 +16,7 @@
from sklearn.utils.testing import assert_raises_regex
from sklearn.utils.testing import assert_raise_message
from sklearn.utils.testing import assert_equal
from sklearn.utils.testing import assert_allclose
from sklearn.utils.testing import assert_array_equal
from sklearn.utils.testing import assert_array_almost_equal
from sklearn.utils.testing import assert_dict_equal
Expand All @@ -24,6 +25,7 @@
from sklearn.base import clone, BaseEstimator
from sklearn.pipeline import Pipeline, FeatureUnion, make_pipeline, make_union
from sklearn.svm import SVC
from sklearn.neighbors import LocalOutlierFactor
from sklearn.linear_model import LogisticRegression, Lasso
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans
Expand Down Expand Up @@ -330,6 +332,36 @@ def test_pipeline_methods_pca_svm():
pipe.score(X, y)


def test_pipeline_score_samples_pca_lof():
iris = load_iris()
X = iris.data
# Test that the score_samples method is implemented on a pipeline.
# Test that the score_samples method on pipeline yields same results as
# applying transform and score_samples steps separately.
pca = PCA(svd_solver='full', n_components='mle', whiten=True)
lof = LocalOutlierFactor(novelty=True)
pipe = Pipeline([('pca', pca), ('lof', lof)])
pipe.fit(X)
# Check the shapes
assert pipe.score_samples(X).shape == (X.shape[0],)
# Check the values
lof.fit(pca.fit_transform(X))
assert_allclose(pipe.score_samples(X), lof.score_samples(pca.transform(X)))


def test_score_samples_on_pipeline_without_score_samples():
X = np.array([[1], [2]])
y = np.array([1, 2])
# Test that a pipeline does not have score_samples method when the final
# step of the pipeline does not have score_samples defined.
pipe = make_pipeline(LogisticRegression())
pipe.fit(X, y)
with pytest.raises(AttributeError,
match="'LogisticRegression' object has no attribute "
"'score_samples'"):
pipe.score_samples(X)


def test_pipeline_methods_preprocessing_svm():
# Test the various methods of the pipeline (preprocessing + svm).
iris = load_iris()
Expand Down