Skip to content

Commit

Permalink
EHN Add function score_samples to Pipeline (#13806)
Browse files Browse the repository at this point in the history
  • Loading branch information
Anaël Beaugnon authored and glemaitre committed Jun 5, 2019
1 parent 6675c9e commit ec35ed2
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 0 deletions.
6 changes: 6 additions & 0 deletions doc/whats_new/v0.22.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ Changelog
of the maximization procedure in :term:`fit`.
:pr:`13618` by :user:`Yoshihiro Uchida <c56pony>`.

:mod:`sklearn.pipeline`
.......................

- |Enhancement| :class:`pipeline.Pipeline` now supports :term:`score_samples` if
the final estimator does.
:pr:`13806` by :user:`Anaël Beaugnon <ab-anssi>`.

:mod:`sklearn.svm`
..................
Expand Down
19 changes: 19 additions & 0 deletions sklearn/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,25 @@ def decision_function(self, X):
Xt = transform.transform(Xt)
return self.steps[-1][-1].decision_function(Xt)

@if_delegate_has_method(delegate='_final_estimator')
def score_samples(self, X):
"""Apply transforms, and score_samples of the final estimator.
Parameters
----------
X : iterable
Data to predict on. Must fulfill input requirements of first step
of the pipeline.
Returns
-------
y_score : ndarray, shape (n_samples,)
"""
Xt = X
for _, _, transformer in self._iter(with_final=False):
Xt = transformer.transform(Xt)
return self.steps[-1][-1].score_samples(Xt)

@if_delegate_has_method(delegate='_final_estimator')
def predict_log_proba(self, X):
"""Apply transforms, and predict_log_proba of the final estimator
Expand Down
32 changes: 32 additions & 0 deletions sklearn/tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from sklearn.utils.testing import assert_raises_regex
from sklearn.utils.testing import assert_raise_message
from sklearn.utils.testing import assert_equal
from sklearn.utils.testing import assert_allclose
from sklearn.utils.testing import assert_array_equal
from sklearn.utils.testing import assert_array_almost_equal
from sklearn.utils.testing import assert_dict_equal
Expand All @@ -24,6 +25,7 @@
from sklearn.base import clone, BaseEstimator
from sklearn.pipeline import Pipeline, FeatureUnion, make_pipeline, make_union
from sklearn.svm import SVC
from sklearn.neighbors import LocalOutlierFactor
from sklearn.linear_model import LogisticRegression, Lasso
from sklearn.linear_model import LinearRegression
from sklearn.cluster import KMeans
Expand Down Expand Up @@ -330,6 +332,36 @@ def test_pipeline_methods_pca_svm():
pipe.score(X, y)


def test_pipeline_score_samples_pca_lof():
iris = load_iris()
X = iris.data
# Test that the score_samples method is implemented on a pipeline.
# Test that the score_samples method on pipeline yields same results as
# applying transform and score_samples steps separately.
pca = PCA(svd_solver='full', n_components='mle', whiten=True)
lof = LocalOutlierFactor(novelty=True)
pipe = Pipeline([('pca', pca), ('lof', lof)])
pipe.fit(X)
# Check the shapes
assert pipe.score_samples(X).shape == (X.shape[0],)
# Check the values
lof.fit(pca.fit_transform(X))
assert_allclose(pipe.score_samples(X), lof.score_samples(pca.transform(X)))


def test_score_samples_on_pipeline_without_score_samples():
X = np.array([[1], [2]])
y = np.array([1, 2])
# Test that a pipeline does not have score_samples method when the final
# step of the pipeline does not have score_samples defined.
pipe = make_pipeline(LogisticRegression())
pipe.fit(X, y)
with pytest.raises(AttributeError,
match="'LogisticRegression' object has no attribute "
"'score_samples'"):
pipe.score_samples(X)


def test_pipeline_methods_preprocessing_svm():
# Test the various methods of the pipeline (preprocessing + svm).
iris = load_iris()
Expand Down

0 comments on commit ec35ed2

Please sign in to comment.