Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH add support for 'passthrough' in FeatureUnion #20860

Merged
merged 22 commits into from Sep 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/whats_new/v1.1.rst
Expand Up @@ -53,6 +53,12 @@ Changelog
:pr:`20880` by :user:`Guillaume Lemaitre <glemaitre>`
and :user:`András Simon <simonandras>`.

:mod:`sklearn.pipeline`
.......................

- |Enhancement| Added support for "passthrough" in :class:`FeatureUnion`.
Setting a transformer to "passthrough" will pass the features unchanged.
:pr:`20860` by :user:`Shubhraneel Pal <shubhraneel>`.

Code and Documentation Contributors
-----------------------------------
Expand Down
35 changes: 21 additions & 14 deletions sklearn/pipeline.py
Expand Up @@ -17,6 +17,7 @@
from joblib import Parallel

from .base import clone, TransformerMixin
from .preprocessing import FunctionTransformer
from .utils._estimator_html_repr import _VisualBlock
from .utils.metaestimators import available_if
from .utils import (
Expand Down Expand Up @@ -853,21 +854,24 @@ class FeatureUnion(TransformerMixin, _BaseComposition):

Parameters of the transformers may be set using its name and the parameter
name separated by a '__'. A transformer may be replaced entirely by
setting the parameter with its name to another transformer,
or removed by setting to 'drop'.
setting the parameter with its name to another transformer, removed by
setting to 'drop' or disabled by setting to 'passthrough' (features are
passed without transformation).

Read more in the :ref:`User Guide <feature_union>`.

.. versionadded:: 0.13

Parameters
----------
transformer_list : list of tuple
List of tuple containing `(str, transformer)`. The first element
of the tuple is name affected to the transformer while the
second element is a scikit-learn transformer instance.
The transformer instance can also be `"drop"` for it to be
ignored.
transformer_list : list of (str, transformer) tuples
List of transformer objects to be applied to the data. The first
half of each tuple is the name of the transformer. The transformer can
be 'drop' for it to be ignored or can be 'passthrough' for features to
be passed unchanged.

.. versionadded:: 1.1
Added the option `"passthrough"`.

.. versionchanged:: 0.22
Deprecated `None` as a transformer in favor of 'drop'.
Expand Down Expand Up @@ -977,7 +981,7 @@ def _validate_transformers(self):

# validate estimators
for t in transformers:
if t == "drop":
if t in ("drop", "passthrough"):
continue
if not (hasattr(t, "fit") or hasattr(t, "fit_transform")) or not hasattr(
t, "transform"
Expand All @@ -1004,12 +1008,15 @@ def _iter(self):
Generate (name, trans, weight) tuples excluding None and
'drop' transformers.
"""

get_weight = (self.transformer_weights or {}).get
return (
(name, trans, get_weight(name))
for name, trans in self.transformer_list
if trans != "drop"
)

for name, trans in self.transformer_list:
if trans == "drop":
continue
if trans == "passthrough":
trans = FunctionTransformer()
yield (name, trans, get_weight(name))

@deprecated(
"get_feature_names is deprecated in 1.0 and will be removed "
Expand Down
54 changes: 54 additions & 0 deletions sklearn/tests/test_pipeline.py
Expand Up @@ -1004,6 +1004,60 @@ def test_set_feature_union_step_drop(get_names):
assert not record


def test_set_feature_union_passthrough():
"""Check the behaviour of setting a transformer to `"passthrough"`."""
mult2 = Mult(2)
shubhraneel marked this conversation as resolved.
Show resolved Hide resolved
mult3 = Mult(3)
X = np.asarray([[1]])

ft = FeatureUnion([("m2", mult2), ("m3", mult3)])
assert_array_equal([[2, 3]], ft.fit(X).transform(X))
assert_array_equal([[2, 3]], ft.fit_transform(X))

ft.set_params(m2="passthrough")
assert_array_equal([[1, 3]], ft.fit(X).transform(X))
assert_array_equal([[1, 3]], ft.fit_transform(X))

ft.set_params(m3="passthrough")
assert_array_equal([[1, 1]], ft.fit(X).transform(X))
assert_array_equal([[1, 1]], ft.fit_transform(X))

# check we can change back
ft.set_params(m3=mult3)
assert_array_equal([[1, 3]], ft.fit(X).transform(X))
assert_array_equal([[1, 3]], ft.fit_transform(X))

# Check 'passthrough' step at construction time
ft = FeatureUnion([("m2", "passthrough"), ("m3", mult3)])
assert_array_equal([[1, 3]], ft.fit(X).transform(X))
assert_array_equal([[1, 3]], ft.fit_transform(X))

X = iris.data
columns = X.shape[1]
pca = PCA(n_components=2, svd_solver="randomized", random_state=0)

ft = FeatureUnion([("passthrough", "passthrough"), ("pca", pca)])
assert_array_equal(X, ft.fit(X).transform(X)[:, :columns])
assert_array_equal(X, ft.fit_transform(X)[:, :columns])

ft.set_params(pca="passthrough")
X_ft = ft.fit(X).transform(X)
assert_array_equal(X_ft, np.hstack([X, X]))
X_ft = ft.fit_transform(X)
assert_array_equal(X_ft, np.hstack([X, X]))

ft.set_params(passthrough=pca)
assert_array_equal(X, ft.fit(X).transform(X)[:, -columns:])
assert_array_equal(X, ft.fit_transform(X)[:, -columns:])

ft = FeatureUnion(
[("passthrough", "passthrough"), ("pca", pca)],
transformer_weights={"passthrough": 2},
)
assert_array_equal(X * 2, ft.fit(X).transform(X)[:, :columns])
assert_array_equal(X * 2, ft.fit_transform(X)[:, :columns])


def test_step_name_validation():
error_message_1 = r"Estimator names must not contain __: got \['a__q'\]"
error_message_2 = r"Names provided are not unique: \['a', 'a'\]"
Expand Down