Skip to content

Commit

Permalink
Change default of ColumnTransformer remainder from passthrough to drop (
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche authored and GaelVaroquaux committed Jul 18, 2018
1 parent 9111064 commit 277e125
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 28 deletions.
4 changes: 2 additions & 2 deletions doc/modules/compose.rst
Expand Up @@ -413,7 +413,7 @@ variable, but apply a :class:`feature_extraction.text.CountVectorizer
<sklearn.feature_extraction.text.CountVectorizer>` to the ``'title'`` column.
As we might use multiple feature extraction methods on the same column, we give
each transformer a unique name, say ``'city_category'`` and ``'title_bow'``.
We can ignore the remaining rating columns by setting ``remainder='drop'``::
By default, the remaining rating columns are ignored (``remainder='drop'``)::

>>> from sklearn.compose import ColumnTransformer
>>> from sklearn.feature_extraction.text import CountVectorizer
Expand Down Expand Up @@ -495,7 +495,7 @@ above example would be::
... ('city', CountVectorizer(analyzer=lambda x: [x])),
... ('title', CountVectorizer()))
>>> column_trans # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
ColumnTransformer(n_jobs=1, remainder='passthrough', transformer_weights=None,
ColumnTransformer(n_jobs=1, remainder='drop', transformer_weights=None,
transformers=[('countvectorizer-1', ...)

.. topic:: Examples:
Expand Down
23 changes: 11 additions & 12 deletions sklearn/compose/_column_transformer.py
Expand Up @@ -71,14 +71,14 @@ class ColumnTransformer(_BaseComposition, TransformerMixin):
A callable is passed the input data `X` and can return any of the
above.
remainder : {'passthrough', 'drop'} or estimator, default 'passthrough'
By default, all remaining columns that were not specified in
`transformers` will be automatically passed through (default of
``'passthrough'``). This subset of columns is concatenated with the
output of the transformers.
By using ``remainder='drop'``, only the specified columns in
`transformers` are transformed and combined in the output, and the
non-specified columns are dropped.
remainder : {'passthrough', 'drop'} or estimator, default 'drop'
By default, only the specified columns in `transformers` are
transformed and combined in the output, and the non-specified
columns are dropped. (default of ``'drop'``).
By specifying ``remainder='passthrough'``, all remaining columns that
were not specified in `transformers` will be automatically passed
through. This subset of columns is concatenated with the output of
the transformers.
By setting ``remainder`` to be an estimator, the remaining
non-specified columns will use the ``remainder`` estimator. The
estimator must support `fit` and `transform`.
Expand Down Expand Up @@ -141,7 +141,7 @@ class ColumnTransformer(_BaseComposition, TransformerMixin):
"""

def __init__(self, transformers, remainder='passthrough', n_jobs=1,
def __init__(self, transformers, remainder='drop', n_jobs=1,
transformer_weights=None):
self.transformers = transformers
self.remainder = remainder
Expand Down Expand Up @@ -658,8 +658,7 @@ def make_column_transformer(*transformers, **kwargs):
... (['numerical_column'], StandardScaler()),
... (['categorical_column'], OneHotEncoder()))
... # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
ColumnTransformer(n_jobs=1, remainder='passthrough',
transformer_weights=None,
ColumnTransformer(n_jobs=1, remainder='drop', transformer_weights=None,
transformers=[('standardscaler',
StandardScaler(...),
['numerical_column']),
Expand All @@ -669,7 +668,7 @@ def make_column_transformer(*transformers, **kwargs):
"""
n_jobs = kwargs.pop('n_jobs', 1)
remainder = kwargs.pop('remainder', 'passthrough')
remainder = kwargs.pop('remainder', 'drop')
if kwargs:
raise TypeError('Unknown keyword arguments: "{}"'
.format(list(kwargs.keys())[0]))
Expand Down
31 changes: 17 additions & 14 deletions sklearn/compose/tests/test_column_transformer.py
Expand Up @@ -405,7 +405,7 @@ def test_column_transformer_get_set_params():
('trans2', StandardScaler(), [1])])

exp = {'n_jobs': 1,
'remainder': 'passthrough',
'remainder': 'drop',
'trans1': ct.transformers[0][1],
'trans1__copy': True,
'trans1__with_mean': True,
Expand All @@ -424,7 +424,7 @@ def test_column_transformer_get_set_params():

ct.set_params(trans1='passthrough')
exp = {'n_jobs': 1,
'remainder': 'passthrough',
'remainder': 'drop',
'trans1': 'passthrough',
'trans2': ct.transformers[1][1],
'trans2__copy': True,
Expand Down Expand Up @@ -492,7 +492,8 @@ def test_column_transformer_get_feature_names():
NotImplementedError, 'get_feature_names is not yet supported',
ct.get_feature_names)

ct = ColumnTransformer([('trans', DictVectorizer(), 0)])
ct = ColumnTransformer([('trans', DictVectorizer(), 0)],
remainder='passthrough')
ct.fit(X)
assert_raise_message(
NotImplementedError, 'get_feature_names is not yet supported',
Expand Down Expand Up @@ -552,23 +553,22 @@ def test_column_transformer_remainder():
X_res_second = np.array([2, 4, 6]).reshape(-1, 1)
X_res_both = X_array

# default passthrough
ct = ColumnTransformer([('trans', Trans(), [0])])
assert_array_equal(ct.fit_transform(X_array), X_res_both)
assert_array_equal(ct.fit(X_array).transform(X_array), X_res_both)
# default drop
ct = ColumnTransformer([('trans1', Trans(), [0])])
assert_array_equal(ct.fit_transform(X_array), X_res_first)
assert_array_equal(ct.fit(X_array).transform(X_array), X_res_first)
assert len(ct.transformers_) == 2
assert ct.transformers_[-1][0] == 'remainder'
assert ct.transformers_[-1][1] == 'passthrough'
assert ct.transformers_[-1][1] == 'drop'
assert_array_equal(ct.transformers_[-1][2], [1])

# specify to drop remaining columns
ct = ColumnTransformer([('trans1', Trans(), [0])],
remainder='drop')
assert_array_equal(ct.fit_transform(X_array), X_res_first)
assert_array_equal(ct.fit(X_array).transform(X_array), X_res_first)
# specify passthrough
ct = ColumnTransformer([('trans', Trans(), [0])], remainder='passthrough')
assert_array_equal(ct.fit_transform(X_array), X_res_both)
assert_array_equal(ct.fit(X_array).transform(X_array), X_res_both)
assert len(ct.transformers_) == 2
assert ct.transformers_[-1][0] == 'remainder'
assert ct.transformers_[-1][1] == 'drop'
assert ct.transformers_[-1][1] == 'passthrough'
assert_array_equal(ct.transformers_[-1][2], [1])

# column order is not preserved (passed through added to end)
Expand Down Expand Up @@ -602,6 +602,9 @@ def test_column_transformer_remainder():
"remainder keyword needs to be one of \'drop\', \'passthrough\', "
"or estimator.", ct.fit_transform, X_array)

# check default for make_column_transformer
ct = make_column_transformer(([0], Trans()))
assert ct.remainder == 'drop'

@pytest.mark.parametrize("key", [[0], np.array([0]), slice(0, 1),
np.array([True, False])])
Expand Down

0 comments on commit 277e125

Please sign in to comment.