Skip to content

Commit

Permalink
DEPR: object-dtype bool_only (pandas-dev#49371)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and noatamir committed Nov 9, 2022
1 parent 9fda232 commit bd9b7e5
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 70 deletions.
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,8 @@ Removal of prior version deprecations/changes
- Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`)
- Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`)
- Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`)
- Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_200.performance:
Expand Down
42 changes: 0 additions & 42 deletions pandas/core/dtypes/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,10 @@
from numbers import Number
import re
from typing import Pattern
import warnings

import numpy as np

from pandas._libs import lib
from pandas._typing import ArrayLike
from pandas.util._exceptions import find_stack_level

is_bool = lib.is_bool

Expand Down Expand Up @@ -425,42 +422,3 @@ def is_dataclass(item):
return is_dataclass(item) and not isinstance(item, type)
except ImportError:
return False


def is_inferred_bool_dtype(arr: ArrayLike) -> bool:
"""
Check if this is a ndarray[bool] or an ndarray[object] of bool objects.
Parameters
----------
arr : np.ndarray or ExtensionArray
Returns
-------
bool
Notes
-----
This does not include the special treatment is_bool_dtype uses for
Categorical.
"""
if not isinstance(arr, np.ndarray):
return False

dtype = arr.dtype
if dtype == np.dtype(bool):
return True
elif dtype == np.dtype("object"):
result = lib.is_bool_array(arr)
if result:
# GH#46188
warnings.warn(
"In a future version, object-dtype columns with all-bool values "
"will not be included in reductions with bool_only=True. "
"Explicitly cast to bool dtype instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
return result

return False
3 changes: 1 addition & 2 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
ABCDataFrame,
ABCSeries,
)
from pandas.core.dtypes.inference import is_inferred_bool_dtype
from pandas.core.dtypes.missing import (
array_equals,
isna,
Expand Down Expand Up @@ -488,7 +487,7 @@ def get_bool_data(self: T, copy: bool = False) -> T:
copy : bool, default False
Whether to copy the blocks
"""
return self._get_data_subset(is_inferred_bool_dtype)
return self._get_data_subset(lambda x: x.dtype == np.dtype(bool))

def get_numeric_data(self: T, copy: bool = False) -> T:
"""
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@
ABCPandasArray,
ABCSeries,
)
from pandas.core.dtypes.inference import is_inferred_bool_dtype
from pandas.core.dtypes.missing import (
is_valid_na_for_dtype,
isna,
Expand Down Expand Up @@ -194,7 +193,7 @@ def is_bool(self) -> bool:
"""
We can be bool if a) we are bool dtype or b) object dtype with bool objects.
"""
return is_inferred_bool_dtype(self.values)
return self.values.dtype == np.dtype(bool)

@final
def external_values(self):
Expand Down
28 changes: 10 additions & 18 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1280,7 +1280,6 @@ def test_any_all_object(self):
assert result is False

def test_any_all_object_bool_only(self):
msg = "object-dtype columns with all-bool values"

df = DataFrame({"A": ["foo", 2], "B": [True, False]}).astype(object)
df._consolidate_inplace()
Expand All @@ -1291,36 +1290,29 @@ def test_any_all_object_bool_only(self):

# The underlying bug is in DataFrame._get_bool_data, so we check
# that while we're here
with tm.assert_produces_warning(FutureWarning, match=msg):
res = df._get_bool_data()
expected = df[["B", "C"]]
res = df._get_bool_data()
expected = df[["C"]]
tm.assert_frame_equal(res, expected)

with tm.assert_produces_warning(FutureWarning, match=msg):
res = df.all(bool_only=True, axis=0)
expected = Series([False, True], index=["B", "C"])
res = df.all(bool_only=True, axis=0)
expected = Series([True], index=["C"])
tm.assert_series_equal(res, expected)

# operating on a subset of columns should not produce a _larger_ Series
with tm.assert_produces_warning(FutureWarning, match=msg):
res = df[["B", "C"]].all(bool_only=True, axis=0)
res = df[["B", "C"]].all(bool_only=True, axis=0)
tm.assert_series_equal(res, expected)

with tm.assert_produces_warning(FutureWarning, match=msg):
assert not df.all(bool_only=True, axis=None)
assert df.all(bool_only=True, axis=None)

with tm.assert_produces_warning(FutureWarning, match=msg):
res = df.any(bool_only=True, axis=0)
expected = Series([True, True], index=["B", "C"])
res = df.any(bool_only=True, axis=0)
expected = Series([True], index=["C"])
tm.assert_series_equal(res, expected)

# operating on a subset of columns should not produce a _larger_ Series
with tm.assert_produces_warning(FutureWarning, match=msg):
res = df[["B", "C"]].any(bool_only=True, axis=0)
res = df[["C"]].any(bool_only=True, axis=0)
tm.assert_series_equal(res, expected)

with tm.assert_produces_warning(FutureWarning, match=msg):
assert df.any(bool_only=True, axis=None)
assert df.any(bool_only=True, axis=None)

@pytest.mark.parametrize("method", ["any", "all"])
def test_any_all_level_axis_none_raises(self, method):
Expand Down
9 changes: 3 additions & 6 deletions pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -795,17 +795,15 @@ def test_get_numeric_data(self, using_copy_on_write):
)

def test_get_bool_data(self, using_copy_on_write):
msg = "object-dtype columns with all-bool values"
mgr = create_mgr(
"int: int; float: float; complex: complex;"
"str: object; bool: bool; obj: object; dt: datetime",
item_shape=(3,),
)
mgr.iset(6, np.array([True, False, True], dtype=np.object_))

with tm.assert_produces_warning(FutureWarning, match=msg):
bools = mgr.get_bool_data()
tm.assert_index_equal(bools.items, Index(["bool", "dt"]))
bools = mgr.get_bool_data()
tm.assert_index_equal(bools.items, Index(["bool"]))
tm.assert_almost_equal(
mgr.iget(mgr.items.get_loc("bool")).internal_values(),
bools.iget(bools.items.get_loc("bool")).internal_values(),
Expand All @@ -824,8 +822,7 @@ def test_get_bool_data(self, using_copy_on_write):
)

# Check sharing
with tm.assert_produces_warning(FutureWarning, match=msg):
bools2 = mgr.get_bool_data(copy=True)
bools2 = mgr.get_bool_data(copy=True)
bools2.iset(0, np.array([False, True, False]))
if using_copy_on_write:
tm.assert_numpy_array_equal(
Expand Down

0 comments on commit bd9b7e5

Please sign in to comment.