From 9cebdf8df1e0cda6f2e87b9d49cdaec23d3a7cb0 Mon Sep 17 00:00:00 2001 From: Mafalda Matias Date: Wed, 3 Apr 2024 11:40:21 +0000 Subject: [PATCH 1/5] Fix #56844: function mask handling None values as condition This commit addresses an issue where using pandas.NA in conjunction with the mask() method resulted in unexpected behavior. The problem arose when comparing a Series containing pandas.NA with a condition within mask(), causing inconsistencies in the output. This fix ensures that pandas.NA behaves consistently with False in logical operations within mask(). --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/generic.py | 3 +++ pandas/tests/frame/indexing/test_mask.py | 8 ++++++++ 3 files changed, 12 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4debd41de213f..abcf26817960b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -329,6 +329,7 @@ Performance improvements Bug fixes ~~~~~~~~~ - Fixed bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`) +- Fixed bug in :func:`mask` to handle NaN values in condition of function. (:issue:`56844`) - Fixed bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`) - Fixed bug in :meth:`DataFrame.cumsum` which was raising ``IndexError`` if dtype is ``timedelta64[ns]`` (:issue:`57956`) - Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 99462917599e1..a82ef4775ec7f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9960,6 +9960,9 @@ def mask( cond = common.apply_if_callable(cond, self) other = common.apply_if_callable(other, self) + if isinstance(cond, ABCDataFrame | ABCSeries): + cond = cond.fillna(False) + # see gh-21891 if not hasattr(cond, "__invert__"): cond = np.array(cond) diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py index 264e27c9c122e..6424fcf52ea67 100644 --- a/pandas/tests/frame/indexing/test_mask.py +++ b/pandas/tests/frame/indexing/test_mask.py @@ -150,3 +150,11 @@ def test_mask_inplace_no_other(): df.mask(cond, inplace=True) expected = DataFrame({"a": [np.nan, 2], "b": ["x", np.nan]}) tm.assert_frame_equal(df, expected) + + +def test_mask_with_NA(): + df = DataFrame({"A": [0, 1, 2]}) + cond = Series([-1, 1, None]).convert_dtypes() < 0 + result = df.mask(cond, other=100) + expected = DataFrame({"A": [100, 1, 2]}) + tm.assert_frame_equal(result, expected) From a0369c79560b20bf9c1c92529f232a222b1360f9 Mon Sep 17 00:00:00 2001 From: mafaldam <106393070+mafaldam@users.noreply.github.com> Date: Wed, 17 Apr 2024 16:49:51 +0100 Subject: [PATCH 2/5] Update pandas/core/generic.py Co-authored-by: Xiao Yuan --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a82ef4775ec7f..d32180322bc5d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9960,7 +9960,7 @@ def mask( cond = common.apply_if_callable(cond, self) other = common.apply_if_callable(other, self) - if isinstance(cond, ABCDataFrame | ABCSeries): + if isinstance(cond, (ABCDataFrame, ABCSeries)): cond = cond.fillna(False) # see gh-21891 From 4f75357e97bb92f22eb6284331662d9af2f307bc Mon Sep 17 00:00:00 2001 From: Mafalda Matias Date: Wed, 3 Apr 2024 11:40:21 +0000 Subject: [PATCH 3/5] Fix #56844: function mask handling None values as condition This commit addresses an issue where using pandas.NA in conjunction with the mask() method resulted in unexpected behavior. The problem arose when comparing a Series containing pandas.NA with a condition within mask(), causing inconsistencies in the output. This fix ensures that pandas.NA behaves consistently with False in logical operations within mask(). --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/generic.py | 3 +++ pandas/tests/frame/indexing/test_mask.py | 8 ++++++++ 3 files changed, 12 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 7fddb36185789..5d1668514c0a6 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -482,6 +482,7 @@ Other - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`) - Bug in :func:`eval` where the names of the :class:`Series` were not preserved when using ``engine="numexpr"``. (:issue:`10239`) - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`) +- Bug in :func:`mask` to handle NaN values in condition of function. (:issue:`56844`) - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`) - Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which did not allow to use ``tan`` function. (:issue:`55091`) - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3da047eaa2d4e..57610ec1016de 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9999,6 +9999,9 @@ def mask( cond = common.apply_if_callable(cond, self) other = common.apply_if_callable(other, self) + if isinstance(cond, ABCDataFrame | ABCSeries): + cond = cond.fillna(False) + # see gh-21891 if not hasattr(cond, "__invert__"): cond = np.array(cond) diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py index 264e27c9c122e..6424fcf52ea67 100644 --- a/pandas/tests/frame/indexing/test_mask.py +++ b/pandas/tests/frame/indexing/test_mask.py @@ -150,3 +150,11 @@ def test_mask_inplace_no_other(): df.mask(cond, inplace=True) expected = DataFrame({"a": [np.nan, 2], "b": ["x", np.nan]}) tm.assert_frame_equal(df, expected) + + +def test_mask_with_NA(): + df = DataFrame({"A": [0, 1, 2]}) + cond = Series([-1, 1, None]).convert_dtypes() < 0 + result = df.mask(cond, other=100) + expected = DataFrame({"A": [100, 1, 2]}) + tm.assert_frame_equal(result, expected) From 3b20b8460945788b71ef7e918a85f4c3574c0b04 Mon Sep 17 00:00:00 2001 From: mafaldam <106393070+mafaldam@users.noreply.github.com> Date: Wed, 17 Apr 2024 16:49:51 +0100 Subject: [PATCH 4/5] Update pandas/core/generic.py Co-authored-by: Xiao Yuan --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 57610ec1016de..a61b42712e999 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9999,7 +9999,7 @@ def mask( cond = common.apply_if_callable(cond, self) other = common.apply_if_callable(other, self) - if isinstance(cond, ABCDataFrame | ABCSeries): + if isinstance(cond, (ABCDataFrame, ABCSeries)): cond = cond.fillna(False) # see gh-21891 From d005ecc7de0696bf52e0846c912ac5e2c782e851 Mon Sep 17 00:00:00 2001 From: mafaldam <106393070+mafaldam@users.noreply.github.com> Date: Wed, 22 May 2024 17:22:11 +0100 Subject: [PATCH 5/5] Update pandas/tests/frame/indexing/test_mask.py Co-authored-by: William Ayd --- pandas/tests/frame/indexing/test_mask.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py index 6424fcf52ea67..014f038158033 100644 --- a/pandas/tests/frame/indexing/test_mask.py +++ b/pandas/tests/frame/indexing/test_mask.py @@ -154,7 +154,8 @@ def test_mask_inplace_no_other(): def test_mask_with_NA(): df = DataFrame({"A": [0, 1, 2]}) - cond = Series([-1, 1, None]).convert_dtypes() < 0 + cond = Series([True, False, pd.NA], dtype=pd.BooleanDtype()) + result = df.mask(cond, other=100) expected = DataFrame({"A": [100, 1, 2]}) tm.assert_frame_equal(result, expected)