Skip to content

Commit 009e548

Browse files
authored
BUG: Series.replace NA->NaN (#62487)
1 parent dfe6dc8 commit 009e548

File tree

6 files changed

+129
-4
lines changed

6 files changed

+129
-4
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1219,10 +1219,11 @@ Other
12191219
- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)
12201220
- Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`)
12211221
- Deprecated the keyword ``check_datetimelike_compat`` in :meth:`testing.assert_frame_equal` and :meth:`testing.assert_series_equal` (:issue:`55638`)
1222+
- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when trying to replace :class:`NA` values in a :class:`Float64Dtype` object with ``np.nan``; this now works with ``pd.set_option("mode.nan_is_na", False)`` and is irrelevant otherwise (:issue:`55127`)
1223+
- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` when trying to replace :class:`np.nan` values in a :class:`Int64Dtype` object with :class:`NA`; this is now a no-op with ``pd.set_option("mode.nan_is_na", False)`` and is irrelevant otherwise (:issue:`51237`)
12221224
- Fixed bug in the :meth:`Series.rank` with object dtype and extremely small float values (:issue:`62036`)
12231225
- Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`)
12241226
- Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`)
1225-
-
12261227

12271228
.. ***DO NOT USE THIS SECTION***
12281229

pandas/core/arrays/masked.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,9 @@ def __setitem__(self, key, value) -> None:
312312
key = check_array_indexer(self, key)
313313

314314
if is_scalar(value):
315-
if is_valid_na_for_dtype(value, self.dtype):
315+
if is_valid_na_for_dtype(value, self.dtype) and not (
316+
lib.is_float(value) and not is_nan_na()
317+
):
316318
self._mask[key] = True
317319
else:
318320
value = self._validate_setitem_value(value)

pandas/core/missing.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515

1616
import numpy as np
1717

18+
from pandas._config import is_nan_na
19+
1820
from pandas._libs import (
1921
NaT,
2022
algos,
@@ -37,7 +39,11 @@
3739
is_object_dtype,
3840
needs_i8_conversion,
3941
)
40-
from pandas.core.dtypes.dtypes import DatetimeTZDtype
42+
from pandas.core.dtypes.dtypes import (
43+
ArrowDtype,
44+
BaseMaskedDtype,
45+
DatetimeTZDtype,
46+
)
4147
from pandas.core.dtypes.missing import (
4248
is_valid_na_for_dtype,
4349
isna,
@@ -86,6 +92,31 @@ def mask_missing(arr: ArrayLike, value) -> npt.NDArray[np.bool_]:
8692
"""
8793
dtype, value = infer_dtype_from(value)
8894

95+
if (
96+
isinstance(arr.dtype, (BaseMaskedDtype, ArrowDtype))
97+
and lib.is_float(value)
98+
and np.isnan(value)
99+
and not is_nan_na()
100+
):
101+
# TODO: this should be done in an EA method?
102+
if arr.dtype.kind == "f":
103+
# GH#55127
104+
if isinstance(arr.dtype, BaseMaskedDtype):
105+
# error: "ExtensionArray" has no attribute "_data" [attr-defined]
106+
mask = np.isnan(arr._data) & ~arr.isna() # type: ignore[attr-defined,operator]
107+
return mask
108+
else:
109+
# error: "ExtensionArray" has no attribute "_pa_array" [attr-defined]
110+
import pyarrow.compute as pc
111+
112+
mask = pc.is_nan(arr._pa_array).fill_null(False).to_numpy() # type: ignore[attr-defined]
113+
return mask
114+
115+
elif arr.dtype.kind in "iu":
116+
# GH#51237
117+
mask = np.zeros(arr.shape, dtype=bool)
118+
return mask
119+
89120
if isna(value):
90121
return isna(arr)
91122

pandas/tests/arrays/masked/test_indexing.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
import pandas.util._test_decorators as td
7+
68
import pandas as pd
79

810

@@ -58,3 +60,47 @@ def test_setitem_validation_scalar_int(self, invalid, any_int_ea_dtype):
5860
def test_setitem_validation_scalar_float(self, invalid, float_ea_dtype):
5961
arr = pd.array([1, 2, None], dtype=float_ea_dtype)
6062
self._check_setitem_invalid(arr, invalid)
63+
64+
65+
@pytest.mark.parametrize(
66+
"dtype",
67+
[
68+
"Float64",
69+
pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow")),
70+
],
71+
)
72+
@pytest.mark.parametrize("indexer", [1, [1], [False, True, False]])
73+
def test_setitem_nan_in_float64_array(dtype, indexer, using_nan_is_na):
74+
arr = pd.array([0, pd.NA, 1], dtype=dtype)
75+
76+
arr[indexer] = np.nan
77+
if not using_nan_is_na:
78+
assert np.isnan(arr[1])
79+
else:
80+
assert arr[1] is pd.NA
81+
82+
83+
@pytest.mark.parametrize(
84+
"dtype",
85+
[
86+
"Int64",
87+
pytest.param("int64[pyarrow]", marks=td.skip_if_no("pyarrow")),
88+
],
89+
)
90+
@pytest.mark.parametrize("indexer", [1, [1], [False, True, False]])
91+
def test_setitem_nan_in_int64_array(dtype, indexer, using_nan_is_na):
92+
arr = pd.array([0, 1, 2], dtype=dtype)
93+
if not using_nan_is_na:
94+
err = TypeError
95+
msg = "Invalid value 'nan' for dtype 'Int64'"
96+
if dtype == "int64[pyarrow]":
97+
import pyarrow as pa
98+
99+
err = pa.lib.ArrowInvalid
100+
msg = "Could not convert nan with type float"
101+
with pytest.raises(err, match=msg):
102+
arr[indexer] = np.nan
103+
assert arr[1] == 1
104+
else:
105+
arr[indexer] = np.nan
106+
assert arr[1] is pd.NA

pandas/tests/frame/methods/test_replace.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
import numpy as np
77
import pytest
88

9+
import pandas.util._test_decorators as td
10+
911
import pandas as pd
1012
from pandas import (
1113
DataFrame,
@@ -1430,6 +1432,49 @@ def test_replace_with_nil_na(self):
14301432
result = ser.replace("nil", "anything else")
14311433
tm.assert_frame_equal(expected, result)
14321434

1435+
@pytest.mark.parametrize(
1436+
"dtype",
1437+
[
1438+
"Float64",
1439+
pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow")),
1440+
],
1441+
)
1442+
def test_replace_na_to_nan_nullable_floats(self, dtype, using_nan_is_na):
1443+
# GH#55127
1444+
df = DataFrame({0: [1, np.nan, 1], 1: Series([0, pd.NA, 1], dtype=dtype)})
1445+
1446+
result = df.replace(pd.NA, np.nan)
1447+
1448+
if using_nan_is_na:
1449+
expected = result
1450+
else:
1451+
expected = DataFrame(
1452+
{0: [1, np.nan, 1], 1: Series([0, np.nan, 1], dtype=dtype)}
1453+
)
1454+
assert np.isnan(expected.loc[1, 1])
1455+
1456+
tm.assert_frame_equal(result, expected)
1457+
1458+
@pytest.mark.parametrize(
1459+
"dtype",
1460+
[
1461+
"Int64",
1462+
pytest.param("int64[pyarrow]", marks=td.skip_if_no("pyarrow")),
1463+
],
1464+
)
1465+
def test_replace_nan_nullable_ints(self, dtype, using_nan_is_na):
1466+
# GH#51237 with nan_is_na=False, replacing NaN should be a no-op here
1467+
ser = Series([1, 2, None], dtype=dtype)
1468+
1469+
result = ser.replace(np.nan, -1)
1470+
1471+
if using_nan_is_na:
1472+
# np.nan is equivalent to pd.NA here
1473+
expected = Series([1, 2, -1], dtype=dtype)
1474+
else:
1475+
expected = ser
1476+
tm.assert_series_equal(result, expected)
1477+
14331478

14341479
class TestDataFrameReplaceRegex:
14351480
@pytest.mark.parametrize(

pandas/tests/series/methods/test_convert_dtypes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ def test_convert_dtypes(
246246
with pytest.raises(TypeError, match="Invalid value"):
247247
result[result.notna()] = np.nan
248248
else:
249-
result[result.notna()] = np.nan
249+
result[result.notna()] = pd.NA
250250

251251
# Make sure original not changed
252252
tm.assert_series_equal(series, copy)

0 commit comments

Comments
 (0)