Skip to content

Commit

Permalink
Backport PR #52174 on branch 2.0.x (BUG: to_numeric converting String…
Browse files Browse the repository at this point in the history
…Array to object or float64) (#52193)
  • Loading branch information
phofl committed Mar 25, 2023
1 parent 2e74ace commit fb554ce
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 12 deletions.
8 changes: 6 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2321,10 +2321,14 @@ def maybe_convert_numeric(
if not seen.coerce_numeric:
raise type(err)(f"{err} at position {i}")

seen.saw_null()
floats[i] = NaN
mask[i] = 1

if allow_null_in_int:
seen.null_ = True
else:
seen.saw_null()
floats[i] = NaN

if seen.check_uint64_conflict():
return (values, None)

Expand Down
17 changes: 13 additions & 4 deletions pandas/core/tools/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
is_integer_dtype,
is_number,
is_numeric_dtype,
is_object_dtype,
is_scalar,
is_string_dtype,
needs_i8_conversion,
)
from pandas.core.dtypes.generic import (
Expand All @@ -32,6 +32,7 @@

import pandas as pd
from pandas.core.arrays import BaseMaskedArray
from pandas.core.arrays.string_ import StringDtype


def to_numeric(
Expand Down Expand Up @@ -191,6 +192,8 @@ def to_numeric(
else:
values = arg

orig_values = values

# GH33013: for IntegerArray & FloatingArray extract non-null values for casting
# save mask to reconstruct the full array after casting
mask: npt.NDArray[np.bool_] | None = None
Expand All @@ -215,17 +218,23 @@ def to_numeric(
values,
set(),
coerce_numeric=coerce_numeric,
convert_to_masked_nullable=dtype_backend is not lib.no_default,
convert_to_masked_nullable=dtype_backend is not lib.no_default
or isinstance(values_dtype, StringDtype),
)
except (ValueError, TypeError):
if errors == "raise":
raise
values = orig_values

if new_mask is not None:
# Remove unnecessary values, is expected later anyway and enables
# downcasting
values = values[~new_mask]
elif dtype_backend is not lib.no_default and new_mask is None:
elif (
dtype_backend is not lib.no_default
and new_mask is None
or isinstance(values_dtype, StringDtype)
):
new_mask = np.zeros(values.shape, dtype=np.bool_)

# attempt downcast only if the data has been successfully converted
Expand Down Expand Up @@ -260,7 +269,7 @@ def to_numeric(

# GH33013: for IntegerArray, BooleanArray & FloatingArray need to reconstruct
# masked array
if (mask is not None or new_mask is not None) and not is_object_dtype(values.dtype):
if (mask is not None or new_mask is not None) and not is_string_dtype(values.dtype):
if mask is None:
mask = new_mask
else:
Expand Down
30 changes: 24 additions & 6 deletions pandas/tests/tools/test_to_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,12 +723,12 @@ def test_precision_float_conversion(strrep):
@pytest.mark.parametrize(
"values, expected",
[
(["1", "2", None], Series([1, 2, np.nan])),
(["1", "2", "3"], Series([1, 2, 3])),
(["1", "2", 3], Series([1, 2, 3])),
(["1", "2", 3.5], Series([1, 2, 3.5])),
(["1", None, 3.5], Series([1, np.nan, 3.5])),
(["1", "2", "3.5"], Series([1, 2, 3.5])),
(["1", "2", None], Series([1, 2, np.nan], dtype="Int64")),
(["1", "2", "3"], Series([1, 2, 3], dtype="Int64")),
(["1", "2", 3], Series([1, 2, 3], dtype="Int64")),
(["1", "2", 3.5], Series([1, 2, 3.5], dtype="Float64")),
(["1", None, 3.5], Series([1, np.nan, 3.5], dtype="Float64")),
(["1", "2", "3.5"], Series([1, 2, 3.5], dtype="Float64")),
],
)
def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected):
Expand All @@ -738,6 +738,24 @@ def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected
tm.assert_series_equal(result, expected)


def test_to_numeric_from_nullable_string_coerce(nullable_string_dtype):
# GH#52146
values = ["a", "1"]
ser = Series(values, dtype=nullable_string_dtype)
result = to_numeric(ser, errors="coerce")
expected = Series([pd.NA, 1], dtype="Int64")
tm.assert_series_equal(result, expected)


def test_to_numeric_from_nullable_string_ignore(nullable_string_dtype):
# GH#52146
values = ["a", "1"]
ser = Series(values, dtype=nullable_string_dtype)
expected = ser.copy()
result = to_numeric(ser, errors="ignore")
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize(
"data, input_dtype, downcast, expected_dtype",
(
Expand Down

0 comments on commit fb554ce

Please sign in to comment.