Skip to content

Commit

Permalink
Backport PR #52872 on branch 2.0.x (BUG: convert_dtypes ingoring conv…
Browse files Browse the repository at this point in the history
…ert keywords for pyarrow backend) (#52959)

Backport PR #52872: BUG: convert_dtypes ingoring convert keywords for pyarrow backend

Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
  • Loading branch information
meeseeksmachine and phofl committed Apr 27, 2023
1 parent 91757c5 commit 79abb44
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 14 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Bug fixes
~~~~~~~~~
- Bug in :func:`api.interchange.from_dataframe` was returning :class:`DataFrame`'s of incorrect sizes when called on slices (:issue:`52824`)
- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`)
- Bug in :meth:`DataFrame.convert_dtypes` ignores ``convert_*`` keywords when set to False ``dtype_backend="pyarrow"`` (:issue:`52872`)
- Bug in :meth:`pd.array` raising for ``NumPy`` array and ``pa.large_string`` or ``pa.large_binary`` (:issue:`52590`)
-

Expand Down
37 changes: 23 additions & 14 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1122,20 +1122,29 @@ def convert_dtypes(
from pandas.core.arrays.arrow.dtype import ArrowDtype
from pandas.core.arrays.string_ import StringDtype

if isinstance(inferred_dtype, PandasExtensionDtype):
base_dtype = inferred_dtype.base
elif isinstance(inferred_dtype, (BaseMaskedDtype, ArrowDtype)):
base_dtype = inferred_dtype.numpy_dtype
elif isinstance(inferred_dtype, StringDtype):
base_dtype = np.dtype(str)
else:
# error: Incompatible types in assignment (expression has type
# "Union[str, Any, dtype[Any], ExtensionDtype]",
# variable has type "Union[dtype[Any], ExtensionDtype, None]")
base_dtype = inferred_dtype # type: ignore[assignment]
pa_type = to_pyarrow_type(base_dtype)
if pa_type is not None:
inferred_dtype = ArrowDtype(pa_type)
assert not isinstance(inferred_dtype, str)

if (
(convert_integer and inferred_dtype.kind in "iu")
or (convert_floating and inferred_dtype.kind in "fc")
or (convert_boolean and inferred_dtype.kind == "b")
or (convert_string and isinstance(inferred_dtype, StringDtype))
or (
inferred_dtype.kind not in "iufcb"
and not isinstance(inferred_dtype, StringDtype)
)
):
if isinstance(inferred_dtype, PandasExtensionDtype):
base_dtype = inferred_dtype.base
elif isinstance(inferred_dtype, (BaseMaskedDtype, ArrowDtype)):
base_dtype = inferred_dtype.numpy_dtype
elif isinstance(inferred_dtype, StringDtype):
base_dtype = np.dtype(str)
else:
base_dtype = inferred_dtype
pa_type = to_pyarrow_type(base_dtype)
if pa_type is not None:
inferred_dtype = ArrowDtype(pa_type)

# error: Incompatible return value type (got "Union[str, Union[dtype[Any],
# ExtensionDtype]]", expected "Union[dtype[Any], ExtensionDtype]")
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/frame/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,17 @@ def test_pyarrow_engine_lines_false(self):
)
with pytest.raises(ValueError, match=msg):
df.convert_dtypes(dtype_backend="numpy")

def test_pyarrow_backend_no_convesion(self):
# GH#52872
pytest.importorskip("pyarrow")
df = pd.DataFrame({"a": [1, 2], "b": 1.5, "c": True, "d": "x"})
expected = df.copy()
result = df.convert_dtypes(
convert_floating=False,
convert_integer=False,
convert_boolean=False,
convert_string=False,
dtype_backend="pyarrow",
)
tm.assert_frame_equal(result, expected)

0 comments on commit 79abb44

Please sign in to comment.