Skip to content

Commit

Permalink
BUG: translate losing object dtype with new string dtype (#56152)
Browse files Browse the repository at this point in the history
* BUG: translate losing object dtype with new string dtype

* Fix

* Update accessor.py
  • Loading branch information
phofl committed Nov 26, 2023
1 parent 45b937d commit 99bcf6b
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 12 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.1.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Bug fixes
- Bug in :meth:`Index.__getitem__` returning wrong result for Arrow dtypes and negative stepsize (:issue:`55832`)
- Fixed bug in :meth:`DataFrame.__setitem__` casting :class:`Index` with object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
- Fixed bug in :meth:`Index.insert` casting object-dtype to PyArrow backed strings when ``infer_string`` option is set (:issue:`55638`)
-
- Fixed bug in :meth:`Series.str.translate` losing object dtype when string option is set (:issue:`56152`)

.. ---------------------------------------------------------------------------
.. _whatsnew_214.other:
Expand Down
22 changes: 12 additions & 10 deletions pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ def _wrap_result(
fill_value=np.nan,
returns_string: bool = True,
returns_bool: bool = False,
dtype=None,
):
from pandas import (
Index,
Expand Down Expand Up @@ -379,29 +380,29 @@ def cons_row(x):
out = out.get_level_values(0)
return out
else:
return Index(result, name=name)
return Index(result, name=name, dtype=dtype)
else:
index = self._orig.index
# This is a mess.
dtype: DtypeObj | str | None
_dtype: DtypeObj | str | None = dtype
vdtype = getattr(result, "dtype", None)
if self._is_string:
if is_bool_dtype(vdtype):
dtype = result.dtype
_dtype = result.dtype
elif returns_string:
dtype = self._orig.dtype
_dtype = self._orig.dtype
else:
dtype = vdtype
else:
dtype = vdtype
_dtype = vdtype
elif vdtype is not None:
_dtype = vdtype

if expand:
cons = self._orig._constructor_expanddim
result = cons(result, columns=name, index=index, dtype=dtype)
result = cons(result, columns=name, index=index, dtype=_dtype)
else:
# Must be a Series
cons = self._orig._constructor
result = cons(result, name=name, index=index, dtype=dtype)
result = cons(result, name=name, index=index, dtype=_dtype)
result = result.__finalize__(self._orig, method="str")
if name is not None and result.ndim == 1:
# __finalize__ might copy over the original name, but we may
Expand Down Expand Up @@ -2317,7 +2318,8 @@ def translate(self, table):
dtype: object
"""
result = self._data.array._str_translate(table)
return self._wrap_result(result)
dtype = object if self._data.dtype == "object" else None
return self._wrap_result(result, dtype=dtype)

@forbid_nonstring_types(["bytes"])
def count(self, pat, flags: int = 0):
Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/strings/test_find_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest

from pandas.errors import PerformanceWarning
import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
Expand Down Expand Up @@ -893,7 +894,10 @@ def test_find_nan(any_string_dtype):
# --------------------------------------------------------------------------------------


def test_translate(index_or_series, any_string_dtype):
@pytest.mark.parametrize(
"infer_string", [False, pytest.param(True, marks=td.skip_if_no("pyarrow"))]
)
def test_translate(index_or_series, any_string_dtype, infer_string):
obj = index_or_series(
["abcdefg", "abcc", "cdddfg", "cdefggg"], dtype=any_string_dtype
)
Expand Down

0 comments on commit 99bcf6b

Please sign in to comment.