Skip to content

Commit

Permalink
Backport PR #52076 on branch 2.0.x (BUG: Fix regression when using Se…
Browse files Browse the repository at this point in the history
…ries with arrow string array) (#52121)

BUG: Fix regression when using Series with arrow string array (#52076)

* BUG: Fix regression when using Series with arrow string array

* Move
  • Loading branch information
phofl committed Mar 22, 2023
1 parent d211998 commit 22e7c08
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 1 deletion.
1 change: 0 additions & 1 deletion pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,6 @@ cpdef ndarray[object] ensure_string_array(
out = arr.astype(str).astype(object)
out[arr.isna()] = na_value
return out

arr = arr.to_numpy()
elif not util.is_array(arr):
arr = np.array(arr, dtype="object")
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,9 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
result[na_values] = libmissing.NA

else:
if hasattr(scalars, "type"):
# pyarrow array
scalars = np.array(scalars)
# convert non-na-likes to str, and nan-likes to StringDtype().na_value
result = lib.ensure_string_array(scalars, na_value=libmissing.NA, copy=copy)

Expand Down
2 changes: 2 additions & 0 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,8 @@ def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False)
result = scalars._data
result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
return cls(pa.array(result, mask=na_values, type=pa.string()))
elif isinstance(scalars, (pa.Array, pa.ChunkedArray)):
return cls(pc.cast(scalars, pa.string()))

# convert non-na-likes to str
result = lib.ensure_string_array(scalars, copy=copy)
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2324,3 +2324,11 @@ def test_boolean_reduce_series_all_null(all_boolean_reductions, skipna):
else:
expected = pd.NA
assert result is expected


@pytest.mark.parametrize("dtype", ["string", "string[pyarrow]"])
def test_series_from_string_array(dtype):
arr = pa.array("the quick brown fox".split())
ser = pd.Series(arr, dtype=dtype)
expected = pd.Series(ArrowExtensionArray(arr), dtype=dtype)
tm.assert_series_equal(ser, expected)

0 comments on commit 22e7c08

Please sign in to comment.