Skip to content

Commit

Permalink
Backport PR #54768 on branch 2.1.x (Fix roundtripping with pyarrow sc…
Browse files Browse the repository at this point in the history
…hema) (#54773)

Backport PR #54768: Fix roundtripping with pyarrow schema

Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com>
  • Loading branch information
meeseeksmachine and phofl committed Aug 27, 2023
1 parent 605aea2 commit d42fbed
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
3 changes: 2 additions & 1 deletion pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
missing as libmissing,
)
from pandas._libs.arrays import NDArrayBacked
from pandas._libs.lib import ensure_string_array
from pandas.compat import pa_version_under7p0
from pandas.compat.numpy import function as nv
from pandas.util._decorators import doc
Expand Down Expand Up @@ -221,7 +222,7 @@ def __from_arrow__(
arr = np.array([], dtype=object)
else:
arr = pyarrow.concat_arrays(chunks).to_numpy(zero_copy_only=False)
arr = lib.convert_nans_to_NA(arr)
arr = ensure_string_array(arr, na_value=libmissing.NA)
# Bypass validation inside StringArray constructor, see GH#47781
new_string_array = StringArray.__new__(StringArray)
NDArrayBacked.__init__(
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" test parquet compat """
import datetime
from decimal import Decimal
from io import BytesIO
import os
import pathlib
Expand All @@ -16,6 +17,7 @@
from pandas.compat.pyarrow import (
pa_version_under7p0,
pa_version_under8p0,
pa_version_under11p0,
pa_version_under13p0,
)

Expand Down Expand Up @@ -1111,6 +1113,18 @@ def test_string_inference(self, tmp_path, pa):
)
tm.assert_frame_equal(result, expected)

@pytest.mark.skipif(pa_version_under11p0, reason="not supported before 11.0")
def test_roundtrip_decimal(self, tmp_path, pa):
# GH#54768
import pyarrow as pa

path = tmp_path / "decimal.p"
df = pd.DataFrame({"a": [Decimal("123.00")]}, dtype="string[pyarrow]")
df.to_parquet(path, schema=pa.schema([("a", pa.decimal128(5))]))
result = read_parquet(path)
expected = pd.DataFrame({"a": ["123"]}, dtype="string[python]")
tm.assert_frame_equal(result, expected)


class TestParquetFastParquet(Base):
def test_basic(self, fp, df_full):
Expand Down

0 comments on commit d42fbed

Please sign in to comment.