Skip to content

Commit

Permalink
Backport PR #51766 on branch 2.0.x (CLN: Use type_mapper instead of m…
Browse files Browse the repository at this point in the history
…anual conversion) (#51840)
  • Loading branch information
phofl committed Mar 8, 2023
1 parent 13cd542 commit 463c5a9
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 55 deletions.
15 changes: 3 additions & 12 deletions pandas/io/feather_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,8 @@
from pandas.compat._optional import import_optional_dependency
from pandas.util._decorators import doc

from pandas import (
arrays,
get_option,
)
import pandas as pd
from pandas import get_option
from pandas.core.api import (
DataFrame,
RangeIndex,
Expand Down Expand Up @@ -173,11 +171,4 @@ def read_feather(
return pa_table.to_pandas(types_mapper=_arrow_dtype_mapping().get)

elif dtype_backend == "pyarrow":
return DataFrame(
{
col_name: arrays.ArrowExtensionArray(pa_col)
for col_name, pa_col in zip(
pa_table.column_names, pa_table.itercolumns()
)
}
)
return pa_table.to_pandas(types_mapper=pd.ArrowDtype)
13 changes: 3 additions & 10 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
from pandas.core.dtypes.generic import ABCIndex

from pandas import (
ArrowDtype,
DataFrame,
MultiIndex,
Series,
Expand Down Expand Up @@ -960,16 +961,8 @@ def read(self) -> DataFrame | Series:
pa_table = pyarrow_json.read_json(self.data)
if self.use_nullable_dtypes:
if get_option("mode.dtype_backend") == "pyarrow":
from pandas.arrays import ArrowExtensionArray

return DataFrame(
{
col_name: ArrowExtensionArray(pa_col)
for col_name, pa_col in zip(
pa_table.column_names, pa_table.itercolumns()
)
}
)
return pa_table.to_pandas(types_mapper=ArrowDtype)

elif get_option("mode.dtype_backend") == "pandas":
from pandas.io._util import _arrow_dtype_mapping

Expand Down
11 changes: 2 additions & 9 deletions pandas/io/orc.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
is_unsigned_integer_dtype,
)

from pandas.core.arrays import ArrowExtensionArray
import pandas as pd
from pandas.core.frame import DataFrame

from pandas.io.common import get_handle
Expand Down Expand Up @@ -99,14 +99,7 @@ def read_orc(
if use_nullable_dtypes:
dtype_backend = get_option("mode.dtype_backend")
if dtype_backend == "pyarrow":
df = DataFrame(
{
col_name: ArrowExtensionArray(pa_col)
for col_name, pa_col in zip(
pa_table.column_names, pa_table.itercolumns()
)
}
)
df = pa_table.to_pandas(types_mapper=pd.ArrowDtype)
else:
from pandas.io._util import _arrow_dtype_mapping

Expand Down
15 changes: 6 additions & 9 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
from pandas.errors import AbstractMethodError
from pandas.util._decorators import doc

import pandas as pd
from pandas import (
DataFrame,
MultiIndex,
arrays,
get_option,
)
from pandas.core.shared_docs import _shared_docs
Expand Down Expand Up @@ -250,14 +250,11 @@ def read(
if dtype_backend == "pandas":
result = pa_table.to_pandas(**to_pandas_kwargs)
elif dtype_backend == "pyarrow":
result = DataFrame(
{
col_name: arrays.ArrowExtensionArray(pa_col)
for col_name, pa_col in zip(
pa_table.column_names, pa_table.itercolumns()
)
}
)
# Incompatible types in assignment (expression has type
# "Type[ArrowDtype]", target has type overloaded function
to_pandas_kwargs["types_mapper"] = pd.ArrowDtype # type: ignore[assignment] # noqa
result = pa_table.to_pandas(**to_pandas_kwargs)

if manager == "array":
result = result._as_manager("array", copy=False)
return result
Expand Down
9 changes: 2 additions & 7 deletions pandas/io/parsers/arrow_parser_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

from pandas.core.dtypes.inference import is_integer

import pandas as pd
from pandas import (
DataFrame,
arrays,
get_option,
)

Expand Down Expand Up @@ -153,12 +153,7 @@ def read(self) -> DataFrame:
self.kwds["use_nullable_dtypes"]
and get_option("mode.dtype_backend") == "pyarrow"
):
frame = DataFrame(
{
col_name: arrays.ArrowExtensionArray(pa_col)
for col_name, pa_col in zip(table.column_names, table.itercolumns())
}
)
frame = table.to_pandas(types_mapper=pd.ArrowDtype)
else:
frame = table.to_pandas()
return self._finalize_pandas_output(frame)
23 changes: 15 additions & 8 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1034,14 +1034,7 @@ def test_read_use_nullable_types_pyarrow_config(self, pa, df_full):
df["bool_with_none"] = [True, None, True]

pa_table = pyarrow.Table.from_pandas(df)
expected = pd.DataFrame(
{
col_name: pd.arrays.ArrowExtensionArray(pa_column)
for col_name, pa_column in zip(
pa_table.column_names, pa_table.itercolumns()
)
}
)
expected = pa_table.to_pandas(types_mapper=pd.ArrowDtype)
# pyarrow infers datetimes as us instead of ns
expected["datetime"] = expected["datetime"].astype("timestamp[us][pyarrow]")
expected["datetime_with_nat"] = expected["datetime_with_nat"].astype(
Expand All @@ -1059,6 +1052,20 @@ def test_read_use_nullable_types_pyarrow_config(self, pa, df_full):
expected=expected,
)

def test_read_use_nullable_types_pyarrow_config_index(self, pa):
df = pd.DataFrame(
{"a": [1, 2]}, index=pd.Index([3, 4], name="test"), dtype="int64[pyarrow]"
)
expected = df.copy()

with pd.option_context("mode.dtype_backend", "pyarrow"):
check_round_trip(
df,
engine=pa,
read_kwargs={"use_nullable_dtypes": True},
expected=expected,
)


class TestParquetFastParquet(Base):
def test_basic(self, fp, df_full):
Expand Down

0 comments on commit 463c5a9

Please sign in to comment.