Skip to content

Commit

Permalink
python create from empty arrow tbl (#2557)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Feb 6, 2022
1 parent b77c963 commit 11ff23e
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 2 deletions.
1 change: 1 addition & 0 deletions py-polars/polars/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ def from_dicts(dicts: Sequence[Dict[str, Any]]) -> DataFrame:
return DataFrame._from_dicts(dicts)


# Note that we cannot overload because pyarrow has no stubs :(
def from_arrow(
a: Union["pa.Table", "pa.Array", "pa.ChunkedArray"], rechunk: bool = True
) -> Union[DataFrame, Series]:
Expand Down
9 changes: 7 additions & 2 deletions py-polars/polars/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,8 +437,13 @@ def arrow_to_pydf(
data_dict[name] = column

if len(data_dict) > 0:
batches = pa.table(data_dict).to_batches()
pydf = PyDataFrame.from_arrow_record_batches(batches)
tbl = pa.table(data_dict)

# path for table without rows that keeps datatype
if tbl.shape[0] == 0:
pydf = pli.DataFrame._from_pandas(tbl.to_pandas())._df
else:
pydf = PyDataFrame.from_arrow_record_batches(tbl.to_batches())
else:
pydf = pli.DataFrame([])._df
if rechunk:
Expand Down
1 change: 1 addition & 0 deletions py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ def _from_pandas(
-------
DataFrame
"""
# path for table without rows that keeps datatype
if data.shape[0] == 0:
return DataFrame([pli.Series(name, data[name]) for name in data.columns])

Expand Down
6 changes: 6 additions & 0 deletions py-polars/tests/test_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,9 @@ def test_from_empty_pandas() -> None:
polars_df = pl.from_pandas(pandas_df)
assert polars_df.columns == ["A", "fruits"]
assert polars_df.dtypes == [pl.Float64, pl.Float64]


def test_from_empty_arrow() -> None:
df = pl.from_arrow(pa.table(pd.DataFrame({"a": [], "b": []})))
assert df.columns == ["a", "b"] # type: ignore
assert df.dtypes == [pl.Float64, pl.Float64] # type: ignore

0 comments on commit 11ff23e

Please sign in to comment.