Skip to content

Commit

Permalink
fix[python]: init from empty arrow series (no data/not chunked) (#4792)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Sep 9, 2022
1 parent fa419a5 commit 04e3526
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 3 deletions.
2 changes: 2 additions & 0 deletions py-polars/polars/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ def arrow_to_pyseries(name: str, values: pa.Array, rechunk: bool = True) -> PySe
pys = PySeries.from_arrow(name, next(it))
for a in it:
pys.append(PySeries.from_arrow(name, a))
elif array.num_chunks == 0:
pys = PySeries.from_arrow(name, pa.array([], array.type))
else:
pys = PySeries.from_arrow(name, array.combine_chunks())

Expand Down
42 changes: 39 additions & 3 deletions py-polars/tests/unit/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,38 @@ def test_from_arrow() -> None:
"decimal1": pa.array([1, 2], pa.decimal128(2, 1)),
}
)
assert pl.from_arrow(tbl).shape == (2, 5)
expected_schema = {
"a": pl.Datetime("ms"),
"b": pl.Datetime("ms"),
"c": pl.Datetime("us"),
"d": pl.Datetime("ns"),
"decimal1": pl.Float64,
}
expected_data = [
(
datetime(1970, 1, 1, 0, 0, 1),
datetime(1970, 1, 1, 0, 0, 0, 1000),
datetime(1970, 1, 1, 0, 0, 0, 1),
datetime(1970, 1, 1, 0, 0),
1.0,
),
(
datetime(1970, 1, 1, 0, 0, 2),
datetime(1970, 1, 1, 0, 0, 0, 2000),
datetime(1970, 1, 1, 0, 0, 0, 2),
datetime(1970, 1, 1, 0, 0),
2.0,
),
]

df = pl.from_arrow(tbl)
assert df.schema == expected_schema
assert df.rows() == expected_data

empty_tbl = tbl[:0] # no rows
df = pl.from_arrow(empty_tbl)
assert df.schema == expected_schema
assert df.rows() == []


def test_dataframe_membership_operator() -> None:
Expand Down Expand Up @@ -813,8 +844,9 @@ def test_column_names() -> None:
"b": pa.array([1, 2, 3, 4, 5], pa.int64()),
}
)
df = pl.from_arrow(tbl)
assert df.columns == ["a", "b"]
for a in (tbl, tbl[:0]):
df = pl.from_arrow(a)
assert df.columns == ["a", "b"]


def test_lazy_functions() -> None:
Expand Down Expand Up @@ -1912,10 +1944,14 @@ def test_partition_by() -> None:
def test_list_of_list_of_struct() -> None:
expected = [{"list_of_list_of_struct": [[{"a": 1}, {"a": 2}]]}]
pa_df = pa.Table.from_pylist(expected)

df = pl.from_arrow(pa_df)
assert df.rows() == [([[{"a": 1}, {"a": 2}]],)]
assert df.to_dicts() == expected

df = pl.from_arrow(pa_df)[:0]
assert df.to_dicts() == []


def test_concat_to_empty() -> None:
assert pl.concat([pl.DataFrame([]), pl.DataFrame({"a": [1]})]).to_dict(False) == {
Expand Down

0 comments on commit 04e3526

Please sign in to comment.