Skip to content

Commit

Permalink
fix from empty pandas (#2757)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Feb 24, 2022
1 parent 207efcd commit 223c9b9
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 5 deletions.
11 changes: 6 additions & 5 deletions py-polars/polars/internals/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
_DOCUMENTING = True

from polars._html import NotebookFormatter
from polars.datatypes import Boolean, DataType, UInt32, py_type_to_dtype
from polars.datatypes import Boolean, DataType, UInt32, Utf8, py_type_to_dtype
from polars.utils import (
_prepare_row_count_args,
_process_null_values,
Expand Down Expand Up @@ -360,12 +360,13 @@ def _from_pandas(
"""
# path for table without rows that keeps datatype
if data.shape[0] == 0:
# We do a loop and materialize the series.
# There can be series of len != null due to pandas indexes.
series = []
for name in data.columns:
col = pli.Series(name, data[name])
if len(col) == 0:
pd_series = data[name]
if pd_series.dtype == np.dtype("O"):
series.append(pli.Series(name, [], dtype=Utf8))
else:
col = pli.Series(name, pd_series)
series.append(pli.Series(name, col))
return DataFrame(series)

Expand Down
12 changes: 12 additions & 0 deletions py-polars/tests/test_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,14 @@ def test_from_empty_pandas() -> None:
assert polars_df.dtypes == [pl.Float64, pl.Float64]


def test_from_empty_pandas_strings() -> None:
df = pd.DataFrame(columns=["a", "b"])
df["a"] = df["a"].astype(str)
df["b"] = df["b"].astype(float)
df_pl = pl.from_pandas(df)
assert df_pl.dtypes == [pl.Utf8, pl.Float64]


def test_from_empty_arrow() -> None:
df = pl.from_arrow(pa.table(pd.DataFrame({"a": [], "b": []})))
assert df.columns == ["a", "b"] # type: ignore
Expand All @@ -240,5 +248,9 @@ def test_from_empty_arrow() -> None:
df1 = pd.DataFrame(columns=["b"], dtype=float)
tbl = pa.Table.from_pandas(df1)
out = pl.from_arrow(tbl)
assert out.columns == ["b", "__index_level_0__"] # type: ignore
assert out.dtypes == [pl.Float64, pl.Utf8] # type: ignore
tbl = pa.Table.from_pandas(df1, preserve_index=False)
out = pl.from_arrow(tbl)
assert out.columns == ["b"] # type: ignore
assert out.dtypes == [pl.Float64] # type: ignore

0 comments on commit 223c9b9

Please sign in to comment.