Skip to content

Commit

Permalink
fix from_pandas object null array (#3733)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 18, 2022
1 parent 2b15038 commit dec2a88
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 8 deletions.
12 changes: 6 additions & 6 deletions py-polars/polars/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,14 +297,14 @@ def _pandas_series_to_arrow(
"""
dtype = values.dtype
if dtype == "object" and len(values) > 0:
if isinstance(_get_first_non_none(values.values), str): # type: ignore
first_non_none = _get_first_non_none(values.values) # type: ignore

if isinstance(first_non_none, str):
return pa.array(values, pa.large_utf8(), from_pandas=nan_to_none)
if first_non_none is None:
return pa.nulls(min_len, pa.large_utf8())

# array is null array, we set to a float64 array
if values.values[0] is None and min_len is not None:
return pa.nulls(min_len, pa.float64())
else:
return pa.array(values, from_pandas=nan_to_none)
return pa.array(values, from_pandas=nan_to_none)
else:
return pa.array(values, from_pandas=nan_to_none)

Expand Down
5 changes: 3 additions & 2 deletions py-polars/tests/test_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,14 +181,15 @@ def test_arrow_list_chunked_array() -> None:


def test_from_pandas_null() -> None:
# null column is an object dtype, so pl.Utf8 is most close
df = pd.DataFrame([{"a": None}, {"a": None}])
out = pl.DataFrame(df)
assert out.dtypes == [pl.Float64]
assert out.dtypes == [pl.Utf8]
assert out["a"][0] is None

df = pd.DataFrame([{"a": None, "b": 1}, {"a": None, "b": 2}])
out = pl.DataFrame(df)
assert out.dtypes == [pl.Float64, pl.Int64]
assert out.dtypes == [pl.Utf8, pl.Int64]


def test_from_pandas_nested_list() -> None:
Expand Down

0 comments on commit dec2a88

Please sign in to comment.