Skip to content

Commit

Permalink
Fix read_parquet with list having nested struct (#2991)
Browse files Browse the repository at this point in the history
  • Loading branch information
cjermain committed Mar 31, 2022
1 parent 6b2e7b5 commit 12044c0
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 0 deletions.
5 changes: 5 additions & 0 deletions polars/polars-core/src/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -953,6 +953,11 @@ impl From<&ArrowDataType> for DataType {
ArrowDataType::Time64(_) | ArrowDataType::Time32(_) => DataType::Time,
#[cfg(feature = "dtype-categorical")]
ArrowDataType::Dictionary(_, _, _) => DataType::Categorical(None),
#[cfg(feature = "dtype-struct")]
ArrowDataType::Struct(fields) => {
let fields: Vec<Field> = fields.iter().map(|fld| fld.into()).collect();
DataType::Struct(fields)
}
ArrowDataType::Extension(name, _, _) if name == "POLARS_EXTENSION_TYPE" => {
#[cfg(feature = "object")]
{
Expand Down
15 changes: 15 additions & 0 deletions py-polars/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,3 +130,18 @@ def test_parquet_datetime() -> None:
f.seek(0)
read = pl.read_parquet(f)
assert read.frame_equal(df)


def test_nested_parquet() -> None:
f = io.BytesIO()
data = [
{"a": [{"b": 0}]},
{"a": [{"b": 1}, {"b": 2}]},
]
df = pd.DataFrame(data)
df.to_parquet(f)

read = pl.read_parquet(f, use_pyarrow=True)
assert read.columns == ["a"]
assert isinstance(read.dtypes[0], pl.datatypes.List)
assert isinstance(read.dtypes[0].inner, pl.datatypes.Struct)

0 comments on commit 12044c0

Please sign in to comment.