Skip to content

Commit

Permalink
fix reading empty parquet (#2615)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Feb 11, 2022
1 parent c65a6d5 commit e4d6f6d
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 3 deletions.
14 changes: 11 additions & 3 deletions polars/polars-io/src/parquet/read_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::parquet::predicates::collect_statistics;
use crate::predicates::{apply_predicate, arrow_schema_to_empty_df, PhysicalIoExpr};
use crate::utils::apply_projection;
use crate::RowCount;
use arrow::array::new_empty_array;
use arrow::io::parquet::read;
use arrow::io::parquet::read::{to_deserializer, FileMetaData};
use polars_core::prelude::*;
Expand Down Expand Up @@ -85,8 +86,11 @@ pub fn read_parquet<R: MmapBytesReader>(
remaining_rows,
Some(chunk_size),
)?;

Series::try_from((field.name.as_str(), iter.next().unwrap()?))
let arr = match iter.next() {
Some(arr) => arr?,
None => Arc::from(new_empty_array(field.data_type.clone())),
};
Series::try_from((field.name.as_str(), arr))
})
.collect::<Result<Vec<_>>>()
})?
Expand All @@ -99,7 +103,11 @@ pub fn read_parquet<R: MmapBytesReader>(
let mut iter =
to_deserializer(columns, field.clone(), remaining_rows, Some(chunk_size))?;

Series::try_from((field.name.as_str(), iter.next().unwrap()?))
let arr = match iter.next() {
Some(arr) => arr?,
None => Arc::from(new_empty_array(field.data_type.clone())),
};
Series::try_from((field.name.as_str(), arr))
})
.collect::<Result<Vec<_>>>()?
};
Expand Down
Binary file added py-polars/null.parquet
Binary file not shown.
7 changes: 7 additions & 0 deletions py-polars/tests/lazy_io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,10 @@ def test_categorical_parquet_statistics() -> None:
.collect()
)
assert df.shape == (4, 3)


def test_null_parquet() -> None:
df = pl.DataFrame([pl.Series("foo", [], dtype=pl.Int8)])
df.to_parquet("null.parquet")
out = pl.read_parquet("null.parquet")
assert out.frame_equal(df)

0 comments on commit e4d6f6d

Please sign in to comment.