Skip to content

Commit

Permalink
fix[rust]: parquet statistics, take null_count of all row groups (#4510)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Aug 20, 2022
1 parent f66be9b commit 2a40366
Showing 1 changed file with 7 additions and 13 deletions.
20 changes: 7 additions & 13 deletions polars/polars-io/src/parquet/predicates.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use arrow::array::{Array, UInt64Array};
use arrow::compute::concatenate::concatenate;
use arrow::io::parquet::read::statistics::{deserialize, Statistics};
use arrow::io::parquet::read::RowGroupMetaData;
Expand All @@ -24,18 +23,13 @@ impl ColumnStats {
match self.1.data_type() {
#[cfg(feature = "dtype-struct")]
DataType::Struct(_) => None,
_ => self
.0
.null_count
.as_any()
.downcast_ref::<UInt64Array>()
.and_then(|arr| {
if arr.is_valid(0) {
Some(arr.value(0) as usize)
} else {
None
}
}),
_ => {
// the array holds the null count for every row group
// so we sum them to get them of the whole file.
Series::try_from(("", self.0.null_count.clone()))
.unwrap()
.sum()
}
}
}

Expand Down

0 comments on commit 2a40366

Please sign in to comment.