From c9fe387d30b76b33016500ed1fabfe0528e4c0a9 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Mon, 25 Apr 2022 06:31:40 +0000 Subject: [PATCH] Fixed struct stats --- examples/parquet_read.rs | 10 +- src/io/parquet/read/statistics/dictionary.rs | 11 +- src/io/parquet/read/statistics/mod.rs | 153 ++++++++++++++++--- src/io/parquet/read/statistics/struct_.rs | 61 ++++++++ tests/it/io/parquet/mod.rs | 153 ++++++++++++------- 5 files changed, 300 insertions(+), 88 deletions(-) create mode 100644 src/io/parquet/read/statistics/struct_.rs diff --git a/examples/parquet_read.rs b/examples/parquet_read.rs index 4cf14fe82ef..07bd9fcacdd 100644 --- a/examples/parquet_read.rs +++ b/examples/parquet_read.rs @@ -13,7 +13,15 @@ fn main() -> Result<()> { let reader = File::open(file_path)?; let reader = read::FileReader::try_new(reader, Some(&[8]), None, None, None)?; - println!("{:#?}", reader.metadata()); + println!("{:#?}", reader.schema()); + + // say we want to evaluate if the we can skip some row groups based on a field's value + let field = &reader.schema().fields[0]; + + // we can deserialize the parquet statistics from this field + let statistics = read::statistics::deserialize(field, &reader.metadata().row_groups)?; + + println!("{:#?}", statistics); let start = SystemTime::now(); for maybe_chunk in reader { diff --git a/src/io/parquet/read/statistics/dictionary.rs b/src/io/parquet/read/statistics/dictionary.rs index 29a60fd1862..03c7e7ce966 100644 --- a/src/io/parquet/read/statistics/dictionary.rs +++ b/src/io/parquet/read/statistics/dictionary.rs @@ -1,5 +1,5 @@ use crate::array::*; -use crate::datatypes::DataType; +use crate::datatypes::{DataType, PhysicalType}; use crate::error::Result; use super::make_mutable; @@ -38,8 +38,13 @@ impl MutableArray for DynMutableDictionary { fn as_box(&mut self) -> Box { let inner = self.inner.as_arc(); - let keys = PrimitiveArray::::from_iter((0..inner.len() as i32).map(Some)); - Box::new(DictionaryArray::::from_data(keys, inner)) + match self.data_type.to_physical_type() { + PhysicalType::Dictionary(key) => match_integer_type!(key, |$T| { + let keys = PrimitiveArray::<$T>::from_iter((0..inner.len() as $T).map(Some)); + Box::new(DictionaryArray::<$T>::from_data(keys, inner)) + }), + _ => todo!(), + } } fn as_any(&self) -> &dyn std::any::Any { diff --git a/src/io/parquet/read/statistics/mod.rs b/src/io/parquet/read/statistics/mod.rs index 4e6e41fbdf4..6df1b16ebed 100644 --- a/src/io/parquet/read/statistics/mod.rs +++ b/src/io/parquet/read/statistics/mod.rs @@ -23,19 +23,29 @@ mod dictionary; mod fixlen; mod list; mod primitive; +mod struct_; mod utf8; use self::list::DynMutableListArray; use super::get_field_columns; +/// Enum of a count statistics +#[derive(Debug, PartialEq)] +pub enum Count { + /// simple arrays (every type not a Struct) have a count of UInt64 + Single(UInt64Array), + /// struct arrays have a count as a struct of UInt64 + Struct(StructArray), +} + /// Arrow-deserialized parquet Statistics of a file #[derive(Debug, PartialEq)] pub struct Statistics { - /// number of nulls - pub null_count: UInt64Array, + /// number of nulls. + pub null_count: Count, /// number of dictinct values - pub distinct_count: UInt64Array, + pub distinct_count: Count, /// Minimum pub min_value: Box, /// Maximum @@ -46,9 +56,9 @@ pub struct Statistics { #[derive(Debug)] struct MutableStatistics { /// number of nulls - pub null_count: UInt64Vec, + pub null_count: Box, /// number of dictinct values - pub distinct_count: UInt64Vec, + pub distinct_count: Box, /// Minimum pub min_value: Box, /// Maximum @@ -57,9 +67,48 @@ struct MutableStatistics { impl From for Statistics { fn from(mut s: MutableStatistics) -> Self { + let null_count = if let PhysicalType::Struct = s.null_count.data_type().to_physical_type() { + let a = s + .null_count + .as_box() + .as_any() + .downcast_ref::() + .unwrap() + .clone(); + Count::Struct(a) + } else { + let a = s + .null_count + .as_box() + .as_any() + .downcast_ref::() + .unwrap() + .clone(); + Count::Single(a) + }; + let distinct_count = + if let PhysicalType::Struct = s.distinct_count.data_type().to_physical_type() { + let a = s + .distinct_count + .as_box() + .as_any() + .downcast_ref::() + .unwrap() + .clone(); + Count::Struct(a) + } else { + let a = s + .distinct_count + .as_box() + .as_any() + .downcast_ref::() + .unwrap() + .clone(); + Count::Single(a) + }; Self { - null_count: s.null_count.into(), - distinct_count: s.distinct_count.into(), + null_count, + distinct_count, min_value: s.min_value.as_box(), max_value: s.max_value.as_box(), } @@ -98,6 +147,10 @@ fn make_mutable(data_type: &DataType, capacity: usize) -> Result Box::new( dictionary::DynMutableDictionary::try_with_capacity(data_type.clone(), capacity)?, ), + PhysicalType::Struct => Box::new(struct_::DynMutableStructArray::try_with_capacity( + data_type.clone(), + capacity, + )?), other => { return Err(ArrowError::NotYetImplemented(format!( "Deserializing parquet stats from {:?} is still not implemented", @@ -107,14 +160,28 @@ fn make_mutable(data_type: &DataType, capacity: usize) -> Result DataType { + if let DataType::Struct(fields) = data_type.to_logical_type() { + DataType::Struct( + fields + .iter() + .map(|f| Field::new(&f.name, create_dt(&f.data_type), f.is_nullable)) + .collect(), + ) + } else { + DataType::UInt64 + } +} + impl MutableStatistics { fn try_new(field: &Field) -> Result { let min_value = make_mutable(&field.data_type, 0)?; let max_value = make_mutable(&field.data_type, 0)?; + let dt = create_dt(&field.data_type); Ok(Self { - null_count: UInt64Vec::new(), - distinct_count: UInt64Vec::new(), + null_count: make_mutable(&dt, 0)?, + distinct_count: make_mutable(&dt, 0)?, min_value, max_value, }) @@ -188,11 +255,11 @@ fn push_others( } fn push( - mut stats: VecDeque<(Option>, ParquetPrimitiveType)>, + stats: &mut VecDeque<(Option>, ParquetPrimitiveType)>, min: &mut dyn MutableArray, max: &mut dyn MutableArray, - distinct_count: &mut UInt64Vec, - null_count: &mut UInt64Vec, + distinct_count: &mut dyn MutableArray, + null_count: &mut dyn MutableArray, ) -> Result<()> { match min.data_type().to_logical_type() { List(_) | LargeList(_) => { @@ -229,12 +296,51 @@ fn push( null_count, ); } + Struct(_) => { + let min = min + .as_mut_any() + .downcast_mut::() + .unwrap(); + let max = max + .as_mut_any() + .downcast_mut::() + .unwrap(); + let distinct_count = distinct_count + .as_mut_any() + .downcast_mut::() + .unwrap(); + let null_count = null_count + .as_mut_any() + .downcast_mut::() + .unwrap(); + return min + .inner + .iter_mut() + .zip(max.inner.iter_mut()) + .zip(distinct_count.inner.iter_mut()) + .zip(null_count.inner.iter_mut()) + .try_for_each(|(((min, max), distinct_count), null_count)| { + push( + stats, + min.as_mut(), + max.as_mut(), + distinct_count.as_mut(), + null_count.as_mut(), + ) + }); + } _ => {} } let (from, type_) = stats.pop_front().unwrap(); let from = from.as_deref(); + let distinct_count = distinct_count + .as_mut_any() + .downcast_mut::() + .unwrap(); + let null_count = null_count.as_mut_any().downcast_mut::().unwrap(); + push_others(from, distinct_count, null_count); let physical_type = &type_.physical_type; @@ -288,21 +394,18 @@ fn push( } } -/// Deserializes [`ParquetStatistics`] into [`Statistics`] associated to `field` +/// Deserializes the statistics in the column chunks from all `row_groups` +/// into [`Statistics`] associated from `field`'s name. /// -/// For non-nested types, it returns a single column. -/// For nested types, it returns one column per parquet primitive column. -pub fn deserialize_statistics(field: &Field, groups: &[RowGroupMetaData]) -> Result { - if groups.is_empty() { - todo!("Return an empty statistics") - } - +/// # Errors +/// This function errors if the deserialization of the statistics fails (e.g. invalid utf8) +pub fn deserialize(field: &Field, row_groups: &[RowGroupMetaData]) -> Result { let mut statistics = MutableStatistics::try_new(field)?; // transpose - groups.iter().try_for_each(|group| { + row_groups.iter().try_for_each(|group| { let columns = get_field_columns(group.columns(), field.name.as_ref()); - let stats = columns + let mut stats = columns .into_iter() .map(|column| { Ok(( @@ -312,11 +415,11 @@ pub fn deserialize_statistics(field: &Field, groups: &[RowGroupMetaData]) -> Res }) .collect::, ParquetPrimitiveType)>>>()?; push( - stats, + &mut stats, statistics.min_value.as_mut(), statistics.max_value.as_mut(), - &mut statistics.distinct_count, - &mut statistics.null_count, + statistics.distinct_count.as_mut(), + statistics.null_count.as_mut(), ) })?; diff --git a/src/io/parquet/read/statistics/struct_.rs b/src/io/parquet/read/statistics/struct_.rs new file mode 100644 index 00000000000..085737d6241 --- /dev/null +++ b/src/io/parquet/read/statistics/struct_.rs @@ -0,0 +1,61 @@ +use crate::array::{Array, StructArray}; +use crate::error::Result; +use crate::{array::MutableArray, datatypes::DataType}; + +use super::make_mutable; + +#[derive(Debug)] +pub struct DynMutableStructArray { + data_type: DataType, + pub inner: Vec>, +} + +impl DynMutableStructArray { + pub fn try_with_capacity(data_type: DataType, capacity: usize) -> Result { + let inners = match data_type.to_logical_type() { + DataType::Struct(inner) => inner, + _ => unreachable!(), + }; + let inner = inners + .iter() + .map(|f| make_mutable(f.data_type(), capacity)) + .collect::>>()?; + + Ok(Self { data_type, inner }) + } +} +impl MutableArray for DynMutableStructArray { + fn data_type(&self) -> &DataType { + &self.data_type + } + + fn len(&self) -> usize { + self.inner.len() + } + + fn validity(&self) -> Option<&crate::bitmap::MutableBitmap> { + None + } + + fn as_box(&mut self) -> Box { + let inner = self.inner.iter_mut().map(|x| x.as_arc()).collect(); + + Box::new(StructArray::new(self.data_type.clone(), inner, None)) + } + + fn as_any(&self) -> &dyn std::any::Any { + self + } + + fn as_mut_any(&mut self) -> &mut dyn std::any::Any { + self + } + + fn push_null(&mut self) { + todo!() + } + + fn shrink_to_fit(&mut self) { + todo!() + } +} diff --git a/tests/it/io/parquet/mod.rs b/tests/it/io/parquet/mod.rs index 995ad2bf40c..4bb52895192 100644 --- a/tests/it/io/parquet/mod.rs +++ b/tests/it/io/parquet/mod.rs @@ -37,7 +37,7 @@ pub fn read_column(mut reader: R, column: &str) -> Result Box { pub fn pyarrow_nullable_statistics(column: &str) -> Statistics { match column { "int64" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(3)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(3)])), min_value: Box::new(Int64Array::from_slice([-256])), max_value: Box::new(Int64Array::from_slice([9])), }, "float64" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(3)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(3)])), min_value: Box::new(Float64Array::from_slice([0.0])), max_value: Box::new(Float64Array::from_slice([9.0])), }, "string" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(4)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(4)])), min_value: Box::new(Utf8Array::::from_slice([""])), max_value: Box::new(Utf8Array::::from_slice(["def"])), }, "bool" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(4)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(4)])), min_value: Box::new(BooleanArray::from_slice([false])), max_value: Box::new(BooleanArray::from_slice([true])), }, "timestamp_ms" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(3)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(3)])), min_value: Box::new( Int64Array::from_slice([0]).to(DataType::Timestamp(TimeUnit::Millisecond, None)), ), @@ -415,8 +415,8 @@ pub fn pyarrow_nullable_statistics(column: &str) -> Statistics { ), }, "uint32" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(3)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(3)])), min_value: Box::new(UInt32Array::from_slice([0])), max_value: Box::new(UInt32Array::from_slice([9])), }, @@ -429,33 +429,33 @@ pub fn pyarrow_nullable_statistics(column: &str) -> Statistics { }; Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(0)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(0)])), min_value: new_dict(Arc::new(Int32Array::from_slice([10]))), max_value: new_dict(Arc::new(Int32Array::from_slice([200]))), } } "decimal_9" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(3)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(3)])), min_value: Box::new(Int128Array::from_slice([-256]).to(DataType::Decimal(9, 0))), max_value: Box::new(Int128Array::from_slice([9]).to(DataType::Decimal(9, 0))), }, "decimal_18" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(3)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(3)])), min_value: Box::new(Int128Array::from_slice([-256]).to(DataType::Decimal(18, 0))), max_value: Box::new(Int128Array::from_slice([9]).to(DataType::Decimal(18, 0))), }, "decimal_26" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(3)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(3)])), min_value: Box::new(Int128Array::from_slice([-256]).to(DataType::Decimal(26, 0))), max_value: Box::new(Int128Array::from_slice([9]).to(DataType::Decimal(26, 0))), }, "timestamp_us" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(3)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(3)])), min_value: Box::new( Int64Array::from_slice([-256]).to(DataType::Timestamp(TimeUnit::Microsecond, None)), ), @@ -464,8 +464,8 @@ pub fn pyarrow_nullable_statistics(column: &str) -> Statistics { ), }, "timestamp_s" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(3)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(3)])), min_value: Box::new( Int64Array::from_slice([-256]).to(DataType::Timestamp(TimeUnit::Second, None)), ), @@ -474,8 +474,8 @@ pub fn pyarrow_nullable_statistics(column: &str) -> Statistics { ), }, "timestamp_s_utc" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(3)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(3)])), min_value: Box::new(Int64Array::from_slice([-256]).to(DataType::Timestamp( TimeUnit::Second, Some("UTC".to_string()), @@ -539,7 +539,7 @@ pub fn pyarrow_required(column: &str) -> Box { pub fn pyarrow_required_statistics(column: &str) -> Statistics { let mut s = pyarrow_nullable_statistics(column); - s.null_count = UInt64Array::from([Some(0)]); + s.null_count = Count::Single(UInt64Array::from([Some(0)])); s } @@ -559,50 +559,50 @@ pub fn pyarrow_nested_nullable_statistics(column: &str) -> Statistics { match column { "list_int16" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(1)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(1)])), min_value: new_list(Arc::new(Int16Array::from_slice([0])), true), max_value: new_list(Arc::new(Int16Array::from_slice([10])), true), }, "list_bool" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(1)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(1)])), min_value: new_list(Arc::new(BooleanArray::from_slice([false])), true), max_value: new_list(Arc::new(BooleanArray::from_slice([true])), true), }, "list_utf8" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: [Some(1)].into(), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single([Some(1)].into()), min_value: new_list(Arc::new(Utf8Array::::from_slice([""])), true), max_value: new_list(Arc::new(Utf8Array::::from_slice(["ccc"])), true), }, "list_large_binary" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: [Some(1)].into(), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single([Some(1)].into()), min_value: new_list(Arc::new(BinaryArray::::from_slice([b""])), true), max_value: new_list(Arc::new(BinaryArray::::from_slice([b"ccc"])), true), }, "list_int64" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: [Some(1)].into(), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single([Some(1)].into()), min_value: new_list(Arc::new(Int64Array::from_slice([0])), true), max_value: new_list(Arc::new(Int64Array::from_slice([10])), true), }, "list_int64_required" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: [Some(1)].into(), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single([Some(1)].into()), min_value: new_list(Arc::new(Int64Array::from_slice([0])), false), max_value: new_list(Arc::new(Int64Array::from_slice([10])), false), }, "list_int64_required_required" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: [Some(0)].into(), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single([Some(0)].into()), min_value: new_list(Arc::new(Int64Array::from_slice([0])), false), max_value: new_list(Arc::new(Int64Array::from_slice([10])), false), }, "list_nested_i64" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: [Some(2)].into(), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single([Some(2)].into()), min_value: new_list( new_list(Arc::new(Int64Array::from_slice([0])), true).into(), true, @@ -613,8 +613,8 @@ pub fn pyarrow_nested_nullable_statistics(column: &str) -> Statistics { ), }, "list_nested_inner_required_required_i64" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: [Some(0)].into(), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single([Some(0)].into()), min_value: new_list( new_list(Arc::new(Int64Array::from_slice([0])), true).into(), true, @@ -625,8 +625,8 @@ pub fn pyarrow_nested_nullable_statistics(column: &str) -> Statistics { ), }, "list_nested_inner_required_i64" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: [Some(0)].into(), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single([Some(0)].into()), min_value: new_list( new_list(Arc::new(Int64Array::from_slice([0])), true).into(), true, @@ -656,14 +656,14 @@ pub fn pyarrow_nested_edge_statistics(column: &str) -> Statistics { match column { "simple" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(0)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(0)])), min_value: new_list(Arc::new(Int64Array::from([Some(0)]))), max_value: new_list(Arc::new(Int64Array::from([Some(1)]))), }, "null" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(1)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(1)])), min_value: new_list(Arc::new(Int64Array::from([None]))), max_value: new_list(Arc::new(Int64Array::from([None]))), }, @@ -730,16 +730,51 @@ pub fn pyarrow_struct(column: &str) -> Box { } pub fn pyarrow_struct_statistics(column: &str) -> Statistics { + let new_struct = |arrays: Vec>, names: Vec| { + let fields = names + .into_iter() + .zip(arrays.iter()) + .map(|(n, a)| Field::new(n, a.data_type().clone(), true)) + .collect(); + StructArray::new(DataType::Struct(fields), arrays, None) + }; + + let names = vec!["f1".to_string(), "f2".to_string()]; + match column { "struct" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(4)]), - min_value: Box::new(BooleanArray::from_slice([false])), - max_value: Box::new(BooleanArray::from_slice([true])), + distinct_count: Count::Struct(new_struct( + vec![ + Arc::new(UInt64Array::from([None])), + Arc::new(UInt64Array::from([None])), + ], + names.clone(), + )), + null_count: Count::Struct(new_struct( + vec![ + Arc::new(UInt64Array::from([Some(4)])), + Arc::new(UInt64Array::from([Some(4)])), + ], + names.clone(), + )), + min_value: Box::new(new_struct( + vec![ + Arc::new(Utf8Array::::from_slice([""])), + Arc::new(BooleanArray::from_slice([false])), + ], + names.clone(), + )), + max_value: Box::new(new_struct( + vec![ + Arc::new(Utf8Array::::from_slice(["def"])), + Arc::new(BooleanArray::from_slice([true])), + ], + names, + )), }, "struct_struct" => Statistics { - distinct_count: UInt64Array::from([None]), - null_count: UInt64Array::from([Some(1)]), + distinct_count: Count::Single(UInt64Array::from([None])), + null_count: Count::Single(UInt64Array::from([Some(1)])), min_value: Box::new(BooleanArray::from_slice([false])), max_value: Box::new(BooleanArray::from_slice([true])), }, @@ -791,7 +826,7 @@ fn integration_read(data: &[u8]) -> Result { let schema = reader.schema().clone(); for field in &schema.fields { - let mut _statistics = deserialize_statistics(field, &reader.metadata().row_groups)?; + let mut _statistics = deserialize(field, &reader.metadata().row_groups)?; } let batches = reader.collect::>>()?;