Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Removed Count (parquet statistics) #1217

Merged
merged 1 commit into from
Aug 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 34 additions & 59 deletions src/io/parquet/read/statistics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,28 +32,13 @@ use self::list::DynMutableListArray;

use super::get_field_columns;

/// Enum of a count statistics
#[derive(Debug, PartialEq)]
pub enum Count {
/// simple arrays have a count of UInt64
Single(UInt64Array),
/// list arrays have a count as a list of UInt64
List(ListArray<i32>),
/// list arrays have a count as a list of UInt64
LargeList(ListArray<i64>),
/// struct arrays have a count as a struct of UInt64
Struct(StructArray),
/// map arrays have a count as a map of UInt64
Map(MapArray),
}

/// Arrow-deserialized parquet Statistics of a file
#[derive(Debug, PartialEq)]
pub struct Statistics {
/// number of nulls.
pub null_count: Count,
/// number of dictinct values
pub distinct_count: Count,
/// number of nulls. This is a [`UInt64Array`] for non-nested types
pub null_count: Box<dyn Array>,
/// number of dictinct values. This is a [`UInt64Array`] for non-nested types
pub distinct_count: Box<dyn Array>,
/// Minimum
pub min_value: Box<dyn Array>,
/// Maximum
Expand All @@ -76,98 +61,88 @@ struct MutableStatistics {
impl From<MutableStatistics> for Statistics {
fn from(mut s: MutableStatistics) -> Self {
let null_count = if let PhysicalType::Struct = s.null_count.data_type().to_physical_type() {
let a = s
.null_count
s.null_count
.as_box()
.as_any()
.downcast_ref::<StructArray>()
.unwrap()
.clone();
Count::Struct(a)
.clone()
.boxed()
} else if let PhysicalType::Map = s.null_count.data_type().to_physical_type() {
let a = s
.null_count
s.null_count
.as_box()
.as_any()
.downcast_ref::<MapArray>()
.unwrap()
.clone();
Count::Map(a)
.clone()
.boxed()
} else if let PhysicalType::List = s.null_count.data_type().to_physical_type() {
let a = s
.null_count
s.null_count
.as_box()
.as_any()
.downcast_ref::<ListArray<i32>>()
.unwrap()
.clone();
Count::List(a)
.clone()
.boxed()
} else if let PhysicalType::LargeList = s.null_count.data_type().to_physical_type() {
let a = s
.null_count
s.null_count
.as_box()
.as_any()
.downcast_ref::<ListArray<i64>>()
.unwrap()
.clone();
Count::LargeList(a)
.clone()
.boxed()
} else {
let a = s
.null_count
s.null_count
.as_box()
.as_any()
.downcast_ref::<UInt64Array>()
.unwrap()
.clone();
Count::Single(a)
.clone()
.boxed()
};
let distinct_count = if let PhysicalType::Struct =
s.distinct_count.data_type().to_physical_type()
{
let a = s
.distinct_count
s.distinct_count
.as_box()
.as_any()
.downcast_ref::<StructArray>()
.unwrap()
.clone();
Count::Struct(a)
.clone()
.boxed()
} else if let PhysicalType::Map = s.distinct_count.data_type().to_physical_type() {
let a = s
.distinct_count
s.distinct_count
.as_box()
.as_any()
.downcast_ref::<MapArray>()
.unwrap()
.clone();
Count::Map(a)
.clone()
.boxed()
} else if let PhysicalType::List = s.distinct_count.data_type().to_physical_type() {
let a = s
.distinct_count
s.distinct_count
.as_box()
.as_any()
.downcast_ref::<ListArray<i32>>()
.unwrap()
.clone();
Count::List(a)
.clone()
.boxed()
} else if let PhysicalType::LargeList = s.distinct_count.data_type().to_physical_type() {
let a = s
.distinct_count
s.distinct_count
.as_box()
.as_any()
.downcast_ref::<ListArray<i64>>()
.unwrap()
.clone();
Count::LargeList(a)
.clone()
.boxed()
} else {
let a = s
.distinct_count
s.distinct_count
.as_box()
.as_any()
.downcast_ref::<UInt64Array>()
.unwrap()
.clone();
Count::Single(a)
.clone()
.boxed()
};
Self {
null_count,
Expand Down
Loading