Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Simpler stats
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Aug 9, 2022
1 parent 838deca commit 12617c9
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 134 deletions.
93 changes: 34 additions & 59 deletions src/io/parquet/read/statistics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,28 +32,13 @@ use self::list::DynMutableListArray;

use super::get_field_columns;

/// Enum of a count statistics
#[derive(Debug, PartialEq)]
pub enum Count {
/// simple arrays have a count of UInt64
Single(UInt64Array),
/// list arrays have a count as a list of UInt64
List(ListArray<i32>),
/// list arrays have a count as a list of UInt64
LargeList(ListArray<i64>),
/// struct arrays have a count as a struct of UInt64
Struct(StructArray),
/// map arrays have a count as a map of UInt64
Map(MapArray),
}

/// Arrow-deserialized parquet Statistics of a file
#[derive(Debug, PartialEq)]
pub struct Statistics {
/// number of nulls.
pub null_count: Count,
/// number of dictinct values
pub distinct_count: Count,
/// number of nulls. This is a [`UInt64Array`] for non-nested types
pub null_count: Box<dyn Array>,
/// number of dictinct values. This is a [`UInt64Array`] for non-nested types
pub distinct_count: Box<dyn Array>,
/// Minimum
pub min_value: Box<dyn Array>,
/// Maximum
Expand All @@ -76,98 +61,88 @@ struct MutableStatistics {
impl From<MutableStatistics> for Statistics {
fn from(mut s: MutableStatistics) -> Self {
let null_count = if let PhysicalType::Struct = s.null_count.data_type().to_physical_type() {
let a = s
.null_count
s.null_count
.as_box()
.as_any()
.downcast_ref::<StructArray>()
.unwrap()
.clone();
Count::Struct(a)
.clone()
.boxed()
} else if let PhysicalType::Map = s.null_count.data_type().to_physical_type() {
let a = s
.null_count
s.null_count
.as_box()
.as_any()
.downcast_ref::<MapArray>()
.unwrap()
.clone();
Count::Map(a)
.clone()
.boxed()
} else if let PhysicalType::List = s.null_count.data_type().to_physical_type() {
let a = s
.null_count
s.null_count
.as_box()
.as_any()
.downcast_ref::<ListArray<i32>>()
.unwrap()
.clone();
Count::List(a)
.clone()
.boxed()
} else if let PhysicalType::LargeList = s.null_count.data_type().to_physical_type() {
let a = s
.null_count
s.null_count
.as_box()
.as_any()
.downcast_ref::<ListArray<i64>>()
.unwrap()
.clone();
Count::LargeList(a)
.clone()
.boxed()
} else {
let a = s
.null_count
s.null_count
.as_box()
.as_any()
.downcast_ref::<UInt64Array>()
.unwrap()
.clone();
Count::Single(a)
.clone()
.boxed()
};
let distinct_count = if let PhysicalType::Struct =
s.distinct_count.data_type().to_physical_type()
{
let a = s
.distinct_count
s.distinct_count
.as_box()
.as_any()
.downcast_ref::<StructArray>()
.unwrap()
.clone();
Count::Struct(a)
.clone()
.boxed()
} else if let PhysicalType::Map = s.distinct_count.data_type().to_physical_type() {
let a = s
.distinct_count
s.distinct_count
.as_box()
.as_any()
.downcast_ref::<MapArray>()
.unwrap()
.clone();
Count::Map(a)
.clone()
.boxed()
} else if let PhysicalType::List = s.distinct_count.data_type().to_physical_type() {
let a = s
.distinct_count
s.distinct_count
.as_box()
.as_any()
.downcast_ref::<ListArray<i32>>()
.unwrap()
.clone();
Count::List(a)
.clone()
.boxed()
} else if let PhysicalType::LargeList = s.distinct_count.data_type().to_physical_type() {
let a = s
.distinct_count
s.distinct_count
.as_box()
.as_any()
.downcast_ref::<ListArray<i64>>()
.unwrap()
.clone();
Count::LargeList(a)
.clone()
.boxed()
} else {
let a = s
.distinct_count
s.distinct_count
.as_box()
.as_any()
.downcast_ref::<UInt64Array>()
.unwrap()
.clone();
Count::Single(a)
.clone()
.boxed()
};
Self {
null_count,
Expand Down
Loading

0 comments on commit 12617c9

Please sign in to comment.