Skip to content

Commit

Permalink
Made some structs and functions public (jorgecarleitao#1375)
Browse files Browse the repository at this point in the history
  • Loading branch information
b41sh authored and ritchie46 committed Apr 5, 2023
1 parent c05edd4 commit af9a0a0
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 12 deletions.
7 changes: 4 additions & 3 deletions src/io/parquet/read/deserialize/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::{
offset::Offsets,
};

use self::nested_utils::{InitNested, NestedArrayIter, NestedState};
pub use self::nested_utils::{init_nested, InitNested, NestedArrayIter, NestedState};
use simple::page_iter_to_arrays;

use super::*;
Expand All @@ -43,7 +43,8 @@ pub fn get_page_iterator<R: Read + Seek>(
)?)
}

fn create_list(
/// Creates a new [`ListArray`] or [`FixedSizeListArray`].
pub fn create_list(
data_type: DataType,
nested: &mut NestedState,
values: Box<dyn Array>,
Expand Down Expand Up @@ -128,7 +129,7 @@ where
}

/// Returns the number of (parquet) columns that a [`DataType`] contains.
fn n_columns(data_type: &DataType) -> usize {
pub fn n_columns(data_type: &DataType) -> usize {
use crate::datatypes::PhysicalType::*;
match data_type.to_physical_type() {
Null | Boolean | Primitive(_) | Binary | FixedSizeBinary | LargeBinary | Utf8
Expand Down
10 changes: 9 additions & 1 deletion src/io/parquet/read/deserialize/nested_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,14 +264,19 @@ pub(super) trait NestedDecoder<'a> {
fn deserialize_dict(&self, page: &DictPage) -> Self::Dictionary;
}

/// The initial info of nested data types.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum InitNested {
/// Primitive data types
Primitive(bool),
/// List data types
List(bool),
/// Struct data types
Struct(bool),
}

fn init_nested(init: &[InitNested], capacity: usize) -> NestedState {
/// Initialize [`NestedState`] from `&[InitNested]`.
pub fn init_nested(init: &[InitNested], capacity: usize) -> NestedState {
let container = init
.iter()
.map(|init| match init {
Expand Down Expand Up @@ -324,12 +329,15 @@ impl<'a> NestedPage<'a> {
}
}

/// The state of nested data types.
#[derive(Debug)]
pub struct NestedState {
/// The nesteds composing `NestedState`.
pub nested: Vec<Box<dyn Nested>>,
}

impl NestedState {
/// Creates a new [`NestedState`].
pub fn new(nested: Vec<Box<dyn Nested>>) -> Self {
Self { nested }
}
Expand Down
5 changes: 4 additions & 1 deletion src/io/parquet/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ pub use parquet2::{

use crate::{array::Array, error::Result};

pub use deserialize::{column_iter_to_arrays, get_page_iterator};
pub use deserialize::{
column_iter_to_arrays, create_list, get_page_iterator, init_nested, n_columns, InitNested,
NestedState,
};
pub use file::{FileReader, RowGroupReader};
pub use row_group::*;
pub use schema::{infer_schema, FileMetaData};
Expand Down
14 changes: 9 additions & 5 deletions src/io/parquet/write/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ use crate::error::{Error, Result};
use crate::types::days_ms;
use crate::types::NativeType;

pub use nested::write_rep_and_def;
pub use pages::{to_leaves, to_nested, to_parquet_leaves};
use parquet2::schema::types::PrimitiveType as ParquetPrimitiveType;
pub use parquet2::{
compression::{BrotliLevel, CompressionOptions, GzipLevel, ZstdLevel},
Expand All @@ -46,6 +48,7 @@ pub use parquet2::{
},
FallibleStreamingIterator,
};
pub use utils::write_def_levels;

/// Currently supported options to write to parquet
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
Expand All @@ -70,7 +73,7 @@ pub use pages::array_to_columns;
pub use pages::Nested;

/// returns offset and length to slice the leaf values
pub(self) fn slice_nested_leaf(nested: &[Nested]) -> (usize, usize) {
pub fn slice_nested_leaf(nested: &[Nested]) -> (usize, usize) {
// find the deepest recursive dremel structure as that one determines how many values we must
// take
let mut out = (0, 0);
Expand Down Expand Up @@ -154,7 +157,8 @@ pub fn can_encode(data_type: &DataType, encoding: Encoding) -> bool {
)
}

fn slice_parquet_array<'a>(
/// Slices the [`Array`] to `Box<dyn Array>` and `Vec<Nested>`.
pub fn slice_parquet_array<'a>(
array: &'a dyn Array,
nested: &'a [Nested<'a>],
offset: usize,
Expand Down Expand Up @@ -186,9 +190,9 @@ fn slice_parquet_array<'a>(
}
}

fn get_max_length(array: &dyn Array, nested: &[Nested]) -> usize {
// get the length that should be sliced.
// that is the inner nested structure that
/// Get the length of [`Array`] that should be sliced.
pub fn get_max_length(array: &dyn Array, nested: &[Nested]) -> usize {
// the inner nested structure that
// dictates how often the primitive should be repeated
for nested in nested.iter().rev() {
match nested {
Expand Down
1 change: 1 addition & 0 deletions src/io/parquet/write/nested/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ fn to_length<O: Offset>(
.map(|w| w[1].to_usize() - w[0].to_usize())
}

/// Write `repetition_levels` and `definition_levels` to buffer.
pub fn write_rep_and_def(
page_version: Version,
nested: &[Nested],
Expand Down
6 changes: 4 additions & 2 deletions src/io/parquet/write/pages.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ fn to_nested_recursive<'a>(
Ok(())
}

fn to_leaves(array: &dyn Array) -> Vec<&dyn Array> {
/// Convert [`Array`] to `Vec<&dyn Array>` leaves in DFS order.
pub fn to_leaves(array: &dyn Array) -> Vec<&dyn Array> {
let mut leaves = vec![];
to_leaves_recursive(array, &mut leaves);
leaves
Expand Down Expand Up @@ -179,7 +180,8 @@ fn to_leaves_recursive<'a>(array: &'a dyn Array, leaves: &mut Vec<&'a dyn Array>
}
}

fn to_parquet_leaves(type_: ParquetType) -> Vec<ParquetPrimitiveType> {
/// Convert `ParquetType` to `Vec<ParquetPrimitiveType>` leaves in DFS order.
pub fn to_parquet_leaves(type_: ParquetType) -> Vec<ParquetPrimitiveType> {
let mut leaves = vec![];
to_parquet_leaves_recursive(type_, &mut leaves);
leaves
Expand Down

0 comments on commit af9a0a0

Please sign in to comment.