Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Made some structs and functions public #1375

Merged
merged 3 commits into from
Feb 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions src/io/parquet/read/deserialize/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::{
offset::Offsets,
};

use self::nested_utils::{InitNested, NestedArrayIter, NestedState};
pub use self::nested_utils::{init_nested, InitNested, NestedArrayIter, NestedState};
use simple::page_iter_to_arrays;

use super::*;
Expand All @@ -43,7 +43,8 @@ pub fn get_page_iterator<R: Read + Seek>(
)?)
}

fn create_list(
/// Creates a new [`ListArray`] or [`FixedSizeListArray`].
pub fn create_list(
data_type: DataType,
nested: &mut NestedState,
values: Box<dyn Array>,
Expand Down Expand Up @@ -128,7 +129,7 @@ where
}

/// Returns the number of (parquet) columns that a [`DataType`] contains.
fn n_columns(data_type: &DataType) -> usize {
pub fn n_columns(data_type: &DataType) -> usize {
use crate::datatypes::PhysicalType::*;
match data_type.to_physical_type() {
Null | Boolean | Primitive(_) | Binary | FixedSizeBinary | LargeBinary | Utf8
Expand Down
10 changes: 9 additions & 1 deletion src/io/parquet/read/deserialize/nested_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,14 +264,19 @@ pub(super) trait NestedDecoder<'a> {
fn deserialize_dict(&self, page: &DictPage) -> Self::Dictionary;
}

/// The initial info of nested data types.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum InitNested {
/// Primitive data types
Primitive(bool),
/// List data types
List(bool),
/// Struct data types
Struct(bool),
}

fn init_nested(init: &[InitNested], capacity: usize) -> NestedState {
/// Initialize [`NestedState`] from `&[InitNested]`.
pub fn init_nested(init: &[InitNested], capacity: usize) -> NestedState {
let container = init
.iter()
.map(|init| match init {
Expand Down Expand Up @@ -324,12 +329,15 @@ impl<'a> NestedPage<'a> {
}
}

/// The state of nested data types.
#[derive(Debug)]
pub struct NestedState {
/// The nesteds composing `NestedState`.
pub nested: Vec<Box<dyn Nested>>,
}

impl NestedState {
/// Creates a new [`NestedState`].
pub fn new(nested: Vec<Box<dyn Nested>>) -> Self {
Self { nested }
}
Expand Down
5 changes: 4 additions & 1 deletion src/io/parquet/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ pub use parquet2::{

use crate::{array::Array, error::Result};

pub use deserialize::{column_iter_to_arrays, get_page_iterator};
pub use deserialize::{
column_iter_to_arrays, create_list, get_page_iterator, init_nested, n_columns, InitNested,
NestedState,
};
pub use file::{FileReader, RowGroupReader};
pub use row_group::*;
pub use schema::{infer_schema, FileMetaData};
Expand Down
14 changes: 9 additions & 5 deletions src/io/parquet/write/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ use crate::error::{Error, Result};
use crate::types::days_ms;
use crate::types::NativeType;

pub use nested::write_rep_and_def;
pub use pages::{to_leaves, to_nested, to_parquet_leaves};
use parquet2::schema::types::PrimitiveType as ParquetPrimitiveType;
pub use parquet2::{
compression::{BrotliLevel, CompressionOptions, GzipLevel, ZstdLevel},
Expand All @@ -46,6 +48,7 @@ pub use parquet2::{
},
FallibleStreamingIterator,
};
pub use utils::write_def_levels;

/// Currently supported options to write to parquet
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
Expand All @@ -70,7 +73,7 @@ pub use pages::array_to_columns;
pub use pages::Nested;

/// returns offset and length to slice the leaf values
pub(self) fn slice_nested_leaf(nested: &[Nested]) -> (usize, usize) {
pub fn slice_nested_leaf(nested: &[Nested]) -> (usize, usize) {
// find the deepest recursive dremel structure as that one determines how many values we must
// take
let mut out = (0, 0);
Expand Down Expand Up @@ -154,7 +157,8 @@ pub fn can_encode(data_type: &DataType, encoding: Encoding) -> bool {
)
}

fn slice_parquet_array<'a>(
/// Slices the [`Array`] to `Box<dyn Array>` and `Vec<Nested>`.
pub fn slice_parquet_array<'a>(
array: &'a dyn Array,
nested: &'a [Nested<'a>],
offset: usize,
Expand Down Expand Up @@ -186,9 +190,9 @@ fn slice_parquet_array<'a>(
}
}

fn get_max_length(array: &dyn Array, nested: &[Nested]) -> usize {
// get the length that should be sliced.
// that is the inner nested structure that
/// Get the length of [`Array`] that should be sliced.
pub fn get_max_length(array: &dyn Array, nested: &[Nested]) -> usize {
// the inner nested structure that
// dictates how often the primitive should be repeated
for nested in nested.iter().rev() {
match nested {
Expand Down
1 change: 1 addition & 0 deletions src/io/parquet/write/nested/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ fn to_length<O: Offset>(
.map(|w| w[1].to_usize() - w[0].to_usize())
}

/// Write `repetition_levels` and `definition_levels` to buffer.
pub fn write_rep_and_def(
page_version: Version,
nested: &[Nested],
Expand Down
6 changes: 4 additions & 2 deletions src/io/parquet/write/pages.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ fn to_nested_recursive<'a>(
Ok(())
}

fn to_leaves(array: &dyn Array) -> Vec<&dyn Array> {
/// Convert [`Array`] to `Vec<&dyn Array>` leaves in DFS order.
pub fn to_leaves(array: &dyn Array) -> Vec<&dyn Array> {
let mut leaves = vec![];
to_leaves_recursive(array, &mut leaves);
leaves
Expand Down Expand Up @@ -179,7 +180,8 @@ fn to_leaves_recursive<'a>(array: &'a dyn Array, leaves: &mut Vec<&'a dyn Array>
}
}

fn to_parquet_leaves(type_: ParquetType) -> Vec<ParquetPrimitiveType> {
/// Convert `ParquetType` to `Vec<ParquetPrimitiveType>` leaves in DFS order.
pub fn to_parquet_leaves(type_: ParquetType) -> Vec<ParquetPrimitiveType> {
let mut leaves = vec![];
to_parquet_leaves_recursive(type_, &mut leaves);
leaves
Expand Down