-
-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
restructure builder module and add 'shrink_to_fit'
- Loading branch information
Showing
5 changed files
with
506 additions
and
479 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
use super::*; | ||
|
||
pub struct BooleanChunkedBuilder { | ||
array_builder: MutableBooleanArray, | ||
field: Field, | ||
} | ||
|
||
impl ChunkedBuilder<bool, BooleanType> for BooleanChunkedBuilder { | ||
/// Appends a value of type `T` into the builder | ||
#[inline] | ||
fn append_value(&mut self, v: bool) { | ||
self.array_builder.push(Some(v)); | ||
} | ||
|
||
/// Appends a null slot into the builder | ||
#[inline] | ||
fn append_null(&mut self) { | ||
self.array_builder.push(None); | ||
} | ||
|
||
fn finish(self) -> BooleanChunked { | ||
let arr: BooleanArray = self.array_builder.into(); | ||
let arr = Arc::new(arr) as ArrayRef; | ||
|
||
ChunkedArray { | ||
field: Arc::new(self.field), | ||
chunks: vec![arr], | ||
phantom: PhantomData, | ||
categorical_map: None, | ||
..Default::default() | ||
} | ||
} | ||
|
||
fn shrink_to_fit(&mut self) { | ||
self.array_builder.shrink_to_fit() | ||
} | ||
} | ||
|
||
impl BooleanChunkedBuilder { | ||
pub fn new(name: &str, capacity: usize) -> Self { | ||
BooleanChunkedBuilder { | ||
array_builder: MutableBooleanArray::with_capacity(capacity), | ||
field: Field::new(name, DataType::Boolean), | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,312 @@ | ||
use super::*; | ||
|
||
pub trait ListBuilderTrait { | ||
fn append_opt_series(&mut self, opt_s: Option<&Series>); | ||
fn append_series(&mut self, s: &Series); | ||
fn append_null(&mut self); | ||
fn finish(&mut self) -> ListChunked; | ||
} | ||
|
||
pub struct ListPrimitiveChunkedBuilder<T> | ||
where | ||
T: PolarsNumericType, | ||
{ | ||
pub builder: LargePrimitiveBuilder<T::Native>, | ||
field: Field, | ||
fast_explode: bool, | ||
} | ||
|
||
macro_rules! finish_list_builder { | ||
($self:ident) => {{ | ||
let arr = $self.builder.as_arc(); | ||
let mut ca = ListChunked { | ||
field: Arc::new($self.field.clone()), | ||
chunks: vec![arr], | ||
phantom: PhantomData, | ||
categorical_map: None, | ||
..Default::default() | ||
}; | ||
if $self.fast_explode { | ||
ca.set_fast_explode() | ||
} | ||
ca | ||
}}; | ||
} | ||
|
||
impl<T> ListPrimitiveChunkedBuilder<T> | ||
where | ||
T: PolarsNumericType, | ||
{ | ||
pub fn new(name: &str, capacity: usize, values_capacity: usize) -> Self { | ||
let values = MutablePrimitiveArray::<T::Native>::with_capacity(values_capacity); | ||
let builder = LargePrimitiveBuilder::<T::Native>::new_with_capacity(values, capacity); | ||
let field = Field::new(name, DataType::List(Box::new(T::get_dtype()))); | ||
|
||
Self { | ||
builder, | ||
field, | ||
fast_explode: true, | ||
} | ||
} | ||
|
||
pub fn append_slice(&mut self, opt_v: Option<&[T::Native]>) { | ||
match opt_v { | ||
Some(items) => { | ||
let values = self.builder.mut_values(); | ||
values.extend_from_slice(items); | ||
self.builder.try_push_valid().unwrap(); | ||
|
||
if items.is_empty() { | ||
self.fast_explode = false; | ||
} | ||
} | ||
None => { | ||
self.builder.push_null(); | ||
} | ||
} | ||
} | ||
/// Appends from an iterator over values | ||
#[inline] | ||
pub fn append_iter_values<I: Iterator<Item = T::Native> + TrustedLen>(&mut self, iter: I) { | ||
let values = self.builder.mut_values(); | ||
|
||
if iter.size_hint().0 == 0 { | ||
self.fast_explode = false; | ||
} | ||
// Safety | ||
// trusted len, trust the type system | ||
unsafe { values.extend_trusted_len_values_unchecked(iter) }; | ||
self.builder.try_push_valid().unwrap(); | ||
} | ||
|
||
/// Appends from an iterator over values | ||
#[inline] | ||
pub fn append_iter<I: Iterator<Item = Option<T::Native>> + TrustedLen>(&mut self, iter: I) { | ||
let values = self.builder.mut_values(); | ||
|
||
if iter.size_hint().0 == 0 { | ||
self.fast_explode = false; | ||
} | ||
// Safety | ||
// trusted len, trust the type system | ||
unsafe { values.extend_trusted_len_unchecked(iter) }; | ||
self.builder.try_push_valid().unwrap(); | ||
} | ||
} | ||
|
||
impl<T> ListBuilderTrait for ListPrimitiveChunkedBuilder<T> | ||
where | ||
T: PolarsNumericType, | ||
{ | ||
#[inline] | ||
fn append_opt_series(&mut self, opt_s: Option<&Series>) { | ||
match opt_s { | ||
Some(s) => { | ||
self.append_series(s); | ||
} | ||
None => self.append_null(), | ||
} | ||
} | ||
|
||
#[inline] | ||
fn append_null(&mut self) { | ||
self.fast_explode = false; | ||
self.builder.push_null(); | ||
} | ||
|
||
#[inline] | ||
fn append_series(&mut self, s: &Series) { | ||
if s.is_empty() { | ||
self.fast_explode = false; | ||
} | ||
let arrays = s.chunks(); | ||
let values = self.builder.mut_values(); | ||
|
||
arrays.iter().for_each(|x| { | ||
let arr = x | ||
.as_any() | ||
.downcast_ref::<PrimitiveArray<T::Native>>() | ||
.unwrap(); | ||
|
||
if arr.null_count() == 0 { | ||
values.extend_from_slice(arr.values().as_slice()) | ||
} else { | ||
// Safety: | ||
// Arrow arrays are trusted length iterators. | ||
unsafe { values.extend_trusted_len_unchecked(arr.into_iter()) } | ||
} | ||
}); | ||
self.builder.try_push_valid().unwrap(); | ||
} | ||
|
||
fn finish(&mut self) -> ListChunked { | ||
finish_list_builder!(self) | ||
} | ||
} | ||
|
||
type LargePrimitiveBuilder<T> = MutableListArray<i64, MutablePrimitiveArray<T>>; | ||
type LargeListUtf8Builder = MutableListArray<i64, MutableUtf8Array<i64>>; | ||
type LargeListBooleanBuilder = MutableListArray<i64, MutableBooleanArray>; | ||
|
||
pub struct ListUtf8ChunkedBuilder { | ||
builder: LargeListUtf8Builder, | ||
field: Field, | ||
fast_explode: bool, | ||
} | ||
|
||
impl ListUtf8ChunkedBuilder { | ||
pub fn new(name: &str, capacity: usize, values_capacity: usize) -> Self { | ||
let values = MutableUtf8Array::<i64>::with_capacity(values_capacity); | ||
let builder = LargeListUtf8Builder::new_with_capacity(values, capacity); | ||
let field = Field::new(name, DataType::List(Box::new(DataType::Utf8))); | ||
|
||
ListUtf8ChunkedBuilder { | ||
builder, | ||
field, | ||
fast_explode: true, | ||
} | ||
} | ||
|
||
#[inline] | ||
pub fn append_iter<'a, I: Iterator<Item = Option<&'a str>> + TrustedLen>(&mut self, iter: I) { | ||
let values = self.builder.mut_values(); | ||
|
||
if iter.size_hint().0 == 0 { | ||
self.fast_explode = false; | ||
} | ||
// Safety | ||
// trusted len, trust the type system | ||
unsafe { values.extend_trusted_len_unchecked(iter) }; | ||
self.builder.try_push_valid().unwrap(); | ||
} | ||
} | ||
|
||
impl ListBuilderTrait for ListUtf8ChunkedBuilder { | ||
fn append_opt_series(&mut self, opt_s: Option<&Series>) { | ||
match opt_s { | ||
Some(s) => self.append_series(s), | ||
None => { | ||
self.append_null(); | ||
} | ||
} | ||
} | ||
|
||
#[inline] | ||
fn append_null(&mut self) { | ||
self.fast_explode = false; | ||
self.builder.push_null(); | ||
} | ||
|
||
#[inline] | ||
fn append_series(&mut self, s: &Series) { | ||
if s.is_empty() { | ||
self.fast_explode = false; | ||
} | ||
let ca = s.utf8().unwrap(); | ||
let value_builder = self.builder.mut_values(); | ||
value_builder.try_extend(ca).unwrap(); | ||
self.builder.try_push_valid().unwrap(); | ||
} | ||
|
||
fn finish(&mut self) -> ListChunked { | ||
finish_list_builder!(self) | ||
} | ||
} | ||
|
||
pub struct ListBooleanChunkedBuilder { | ||
builder: LargeListBooleanBuilder, | ||
field: Field, | ||
fast_explode: bool, | ||
} | ||
|
||
impl ListBooleanChunkedBuilder { | ||
pub fn new(name: &str, capacity: usize, values_capacity: usize) -> Self { | ||
let values = MutableBooleanArray::with_capacity(values_capacity); | ||
let builder = LargeListBooleanBuilder::new_with_capacity(values, capacity); | ||
let field = Field::new(name, DataType::List(Box::new(DataType::Boolean))); | ||
|
||
Self { | ||
builder, | ||
field, | ||
fast_explode: true, | ||
} | ||
} | ||
|
||
#[inline] | ||
pub fn append_iter<I: Iterator<Item = Option<bool>> + TrustedLen>(&mut self, iter: I) { | ||
let values = self.builder.mut_values(); | ||
|
||
if iter.size_hint().0 == 0 { | ||
self.fast_explode = false; | ||
} | ||
// Safety | ||
// trusted len, trust the type system | ||
unsafe { values.extend_trusted_len_unchecked(iter) }; | ||
self.builder.try_push_valid().unwrap(); | ||
} | ||
} | ||
|
||
impl ListBuilderTrait for ListBooleanChunkedBuilder { | ||
fn append_opt_series(&mut self, opt_s: Option<&Series>) { | ||
match opt_s { | ||
Some(s) => self.append_series(s), | ||
None => { | ||
self.append_null(); | ||
} | ||
} | ||
} | ||
|
||
#[inline] | ||
fn append_null(&mut self) { | ||
self.fast_explode = false; | ||
self.builder.push_null(); | ||
} | ||
|
||
#[inline] | ||
fn append_series(&mut self, s: &Series) { | ||
let ca = s.bool().unwrap(); | ||
if ca.is_empty() { | ||
self.fast_explode = false; | ||
} | ||
let value_builder = self.builder.mut_values(); | ||
value_builder.extend(ca); | ||
self.builder.try_push_valid().unwrap(); | ||
} | ||
|
||
fn finish(&mut self) -> ListChunked { | ||
finish_list_builder!(self) | ||
} | ||
} | ||
|
||
pub fn get_list_builder( | ||
dt: &DataType, | ||
value_capacity: usize, | ||
list_capacity: usize, | ||
name: &str, | ||
) -> Box<dyn ListBuilderTrait> { | ||
macro_rules! get_primitive_builder { | ||
($type:ty) => {{ | ||
let builder = | ||
ListPrimitiveChunkedBuilder::<$type>::new(&name, list_capacity, value_capacity); | ||
Box::new(builder) | ||
}}; | ||
} | ||
macro_rules! get_bool_builder { | ||
() => {{ | ||
let builder = ListBooleanChunkedBuilder::new(&name, list_capacity, value_capacity); | ||
Box::new(builder) | ||
}}; | ||
} | ||
macro_rules! get_utf8_builder { | ||
() => {{ | ||
let builder = ListUtf8ChunkedBuilder::new(&name, list_capacity, 5 * value_capacity); | ||
Box::new(builder) | ||
}}; | ||
} | ||
match_arrow_data_type_apply_macro!( | ||
dt, | ||
get_primitive_builder, | ||
get_utf8_builder, | ||
get_bool_builder | ||
) | ||
} |
Oops, something went wrong.