Skip to content

Commit

Permalink
fix flaw in ziP_with kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 10, 2020
1 parent d2f2bec commit b829df2
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 34 deletions.
46 changes: 33 additions & 13 deletions polars/src/chunked_array/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,18 +160,28 @@ where
fn set_null_bits(
mut builder: ArrayDataBuilder,
null_bit_buffer: Option<Buffer>,
null_count: usize,
null_count: Option<usize>,
len: usize,
) -> ArrayDataBuilder {
if null_count > 0 {
let null_bit_buffer =
null_bit_buffer.expect("implementation error. Should not be None if null_count > 0");
debug_assert!(null_count == len - bit_util::count_set_bits(null_bit_buffer.data()));
builder = builder
.null_count(null_count)
.null_bit_buffer(null_bit_buffer);
match null_count {
Some(null_count) => {
if null_count > 0 {
let null_bit_buffer = null_bit_buffer
.expect("implementation error. Should not be None if null_count > 0");
debug_assert!(null_count == len - bit_util::count_set_bits(null_bit_buffer.data()));
builder = builder
.null_count(null_count)
.null_bit_buffer(null_bit_buffer);
}
builder
}
None => match null_bit_buffer {
None => builder.null_count(0),
Some(buffer) => builder
.null_count(bit_util::count_set_bits(buffer.data()))
.null_bit_buffer(buffer),
},
}
builder
}

/// Take an existing slice and a null bitmap and construct an arrow array.
Expand All @@ -191,7 +201,7 @@ where
.len(len)
.add_buffer(Buffer::from(values.to_byte_slice()));

let builder = set_null_bits(builder, null_bit_buffer, null_count, len);
let builder = set_null_bits(builder, null_bit_buffer, Some(null_count), len);
let data = builder.build();
PrimitiveArray::<T>::from(data)
}
Expand Down Expand Up @@ -219,7 +229,7 @@ where
let mut chunks = vec![];

for (values, (null_count, opt_buffer)) in iter {
let arr = aligned_vec_to_primitive_array::<T>(values, opt_buffer, null_count);
let arr = aligned_vec_to_primitive_array::<T>(values, opt_buffer, Some(null_count));
chunks.push(Arc::new(arr) as ArrayRef)
}
ChunkedArray::new_from_chunks("from_iter", chunks)
Expand All @@ -244,7 +254,7 @@ fn round_upto_power_of_2(num: usize, factor: usize) -> usize {
pub fn aligned_vec_to_primitive_array<T: ArrowPrimitiveType>(
values: AlignedVec<T::Native>,
null_bit_buffer: Option<Buffer>,
null_count: usize,
null_count: Option<usize>,
) -> PrimitiveArray<T> {
let values = unsafe { values.into_inner() };
let vec_len = values.len();
Expand Down Expand Up @@ -323,6 +333,16 @@ impl<T> AlignedVec<T> {
}
}

pub fn reserve(&mut self, additional: usize) {
let _me = ManuallyDrop::new(mem::take(&mut self.inner));
let ptr = self.as_mut_ptr() as *mut u8;
let capacity = self.capacity + std::mem::size_of::<T>() * additional;
let ptr = unsafe { memory::reallocate(ptr, self.capacity, capacity) as *mut T };
let v = unsafe { Vec::from_raw_parts(ptr, 0, capacity) };
self.inner = v;
self.capacity = capacity;
}

pub fn len(&self) -> usize {
self.inner.len()
}
Expand Down Expand Up @@ -674,7 +694,7 @@ mod test {

let ptr = v.as_ptr();
assert_eq!((ptr as usize) % 64, 0);
let a = aligned_vec_to_primitive_array::<Int32Type>(v, None, 0);
let a = aligned_vec_to_primitive_array::<Int32Type>(v, None, Some(0));
assert_eq!(a.value_slice(0, 2), &[1, 2])
}

Expand Down
25 changes: 8 additions & 17 deletions polars/src/chunked_array/kernels.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::chunked_array::builder::aligned_vec_to_primitive_array;
use crate::prelude::*;
use arrow::array::{Array, ArrayData, ArrayRef, BooleanArray, PrimitiveArray};
use arrow::array::{Array, ArrayRef, BooleanArray, PrimitiveArray};
use arrow::bitmap::Bitmap;
use arrow::buffer::Buffer;
use arrow::datatypes::ArrowNumericType;
use arrow::error::Result as ArrowResult;
use std::ops::BitOr;
Expand Down Expand Up @@ -59,27 +59,18 @@ where
let take_a = mask.value(i);
if take_a {
unsafe {
values.push(vals_a.get_unchecked(i));
values.push(*vals_a.get_unchecked(i));
}
} else {
unsafe {
values.push(vals_b.get_unchecked(i));
values.push(*vals_b.get_unchecked(i));
}
}
}

// give ownership to apache arrow without copying
let (ptr, len, cap) = values.into_raw_parts();

let buf = unsafe { Buffer::from_raw_parts(ptr as *const u8, len, cap) };
let data = ArrayData::new(
T::get_data_type(),
a.len(),
None,
Ok(Arc::new(aligned_vec_to_primitive_array::<T>(
values,
null_bit_buffer,
a.offset(),
vec![buf],
vec![],
);
Ok(Arc::new(PrimitiveArray::<T>::from(Arc::new(data))))
None,
)))
}
6 changes: 4 additions & 2 deletions polars/src/chunked_array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,7 @@ where
{
/// Create a new ChunkedArray by taking ownershipt of the AlignedVec. This operation is zero copy.
pub fn new_from_aligned_vec(name: &str, v: AlignedVec<T::Native>) -> Self {
let arr = aligned_vec_to_primitive_array::<T>(v, None, 0);
let arr = aligned_vec_to_primitive_array::<T>(v, None, Some(0));
Self::new_from_chunks(name, vec![Arc::new(arr)])
}

Expand Down Expand Up @@ -616,7 +616,9 @@ where
) -> Self {
let len = values.len();
let arr = Arc::new(aligned_vec_to_primitive_array::<T>(
values, buffer, null_count,
values,
buffer,
Some(null_count),
));
ChunkedArray {
field: Arc::new(Field::new(name, T::get_data_type(), true)),
Expand Down
5 changes: 3 additions & 2 deletions polars/src/series/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::prelude::*;
use crate::utils::Xob;
use std::iter::FromIterator;

macro_rules! from_iterator {
Expand All @@ -12,8 +13,8 @@ macro_rules! from_iterator {

impl FromIterator<$native> for Series {
fn from_iter<I: IntoIterator<Item = $native>>(iter: I) -> Self {
let ca = iter.into_iter().map(|v| Some(v)).collect();
Series::$variant(ca)
let ca: Xob<ChunkedArray<_>> = iter.into_iter().collect();
Series::$variant(ca.into_inner())
}
}

Expand Down

0 comments on commit b829df2

Please sign in to comment.