Skip to content

Commit

Permalink
update arrow (#3054)
Browse files Browse the repository at this point in the history
* update arrow

* cleanup trusted len trait
  • Loading branch information
ritchie46 committed Apr 3, 2022
1 parent 1645f4a commit 5d9d9bd
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 92 deletions.
2 changes: 1 addition & 1 deletion polars/polars-arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ description = "Arrow interfaces for Polars DataFrame library"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "5658512460d64f8eca0d0ce0ae53aea41c35ab9d", default-features = false }
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "36b08249dd03c8b8f88f454158fcf3401c647a49", default-features = false }
# arrow = { package = "arrow2", git = "https://github.com/ritchie46/arrow2", branch = "cherry_pick", default-features = false }
# arrow = { package = "arrow2", version = "0.10", default-features = false, features = ["compute_concatenate"] }
hashbrown = "0.12"
Expand Down
41 changes: 29 additions & 12 deletions polars/polars-arrow/src/trusted_len/push_unchecked.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,28 @@ pub trait PushUnchecked<T> {
unsafe fn push_unchecked_no_len_set(&mut self, value: T);

/// Extend the array with an iterator who's length can be trusted
fn extend_trusted_len<I: IntoIterator<Item = T> + TrustedLen>(&mut self, iter: I);
fn extend_trusted_len<I: IntoIterator<Item = T, IntoIter = J>, J: TrustedLen>(
&mut self,
iter: I,
) {
unsafe { self.extend_trusted_len_unchecked(iter) }
}

/// # Safety
/// Caller must ensure the iterators reported length is correct
unsafe fn extend_trusted_len_unchecked<I: IntoIterator<Item = T>>(&mut self, iter: I);

fn from_trusted_len_iter<I: IntoIterator<Item = T> + TrustedLen>(iter: I) -> Self;
fn from_trusted_len_iter<I: IntoIterator<Item = T, IntoIter = J>, J: TrustedLen>(
iter: I,
) -> Self
where
Self: Sized,
{
unsafe { Self::from_trusted_len_iter_unchecked(iter) }
}
/// # Safety
/// Caller must ensure the iterators reported length is correct
unsafe fn from_trusted_len_iter_unchecked<I: IntoIterator<Item = T>>(iter: I) -> Self;
}

impl<T> PushUnchecked<T> for Vec<T> {
Expand All @@ -34,24 +53,22 @@ impl<T> PushUnchecked<T> for Vec<T> {
}

#[inline]
fn extend_trusted_len<I: IntoIterator<Item = T> + TrustedLen>(&mut self, iter: I) {
unsafe fn extend_trusted_len_unchecked<I: IntoIterator<Item = T>>(&mut self, iter: I) {
let iter = iter.into_iter();
let upper = iter.size_hint().1.expect("must have an upper bound");
self.reserve(upper);

unsafe {
let mut dst = self.as_mut_ptr().add(self.len());
for value in iter {
std::ptr::write(dst, value);
dst = dst.add(1)
}
self.set_len(self.len() + upper)
let mut dst = self.as_mut_ptr().add(self.len());
for value in iter {
std::ptr::write(dst, value);
dst = dst.add(1)
}
self.set_len(self.len() + upper)
}

fn from_trusted_len_iter<I: IntoIterator<Item = T> + TrustedLen>(iter: I) -> Self {
unsafe fn from_trusted_len_iter_unchecked<I: IntoIterator<Item = T>>(iter: I) -> Self {
let mut v = vec![];
v.extend_trusted_len(iter);
v.extend_trusted_len_unchecked(iter);
v
}
}
2 changes: 1 addition & 1 deletion polars/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ thiserror = "^1.0"
package = "arrow2"
git = "https://github.com/jorgecarleitao/arrow2"
# git = "https://github.com/ritchie46/arrow2"
rev = "5658512460d64f8eca0d0ce0ae53aea41c35ab9d"
rev = "36b08249dd03c8b8f88f454158fcf3401c647a49"
# branch = "cherry_pick"
version = "0.10"
default-features = false
Expand Down
42 changes: 9 additions & 33 deletions polars/polars-core/src/chunked_array/bitwise.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use super::*;
use crate::utils::{align_chunks_binary, combine_validities, CustomIterTools};
use arrow::bitmap::MutableBitmap;
use arrow::compute;
use std::ops::{BitAnd, BitOr, BitXor, Not};

Expand All @@ -19,16 +18,15 @@ where
.map(|(l_arr, r_arr)| {
let l_vals = l_arr.values().as_slice();
let r_vals = r_arr.values().as_slice();
let valididity = combine_validities(l_arr.validity(), r_arr.validity());
let validity = combine_validities(l_arr.validity(), r_arr.validity());

let av = l_vals
.iter()
.zip(r_vals)
.map(|(l, r)| *l & *r)
.collect_trusted::<Vec<_>>();

let arr =
PrimitiveArray::from_data(T::get_dtype().to_arrow(), av.into(), valididity);
let arr = PrimitiveArray::from_data(T::get_dtype().to_arrow(), av.into(), validity);
Arc::new(arr) as ArrayRef
})
.collect::<Vec<_>>();
Expand All @@ -52,16 +50,15 @@ where
.map(|(l_arr, r_arr)| {
let l_vals = l_arr.values().as_slice();
let r_vals = r_arr.values().as_slice();
let valididity = combine_validities(l_arr.validity(), r_arr.validity());
let validity = combine_validities(l_arr.validity(), r_arr.validity());

let av = l_vals
.iter()
.zip(r_vals)
.map(|(l, r)| *l | *r)
.collect_trusted::<Vec<_>>();

let arr =
PrimitiveArray::from_data(T::get_dtype().to_arrow(), av.into(), valididity);
let arr = PrimitiveArray::from_data(T::get_dtype().to_arrow(), av.into(), validity);
Arc::new(arr) as ArrayRef
})
.collect::<Vec<_>>();
Expand All @@ -85,16 +82,15 @@ where
.map(|(l_arr, r_arr)| {
let l_vals = l_arr.values().as_slice();
let r_vals = r_arr.values().as_slice();
let valididity = combine_validities(l_arr.validity(), r_arr.validity());
let validity = combine_validities(l_arr.validity(), r_arr.validity());

let av = l_vals
.iter()
.zip(r_vals)
.map(|(l, r)| l.bitxor(*r))
.collect_trusted::<Vec<_>>();

let arr =
PrimitiveArray::from_data(T::get_dtype().to_arrow(), av.into(), valididity);
let arr = PrimitiveArray::from_data(T::get_dtype().to_arrow(), av.into(), validity);
Arc::new(arr) as ArrayRef
})
.collect::<Vec<_>>();
Expand Down Expand Up @@ -191,20 +187,10 @@ impl BitXor for &BooleanChunked {
.downcast_iter()
.zip(r.downcast_iter())
.map(|(l_arr, r_arr)| {
let valididity = combine_validities(l_arr.validity(), r_arr.validity());
let validity = combine_validities(l_arr.validity(), r_arr.validity());
let values = l_arr.values() ^ r_arr.values();

let mut vals = MutableBitmap::with_capacity(l_arr.len());

let iter = l_arr
.values_iter()
.zip(r_arr.values_iter())
.map(|(l, r)| l.bitxor(r));

// Safety:
// length can be trusted
unsafe { vals.extend_from_trusted_len_iter_unchecked(iter) };

let arr = BooleanArray::from_data_default(vals.into(), valididity);
let arr = BooleanArray::from_data_default(values, validity);
Arc::new(arr) as ArrayRef
})
.collect::<Vec<_>>();
Expand Down Expand Up @@ -251,16 +237,6 @@ impl BitAnd for &BooleanChunked {
.downcast_iter()
.zip(rhs.downcast_iter())
.map(|(lhs, rhs)| {
// early return from all `false` paths
if lhs.null_count() == 0 && rhs.null_count() == 0 {
if lhs.values().null_count() == lhs.len() {
return Arc::new(lhs.clone()) as ArrayRef;
}
if rhs.values().null_count() == rhs.len() {
return Arc::new(rhs.clone()) as ArrayRef;
}
}

Arc::new(compute::boolean_kleene::and(lhs, rhs).expect("should be same size"))
as ArrayRef
})
Expand Down
5 changes: 2 additions & 3 deletions polars/polars-core/src/chunked_array/trusted_len.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@ use crate::chunked_array::upstream_traits::PolarsAsRef;
use crate::prelude::*;
use crate::utils::{CustomIterTools, FromTrustedLenIterator, NoNull};
use arrow::bitmap::MutableBitmap;
use arrow::buffer::Buffer;
use polars_arrow::bit_util::unset_bit_raw;
use polars_arrow::trusted_len::FromIteratorReversed;
use polars_arrow::trusted_len::{FromIteratorReversed, PushUnchecked};
use std::borrow::Borrow;

impl<T> FromTrustedLenIterator<Option<T::Native>> for ChunkedArray<T>
Expand All @@ -30,7 +29,7 @@ where
// know we don't have null values.
fn from_iter_trusted_length<I: IntoIterator<Item = T::Native>>(iter: I) -> Self {
let iter = iter.into_iter();
let values = unsafe { Buffer::from_trusted_len_iter_unchecked(iter) };
let values = unsafe { Vec::from_trusted_len_iter_unchecked(iter) }.into();
let arr = PrimitiveArray::from_data(T::get_dtype().to_arrow(), values, None);

NoNull::new(ChunkedArray::from_chunks("", vec![Arc::new(arr)]))
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ private = ["polars-time/private"]
[dependencies]
ahash = "0.7"
anyhow = "1.0"
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "5658512460d64f8eca0d0ce0ae53aea41c35ab9d", default-features = false }
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "36b08249dd03c8b8f88f454158fcf3401c647a49", default-features = false }
# arrow = { package = "arrow2", git = "https://github.com/ritchie46/arrow2", branch = "cherry_pick", default-features = false }
# arrow = { package = "arrow2", version = "0.10", default-features = false }
csv-core = { version = "0.1.10", optional = true }
Expand Down
44 changes: 3 additions & 41 deletions py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 5d9d9bd

Please sign in to comment.