Skip to content

Commit

Permalink
Update arrow and prepare for mutable arithmetics (#3695)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jun 15, 2022
1 parent 0307a57 commit 19bbcd5
Show file tree
Hide file tree
Showing 117 changed files with 512 additions and 861 deletions.
5 changes: 2 additions & 3 deletions examples/python_rust_compiled_function/src/ffi.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use arrow::{array::ArrayRef, ffi};
use arrow::ffi;
use polars::prelude::*;
use polars_arrow::export::arrow;
use pyo3::exceptions::PyValueError;
Expand All @@ -24,8 +24,7 @@ fn array_to_rust(arrow_array: &PyAny) -> PyResult<ArrayRef> {

unsafe {
let field = ffi::import_field_from_c(schema.as_ref()).unwrap();
let array = ffi::import_array_from_c(array, field.data_type).unwrap();
Ok(array.into())
Ok(ffi::import_array_from_c(array, field.data_type).unwrap())
}
}

Expand Down
4 changes: 2 additions & 2 deletions polars/polars-arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ description = "Arrow interfaces for Polars DataFrame library"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "6919cc3281c3ee7dd3e973a7b303c67277458f20", features = ["compute_concatenate"], default-features = false }
arrow = { package = "arrow2", git = "https://github.com/jorgecarleitao/arrow2", rev = "39db6fb7514364bfea08d594793b23e1ed5a7def", features = ["compute_concatenate"], default-features = false }
# arrow = { package = "arrow2", path = "../../../arrow2", features = ["compute_concatenate"], default-features = false }
# arrow = { package = "arrow2", git = "https://github.com/ritchie46/arrow2", branch = "improve_mutable", features = ["compute_concatenate"], default-features = false }
# arrow = { package = "arrow2", git = "https://github.com/ritchie46/arrow2", branch = "count_shared", features = ["compute_concatenate"], default-features = false }
# arrow = { package = "arrow2", version = "0.12", default-features = false, features = ["compute_concatenate"] }
hashbrown = "0.12"
num = "^0.4"
Expand Down
19 changes: 12 additions & 7 deletions polars/polars-arrow/src/array/list.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use arrow::array::{Array, ArrayRef, ListArray};
use crate::prelude::*;
use arrow::array::{Array, ListArray};
use arrow::bitmap::MutableBitmap;
use arrow::compute::concatenate;
use arrow::datatypes::DataType;
Expand Down Expand Up @@ -87,11 +88,15 @@ impl<'a> AnonymousBuilder<'a> {
let values = concatenate::concatenate(&self.arrays)?;

let dtype = ListArray::<i64>::default_datatype(inner_dtype.clone());
Ok(ListArray::<i64>::from_data(
dtype,
self.offsets.into(),
values.into(),
self.validity.map(|validity| validity.into()),
))
// Safety:
// offsets are monotonically increasing
unsafe {
Ok(ListArray::<i64>::new_unchecked(
dtype,
self.offsets.into(),
values,
self.validity.map(|validity| validity.into()),
))
}
}
}
22 changes: 14 additions & 8 deletions polars/polars-arrow/src/array/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use arrow::array::{Array, ArrayRef, BooleanArray, ListArray, PrimitiveArray, Utf8Array};
use crate::prelude::*;
use arrow::array::{Array, BooleanArray, ListArray, PrimitiveArray, Utf8Array};
use arrow::bitmap::MutableBitmap;
use arrow::datatypes::DataType;
use arrow::types::NativeType;
use std::sync::Arc;

use crate::utils::CustomIterTools;

Expand Down Expand Up @@ -94,10 +94,12 @@ pub trait ListFromIter {

let values: PrimitiveArray<T> = iter_to_values!(iterator, validity, offsets, length_so_far);

ListArray::from_data(
// Safety:
// offsets are monotonically increasing
ListArray::new_unchecked(
ListArray::<i64>::default_datatype(data_type.clone()),
offsets.into(),
Arc::new(values.to(data_type)),
Box::new(values.to(data_type)),
Some(validity.into()),
)
}
Expand All @@ -122,10 +124,12 @@ pub trait ListFromIter {

let values: BooleanArray = iter_to_values!(iterator, validity, offsets, length_so_far);

ListArray::from_data(
// Safety:
// Offsets are monotonically increasing.
ListArray::new_unchecked(
ListArray::<i64>::default_datatype(DataType::Boolean),
offsets.into(),
Arc::new(values),
Box::new(values),
Some(validity.into()),
)
}
Expand Down Expand Up @@ -166,10 +170,12 @@ pub trait ListFromIter {
.trust_my_length(n_elements)
.collect();

ListArray::from_data(
// Safety:
// offsets are monotonically increasing
ListArray::new_unchecked(
ListArray::<i64>::default_datatype(DataType::LargeUtf8),
offsets.into(),
Arc::new(values),
Box::new(values),
Some(validity.into()),
)
}
Expand Down
3 changes: 2 additions & 1 deletion polars/polars-arrow/src/conversion.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use arrow::array::{ArrayRef, StructArray};
use crate::prelude::*;
use arrow::array::StructArray;
use arrow::chunk::Chunk;
use arrow::datatypes::{DataType, Field};

Expand Down
4 changes: 4 additions & 0 deletions polars/polars-arrow/src/data_types.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use arrow::array::Array;

/// # Safety
/// unsafe code downstream relies on the correct is_float call
pub unsafe trait IsFloat: private::Sealed {
Expand Down Expand Up @@ -57,3 +59,5 @@ macro_rules! impl_is_float {

impl_is_float!(f32);
impl_is_float!(f64);

pub type ArrayRef = Box<dyn Array>;
7 changes: 3 additions & 4 deletions polars/polars-arrow/src/kernels/concatenate.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
use crate::prelude::*;
use arrow::array::growable::make_growable;
use arrow::array::{Array, ArrayRef};
use arrow::error::{Error as ArrowError, Result};
use std::sync::Arc;

/// Concatenate multiple [Array] of the same type into a single [`Array`].
/// This does not check the arrays types.
pub fn concatenate_owned_unchecked(arrays: &[ArrayRef]) -> Result<Arc<dyn Array>> {
pub fn concatenate_owned_unchecked(arrays: &[ArrayRef]) -> Result<ArrayRef> {
if arrays.is_empty() {
return Err(ArrowError::InvalidArgumentError(
"concat requires input of at least one array".to_string(),
Expand All @@ -26,5 +25,5 @@ pub fn concatenate_owned_unchecked(arrays: &[ArrayRef]) -> Result<Arc<dyn Array>
mutable.extend(i, 0, *len)
}

Ok(mutable.as_arc())
Ok(mutable.as_box())
}
12 changes: 6 additions & 6 deletions polars/polars-arrow/src/kernels/float.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
use crate::array::default_arrays::FromData;
use arrow::array::{ArrayRef, BooleanArray, PrimitiveArray};
use crate::prelude::*;
use arrow::array::{BooleanArray, PrimitiveArray};
use arrow::bitmap::Bitmap;
use arrow::types::NativeType;
use num::Float;
use std::sync::Arc;

pub fn is_nan<T>(arr: &PrimitiveArray<T>) -> ArrayRef
where
T: NativeType + Float,
{
let values = Bitmap::from_trusted_len_iter(arr.values().iter().map(|v| v.is_nan()));

Arc::new(BooleanArray::from_data_default(
Box::new(BooleanArray::from_data_default(
values,
arr.validity().cloned(),
))
Expand All @@ -23,7 +23,7 @@ where
{
let values = Bitmap::from_trusted_len_iter(arr.values().iter().map(|v| !v.is_nan()));

Arc::new(BooleanArray::from_data_default(
Box::new(BooleanArray::from_data_default(
values,
arr.validity().cloned(),
))
Expand All @@ -35,7 +35,7 @@ where
{
let values = Bitmap::from_trusted_len_iter(arr.values().iter().map(|v| v.is_finite()));

Arc::new(BooleanArray::from_data_default(
Box::new(BooleanArray::from_data_default(
values,
arr.validity().cloned(),
))
Expand All @@ -47,7 +47,7 @@ where
{
let values = Bitmap::from_trusted_len_iter(arr.values().iter().map(|v| v.is_infinite()));

Arc::new(BooleanArray::from_data_default(
Box::new(BooleanArray::from_data_default(
values,
arr.validity().cloned(),
))
Expand Down
10 changes: 6 additions & 4 deletions polars/polars-arrow/src/kernels/list.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use crate::index::*;
use crate::kernels::take::take_unchecked;
use crate::prelude::*;
use crate::trusted_len::PushUnchecked;
use crate::utils::CustomIterTools;
use arrow::array::{ArrayRef, ListArray};
use arrow::array::ListArray;
use arrow::buffer::Buffer;

/// Get the indices that would result in a get operation on the lists values.
Expand Down Expand Up @@ -81,7 +81,9 @@ pub fn array_to_unit_list(array: ArrayRef) -> ListArray<i64> {

let offsets: Buffer<i64> = offsets.into();
let dtype = ListArray::<i64>::default_datatype(array.data_type().clone());
ListArray::<i64>::from_data(dtype, offsets, array, None)
// Safety:
// offsets are monotonically increasing
unsafe { ListArray::<i64>::new_unchecked(dtype, offsets, array, None) }
}

#[cfg(test)]
Expand All @@ -97,7 +99,7 @@ mod test {
let offsets = Buffer::from(vec![0i64, 3, 5, 6]);

let dtype = ListArray::<i64>::default_datatype(DataType::Int32);
ListArray::<i64>::from_data(dtype, offsets, Arc::new(values), None)
ListArray::<i64>::from_data(dtype, offsets, Box::new(values), None)
}

#[test]
Expand Down
5 changes: 2 additions & 3 deletions polars/polars-arrow/src/kernels/rolling/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,15 @@ pub mod nulls;
mod window;

use crate::data_types::IsFloat;
use crate::prelude::QuantileInterpolOptions;
use crate::prelude::*;
use crate::utils::CustomIterTools;
use arrow::array::{ArrayRef, PrimitiveArray};
use arrow::array::PrimitiveArray;
use arrow::bitmap::{Bitmap, MutableBitmap};
use arrow::types::NativeType;
use num::ToPrimitive;
use num::{Bounded, Float, NumCast, One, Zero};
use std::cmp::Ordering;
use std::ops::{Add, AddAssign, Div, Mul, Sub, SubAssign};
use std::sync::Arc;
use window::*;

type Start = usize;
Expand Down
7 changes: 3 additions & 4 deletions polars/polars-arrow/src/kernels/rolling/no_nulls/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,13 @@ mod variance;

use super::*;
use crate::utils::CustomIterTools;
use arrow::array::{ArrayRef, PrimitiveArray};
use arrow::array::PrimitiveArray;
use arrow::datatypes::DataType;
use arrow::types::NativeType;
use num::{Float, NumCast};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::fmt::Debug;
use std::sync::Arc;

pub use mean::*;
pub use min_max::*;
Expand Down Expand Up @@ -56,7 +55,7 @@ where
.collect_trusted::<Vec<_>>();

let validity = create_validity(min_periods, len as usize, window_size, det_offsets_fn);
Arc::new(PrimitiveArray::from_data(
Box::new(PrimitiveArray::from_data(
T::PRIMITIVE.into(),
out.into(),
validity.map(|b| b.into()),
Expand Down Expand Up @@ -104,7 +103,7 @@ where
.collect_trusted::<Vec<T>>();

let validity = create_validity(min_periods, len as usize, window_size, det_offsets_fn);
Arc::new(PrimitiveArray::from_data(
Box::new(PrimitiveArray::from_data(
DataType::from(T::PRIMITIVE),
out.into(),
validity.map(|b| b.into()),
Expand Down
8 changes: 4 additions & 4 deletions polars/polars-arrow/src/kernels/rolling/no_nulls/quantile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ where
{
if values.is_empty() {
let out: Vec<T> = vec![];
return Arc::new(PrimitiveArray::from_data(
return Box::new(PrimitiveArray::from_data(
T::PRIMITIVE.into(),
out.into(),
None,
Expand All @@ -50,7 +50,7 @@ where
})
.collect::<PrimitiveArray<T>>();

Arc::new(out)
Box::new(out)
}

pub(crate) fn compute_quantile2<T>(
Expand Down Expand Up @@ -244,7 +244,7 @@ where
.collect_trusted::<Vec<T>>();

let validity = create_validity(min_periods, len as usize, window_size, det_offsets_fn);
Arc::new(PrimitiveArray::from_data(
Box::new(PrimitiveArray::from_data(
T::PRIMITIVE.into(),
out.into(),
validity.map(|b| b.into()),
Expand Down Expand Up @@ -284,7 +284,7 @@ where
.collect_trusted::<Vec<T>>();

let validity = create_validity(min_periods, len as usize, window_size, det_offsets_fn);
Arc::new(PrimitiveArray::from_data(
Box::new(PrimitiveArray::from_data(
T::PRIMITIVE.into(),
out.into(),
validity.map(|b| b.into()),
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-arrow/src/kernels/rolling/nulls/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ where
})
.collect_trusted::<Vec<_>>();

Arc::new(PrimitiveArray::from_data(
Box::new(PrimitiveArray::from_data(
T::PRIMITIVE.into(),
out.into(),
Some(validity.into()),
Expand Down
6 changes: 3 additions & 3 deletions polars/polars-arrow/src/kernels/rolling/nulls/quantile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ where
{
if values.is_empty() {
let out: Vec<T> = vec![];
return Arc::new(PrimitiveArray::from_data(
return Box::new(PrimitiveArray::from_data(
T::PRIMITIVE.into(),
out.into(),
None,
Expand Down Expand Up @@ -70,7 +70,7 @@ where
})
.collect_trusted::<Vec<T>>();

Arc::new(PrimitiveArray::from_data(
Box::new(PrimitiveArray::from_data(
T::PRIMITIVE.into(),
out.into(),
Some(validity.into()),
Expand Down Expand Up @@ -134,7 +134,7 @@ where
})
.collect_trusted::<Vec<T>>();

Arc::new(PrimitiveArray::from_data(
Box::new(PrimitiveArray::from_data(
T::PRIMITIVE.into(),
out.into(),
Some(validity.into()),
Expand Down
6 changes: 3 additions & 3 deletions polars/polars-arrow/src/kernels/string.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
use crate::prelude::*;
use crate::trusted_len::PushUnchecked;
use arrow::array::{ArrayRef, UInt32Array, Utf8Array};
use arrow::array::{UInt32Array, Utf8Array};
use arrow::buffer::Buffer;
use arrow::datatypes::DataType;
use std::sync::Arc;

pub fn string_lengths(array: &Utf8Array<i64>) -> ArrayRef {
let values = array.offsets().windows(2).map(|x| (x[1] - x[0]) as u32);

let values: Buffer<_> = Vec::from_trusted_len_iter(values).into();

let array = UInt32Array::from_data(DataType::UInt32, values, array.validity().cloned());
Arc::new(array)
Box::new(array)
}

0 comments on commit 19bbcd5

Please sign in to comment.