Skip to content

Commit

Permalink
compile
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed May 21, 2023
1 parent cf0d711 commit 165d66c
Show file tree
Hide file tree
Showing 16 changed files with 208 additions and 228 deletions.
6 changes: 3 additions & 3 deletions Cargo.toml
Expand Up @@ -43,10 +43,10 @@ either = "1.8"
[workspace.dependencies.arrow]
package = "arrow2"
# git = "https://github.com/jorgecarleitao/arrow2"
#git = "https://github.com/ritchie46/arrow2"
git = "https://github.com/ritchie46/arrow2"
# rev = "1491c6e8f4fd100f53c358e4f3ef1536d9e75090"
path = "../arrow2"
#branch = "polars_2023-05-19"
# path = "../arrow2"
branch = "polars_2023-05-21"
version = "0.17"
default-features = false
features = [
Expand Down
17 changes: 6 additions & 11 deletions polars/polars-arrow/src/array/fixed_size_list.rs
@@ -1,23 +1,24 @@
use arrow::array::{FixedSizeListArray};
use arrow::array::FixedSizeListArray;
use arrow::bitmap::MutableBitmap;
use arrow::datatypes::DataType;
use crate::prelude::ArrayRef;
use polars_error::PolarsResult;

use crate::kernels::concatenate::concatenate_owned_unchecked;
use crate::prelude::ArrayRef;

#[derive(Default)]
pub struct AnonymousBuilder {
arrays: Vec<ArrayRef>,
validity: Option<MutableBitmap>,
pub width: usize
pub width: usize,
}

impl<'a> AnonymousBuilder {
impl AnonymousBuilder {
pub fn new(capacity: usize, width: usize) -> Self {
Self {
arrays: Vec::with_capacity(capacity),
validity: None,
width
width,
}
}
pub fn is_empty(&self) -> bool {
Expand Down Expand Up @@ -47,12 +48,6 @@ impl<'a> AnonymousBuilder {
self.validity = Some(validity)
}

fn update_validity(&mut self) {
if let Some(validity) = &mut self.validity {
validity.push(true)
}
}

pub fn finish(self, inner_dtype: Option<&DataType>) -> PolarsResult<FixedSizeListArray> {
let values = concatenate_owned_unchecked(&self.arrays)?;
let inner_dtype = inner_dtype.unwrap_or_else(|| self.arrays[0].data_type());
Expand Down
4 changes: 2 additions & 2 deletions polars/polars-arrow/src/array/mod.rs
Expand Up @@ -10,13 +10,13 @@ use crate::prelude::*;
use crate::utils::CustomIterTools;

pub mod default_arrays;
#[cfg(feature = "dtype-fixed_size_list")]
pub mod fixed_size_list;
mod get;
pub mod list;
pub mod null;
pub mod slice;
pub mod utf8;
#[cfg(feature = "dtype-fixed_size_list")]
pub mod fixed_size_list;

pub use get::ArrowGetItem;
pub use slice::*;
Expand Down
2 changes: 2 additions & 0 deletions polars/polars-arrow/src/compute/take/bitmap.rs
Expand Up @@ -2,6 +2,8 @@ use arrow::bitmap::Bitmap;

use crate::index::IdxSize;

/// # Safety
/// doesn't do any bound checks
pub unsafe fn take_bitmap_unchecked(values: &Bitmap, indices: &[IdxSize]) -> Bitmap {
let values = indices.iter().map(|&index| {
debug_assert!((index as usize) < values.len());
Expand Down
27 changes: 27 additions & 0 deletions polars/polars-arrow/src/kernels/comparison.rs
@@ -0,0 +1,27 @@
use arrow::array::{BooleanArray, FixedSizeListArray};
use arrow::bitmap::utils::count_zeros;

use crate::utils::combine_validities_and;

fn fixed_size_list_cmp<F>(a: &FixedSizeListArray, b: &FixedSizeListArray, func: F) -> BooleanArray
where
F: Fn(usize) -> bool,
{
assert_eq!(a.size(), b.size());
let mask = arrow::compute::comparison::eq(a.values().as_ref(), b.values().as_ref());
let mask = combine_validities_and(Some(mask.values()), mask.validity()).unwrap();
let (slice, offset, _len) = mask.as_slice();
assert_eq!(offset, 0);

let width = a.size();
let iter = (0..a.len()).map(|i| func(count_zeros(slice, i, width)));
// range is trustedlen
unsafe { BooleanArray::from_trusted_len_values_iter_unchecked(iter) }
}

pub fn fixed_size_list_eq(a: &FixedSizeListArray, b: &FixedSizeListArray) -> BooleanArray {
fixed_size_list_cmp(a, b, |count_zeros| count_zeros == 0)
}
pub fn fixed_size_list_neq(a: &FixedSizeListArray, b: &FixedSizeListArray) -> BooleanArray {
fixed_size_list_cmp(a, b, |count_zeros| count_zeros != 0)
}
2 changes: 2 additions & 0 deletions polars/polars-arrow/src/kernels/mod.rs
Expand Up @@ -4,6 +4,8 @@ use arrow::array::BooleanArray;
use arrow::bitmap::utils::BitChunks;
#[cfg(feature = "simd")]
pub mod agg_mean;
#[cfg(feature = "dtype-fixed_size_list")]
pub mod comparison;
pub mod concatenate;
pub mod ewm;
pub mod float;
Expand Down
26 changes: 19 additions & 7 deletions polars/polars-core/src/chunked_array/builder/fixed_size_list.rs
Expand Up @@ -75,16 +75,21 @@ impl<T: NativeType> FixedSizeListBuilder for FixedSizeListNumericBuilder<T> {
pub(crate) struct AnonymousOwnedFixedSizeListBuilder {
inner: fixed_size_list::AnonymousBuilder,
name: SmartString,
inner_dtype: Option<DataType>
inner_dtype: Option<DataType>,
}

impl AnonymousOwnedFixedSizeListBuilder {
pub(crate) fn new(name: &str, width: usize, capacity: usize, inner_dtype: Option<DataType>) -> Self {
pub(crate) fn new(
name: &str,
width: usize,
capacity: usize,
inner_dtype: Option<DataType>,
) -> Self {
let inner = fixed_size_list::AnonymousBuilder::new(capacity, width);
Self {
inner,
name: name.into(),
inner_dtype
inner_dtype,
}
}
}
Expand All @@ -102,7 +107,9 @@ impl FixedSizeListBuilder for AnonymousOwnedFixedSizeListBuilder {
}

fn finish(&mut self) -> FixedSizeListChunked {
let arr = std::mem::take(&mut self.inner).finish(self.inner_dtype.as_ref().map(|dt| dt.to_arrow()).as_ref()).unwrap();
let arr = std::mem::take(&mut self.inner)
.finish(self.inner_dtype.as_ref().map(|dt| dt.to_arrow()).as_ref())
.unwrap();
unsafe { ChunkedArray::from_chunks(self.name.as_str(), vec![Box::new(arr) as ArrayRef]) }
}
}
Expand All @@ -117,10 +124,15 @@ pub(crate) fn get_fixed_size_list_builder(

let builder = if phys_dtype.is_numeric() {
with_match_physical_numeric_type!(phys_dtype, |$T| {
Box::new(FixedSizeListNumericBuilder::<$T>::new(name, width, capacity)) as Box<dyn FixedSizeListBuilder>
})
Box::new(FixedSizeListNumericBuilder::<$T>::new(name, width, capacity)) as Box<dyn FixedSizeListBuilder>
})
} else {
Box::new(AnonymousOwnedFixedSizeListBuilder::new(name, width, capacity, Some(inner_type_logical.clone())))
Box::new(AnonymousOwnedFixedSizeListBuilder::new(
name,
width,
capacity,
Some(inner_type_logical.clone()),
))
};
Ok(builder)
}
15 changes: 8 additions & 7 deletions polars/polars-core/src/chunked_array/cast.rs
Expand Up @@ -368,7 +368,11 @@ fn cast_list(ca: &ListChunked, child_type: &DataType) -> PolarsResult<(ArrayRef,

// Returns inner data type. This is needed because a cast can instantiate the dtype inner
// values for instance with categoricals
fn cast_fixed_size_list(ca: &FixedSizeListChunked, child_type: &DataType) -> PolarsResult<(ArrayRef, DataType)> {
#[cfg(feature = "dtype-fixed-size-list")]
fn cast_fixed_size_list(
ca: &FixedSizeListChunked,
child_type: &DataType,
) -> PolarsResult<(ArrayRef, DataType)> {
let ca = ca.rechunk();
let arr = ca.downcast_iter().next().unwrap();
// safety: inner dtype is passed correctly
Expand All @@ -382,12 +386,9 @@ fn cast_fixed_size_list(ca: &FixedSizeListChunked, child_type: &DataType) -> Pol

let new_values = new_inner.array_ref(0).clone();

let data_type = FixedSizeListArray::default_datatype(new_values.data_type().clone(), ca.width());
let new_arr = FixedSizeListArray::new(
data_type,
new_values,
arr.validity().cloned(),
);
let data_type =
FixedSizeListArray::default_datatype(new_values.data_type().clone(), ca.width());
let new_arr = FixedSizeListArray::new(data_type, new_values, arr.validity().cloned());
Ok((Box::new(new_arr), inner_dtype))
}

Expand Down
39 changes: 18 additions & 21 deletions polars/polars-core/src/chunked_array/comparison/mod.rs
Expand Up @@ -841,30 +841,27 @@ impl ChunkCompare<&ListChunked> for ListChunked {
impl ChunkCompare<&FixedSizeListChunked> for FixedSizeListChunked {
type Item = BooleanChunked;
fn equal(&self, rhs: &FixedSizeListChunked) -> BooleanChunked {
todo!();
// self.amortized_iter()
// .zip(rhs.amortized_iter())
// .map(|(left, right)| match (left, right) {
// (None, None) => true,
// (Some(l), Some(r)) => l.as_ref().series_equal_missing(r.as_ref()),
// _ => false,
// })
// .collect_trusted()
let (a, b) = align_chunks_binary(self, rhs);
let chunks = a
.downcast_iter()
.zip(b.downcast_iter())
.map(|(a, b)| {
Box::new(polars_arrow::kernels::comparison::fixed_size_list_eq(a, b)) as ArrayRef
})
.collect::<Vec<_>>();
unsafe { BooleanChunked::from_chunks(self.name(), chunks) }
}

fn not_equal(&self, rhs: &FixedSizeListChunked) -> BooleanChunked {
todo!();
// self.amortized_iter()
// .zip(rhs.amortized_iter())
// .map(|(left, right)| {
// let out = match (left, right) {
// (None, None) => true,
// (Some(l), Some(r)) => l.as_ref().series_equal_missing(r.as_ref()),
// _ => false,
// };
// !out
// })
// .collect_trusted()
let (a, b) = align_chunks_binary(self, rhs);
let chunks = a
.downcast_iter()
.zip(b.downcast_iter())
.map(|(a, b)| {
Box::new(polars_arrow::kernels::comparison::fixed_size_list_neq(a, b)) as ArrayRef
})
.collect::<Vec<_>>();
unsafe { BooleanChunked::from_chunks(self.name(), chunks) }
}

// following are not implemented because gt, lt comparison of series don't make sense
Expand Down
8 changes: 6 additions & 2 deletions polars/polars-core/src/chunked_array/ops/explode.rs
Expand Up @@ -5,10 +5,13 @@ use arrow::bitmap::{Bitmap, MutableBitmap};
use arrow::offset::OffsetsBuffer;
use polars_arrow::array::PolarsArray;
use polars_arrow::bit_util::unset_bit_raw;
#[cfg(feature = "dtype-fixed-size-list")]
use polars_arrow::is_valid::IsValid;
use polars_arrow::prelude::*;

use crate::chunked_array::builder::{AnonymousOwnedListBuilder, get_fixed_size_list_builder};
#[cfg(feature = "dtype-fixed-size-list")]
use crate::chunked_array::builder::get_fixed_size_list_builder;
use crate::chunked_array::builder::AnonymousOwnedListBuilder;
use crate::prelude::*;
use crate::series::implementations::null::NullChunked;

Expand Down Expand Up @@ -261,7 +264,8 @@ impl ExplodeByOffsets for FixedSizeListChunked {

let cap = get_capacity(offsets);
let inner_type = self.inner_dtype();
let mut builder = get_fixed_size_list_builder(&inner_type, cap, self.width(), self.name()).unwrap();
let mut builder =
get_fixed_size_list_builder(&inner_type, cap, self.width(), self.name()).unwrap();

let mut start = offsets[0] as usize;
let mut last = start;
Expand Down
1 change: 0 additions & 1 deletion polars/polars-core/src/chunked_array/ops/full.rs
@@ -1,7 +1,6 @@
use arrow::bitmap::MutableBitmap;
use polars_arrow::array::default_arrays::FromData;

#[cfg(feature = "dtype-fixed-size-list")]
use crate::chunked_array::builder::get_list_builder;
use crate::prelude::*;
use crate::series::IsSorted;
Expand Down
2 changes: 2 additions & 0 deletions polars/polars-core/src/chunked_array/ops/reverse.rs
@@ -1,3 +1,4 @@
#[cfg(feature = "dtype-fixed-size-list")]
use crate::chunked_array::builder::get_fixed_size_list_builder;
use crate::prelude::*;
use crate::series::IsSorted;
Expand Down Expand Up @@ -43,6 +44,7 @@ impl_reverse!(Utf8Type, Utf8Chunked);
impl_reverse!(BinaryType, BinaryChunked);
impl_reverse!(ListType, ListChunked);

#[cfg(feature = "dtype-fixed-size-list")]
impl ChunkReverse for FixedSizeListChunked {
fn reverse(&self) -> Self {
if !self.inner_dtype().is_numeric() {
Expand Down
43 changes: 24 additions & 19 deletions polars/polars-core/src/chunked_array/ops/take/take_chunked.rs
Expand Up @@ -172,32 +172,37 @@ impl TakeChunked for ListChunked {
impl TakeChunked for FixedSizeListChunked {
unsafe fn take_chunked_unchecked(&self, by: &[ChunkId], sorted: IsSorted) -> Self {
let arrs = self.downcast_iter().collect::<Vec<_>>();
let mut ca: Self = by
.iter()
.map(|[chunk_idx, array_idx]| {
let arr = arrs.get_unchecked(*chunk_idx as usize);
arr.get_unchecked(*array_idx as usize)
})
.collect();
ca.rename(self.name());
let iter = by.iter().map(|[chunk_idx, array_idx]| {
let arr = arrs.get_unchecked(*chunk_idx as usize);
arr.get_unchecked(*array_idx as usize)
});
let mut ca = Self::from_iter_and_args(
iter,
self.width(),
by.len(),
Some(self.inner_dtype()),
self.name(),
);
ca.set_sorted_flag(sorted);
ca
}

unsafe fn take_opt_chunked_unchecked(&self, by: &[Option<ChunkId>]) -> Self {
let arrs = self.downcast_iter().collect::<Vec<_>>();
let mut ca: Self = by
.iter()
.map(|opt_idx| {
opt_idx.and_then(|[chunk_idx, array_idx]| {
let arr = arrs.get_unchecked(chunk_idx as usize);
arr.get_unchecked(array_idx as usize)
})
let iter = by.iter().map(|opt_idx| {
opt_idx.and_then(|[chunk_idx, array_idx]| {
let arr = arrs.get_unchecked(chunk_idx as usize);
arr.get_unchecked(array_idx as usize)
})
.collect();

ca.rename(self.name());
ca
});

Self::from_iter_and_args(
iter,
self.width(),
by.len(),
Some(self.inner_dtype()),
self.name(),
)
}
}
#[cfg(feature = "object")]
Expand Down
13 changes: 6 additions & 7 deletions polars/polars-core/src/chunked_array/ops/take/take_every.rs
Expand Up @@ -62,13 +62,12 @@ impl ChunkTakeEvery<ListType> for ListChunked {
#[cfg(feature = "dtype-fixed-size-list")]
impl ChunkTakeEvery<FixedSizeListType> for FixedSizeListChunked {
fn take_every(&self, n: usize) -> FixedSizeListChunked {
let mut ca: Self = if !self.has_validity() {
self.into_no_null_iter().step_by(n).collect()
} else {
self.into_iter().step_by(n).collect()
};
ca.rename(self.name());
ca
let idx = (0 as IdxSize..(self.len() as IdxSize))
.step_by(n)
.collect::<Vec<_>>();

// safety: we are in bounds
unsafe { self.take_unchecked((&idx).into()) }
}
}

Expand Down

0 comments on commit 165d66c

Please sign in to comment.