Skip to content

Commit

Permalink
more has-validity instead of null_count
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Nov 4, 2021
1 parent fa13883 commit 6808c75
Show file tree
Hide file tree
Showing 19 changed files with 104 additions and 117 deletions.
2 changes: 1 addition & 1 deletion polars/polars-arrow/src/kernels/set.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use crate::array::default_arrays::FromData;
use crate::error::{PolarsError, Result};
use crate::kernels::BinaryMaskedSliceIterator;
use crate::prelude::PolarsArray;
use arrow::array::*;
use arrow::buffer::MutableBuffer;
use arrow::{datatypes::DataType, types::NativeType};
use std::ops::BitOr;
use crate::prelude::PolarsArray;

/// Set values in a primitive array where the primitive array has null values.
/// this is faster because we don't have to invert and combine bitmaps
Expand Down
6 changes: 3 additions & 3 deletions polars/polars-arrow/src/kernels/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub unsafe fn take_no_null_primitive<T: NativeType>(
arr: &PrimitiveArray<T>,
indices: &UInt32Array,
) -> Arc<PrimitiveArray<T>> {
assert_eq!(arr.null_count(), 0);
debug_assert_eq!(arr.has_validity(), false);

let array_values = arr.values().as_slice();
let index_values = indices.values().as_slice();
Expand All @@ -87,7 +87,7 @@ pub unsafe fn take_no_null_primitive_iter_unchecked<
arr: &PrimitiveArray<T>,
indices: I,
) -> Arc<PrimitiveArray<T>> {
assert_eq!(arr.null_count(), 0);
debug_assert_eq!(arr.has_validity(), false);

let array_values = arr.values().as_slice();

Expand Down Expand Up @@ -189,7 +189,7 @@ pub unsafe fn take_no_null_bool_iter_unchecked<I: IntoIterator<Item = usize>>(
arr: &BooleanArray,
indices: I,
) -> Arc<BooleanArray> {
debug_assert_eq!(arr.null_count(), 0);
debug_assert_eq!(arr.has_validity(), false);
let iter = indices
.into_iter()
.map(|idx| Some(arr.values().get_bit_unchecked(idx)));
Expand Down
11 changes: 2 additions & 9 deletions polars/polars-arrow/src/kernels/take_agg.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
//! kernels that combine take and aggregations.
use crate::array::PolarsArray;
use arrow::array::{Array, PrimitiveArray};
use arrow::types::NativeType;

Expand All @@ -16,7 +17,7 @@ pub unsafe fn take_agg_no_null_primitive_iter_unchecked<
f: F,
init: T,
) -> T {
debug_assert_eq!(arr.null_count(), 0);
debug_assert_eq!(arr.has_validity(), false);

let array_values = arr.values().as_slice();

Expand All @@ -39,10 +40,6 @@ pub unsafe fn take_agg_primitive_iter_unchecked<
f: F,
init: T,
) -> Option<T> {
if arr.null_count() == arr.len() {
return None;
}

let array_values = arr.values().as_slice();
let validity = arr.validity().expect("null buffer should be there");

Expand Down Expand Up @@ -74,10 +71,6 @@ pub unsafe fn take_agg_primitive_iter_unchecked_count_nulls<
f: F,
init: T,
) -> Option<(T, u32)> {
if arr.null_count() == arr.len() {
return None;
}

let array_values = arr.values().as_slice();
let validity = arr.validity().expect("null buffer should be there");

Expand Down
16 changes: 8 additions & 8 deletions polars/polars-core/src/chunked_array/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,23 @@ macro_rules! apply_operand_on_chunkedarray_by_iter {

($self:ident, $rhs:ident, $operand:tt) => {
{
match ($self.null_count(), $rhs.null_count()) {
(0, 0) => {
match ($self.has_validity(), $rhs.has_validity()) {
(false, false) => {
let a: NoNull<ChunkedArray<_>> = $self
.into_no_null_iter()
.zip($rhs.into_no_null_iter())
.map(|(left, right)| left $operand right)
.collect();
a.into_inner()
},
(0, _) => {
(false, _) => {
$self
.into_no_null_iter()
.zip($rhs.into_iter())
.map(|(left, opt_right)| opt_right.map(|right| left $operand right))
.collect()
.collect_trusted()
},
(_, 0) => {
(_, false) => {
$self
.into_iter()
.zip($rhs.into_no_null_iter())
Expand All @@ -44,7 +44,7 @@ macro_rules! apply_operand_on_chunkedarray_by_iter {
(Some(_), None) => None,
(Some(left), Some(right)) => Some(left $operand right),
})
.collect()
.collect_trusted()

}
}
Expand Down Expand Up @@ -388,8 +388,8 @@ impl Add<&str> for &Utf8Chunked {
type Output = Utf8Chunked;

fn add(self, rhs: &str) -> Self::Output {
match self.null_count() {
0 => self
match self.has_validity() {
false => self
.into_no_null_iter()
.map(|l| concat_strings(l, rhs))
.collect(),
Expand Down
18 changes: 9 additions & 9 deletions polars/polars-core/src/chunked_array/comparison.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ where

macro_rules! impl_eq_missing {
($self:ident, $rhs:ident) => {{
match ($self.null_count(), $rhs.null_count()) {
(0, 0) => $self
match ($self.has_validity(), $rhs.has_validity()) {
(false, false) => $self
.into_no_null_iter()
.zip($rhs.into_no_null_iter())
.map(|(opt_a, opt_b)| opt_a == opt_b)
Expand Down Expand Up @@ -535,22 +535,22 @@ impl ChunkCompare<&str> for Utf8Chunked {

macro_rules! impl_cmp_list {
($self:ident, $rhs:ident, $cmp_method:ident) => {{
match ($self.null_count(), $rhs.null_count()) {
(0, 0) => $self
match ($self.has_validity(), $rhs.has_validity()) {
(false, false) => $self
.into_no_null_iter()
.zip($rhs.into_no_null_iter())
.map(|(left, right)| left.$cmp_method(&right))
.collect(),
(0, _) => $self
.collect_trusted(),
(false, _) => $self
.into_no_null_iter()
.zip($rhs.into_iter())
.map(|(left, opt_right)| opt_right.map(|right| left.$cmp_method(&right)))
.collect(),
(_, 0) => $self
.collect_trusted(),
(_, false) => $self
.into_iter()
.zip($rhs.into_no_null_iter())
.map(|(opt_left, right)| opt_left.map(|left| left.$cmp_method(&right)))
.collect(),
.collect_trusted(),
(_, _) => $self
.into_iter()
.zip($rhs.into_iter())
Expand Down
4 changes: 3 additions & 1 deletion polars/polars-core/src/chunked_array/kernels/take.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::prelude::*;
use arrow::bitmap::MutableBitmap;
use polars_arrow::array::PolarsArray;
use polars_arrow::bit_util::unset_bit_raw;
use polars_arrow::kernels::take::take_value_indices_from_list;
use std::convert::TryFrom;
Expand Down Expand Up @@ -52,7 +53,8 @@ pub(crate) unsafe fn take_list_unchecked(
let taken = taken.chunks()[0].clone();

let validity =
if values.null_count() > 0 || indices.null_count() > 0 {
// if null count > 0
if values.has_validity() || indices.has_validity() {
// determine null buffer, which are a function of `values` and `indices`
let mut validity = MutableBitmap::with_capacity(indices.len());
let validity_ptr = validity.as_slice().as_ptr() as *mut u8;
Expand Down
10 changes: 3 additions & 7 deletions polars/polars-core/src/chunked_array/ops/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,12 +261,8 @@ where
ca.into_series()
}
fn mean_as_series(&self) -> Series {
if self.null_count() == self.len() {
Self::full_null(self.name(), 1).into_series()
} else {
let val = [self.mean()];
Series::new(self.name(), val)
}
let val = [self.mean()];
Series::new(self.name(), val)
}
fn median_as_series(&self) -> Series {
let val = [self.median()];
Expand Down Expand Up @@ -605,6 +601,6 @@ mod test {
// all null values case
let ca = Float32Chunked::full_null("", 3);
assert_eq!(ca.mean(), None);
assert_eq!(ca.mean_as_series().f32().unwrap().get(0), None);
assert_eq!(ca.mean_as_series().f64().unwrap().get(0), None);
}
}
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/ops/apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
use crate::prelude::*;
use crate::utils::{CustomIterTools, NoNull};
use arrow::array::{Array, ArrayRef, BooleanArray, PrimitiveArray};
use polars_arrow::array::PolarsArray;
use std::borrow::Cow;
use std::convert::TryFrom;
use polars_arrow::array::PolarsArray;

macro_rules! try_apply {
($self:expr, $f:expr) => {{
Expand Down
3 changes: 2 additions & 1 deletion polars/polars-core/src/chunked_array/ops/explode.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use crate::prelude::*;
use arrow::{array::*, bitmap::MutableBitmap, buffer::Buffer};
use polars_arrow::array::PolarsArray;
use polars_arrow::bit_util::unset_bit_raw;
use polars_arrow::prelude::{FromDataUtf8, ValueSize};
use std::convert::TryFrom;
Expand Down Expand Up @@ -37,7 +38,7 @@ where
// value and collect the indices.
// because the length of the array is not known, we first collect the null indexes, offsetted
// with the insertion of empty rows (as None) and later create a validity bitmap
if arr.null_count() > 0 {
if arr.has_validity() {
let validity_values = arr.validity().unwrap();

for &o in &offsets[1..] {
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/ops/interpolate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ where
fn interpolate(&self) -> Self {
// This implementation differs from pandas as that boundary None's are not removed
// this prevents a lot of errors due to expressions leading to different lengths
if !self.has_validity() || self.null_count() == self.len() {
if !self.has_validity() {
return self.clone();
}

Expand Down
20 changes: 10 additions & 10 deletions polars/polars-core/src/chunked_array/ops/rolling_window.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ mod inner_mod {
check_input(options.window_size, options.min_periods)?;
let ca = self.rechunk();
let arr = ca.downcast_iter().next().unwrap();
let arr = match self.null_count() {
0 => rolling::no_nulls::rolling_mean(
let arr = match self.has_validity() {
false => rolling::no_nulls::rolling_mean(
arr.values(),
options.window_size,
options.min_periods,
Expand Down Expand Up @@ -94,8 +94,8 @@ mod inner_mod {
}

let arr = ca.downcast_iter().next().unwrap();
let arr = match self.null_count() {
0 => rolling::no_nulls::rolling_sum(
let arr = match self.has_validity() {
false => rolling::no_nulls::rolling_sum(
arr.values(),
options.window_size,
options.min_periods,
Expand Down Expand Up @@ -128,8 +128,8 @@ mod inner_mod {
}

let arr = ca.downcast_iter().next().unwrap();
let arr = match self.null_count() {
0 => rolling::no_nulls::rolling_min(
let arr = match self.has_validity() {
false => rolling::no_nulls::rolling_min(
arr.values(),
options.window_size,
options.min_periods,
Expand Down Expand Up @@ -162,8 +162,8 @@ mod inner_mod {
}

let arr = ca.downcast_iter().next().unwrap();
let arr = match self.null_count() {
0 => rolling::no_nulls::rolling_max(
let arr = match self.has_validity() {
false => rolling::no_nulls::rolling_max(
arr.values(),
options.window_size,
options.min_periods,
Expand Down Expand Up @@ -304,8 +304,8 @@ mod inner_mod {
}

let arr = ca.downcast_iter().next().unwrap();
let arr = match self.null_count() {
0 => rolling::no_nulls::rolling_var(
let arr = match self.has_validity() {
false => rolling::no_nulls::rolling_var(
arr.values(),
options.window_size,
options.min_periods,
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-core/src/chunked_array/ops/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ where
check_bounds!(self, mask);

// Fast path uses the kernel in polars-arrow
if let (Some(value), 0) = (value, mask.null_count()) {
if let (Some(value), false) = (value, mask.has_validity()) {
let (left, mask) = align_chunks_binary(self, mask);

// apply binary kernel.
Expand Down

0 comments on commit 6808c75

Please sign in to comment.