Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Simplified min_max_string and min_max_binary #1004

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions benches/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,18 @@ fn add_benchmark(c: &mut Criterion) {
c.bench_function(&format!("min null 2^{} f32", log2_size), |b| {
b.iter(|| bench_min(&arr_a))
});

let arr_a = create_string_array::<i32>(1, size, 0.0, 0);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I set the length of each string to be 1 to make the time of comparing each element to be determined.


c.bench_function(&format!("min 2^{} utf8", log2_size), |b| {
b.iter(|| bench_min(&arr_a))
});

let arr_a = create_string_array::<i32>(1, size, 0.1, 0);

c.bench_function(&format!("min null 2^{} utf8", log2_size), |b| {
b.iter(|| bench_min(&arr_a))
});
});
}

Expand Down
116 changes: 32 additions & 84 deletions src/compute/aggregate/min_max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,86 +30,6 @@ pub trait SimdOrd<T> {
fn new_max() -> Self;
}

/// Helper to compute min/max of [`BinaryArray`]
fn min_max_binary<O: Offset, F: Fn(&[u8], &[u8]) -> bool>(
array: &BinaryArray<O>,
cmp: F,
) -> Option<&[u8]> {
let null_count = array.null_count();

if null_count == array.len() || array.len() == 0 {
return None;
}
let value = if array.validity().is_some() {
array.iter().fold(None, |mut acc: Option<&[u8]>, v| {
if let Some(item) = v {
if let Some(acc) = acc.as_mut() {
if cmp(acc, item) {
*acc = item
}
} else {
acc = Some(item)
}
}
acc
})
} else {
array
.values_iter()
.fold(None, |mut acc: Option<&[u8]>, item| {
if let Some(acc) = acc.as_mut() {
if cmp(acc, item) {
*acc = item
}
} else {
acc = Some(item)
}
acc
})
};
value
}

/// Helper to compute min/max of [`Utf8Array`]
fn min_max_string<O: Offset, F: Fn(&str, &str) -> bool>(
array: &Utf8Array<O>,
cmp: F,
) -> Option<&str> {
let null_count = array.null_count();

if null_count == array.len() || array.len() == 0 {
return None;
}
let value = if array.validity().is_some() {
array.iter().fold(None, |mut acc: Option<&str>, v| {
if let Some(item) = v {
if let Some(acc) = acc.as_mut() {
if cmp(acc, item) {
*acc = item
}
} else {
acc = Some(item)
}
}
acc
})
} else {
array
.values_iter()
.fold(None, |mut acc: Option<&str>, item| {
if let Some(acc) = acc.as_mut() {
if cmp(acc, item) {
*acc = item
}
} else {
acc = Some(item)
}
acc
})
};
value
}

fn nonnull_min_primitive<T>(values: &[T]) -> T
where
T: NativeType + Simd,
Expand Down Expand Up @@ -278,24 +198,52 @@ where
})
}

/// Helper to compute min/max of [`BinaryArray`] and [`Utf8Array`]
macro_rules! min_max_binary_utf8 {
($array: expr, $cmp: expr) => {
if $array.null_count() == $array.len() {
None
} else if $array.validity().is_some() {
$array
.iter()
.reduce(|v1, v2| match (v1, v2) {
(None, v2) => v2,
(v1, None) => v1,
(Some(v1), Some(v2)) => {
if $cmp(v1, v2) {
Some(v2)
} else {
Some(v1)
}
}
})
.unwrap_or(None)
} else {
$array
.values_iter()
.reduce(|v1, v2| if $cmp(v1, v2) { v2 } else { v1 })
}
};
}

/// Returns the maximum value in the binary array, according to the natural order.
pub fn max_binary<O: Offset>(array: &BinaryArray<O>) -> Option<&[u8]> {
min_max_binary(array, |a, b| a < b)
min_max_binary_utf8!(array, |a, b| a < b)
}

/// Returns the minimum value in the binary array, according to the natural order.
pub fn min_binary<O: Offset>(array: &BinaryArray<O>) -> Option<&[u8]> {
min_max_binary(array, |a, b| a > b)
min_max_binary_utf8!(array, |a, b| a > b)
}

/// Returns the maximum value in the string array, according to the natural order.
pub fn max_string<O: Offset>(array: &Utf8Array<O>) -> Option<&str> {
min_max_string(array, |a, b| a < b)
min_max_binary_utf8!(array, |a, b| a < b)
}

/// Returns the minimum value in the string array, according to the natural order.
pub fn min_string<O: Offset>(array: &Utf8Array<O>) -> Option<&str> {
min_max_string(array, |a, b| a > b)
min_max_binary_utf8!(array, |a, b| a > b)
}

/// Returns the minimum value in the boolean array.
Expand Down
28 changes: 23 additions & 5 deletions tests/it/compute/aggregate/min_max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,11 @@ fn min_max_f64_edge_cases() {
assert_eq!(Some(f64::INFINITY), max_primitive(&a));
}

// todo: convert me
#[test]
fn test_string_min_max_with_nulls() {
let a = Utf8Array::<i32>::from(&[Some("b"), None, None, Some("a"), Some("c")]);
assert_eq!("a", min_string(&a).unwrap());
assert_eq!("c", max_string(&a).unwrap());
assert_eq!(Some("a"), min_string(&a));
assert_eq!(Some("c"), max_string(&a));
}

#[test]
Expand All @@ -127,6 +126,13 @@ fn test_string_min_max_all_nulls() {
assert_eq!(None, max_string(&a));
}

#[test]
fn test_string_min_max_no_null() {
let a = Utf8Array::<i32>::from(&[Some("abc"), Some("abd"), Some("bac"), Some("bbb")]);
assert_eq!(Some("abc"), min_string(&a));
assert_eq!(Some("bbb"), max_string(&a));
}

#[test]
fn test_string_min_max_1() {
let a = Utf8Array::<i32>::from(&[None, None, Some("b"), Some("a")]);
Expand Down Expand Up @@ -192,8 +198,20 @@ fn test_boolean_min_max_smaller() {
#[test]
fn test_binary_min_max_with_nulls() {
let a = BinaryArray::<i32>::from(&[Some(b"b"), None, None, Some(b"a"), Some(b"c")]);
assert_eq!("a".as_bytes(), min_binary(&a).unwrap());
assert_eq!("c".as_bytes(), max_binary(&a).unwrap());
assert_eq!(Some("a".as_bytes()), min_binary(&a));
assert_eq!(Some("c".as_bytes()), max_binary(&a));
}

#[test]
fn test_binary_min_max_no_null() {
let a = BinaryArray::<i32>::from(&[
Some("abc".as_bytes()),
Some(b"acd"),
Some(b"aabd"),
Some(b""),
]);
assert_eq!(Some("".as_bytes()), min_binary(&a));
assert_eq!(Some("acd".as_bytes()), max_binary(&a));
}

#[test]
Expand Down