Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Simplified min_max_string and min_max_binary (#1004)
Browse files Browse the repository at this point in the history
  • Loading branch information
HaoYang670 committed May 23, 2022
1 parent a10db3a commit a810b03
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 89 deletions.
12 changes: 12 additions & 0 deletions benches/aggregate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,18 @@ fn add_benchmark(c: &mut Criterion) {
c.bench_function(&format!("min null 2^{} f32", log2_size), |b| {
b.iter(|| bench_min(&arr_a))
});

let arr_a = create_string_array::<i32>(1, size, 0.0, 0);

c.bench_function(&format!("min 2^{} utf8", log2_size), |b| {
b.iter(|| bench_min(&arr_a))
});

let arr_a = create_string_array::<i32>(1, size, 0.1, 0);

c.bench_function(&format!("min null 2^{} utf8", log2_size), |b| {
b.iter(|| bench_min(&arr_a))
});
});
}

Expand Down
116 changes: 32 additions & 84 deletions src/compute/aggregate/min_max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,86 +30,6 @@ pub trait SimdOrd<T> {
fn new_max() -> Self;
}

/// Helper to compute min/max of [`BinaryArray`]
fn min_max_binary<O: Offset, F: Fn(&[u8], &[u8]) -> bool>(
array: &BinaryArray<O>,
cmp: F,
) -> Option<&[u8]> {
let null_count = array.null_count();

if null_count == array.len() || array.len() == 0 {
return None;
}
let value = if array.validity().is_some() {
array.iter().fold(None, |mut acc: Option<&[u8]>, v| {
if let Some(item) = v {
if let Some(acc) = acc.as_mut() {
if cmp(acc, item) {
*acc = item
}
} else {
acc = Some(item)
}
}
acc
})
} else {
array
.values_iter()
.fold(None, |mut acc: Option<&[u8]>, item| {
if let Some(acc) = acc.as_mut() {
if cmp(acc, item) {
*acc = item
}
} else {
acc = Some(item)
}
acc
})
};
value
}

/// Helper to compute min/max of [`Utf8Array`]
fn min_max_string<O: Offset, F: Fn(&str, &str) -> bool>(
array: &Utf8Array<O>,
cmp: F,
) -> Option<&str> {
let null_count = array.null_count();

if null_count == array.len() || array.len() == 0 {
return None;
}
let value = if array.validity().is_some() {
array.iter().fold(None, |mut acc: Option<&str>, v| {
if let Some(item) = v {
if let Some(acc) = acc.as_mut() {
if cmp(acc, item) {
*acc = item
}
} else {
acc = Some(item)
}
}
acc
})
} else {
array
.values_iter()
.fold(None, |mut acc: Option<&str>, item| {
if let Some(acc) = acc.as_mut() {
if cmp(acc, item) {
*acc = item
}
} else {
acc = Some(item)
}
acc
})
};
value
}

fn nonnull_min_primitive<T>(values: &[T]) -> T
where
T: NativeType + Simd,
Expand Down Expand Up @@ -278,24 +198,52 @@ where
})
}

/// Helper to compute min/max of [`BinaryArray`] and [`Utf8Array`]
macro_rules! min_max_binary_utf8 {
($array: expr, $cmp: expr) => {
if $array.null_count() == $array.len() {
None
} else if $array.validity().is_some() {
$array
.iter()
.reduce(|v1, v2| match (v1, v2) {
(None, v2) => v2,
(v1, None) => v1,
(Some(v1), Some(v2)) => {
if $cmp(v1, v2) {
Some(v2)
} else {
Some(v1)
}
}
})
.unwrap_or(None)
} else {
$array
.values_iter()
.reduce(|v1, v2| if $cmp(v1, v2) { v2 } else { v1 })
}
};
}

/// Returns the maximum value in the binary array, according to the natural order.
pub fn max_binary<O: Offset>(array: &BinaryArray<O>) -> Option<&[u8]> {
min_max_binary(array, |a, b| a < b)
min_max_binary_utf8!(array, |a, b| a < b)
}

/// Returns the minimum value in the binary array, according to the natural order.
pub fn min_binary<O: Offset>(array: &BinaryArray<O>) -> Option<&[u8]> {
min_max_binary(array, |a, b| a > b)
min_max_binary_utf8!(array, |a, b| a > b)
}

/// Returns the maximum value in the string array, according to the natural order.
pub fn max_string<O: Offset>(array: &Utf8Array<O>) -> Option<&str> {
min_max_string(array, |a, b| a < b)
min_max_binary_utf8!(array, |a, b| a < b)
}

/// Returns the minimum value in the string array, according to the natural order.
pub fn min_string<O: Offset>(array: &Utf8Array<O>) -> Option<&str> {
min_max_string(array, |a, b| a > b)
min_max_binary_utf8!(array, |a, b| a > b)
}

/// Returns the minimum value in the boolean array.
Expand Down
28 changes: 23 additions & 5 deletions tests/it/compute/aggregate/min_max.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,11 @@ fn min_max_f64_edge_cases() {
assert_eq!(Some(f64::INFINITY), max_primitive(&a));
}

// todo: convert me
#[test]
fn test_string_min_max_with_nulls() {
let a = Utf8Array::<i32>::from(&[Some("b"), None, None, Some("a"), Some("c")]);
assert_eq!("a", min_string(&a).unwrap());
assert_eq!("c", max_string(&a).unwrap());
assert_eq!(Some("a"), min_string(&a));
assert_eq!(Some("c"), max_string(&a));
}

#[test]
Expand All @@ -127,6 +126,13 @@ fn test_string_min_max_all_nulls() {
assert_eq!(None, max_string(&a));
}

#[test]
fn test_string_min_max_no_null() {
let a = Utf8Array::<i32>::from(&[Some("abc"), Some("abd"), Some("bac"), Some("bbb")]);
assert_eq!(Some("abc"), min_string(&a));
assert_eq!(Some("bbb"), max_string(&a));
}

#[test]
fn test_string_min_max_1() {
let a = Utf8Array::<i32>::from(&[None, None, Some("b"), Some("a")]);
Expand Down Expand Up @@ -192,8 +198,20 @@ fn test_boolean_min_max_smaller() {
#[test]
fn test_binary_min_max_with_nulls() {
let a = BinaryArray::<i32>::from(&[Some(b"b"), None, None, Some(b"a"), Some(b"c")]);
assert_eq!("a".as_bytes(), min_binary(&a).unwrap());
assert_eq!("c".as_bytes(), max_binary(&a).unwrap());
assert_eq!(Some("a".as_bytes()), min_binary(&a));
assert_eq!(Some("c".as_bytes()), max_binary(&a));
}

#[test]
fn test_binary_min_max_no_null() {
let a = BinaryArray::<i32>::from(&[
Some("abc".as_bytes()),
Some(b"acd"),
Some(b"aabd"),
Some(b""),
]);
assert_eq!(Some("".as_bytes()), min_binary(&a));
assert_eq!(Some("acd".as_bytes()), max_binary(&a));
}

#[test]
Expand Down

0 comments on commit a810b03

Please sign in to comment.