Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved performance of sum aggregation via aligned loads (-10%) (#445)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 29, 2021
1 parent fedb19f commit 94fd267
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 14 deletions.
20 changes: 9 additions & 11 deletions src/compute/aggregate/sum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,17 @@ pub trait Sum<T> {
#[clone(target = "x86_64+avx")]
fn nonnull_sum<T>(values: &[T]) -> T
where
T: NativeType + Simd,
T::Simd: Add<Output = T::Simd> + Sum<T>,
T: NativeType + Simd + Add<Output = T> + std::iter::Sum<T>,
T::Simd: Sum<T> + Add<Output = T::Simd>,
{
let mut chunks = values.chunks_exact(T::Simd::LANES);
let (head, simd_vals, tail) = T::Simd::align(values);

let sum = chunks.by_ref().fold(T::Simd::default(), |acc, chunk| {
acc + T::Simd::from_chunk(chunk)
});

let remainder = T::Simd::from_incomplete_chunk(chunks.remainder(), T::default());
let reduced = sum + remainder;
let mut reduced = T::Simd::from_incomplete_chunk(&[], T::default());
for chunk in simd_vals {
reduced = reduced + *chunk;
}

reduced.simd_sum()
reduced.simd_sum() + head.iter().copied().sum() + tail.iter().copied().sum()
}

/// # Panics
Expand Down Expand Up @@ -90,7 +88,7 @@ where
/// Returns `None` if the array is empty or only contains null values.
pub fn sum_primitive<T>(array: &PrimitiveArray<T>) -> Option<T>
where
T: NativeType + Simd,
T: NativeType + Simd + Add<Output = T> + std::iter::Sum<T>,
T::Simd: Add<Output = T::Simd> + Sum<T>,
{
let null_count = array.null_count();
Expand Down
7 changes: 6 additions & 1 deletion src/types/simd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ pub trait FromMaskChunk<T> {
}

/// A struct that lends itself well to be compiled leveraging SIMD
pub trait NativeSimd: Default {
/// # Safety
/// The `NativeType` and the `NativeSimd` must have possible a matching alignment.
/// e.g. slicing `&[NativeType]` by `align_of<NativeSimd>()` must be properly aligned/safe.
pub unsafe trait NativeSimd: Default + Copy {
/// Number of lanes
const LANES: usize;
/// The [`NativeType`] of this struct. E.g. `f32` for a `NativeSimd = f32x16`.
Expand All @@ -32,6 +35,8 @@ pub trait NativeSimd: Default {
/// Items from `v` at positions larger than the number of lanes are ignored;
/// remaining items are populated with `remaining`.
fn from_incomplete_chunk(v: &[Self::Native], remaining: Self::Native) -> Self;

fn align(values: &[Self::Native]) -> (&[Self::Native], &[Self], &[Self::Native]);
}

/// Trait implemented by some [`NativeType`] that have a SIMD representation.
Expand Down
8 changes: 7 additions & 1 deletion src/types/simd/native.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ use super::*;
macro_rules! simd {
($name:tt, $type:ty, $lanes:expr, $mask:ty) => {
#[allow(non_camel_case_types)]
#[derive(Copy, Clone)]
pub struct $name(pub [$type; $lanes]);

impl NativeSimd for $name {
unsafe impl NativeSimd for $name {
const LANES: usize = $lanes;
type Native = $type;
type Chunk = $mask;
Expand All @@ -35,6 +36,11 @@ macro_rules! simd {
a.iter_mut().zip(v.iter()).for_each(|(a, b)| *a = *b);
Self(a)
}

#[inline]
fn align(values: &[Self::Native]) -> (&[Self::Native], &[Self], &[Self::Native]) {
unsafe { values.align_to::<Self>() }
}
}

impl std::ops::Index<usize> for $name {
Expand Down
7 changes: 6 additions & 1 deletion src/types/simd/packed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use super::*;

macro_rules! simd {
($name:tt, $type:ty, $lanes:expr, $chunk:ty, $mask:tt) => {
impl NativeSimd for $name {
unsafe impl NativeSimd for $name {
const LANES: usize = $lanes;
type Native = $type;
type Chunk = $chunk;
Expand All @@ -29,6 +29,11 @@ macro_rules! simd {
a.iter_mut().zip(v.iter()).for_each(|(a, b)| *a = *b);
<$name>::from_chunk(a.as_ref())
}

#[inline]
fn align(values: &[Self::Native]) -> (&[Self::Native], &[Self], &[Self::Native]) {
unsafe { values.align_to::<Self>() }
}
}
};
}
Expand Down

0 comments on commit 94fd267

Please sign in to comment.