Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Improved performance of sum aggregation via aligned loads (-10%) #445

Merged
merged 5 commits into from
Sep 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions src/compute/aggregate/sum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,17 @@ pub trait Sum<T> {
#[clone(target = "x86_64+avx")]
fn nonnull_sum<T>(values: &[T]) -> T
where
T: NativeType + Simd,
T::Simd: Add<Output = T::Simd> + Sum<T>,
T: NativeType + Simd + Add<Output = T> + std::iter::Sum<T>,
T::Simd: Sum<T> + Add<Output = T::Simd>,
{
let mut chunks = values.chunks_exact(T::Simd::LANES);
let (head, simd_vals, tail) = T::Simd::align(values);

let sum = chunks.by_ref().fold(T::Simd::default(), |acc, chunk| {
acc + T::Simd::from_chunk(chunk)
});

let remainder = T::Simd::from_incomplete_chunk(chunks.remainder(), T::default());
let reduced = sum + remainder;
let mut reduced = T::Simd::from_incomplete_chunk(&[], T::default());
for chunk in simd_vals {
reduced = reduced + *chunk;
}

reduced.simd_sum()
reduced.simd_sum() + head.iter().copied().sum() + tail.iter().copied().sum()
}

/// # Panics
Expand Down Expand Up @@ -90,7 +88,7 @@ where
/// Returns `None` if the array is empty or only contains null values.
pub fn sum_primitive<T>(array: &PrimitiveArray<T>) -> Option<T>
where
T: NativeType + Simd,
T: NativeType + Simd + Add<Output = T> + std::iter::Sum<T>,
T::Simd: Add<Output = T::Simd> + Sum<T>,
{
let null_count = array.null_count();
Expand Down
7 changes: 6 additions & 1 deletion src/types/simd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ pub trait FromMaskChunk<T> {
}

/// A struct that lends itself well to be compiled leveraging SIMD
pub trait NativeSimd: Default {
/// # Safety
/// The `NativeType` and the `NativeSimd` must have possible a matching alignment.
/// e.g. slicing `&[NativeType]` by `align_of<NativeSimd>()` must be properly aligned/safe.
pub unsafe trait NativeSimd: Default + Copy {
/// Number of lanes
const LANES: usize;
/// The [`NativeType`] of this struct. E.g. `f32` for a `NativeSimd = f32x16`.
Expand All @@ -32,6 +35,8 @@ pub trait NativeSimd: Default {
/// Items from `v` at positions larger than the number of lanes are ignored;
/// remaining items are populated with `remaining`.
fn from_incomplete_chunk(v: &[Self::Native], remaining: Self::Native) -> Self;

fn align(values: &[Self::Native]) -> (&[Self::Native], &[Self], &[Self::Native]);
}

/// Trait implemented by some [`NativeType`] that have a SIMD representation.
Expand Down
8 changes: 7 additions & 1 deletion src/types/simd/native.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ use super::*;
macro_rules! simd {
($name:tt, $type:ty, $lanes:expr, $mask:ty) => {
#[allow(non_camel_case_types)]
#[derive(Copy, Clone)]
pub struct $name(pub [$type; $lanes]);

impl NativeSimd for $name {
unsafe impl NativeSimd for $name {
const LANES: usize = $lanes;
type Native = $type;
type Chunk = $mask;
Expand All @@ -35,6 +36,11 @@ macro_rules! simd {
a.iter_mut().zip(v.iter()).for_each(|(a, b)| *a = *b);
Self(a)
}

#[inline]
fn align(values: &[Self::Native]) -> (&[Self::Native], &[Self], &[Self::Native]) {
unsafe { values.align_to::<Self>() }
}
}

impl std::ops::Index<usize> for $name {
Expand Down
7 changes: 6 additions & 1 deletion src/types/simd/packed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use super::*;

macro_rules! simd {
($name:tt, $type:ty, $lanes:expr, $chunk:ty, $mask:tt) => {
impl NativeSimd for $name {
unsafe impl NativeSimd for $name {
const LANES: usize = $lanes;
type Native = $type;
type Chunk = $chunk;
Expand All @@ -29,6 +29,11 @@ macro_rules! simd {
a.iter_mut().zip(v.iter()).for_each(|(a, b)| *a = *b);
<$name>::from_chunk(a.as_ref())
}

#[inline]
fn align(values: &[Self::Native]) -> (&[Self::Native], &[Self], &[Self::Native]) {
unsafe { values.align_to::<Self>() }
}
}
};
}
Expand Down