Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
aligned-load sum aggregation
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 24, 2021
1 parent 1e31977 commit de512f2
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 5 deletions.
30 changes: 25 additions & 5 deletions src/compute/aggregate/sum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,43 @@ pub trait Sum<T> {
fn simd_sum(self) -> T;
}

fn split_by_alignment<U, T>(values: &[T]) -> (&[T], &[T]) {
let alignment = std::mem::align_of::<U>();

let vals_ptr = values.as_ptr();
let bytes_offset = vals_ptr.align_offset(alignment);
let type_offset = if bytes_offset > 0 {
std::mem::align_of::<T>() / bytes_offset
} else {
0
};

let head = &values[..type_offset];
let aligned_values = &values[type_offset..];
(head, aligned_values)
}

#[multiversion]
#[clone(target = "x86_64+avx")]
fn nonnull_sum<T>(values: &[T]) -> T
where
T: NativeType + Simd,
T: NativeType + Simd + Add<Output = T> + std::iter::Sum<T>,
T::Simd: Add<Output = T::Simd> + Sum<T>,
{
let mut chunks = values.chunks_exact(T::Simd::LANES);
let (head, aligned_values) = split_by_alignment::<T::Simd, _>(values);

let mut chunks = aligned_values.chunks_exact(T::Simd::LANES);

// Safety:
// we just made sure that we work on a slice af data aligned to T::Simd
let sum = chunks.by_ref().fold(T::Simd::default(), |acc, chunk| {
acc + T::Simd::from_chunk(chunk)
acc + unsafe { T::Simd::from_chunk_aligned_unchecked(chunk) }
});

let remainder = T::Simd::from_incomplete_chunk(chunks.remainder(), T::default());
let reduced = sum + remainder;

reduced.simd_sum()
reduced.simd_sum() + head.iter().copied().sum()
}

/// # Panics
Expand Down Expand Up @@ -90,7 +110,7 @@ where
/// Returns `None` if the array is empty or only contains null values.
pub fn sum_primitive<T>(array: &PrimitiveArray<T>) -> Option<T>
where
T: NativeType + Simd,
T: NativeType + Simd + Add<Output = T> + std::iter::Sum<T>,
T::Simd: Add<Output = T::Simd> + Sum<T>,
{
let null_count = array.null_count();
Expand Down
7 changes: 7 additions & 0 deletions src/types/simd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ pub trait NativeSimd: Default {
/// * iff `v.len()` != `T::LANES`
fn from_chunk(v: &[Self::Native]) -> Self;

/// Convert itself from a slice.
/// # Safety:
/// Caller must ensure:
/// * `v.len() == T::LANES`
/// * slice is aligned to `Self`
unsafe fn from_chunk_aligned_unchecked(v: &[Self::Native]) -> Self;

/// creates a new Self from `v` by populating items from `v` up to its length.
/// Items from `v` at positions larger than the number of lanes are ignored;
/// remaining items are populated with `remaining`.
Expand Down
5 changes: 5 additions & 0 deletions src/types/simd/native.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ macro_rules! simd {
($name)(v.try_into().unwrap())
}

#[inline]
unsafe fn from_chunk_aligned_unchecked(v: &[$type]) -> Self {
($name)(v.try_into().unwrap())
}

#[inline]
fn from_incomplete_chunk(v: &[$type], remaining: $type) -> Self {
let mut a = [remaining; $lanes];
Expand Down
5 changes: 5 additions & 0 deletions src/types/simd/packed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ macro_rules! simd {
<$name>::from_slice_unaligned(v)
}

#[inline]
unsafe fn from_chunk_aligned_unchecked(v: &[$type]) -> Self {
<$name>::from_slice_aligned_unchecked(v)
}

#[inline]
fn from_incomplete_chunk(v: &[$type], remaining: $type) -> Self {
let mut a = [remaining; $lanes];
Expand Down

0 comments on commit de512f2

Please sign in to comment.