Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
use align_to
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 27, 2021
1 parent 1a385ff commit 90b4f85
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 47 deletions.
41 changes: 11 additions & 30 deletions src/compute/aggregate/sum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,43 +19,24 @@ pub trait Sum<T> {
fn simd_sum(self) -> T;
}

fn split_by_alignment<U, T>(values: &[T]) -> (&[T], &[T]) {
let alignment = std::mem::align_of::<U>();

let vals_ptr = values.as_ptr();
let bytes_offset = vals_ptr.align_offset(alignment);
let type_offset = if bytes_offset > 0 {
std::mem::align_of::<T>() / bytes_offset
} else {
0
};

let head = &values[..type_offset];
let aligned_values = &values[type_offset..];
(head, aligned_values)
}

#[multiversion]
#[clone(target = "x86_64+avx")]
fn nonnull_sum<T>(values: &[T]) -> T
fn nonnull_sum<'a, T>(values: &'a [T]) -> T
where
T: NativeType + Simd + Add<Output = T> + std::iter::Sum<T>,
T::Simd: Add<Output = T::Simd> + Sum<T>,
T::Simd: Sum<T> + Add<Output = T::Simd>,
{
let (head, aligned_values) = split_by_alignment::<T::Simd, _>(values);

let mut chunks = aligned_values.chunks_exact(T::Simd::LANES);

// Safety:
// we just made sure that we work on a slice af data aligned to T::Simd
let sum = chunks.by_ref().fold(T::Simd::default(), |acc, chunk| {
acc + unsafe { T::Simd::from_chunk_aligned_unchecked(chunk) }
});
// T::Simd is the vector type T and the alignment is similar to aligning to [T; alignment]
// the alignment of T::Simd ensures that it fits T.
let (head, simd_vals, tail) = unsafe { values.align_to::<T::Simd>() };

let remainder = T::Simd::from_incomplete_chunk(chunks.remainder(), T::default());
let reduced = sum + remainder;
let mut reduced = T::Simd::from_incomplete_chunk(&[], T::default());
for chunk in simd_vals {
reduced = reduced + chunk.clone()
}

reduced.simd_sum() + head.iter().copied().sum()
reduced.simd_sum() + head.iter().copied().sum() + tail.iter().copied().sum()
}

/// # Panics
Expand Down Expand Up @@ -108,7 +89,7 @@ where
/// Returns the sum of values in the array.
///
/// Returns `None` if the array is empty or only contains null values.
pub fn sum_primitive<T>(array: &PrimitiveArray<T>) -> Option<T>
pub fn sum_primitive<'a, T>(array: &'a PrimitiveArray<T>) -> Option<T>
where
T: NativeType + Simd + Add<Output = T> + std::iter::Sum<T>,
T::Simd: Add<Output = T::Simd> + Sum<T>,
Expand Down
9 changes: 2 additions & 7 deletions src/types/simd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,12 @@ pub trait NativeSimd: Default {
/// * iff `v.len()` != `T::LANES`
fn from_chunk(v: &[Self::Native]) -> Self;

/// Convert itself from a slice.
/// # Safety:
/// Caller must ensure:
/// * `v.len() == T::LANES`
/// * slice is aligned to `Self`
unsafe fn from_chunk_aligned_unchecked(v: &[Self::Native]) -> Self;

/// creates a new Self from `v` by populating items from `v` up to its length.
/// Items from `v` at positions larger than the number of lanes are ignored;
/// remaining items are populated with `remaining`.
fn from_incomplete_chunk(v: &[Self::Native], remaining: Self::Native) -> Self;

fn clone(&self) -> Self;
}

/// Trait implemented by some [`NativeType`] that have a SIMD representation.
Expand Down
9 changes: 4 additions & 5 deletions src/types/simd/native.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,16 @@ macro_rules! simd {
($name)(v.try_into().unwrap())
}

#[inline]
unsafe fn from_chunk_aligned_unchecked(v: &[$type]) -> Self {
($name)(v.try_into().unwrap())
}

#[inline]
fn from_incomplete_chunk(v: &[$type], remaining: $type) -> Self {
let mut a = [remaining; $lanes];
a.iter_mut().zip(v.iter()).for_each(|(a, b)| *a = *b);
Self(a)
}
#[inline]
fn clone(&self) -> Self {
self.clone()
}
}

impl std::ops::Index<usize> for $name {
Expand Down
10 changes: 5 additions & 5 deletions src/types/simd/packed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@ macro_rules! simd {
<$name>::from_slice_unaligned(v)
}

#[inline]
unsafe fn from_chunk_aligned_unchecked(v: &[$type]) -> Self {
<$name>::from_slice_aligned_unchecked(v)
}

#[inline]
fn from_incomplete_chunk(v: &[$type], remaining: $type) -> Self {
let mut a = [remaining; $lanes];
a.iter_mut().zip(v.iter()).for_each(|(a, b)| *a = *b);
<$name>::from_chunk(a.as_ref())
}

#[inline]
fn clone(&self) -> Self {
*self
}
}
};
}
Expand Down

0 comments on commit 90b4f85

Please sign in to comment.