Skip to content

Commit

Permalink
Documents arithmetic reduction semantics (rust-lang#412)
Browse files Browse the repository at this point in the history
* documents arithmetic reduction semantics
  • Loading branch information
gnzlbg committed Apr 5, 2018
1 parent f750e2a commit 65740ab
Show file tree
Hide file tree
Showing 5 changed files with 691 additions and 60 deletions.
2 changes: 1 addition & 1 deletion ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ set -ex
# Tests are all super fast anyway, and they fault often enough on travis that
# having only one thread increases debuggability to be worth it.
export RUST_TEST_THREADS=1
#export RUST_BACKTRACE=1
#export RUST_BACKTRACE=full
#export RUST_TEST_NOCAPTURE=1

FEATURES="strict,$FEATURES"
Expand Down
106 changes: 76 additions & 30 deletions coresimd/ppsv/api/arithmetic_reductions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,58 +4,104 @@
macro_rules! impl_arithmetic_reductions {
($id:ident, $elem_ty:ident) => {
impl $id {
/// Lane-wise addition of the vector elements.
/// Horizontal sum of the vector elements.
///
/// FIXME: document guarantees with respect to:
/// * integers: overflow behavior
/// * floats: order and NaNs
/// The intrinsic performs a tree-reduction of the vector elements.
/// That is, for an 8 element vector:
///
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
///
/// # Integer vectors
///
/// If an operation overflows it returns the mathematical result
/// modulo `2^n` where `n` is the number of times it overflows.
///
/// # Floating-point vectors
///
/// If one of the vector element is `NaN` the reduction returns
/// `NaN`.
#[cfg(not(target_arch = "aarch64"))]
#[inline]
pub fn sum(self) -> $elem_ty {
pub fn wrapping_sum(self) -> $elem_ty {
use coresimd::simd_llvm::simd_reduce_add_ordered;
unsafe { simd_reduce_add_ordered(self, 0 as $elem_ty) }
}
/// Lane-wise addition of the vector elements.
/// Horizontal sum of the vector elements.
///
/// The intrinsic performs a tree-reduction of the vector elements.
/// That is, for an 8 element vector:
///
/// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
///
/// # Integer vectors
///
/// If an operation overflows it returns the mathematical result
/// modulo `2^n` where `n` is the number of times it overflows.
///
/// # Floating-point vectors
///
/// FIXME: document guarantees with respect to:
/// * integers: overflow behavior
/// * floats: order and NaNs
/// If one of the vector element is `NaN` the reduction returns
/// `NaN`.
#[cfg(target_arch = "aarch64")]
#[inline]
pub fn sum(self) -> $elem_ty {
pub fn wrapping_sum(self) -> $elem_ty {
// FIXME: broken on AArch64
// https://bugs.llvm.org/show_bug.cgi?id=36796
use super::codegen::wrapping::Wrapping;
let mut x = self.extract(0) as $elem_ty;
for i in 1..$id::lanes() {
x += self.extract(i) as $elem_ty;
x = Wrapping::add(x, self.extract(i) as $elem_ty);
}
x
}

/// Lane-wise multiplication of the vector elements.
/// Horizontal product of the vector elements.
///
/// FIXME: document guarantees with respect to:
/// * integers: overflow behavior
/// * floats: order and NaNs
/// The intrinsic performs a tree-reduction of the vector elements.
/// That is, for an 8 element vector:
///
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
///
/// # Integer vectors
///
/// If an operation overflows it returns the mathematical result
/// modulo `2^n` where `n` is the number of times it overflows.
///
/// # Floating-point vectors
///
/// If one of the vector element is `NaN` the reduction returns
/// `NaN`.
#[cfg(not(target_arch = "aarch64"))]
#[inline]
pub fn product(self) -> $elem_ty {
pub fn wrapping_product(self) -> $elem_ty {
use coresimd::simd_llvm::simd_reduce_mul_ordered;
unsafe { simd_reduce_mul_ordered(self, 1 as $elem_ty) }
}
/// Lane-wise multiplication of the vector elements.
/// Horizontal product of the vector elements.
///
/// The intrinsic performs a tree-reduction of the vector elements.
/// That is, for an 8 element vector:
///
/// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
///
/// # Integer vectors
///
/// If an operation overflows it returns the mathematical result
/// modulo `2^n` where `n` is the number of times it overflows.
///
/// # Floating-point vectors
///
/// FIXME: document guarantees with respect to:
/// * integers: overflow behavior
/// * floats: order and NaNs
/// If one of the vector element is `NaN` the reduction returns
/// `NaN`.
#[cfg(target_arch = "aarch64")]
#[inline]
pub fn product(self) -> $elem_ty {
pub fn wrapping_product(self) -> $elem_ty {
// FIXME: broken on AArch64
// https://bugs.llvm.org/show_bug.cgi?id=36796
use super::codegen::wrapping::Wrapping;
let mut x = self.extract(0) as $elem_ty;
for i in 1..$id::lanes() {
x *= self.extract(i) as $elem_ty;
x = Wrapping::mul(x, self.extract(i) as $elem_ty);
}
x
}
Expand All @@ -78,25 +124,25 @@ macro_rules! test_arithmetic_reductions {
}

#[test]
fn sum() {
fn wrapping_sum() {
use coresimd::simd::$id;
let v = $id::splat(0 as $elem_ty);
assert_eq!(v.sum(), 0 as $elem_ty);
assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
let v = $id::splat(1 as $elem_ty);
assert_eq!(v.sum(), $id::lanes() as $elem_ty);
assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
let v = alternating(2);
assert_eq!(
v.sum(),
v.wrapping_sum(),
($id::lanes() / 2 + $id::lanes()) as $elem_ty
);
}
#[test]
fn product() {
fn wrapping_product() {
use coresimd::simd::$id;
let v = $id::splat(0 as $elem_ty);
assert_eq!(v.product(), 0 as $elem_ty);
assert_eq!(v.wrapping_product(), 0 as $elem_ty);
let v = $id::splat(1 as $elem_ty);
assert_eq!(v.product(), 1 as $elem_ty);
assert_eq!(v.wrapping_product(), 1 as $elem_ty);
let f = match $id::lanes() {
64 => 16,
32 => 8,
Expand All @@ -105,7 +151,7 @@ macro_rules! test_arithmetic_reductions {
};
let v = alternating(f);
assert_eq!(
v.product(),
v.wrapping_product(),
(2_usize.pow(($id::lanes() / f) as u32) as $elem_ty)
);
}
Expand Down
52 changes: 23 additions & 29 deletions coresimd/ppsv/api/minmax_reductions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,19 @@
macro_rules! impl_minmax_reductions {
($id:ident, $elem_ty:ident) => {
impl $id {
/// Largest vector value.
///
/// FIXME: document behavior for float vectors with NaNs.
#[cfg(not(target_arch = "aarch64"))]
/// Largest vector element value.
#[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
#[inline]
pub fn max(self) -> $elem_ty {
pub fn max_element(self) -> $elem_ty {
use coresimd::simd_llvm::simd_reduce_max;
unsafe { simd_reduce_max(self) }
}
/// Largest vector value.
///
/// FIXME: document behavior for float vectors with NaNs.
#[cfg(target_arch = "aarch64")]

/// Largest vector element value.
#[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
#[allow(unused_imports)]
#[inline]
pub fn max(self) -> $elem_ty {
pub fn max_element(self) -> $elem_ty {
// FIXME: broken on AArch64
// https://bugs.llvm.org/show_bug.cgi?id=36796
use cmp::Ord;
Expand All @@ -31,22 +28,19 @@ macro_rules! impl_minmax_reductions {
x
}

/// Smallest vector value.
///
/// FIXME: document behavior for float vectors with NaNs.
#[cfg(not(target_arch = "aarch64"))]
/// Smallest vector element value.
#[cfg(not(any(target_arch = "aarch64", target_arch = "arm")))]
#[inline]
pub fn min(self) -> $elem_ty {
pub fn min_element(self) -> $elem_ty {
use coresimd::simd_llvm::simd_reduce_min;
unsafe { simd_reduce_min(self) }
}
/// Smallest vector value.
///
/// FIXME: document behavior for float vectors with NaNs.
#[cfg(target_arch = "aarch64")]

/// Smallest vector element value.
#[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
#[allow(unused_imports)]
#[inline]
pub fn min(self) -> $elem_ty {
pub fn min_element(self) -> $elem_ty {
// FIXME: broken on AArch64
// https://bugs.llvm.org/show_bug.cgi?id=36796
use cmp::Ord;
Expand All @@ -65,29 +59,29 @@ macro_rules! impl_minmax_reductions {
macro_rules! test_minmax_reductions {
($id:ident, $elem_ty:ident) => {
#[test]
fn max() {
fn max_element() {
use coresimd::simd::$id;
let v = $id::splat(0 as $elem_ty);
assert_eq!(v.max(), 0 as $elem_ty);
assert_eq!(v.max_element(), 0 as $elem_ty);
let v = v.replace(1, 1 as $elem_ty);
assert_eq!(v.max(), 1 as $elem_ty);
assert_eq!(v.max_element(), 1 as $elem_ty);
let v = v.replace(0, 2 as $elem_ty);
assert_eq!(v.max(), 2 as $elem_ty);
assert_eq!(v.max_element(), 2 as $elem_ty);
}

#[test]
fn min() {
fn min_element() {
use coresimd::simd::$id;
let v = $id::splat(0 as $elem_ty);
assert_eq!(v.min(), 0 as $elem_ty);
assert_eq!(v.min_element(), 0 as $elem_ty);
let v = v.replace(1, 1 as $elem_ty);
assert_eq!(v.min(), 0 as $elem_ty);
assert_eq!(v.min_element(), 0 as $elem_ty);
let v = $id::splat(1 as $elem_ty);
let v = v.replace(0, 2 as $elem_ty);
assert_eq!(v.min(), 1 as $elem_ty);
assert_eq!(v.min_element(), 1 as $elem_ty);
let v = $id::splat(2 as $elem_ty);
let v = v.replace(1, 1 as $elem_ty);
assert_eq!(v.min(), 1 as $elem_ty);
assert_eq!(v.min_element(), 1 as $elem_ty);
}
};
}
49 changes: 49 additions & 0 deletions coresimd/ppsv/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,52 @@ impl<T> FromBits<T> for T {
t
}
}

/// Workarounds code generation issues.
#[cfg(target_arch = "aarch64")]
mod codegen {
#[cfg(target_arch = "aarch64")]
pub mod wrapping {
pub trait Wrapping {
fn add(self, other: Self) -> Self;
fn mul(self, other: Self) -> Self;
}

macro_rules! int_impl {
($id:ident) => {
impl Wrapping for $id {
fn add(self, other: Self) -> Self {
self.wrapping_add(other)
}
fn mul(self, other: Self) -> Self {
self.wrapping_mul(other)
}
}
};
}
int_impl!(i8);
int_impl!(i16);
int_impl!(i32);
int_impl!(i64);
int_impl!(u8);
int_impl!(u16);
int_impl!(u32);
int_impl!(u64);

macro_rules! float_impl {
($id:ident) => {
impl Wrapping for $id {
fn add(self, other: Self) -> Self {
self + other
}
fn mul(self, other: Self) -> Self {
self * other
}
}
};
}
float_impl!(f32);
float_impl!(f64);
}

}
Loading

0 comments on commit 65740ab

Please sign in to comment.