diff --git a/.gitignore b/.gitignore index c16125ed70..97647e1e70 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ Cargo.lock .*.swp target -tags \ No newline at end of file +tags +crates/stdarch-gen/aarch64.rs +crates/stdarch-gen/arm.rs diff --git a/Cargo.toml b/Cargo.toml index 7b4c5ead8a..73f69ca46f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,6 +3,7 @@ members = [ "crates/stdarch-verify", "crates/core_arch", "crates/std_detect", + "crates/stdarch-gen", "examples/" ] exclude = [ diff --git a/crates/core_arch/src/aarch64/mod.rs b/crates/core_arch/src/aarch64/mod.rs index e33dc7eaf5..190383df21 100644 --- a/crates/core_arch/src/aarch64/mod.rs +++ b/crates/core_arch/src/aarch64/mod.rs @@ -29,3 +29,6 @@ use stdarch_test::assert_instr; pub unsafe fn brk() -> ! { crate::intrinsics::abort() } + +#[cfg(test)] +pub(crate) mod test_support; diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs new file mode 100644 index 0000000000..fcb3986350 --- /dev/null +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -0,0 +1,666 @@ +// This code is automatically generated. DO NOT MODIFY. +// +// Instead, modify `crates/stdarch-gen/neon.spec` and run the following command to re-generate this file: +// +// ``` +// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec +// ``` +use super::*; +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmeq))] +pub unsafe fn vceq_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmeq))] +pub unsafe fn vceqq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmeq))] +pub unsafe fn vceq_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmeq))] +pub unsafe fn vceqq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmeq))] +pub unsafe fn vceq_p64(a: poly64x1_t, b: poly64x1_t) -> uint64x1_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmeq))] +pub unsafe fn vceqq_p64(a: poly64x2_t, b: poly64x2_t) -> uint64x2_t { + simd_eq(a, b) +} + +/// Floating-point compare equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcmeq))] +pub unsafe fn vceq_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t { + simd_eq(a, b) +} + +/// Floating-point compare equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcmeq))] +pub unsafe fn vceqq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t { + simd_eq(a, b) +} + +/// Compare signed greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmgt))] +pub unsafe fn vcgt_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t { + simd_gt(a, b) +} + +/// Compare signed greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmgt))] +pub unsafe fn vcgtq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t { + simd_gt(a, b) +} + +/// Compare unsigned highe +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmhi))] +pub unsafe fn vcgt_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + simd_gt(a, b) +} + +/// Compare unsigned highe +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmhi))] +pub unsafe fn vcgtq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + simd_gt(a, b) +} + +/// Floating-point compare greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcmgt))] +pub unsafe fn vcgt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t { + simd_gt(a, b) +} + +/// Floating-point compare greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcmgt))] +pub unsafe fn vcgtq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t { + simd_gt(a, b) +} + +/// Compare signed less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmgt))] +pub unsafe fn vclt_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t { + simd_lt(a, b) +} + +/// Compare signed less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmgt))] +pub unsafe fn vcltq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t { + simd_lt(a, b) +} + +/// Compare unsigned less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmhi))] +pub unsafe fn vclt_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + simd_lt(a, b) +} + +/// Compare unsigned less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmhi))] +pub unsafe fn vcltq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + simd_lt(a, b) +} + +/// Floating-point compare less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcmgt))] +pub unsafe fn vclt_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t { + simd_lt(a, b) +} + +/// Floating-point compare less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcmgt))] +pub unsafe fn vcltq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t { + simd_lt(a, b) +} + +/// Compare signed less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmge))] +pub unsafe fn vcle_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t { + simd_le(a, b) +} + +/// Compare signed less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmge))] +pub unsafe fn vcleq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t { + simd_le(a, b) +} + +/// Compare unsigned less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmhs))] +pub unsafe fn vcle_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + simd_le(a, b) +} + +/// Compare unsigned less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmhs))] +pub unsafe fn vcleq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + simd_le(a, b) +} + +/// Floating-point compare less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcmge))] +pub unsafe fn vcle_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t { + simd_le(a, b) +} + +/// Floating-point compare less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcmge))] +pub unsafe fn vcleq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t { + simd_le(a, b) +} + +/// Compare signed greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmge))] +pub unsafe fn vcge_s64(a: int64x1_t, b: int64x1_t) -> uint64x1_t { + simd_ge(a, b) +} + +/// Compare signed greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmge))] +pub unsafe fn vcgeq_s64(a: int64x2_t, b: int64x2_t) -> uint64x2_t { + simd_ge(a, b) +} + +/// Compare unsigned greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmhs))] +pub unsafe fn vcge_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + simd_ge(a, b) +} + +/// Compare unsigned greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(cmhs))] +pub unsafe fn vcgeq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + simd_ge(a, b) +} + +/// Floating-point compare greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcmge))] +pub unsafe fn vcge_f64(a: float64x1_t, b: float64x1_t) -> uint64x1_t { + simd_ge(a, b) +} + +/// Floating-point compare greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcmge))] +pub unsafe fn vcgeq_f64(a: float64x2_t, b: float64x2_t) -> uint64x2_t { + simd_ge(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmul))] +pub unsafe fn vmul_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fmul))] +pub unsafe fn vmulq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + simd_mul(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fsub))] +pub unsafe fn vsub_f64(a: float64x1_t, b: float64x1_t) -> float64x1_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fsub))] +pub unsafe fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { + simd_sub(a, b) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::core_arch::simd::*; + use std::mem::transmute; + use stdarch_test::simd_test; + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_u64() { + let a: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let b: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vceq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let b: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vceq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_u64() { + let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0x01); + let b: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0x01); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vceqq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let b: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0); + let r: u64x2 = transmute(vceqq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_s64() { + let a: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vceq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vceq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_s64() { + let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x01); + let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x01); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vceqq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x7F_FF_FF_FF_FF_FF_FF_FF); + let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, -9223372036854775808); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0); + let r: u64x2 = transmute(vceqq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_p64() { + let a: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vceq_p64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let b: i64x1 = i64x1::new(0x7F_FF_FF_FF_FF_FF_FF_FF); + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vceq_p64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_p64() { + let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x01); + let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x01); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vceqq_p64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x7F_FF_FF_FF_FF_FF_FF_FF); + let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, -9223372036854775808); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0); + let r: u64x2 = transmute(vceqq_p64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_f64() { + let a: f64 = 1.2; + let b: f64 = 1.2; + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vceq_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_f64() { + let a: f64x2 = f64x2::new(1.2, 3.4); + let b: f64x2 = f64x2::new(1.2, 3.4); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vceqq_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_s64() { + let a: i64x1 = i64x1::new(1); + let b: i64x1 = i64x1::new(0); + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vcgt_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_s64() { + let a: i64x2 = i64x2::new(1, 2); + let b: i64x2 = i64x2::new(0, 1); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vcgtq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_u64() { + let a: u64x1 = u64x1::new(1); + let b: u64x1 = u64x1::new(0); + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vcgt_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_u64() { + let a: u64x2 = u64x2::new(1, 2); + let b: u64x2 = u64x2::new(0, 1); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vcgtq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_f64() { + let a: f64 = 1.2; + let b: f64 = 0.1; + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vcgt_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_f64() { + let a: f64x2 = f64x2::new(1.2, 2.3); + let b: f64x2 = f64x2::new(0.1, 1.2); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vcgtq_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s64() { + let a: i64x1 = i64x1::new(0); + let b: i64x1 = i64x1::new(1); + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vclt_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_s64() { + let a: i64x2 = i64x2::new(0, 1); + let b: i64x2 = i64x2::new(1, 2); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vcltq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_u64() { + let a: u64x1 = u64x1::new(0); + let b: u64x1 = u64x1::new(1); + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vclt_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u64() { + let a: u64x2 = u64x2::new(0, 1); + let b: u64x2 = u64x2::new(1, 2); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vcltq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_f64() { + let a: f64 = 0.1; + let b: f64 = 1.2; + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vclt_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_f64() { + let a: f64x2 = f64x2::new(0.1, 1.2); + let b: f64x2 = f64x2::new(1.2, 2.3); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vcltq_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_s64() { + let a: i64x1 = i64x1::new(0); + let b: i64x1 = i64x1::new(1); + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vcle_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_s64() { + let a: i64x2 = i64x2::new(0, 1); + let b: i64x2 = i64x2::new(1, 2); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vcleq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_u64() { + let a: u64x1 = u64x1::new(0); + let b: u64x1 = u64x1::new(1); + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vcle_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_u64() { + let a: u64x2 = u64x2::new(0, 1); + let b: u64x2 = u64x2::new(1, 2); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vcleq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_f64() { + let a: f64 = 0.1; + let b: f64 = 1.2; + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vcle_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_f64() { + let a: f64x2 = f64x2::new(0.1, 1.2); + let b: f64x2 = f64x2::new(1.2, 2.3); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vcleq_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_s64() { + let a: i64x1 = i64x1::new(1); + let b: i64x1 = i64x1::new(0); + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vcge_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_s64() { + let a: i64x2 = i64x2::new(1, 2); + let b: i64x2 = i64x2::new(0, 1); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vcgeq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_u64() { + let a: u64x1 = u64x1::new(1); + let b: u64x1 = u64x1::new(0); + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vcge_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_u64() { + let a: u64x2 = u64x2::new(1, 2); + let b: u64x2 = u64x2::new(0, 1); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vcgeq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_f64() { + let a: f64 = 1.2; + let b: f64 = 0.1; + let e: u64x1 = u64x1::new(0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x1 = transmute(vcge_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_f64() { + let a: f64x2 = f64x2::new(1.2, 2.3); + let b: f64x2 = f64x2::new(0.1, 1.2); + let e: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let r: u64x2 = transmute(vcgeq_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_f64() { + let a: f64 = 1.0; + let b: f64 = 2.0; + let e: f64 = 2.0; + let r: f64 = transmute(vmul_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_f64() { + let a: f64x2 = f64x2::new(1.0, 2.0); + let b: f64x2 = f64x2::new(2.0, 3.0); + let e: f64x2 = f64x2::new(2.0, 6.0); + let r: f64x2 = transmute(vmulq_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_f64() { + let a: f64 = 1.0; + let b: f64 = 1.0; + let e: f64 = 0.0; + let r: f64 = transmute(vsub_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_f64() { + let a: f64x2 = f64x2::new(1.0, 4.0); + let b: f64x2 = f64x2::new(1.0, 2.0); + let e: f64x2 = f64x2::new(0.0, 2.0); + let r: f64x2 = transmute(vsubq_f64(transmute(a), transmute(b))); + assert_eq!(r, e); + } +} diff --git a/crates/core_arch/src/aarch64/neon.rs b/crates/core_arch/src/aarch64/neon/mod.rs similarity index 81% rename from crates/core_arch/src/aarch64/neon.rs rename to crates/core_arch/src/aarch64/neon/mod.rs index 2ddd97273c..532c5b4d5a 100644 --- a/crates/core_arch/src/aarch64/neon.rs +++ b/crates/core_arch/src/aarch64/neon/mod.rs @@ -2,6 +2,9 @@ #![allow(non_camel_case_types)] +mod generated; +pub use self::generated::*; + // FIXME: replace neon with asimd use crate::{ @@ -18,8 +21,12 @@ types! { pub struct float64x2_t(f64, f64); /// ARM-specific 64-bit wide vector of one packed `p64`. pub struct poly64x1_t(i64); // FIXME: check this! + /// ARM-specific 64-bit wide vector of one packed `p64`. + pub struct poly64_t(i64); // FIXME: check this! /// ARM-specific 64-bit wide vector of two packed `p64`. pub struct poly64x2_t(i64, i64); // FIXME: check this! + /// ARM-specific 128-bit wide vector of one packed `p64`. + pub struct poly128_t(i128); // FIXME: check this! } /// ARM-specific type containing two `int8x16_t` vectors. @@ -64,6 +71,12 @@ pub struct poly8x16x4_t( #[allow(improper_ctypes)] extern "C" { + #[link_name = "llvm.aarch64.neon.pmull64"] + fn vmull_p64_(a: i64, b: i64) -> int8x16_t; + + #[link_name = "llvm.aarch64.neon.addp.v16i8"] + fn vpaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + #[link_name = "llvm.aarch64.neon.smaxv.i8.v8i8"] fn vmaxv_s8_(a: int8x8_t) -> i8; #[link_name = "llvm.aarch64.neon.smaxv.i8.6i8"] @@ -221,6 +234,7 @@ extern "C" { b3: int8x16_t, c: uint8x8_t, ) -> int8x8_t; + #[link_name = "llvm.aarch64.neon.tbx4.v16i8"] fn vqtbx4q( a: int8x16_t, @@ -232,6 +246,22 @@ extern "C" { ) -> int8x16_t; } +/// Add pairwise +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(addp))] +pub unsafe fn vpaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + vpaddq_u8_(a, b) +} + +/// Polynomial multiply long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(pmull))] +pub unsafe fn vmull_p64(a: poly64_t, b: poly64_t) -> poly128_t { + transmute(vmull_p64_(transmute(a), transmute(b))) +} + /// Vector add. #[inline] #[target_feature(enable = "neon")] @@ -1544,10 +1574,53 @@ pub unsafe fn vqtbx4q_p8(a: poly8x16_t, t: poly8x16x4_t, idx: uint8x16_t) -> pol #[cfg(test)] mod tests { - use crate::core_arch::{aarch64::*, simd::*}; + use crate::core_arch::aarch64::test_support::*; + use crate::core_arch::arm::test_support::*; + use crate::core_arch::{aarch64::neon::*, aarch64::*, simd::*}; use std::mem::transmute; use stdarch_test::simd_test; + #[simd_test(enable = "neon")] + unsafe fn test_vpaddq_u8() { + let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = i8x16::new( + 17, 18, 19, 20, 20, 21, 22, 23, 24, 25, 26, 27, 29, 29, 30, 31, + ); + let r = i8x16(1, 5, 9, 13, 17, 21, 25, 29, 35, 39, 41, 45, 49, 53, 58, 61); + let e: i8x16 = transmute(vpaddq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_p64() { + // FIXME: I've a hard time writing a test for this as the documentation + // from arm is a bit thin as to waht exactly it does + let a: i64 = 8; + let b: i64 = 7; + let e: i128 = 56; + let r: i128 = transmute(vmull_p64(transmute(a), transmute(b))); + assert_eq!(r, e); + + /* + let a: i64 = 5; + let b: i64 = 5; + let e: i128 = 25; + let r: i128 = transmute(vmull_p64(a, b)); + + assert_eq!(r, e); + let a: i64 = 6; + let b: i64 = 6; + let e: i128 = 36; + let r: i128 = transmute(vmull_p64(a, b)); + assert_eq!(r, e); + + let a: i64 = 7; + let b: i64 = 6; + let e: i128 = 42; + let r: i128 = transmute(vmull_p64(a, b)); + assert_eq!(r, e); + */ + } #[simd_test(enable = "neon")] unsafe fn test_vadd_f64() { let a = 1.; @@ -1980,9 +2053,459 @@ mod tests { test_vcombine!(test_vcombine_u64 => vcombine_u64([3_u64], [13_u64])); test_vcombine!(test_vcombine_p64 => vcombine_p64([3_u64], [13_u64])); test_vcombine!(test_vcombine_f64 => vcombine_f64([-3_f64], [13_f64])); + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_u64() { + test_cmp_u64( + |i, j| vceq_u64(i, j), + |a: u64, b: u64| -> u64 { + if a == b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_u64() { + testq_cmp_u64( + |i, j| vceqq_u64(i, j), + |a: u64, b: u64| -> u64 { + if a == b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_s64() { + test_cmp_s64( + |i, j| vceq_s64(i, j), + |a: i64, b: i64| -> u64 { + if a == b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_s64() { + testq_cmp_s64( + |i, j| vceqq_s64(i, j), + |a: i64, b: i64| -> u64 { + if a == b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_p64() { + test_cmp_p64( + |i, j| vceq_p64(i, j), + |a: u64, b: u64| -> u64 { + if a == b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_p64() { + testq_cmp_p64( + |i, j| vceqq_p64(i, j), + |a: u64, b: u64| -> u64 { + if a == b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_f64() { + test_cmp_f64( + |i, j| vceq_f64(i, j), + |a: f64, b: f64| -> u64 { + if a == b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_f64() { + testq_cmp_f64( + |i, j| vceqq_f64(i, j), + |a: f64, b: f64| -> u64 { + if a == b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_s64() { + test_cmp_s64( + |i, j| vcgt_s64(i, j), + |a: i64, b: i64| -> u64 { + if a > b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_s64() { + testq_cmp_s64( + |i, j| vcgtq_s64(i, j), + |a: i64, b: i64| -> u64 { + if a > b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_u64() { + test_cmp_u64( + |i, j| vcgt_u64(i, j), + |a: u64, b: u64| -> u64 { + if a > b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_u64() { + testq_cmp_u64( + |i, j| vcgtq_u64(i, j), + |a: u64, b: u64| -> u64 { + if a > b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_f64() { + test_cmp_f64( + |i, j| vcgt_f64(i, j), + |a: f64, b: f64| -> u64 { + if a > b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_f64() { + testq_cmp_f64( + |i, j| vcgtq_f64(i, j), + |a: f64, b: f64| -> u64 { + if a > b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s64() { + test_cmp_s64( + |i, j| vclt_s64(i, j), + |a: i64, b: i64| -> u64 { + if a < b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_s64() { + testq_cmp_s64( + |i, j| vcltq_s64(i, j), + |a: i64, b: i64| -> u64 { + if a < b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_u64() { + test_cmp_u64( + |i, j| vclt_u64(i, j), + |a: u64, b: u64| -> u64 { + if a < b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u64() { + testq_cmp_u64( + |i, j| vcltq_u64(i, j), + |a: u64, b: u64| -> u64 { + if a < b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vltq_f64() { + test_cmp_f64( + |i, j| vclt_f64(i, j), + |a: f64, b: f64| -> u64 { + if a < b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_f64() { + testq_cmp_f64( + |i, j| vcltq_f64(i, j), + |a: f64, b: f64| -> u64 { + if a < b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_s64() { + test_cmp_s64( + |i, j| vcle_s64(i, j), + |a: i64, b: i64| -> u64 { + if a <= b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_s64() { + testq_cmp_s64( + |i, j| vcleq_s64(i, j), + |a: i64, b: i64| -> u64 { + if a <= b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_u64() { + test_cmp_u64( + |i, j| vcle_u64(i, j), + |a: u64, b: u64| -> u64 { + if a <= b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_u64() { + testq_cmp_u64( + |i, j| vcleq_u64(i, j), + |a: u64, b: u64| -> u64 { + if a <= b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vleq_f64() { + test_cmp_f64( + |i, j| vcle_f64(i, j), + |a: f64, b: f64| -> u64 { + if a <= b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_f64() { + testq_cmp_f64( + |i, j| vcleq_f64(i, j), + |a: f64, b: f64| -> u64 { + if a <= b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_s64() { + test_cmp_s64( + |i, j| vcge_s64(i, j), + |a: i64, b: i64| -> u64 { + if a >= b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_s64() { + testq_cmp_s64( + |i, j| vcgeq_s64(i, j), + |a: i64, b: i64| -> u64 { + if a >= b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_u64() { + test_cmp_u64( + |i, j| vcge_u64(i, j), + |a: u64, b: u64| -> u64 { + if a >= b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_u64() { + testq_cmp_u64( + |i, j| vcgeq_u64(i, j), + |a: u64, b: u64| -> u64 { + if a >= b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgeq_f64() { + test_cmp_f64( + |i, j| vcge_f64(i, j), + |a: f64, b: f64| -> u64 { + if a >= b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_f64() { + testq_cmp_f64( + |i, j| vcgeq_f64(i, j), + |a: f64, b: f64| -> u64 { + if a >= b { + 0xFFFFFFFFFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_f64() { + test_ari_f64(|i, j| vmul_f64(i, j), |a: f64, b: f64| -> f64 { a * b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_f64() { + testq_ari_f64(|i, j| vmulq_f64(i, j), |a: f64, b: f64| -> f64 { a * b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_f64() { + test_ari_f64(|i, j| vsub_f64(i, j), |a: f64, b: f64| -> f64 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_f64() { + testq_ari_f64(|i, j| vsubq_f64(i, j), |a: f64, b: f64| -> f64 { a - b }); + } } #[cfg(test)] #[cfg(target_endian = "little")] -#[path = "../arm/table_lookup_tests.rs"] +#[path = "../../arm/neon/table_lookup_tests.rs"] mod table_lookup_tests; diff --git a/crates/core_arch/src/aarch64/test_support.rs b/crates/core_arch/src/aarch64/test_support.rs new file mode 100644 index 0000000000..e08c39a545 --- /dev/null +++ b/crates/core_arch/src/aarch64/test_support.rs @@ -0,0 +1,184 @@ +use crate::core_arch::{aarch64::neon::*, arm::*, simd::*}; +use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec}; + +macro_rules! V_u64 { + () => { + vec![ + 0x0000000000000000u64, + 0x0101010101010101u64, + 0x0202020202020202u64, + 0x0F0F0F0F0F0F0F0Fu64, + 0x8080808080808080u64, + 0xF0F0F0F0F0F0F0F0u64, + 0xFFFFFFFFFFFFFFFFu64, + ] + }; +} + +macro_rules! V_f64 { + () => { + vec![ + 0.0f64, + 1.0f64, + -1.0f64, + 1.2f64, + 2.4f64, + std::f64::MAX, + std::f64::MIN, + std::f64::INFINITY, + std::f64::NEG_INFINITY, + std::f64::NAN, + ] + }; +} + +macro_rules! to64 { + ($t : ident) => { + |v: $t| -> u64 { transmute(v) } + }; +} + +macro_rules! to128 { + ($t : ident) => { + |v: $t| -> u128 { transmute(v) } + }; +} + +pub(crate) fn test( + vals: Vec, + fill1: fn(T) -> V, + fill2: fn(U) -> W, + cast: fn(W) -> X, + test_fun: fn(V, V) -> W, + verify_fun: fn(T, T) -> U, +) where + T: Copy + core::fmt::Debug, + U: Copy + core::fmt::Debug + std::cmp::PartialEq, + V: Copy + core::fmt::Debug, + W: Copy + core::fmt::Debug, + X: Copy + core::fmt::Debug + std::cmp::PartialEq, +{ + let pairs = vals.iter().zip(vals.iter()); + + for (i, j) in pairs { + let a: V = fill1(*i); + let b: V = fill1(*j); + + let actual_pre: W = test_fun(a, b); + let expected_pre: W = fill2(verify_fun(*i, *j)); + + let actual: X = cast(actual_pre); + let expected: X = cast(expected_pre); + + assert_eq!( + actual, expected, + "[{:?}:{:?}] :\nf({:?}, {:?}) = {:?}\ng({:?}, {:?}) = {:?}\n", + *i, *j, &a, &b, actual_pre, &a, &b, expected_pre + ); + } +} + +macro_rules! gen_test_fn { + ($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => { + pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) { + unsafe { + test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun) + }; + } + }; +} + +macro_rules! gen_fill_fn { + ($id: ident, $el_width: expr, $num_els: expr, $in_t : ident, $out_t: ident, $cmp_t: ident) => { + pub(crate) fn $id(val: $in_t) -> $out_t { + let initial: [$in_t; $num_els] = [val; $num_els]; + let result: $cmp_t = unsafe { transmute(initial) }; + let result_out: $out_t = unsafe { transmute(result) }; + + // println!("FILL: {:016x} as {} x {}: {:016x}", val.reverse_bits(), $el_width, $num_els, (result as u64).reverse_bits()); + + result_out + } + }; +} + +gen_fill_fn!(fill_u64, 64, 1, u64, uint64x1_t, u64); +gen_fill_fn!(fillq_u64, 64, 2, u64, uint64x2_t, u128); +gen_fill_fn!(fill_f64, 64, 1, f64, float64x1_t, u64); +gen_fill_fn!(fillq_f64, 64, 2, f64, float64x2_t, u128); +gen_fill_fn!(fill_p64, 64, 1, u64, poly64x1_t, u64); +gen_fill_fn!(fillq_p64, 64, 2, u64, poly64x2_t, u128); + +gen_test_fn!( + test_ari_f64, + f64, + f64, + float64x1_t, + float64x1_t, + u64, + V_f64!(), + fill_f64, + fill_f64, + to64!(float64x1_t) +); +gen_test_fn!( + test_cmp_f64, + f64, + u64, + float64x1_t, + uint64x1_t, + u64, + V_f64!(), + fill_f64, + fill_u64, + to64!(uint64x1_t) +); +gen_test_fn!( + testq_ari_f64, + f64, + f64, + float64x2_t, + float64x2_t, + u128, + V_f64!(), + fillq_f64, + fillq_f64, + to128!(float64x2_t) +); +gen_test_fn!( + testq_cmp_f64, + f64, + u64, + float64x2_t, + uint64x2_t, + u128, + V_f64!(), + fillq_f64, + fillq_u64, + to128!(uint64x2_t) +); + +gen_test_fn!( + test_cmp_p64, + u64, + u64, + poly64x1_t, + uint64x1_t, + u64, + V_u64!(), + fill_p64, + fill_u64, + to64!(uint64x1_t) +); +gen_test_fn!( + testq_cmp_p64, + u64, + u64, + poly64x2_t, + uint64x2_t, + u128, + V_u64!(), + fillq_p64, + fillq_u64, + to128!(uint64x2_t) +); diff --git a/crates/core_arch/src/arm/mod.rs b/crates/core_arch/src/arm/mod.rs index e7b3c67677..bd902dc607 100644 --- a/crates/core_arch/src/arm/mod.rs +++ b/crates/core_arch/src/arm/mod.rs @@ -51,3 +51,7 @@ use stdarch_test::assert_instr; pub unsafe fn udf() -> ! { crate::intrinsics::abort() } + +#[cfg(test)] +#[cfg(any(target_arch = "aarch64", target_feature = "v7"))] +pub(crate) mod test_support; diff --git a/crates/core_arch/src/arm/neon.rs b/crates/core_arch/src/arm/neon.rs deleted file mode 100644 index a5eee5d8b8..0000000000 --- a/crates/core_arch/src/arm/neon.rs +++ /dev/null @@ -1,1687 +0,0 @@ -//! ARMv7 NEON intrinsics - -use crate::core_arch::simd_llvm::*; -#[cfg(target_arch = "arm")] -use crate::mem::transmute; -#[cfg(test)] -use stdarch_test::assert_instr; - -types! { - /// ARM-specific 64-bit wide vector of eight packed `i8`. - pub struct int8x8_t(i8, i8, i8, i8, i8, i8, i8, i8); - /// ARM-specific 64-bit wide vector of eight packed `u8`. - pub struct uint8x8_t(u8, u8, u8, u8, u8, u8, u8, u8); - /// ARM-specific 64-bit wide polynomial vector of eight packed `u8`. - pub struct poly8x8_t(u8, u8, u8, u8, u8, u8, u8, u8); - /// ARM-specific 64-bit wide vector of four packed `i16`. - pub struct int16x4_t(i16, i16, i16, i16); - /// ARM-specific 64-bit wide vector of four packed `u16`. - pub struct uint16x4_t(u16, u16, u16, u16); - // FIXME: ARM-specific 64-bit wide vector of four packed `f16`. - // pub struct float16x4_t(f16, f16, f16, f16); - /// ARM-specific 64-bit wide vector of four packed `u16`. - pub struct poly16x4_t(u16, u16, u16, u16); - /// ARM-specific 64-bit wide vector of two packed `i32`. - pub struct int32x2_t(i32, i32); - /// ARM-specific 64-bit wide vector of two packed `u32`. - pub struct uint32x2_t(u32, u32); - /// ARM-specific 64-bit wide vector of two packed `f32`. - pub struct float32x2_t(f32, f32); - /// ARM-specific 64-bit wide vector of one packed `i64`. - pub struct int64x1_t(i64); - /// ARM-specific 64-bit wide vector of one packed `u64`. - pub struct uint64x1_t(u64); - - /// ARM-specific 128-bit wide vector of sixteen packed `i8`. - pub struct int8x16_t( - i8, i8 ,i8, i8, i8, i8 ,i8, i8, - i8, i8 ,i8, i8, i8, i8 ,i8, i8, - ); - /// ARM-specific 128-bit wide vector of sixteen packed `u8`. - pub struct uint8x16_t( - u8, u8 ,u8, u8, u8, u8 ,u8, u8, - u8, u8 ,u8, u8, u8, u8 ,u8, u8, - ); - /// ARM-specific 128-bit wide vector of sixteen packed `u8`. - pub struct poly8x16_t( - u8, u8, u8, u8, u8, u8, u8, u8, - u8, u8, u8, u8, u8, u8, u8, u8 - ); - /// ARM-specific 128-bit wide vector of eight packed `i16`. - pub struct int16x8_t(i16, i16, i16, i16, i16, i16, i16, i16); - /// ARM-specific 128-bit wide vector of eight packed `u16`. - pub struct uint16x8_t(u16, u16, u16, u16, u16, u16, u16, u16); - // FIXME: ARM-specific 128-bit wide vector of eight packed `f16`. - // pub struct float16x8_t(f16, f16, f16, f16, f16, f16, f16); - /// ARM-specific 128-bit wide vector of eight packed `u16`. - pub struct poly16x8_t(u16, u16, u16, u16, u16, u16, u16, u16); - /// ARM-specific 128-bit wide vector of four packed `i32`. - pub struct int32x4_t(i32, i32, i32, i32); - /// ARM-specific 128-bit wide vector of four packed `u32`. - pub struct uint32x4_t(u32, u32, u32, u32); - /// ARM-specific 128-bit wide vector of four packed `f32`. - pub struct float32x4_t(f32, f32, f32, f32); - /// ARM-specific 128-bit wide vector of two packed `i64`. - pub struct int64x2_t(i64, i64); - /// ARM-specific 128-bit wide vector of two packed `u64`. - pub struct uint64x2_t(u64, u64); -} - -/// ARM-specific type containing two `int8x8_t` vectors. -#[derive(Copy, Clone)] -pub struct int8x8x2_t(pub int8x8_t, pub int8x8_t); -/// ARM-specific type containing three `int8x8_t` vectors. -#[derive(Copy, Clone)] -pub struct int8x8x3_t(pub int8x8_t, pub int8x8_t, pub int8x8_t); -/// ARM-specific type containing four `int8x8_t` vectors. -#[derive(Copy, Clone)] -pub struct int8x8x4_t(pub int8x8_t, pub int8x8_t, pub int8x8_t, pub int8x8_t); - -/// ARM-specific type containing two `uint8x8_t` vectors. -#[derive(Copy, Clone)] -pub struct uint8x8x2_t(pub uint8x8_t, pub uint8x8_t); -/// ARM-specific type containing three `uint8x8_t` vectors. -#[derive(Copy, Clone)] -pub struct uint8x8x3_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t); -/// ARM-specific type containing four `uint8x8_t` vectors. -#[derive(Copy, Clone)] -pub struct uint8x8x4_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t, pub uint8x8_t); - -/// ARM-specific type containing two `poly8x8_t` vectors. -#[derive(Copy, Clone)] -pub struct poly8x8x2_t(pub poly8x8_t, pub poly8x8_t); -/// ARM-specific type containing three `poly8x8_t` vectors. -#[derive(Copy, Clone)] -pub struct poly8x8x3_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t); -/// ARM-specific type containing four `poly8x8_t` vectors. -#[derive(Copy, Clone)] -pub struct poly8x8x4_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t, pub poly8x8_t); - -#[allow(improper_ctypes)] -extern "C" { - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f32")] - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")] - fn frsqrte_v2f32(a: float32x2_t) -> float32x2_t; - - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v8i8")] - fn vpmins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v4i16")] - fn vpmins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v2i32")] - fn vpmins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v8i8")] - fn vpminu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v4i16")] - fn vpminu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v2i32")] - fn vpminu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminp.v2f32")] - fn vpminf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; - - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v8i8")] - fn vpmaxs_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t; - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v4i16")] - fn vpmaxs_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t; - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v2i32")] - fn vpmaxs_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t; - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v8i8")] - fn vpmaxu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v4i16")] - fn vpmaxu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v2i32")] - fn vpmaxu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")] - #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxp.v2f32")] - fn vpmaxf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; -} - -#[cfg(target_arch = "arm")] -#[allow(improper_ctypes)] -extern "C" { - #[link_name = "llvm.arm.neon.vtbl1"] - fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbl2"] - fn vtbl2(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbl3"] - fn vtbl3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbl4"] - fn vtbl4(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; - - #[link_name = "llvm.arm.neon.vtbx1"] - fn vtbx1(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbx2"] - fn vtbx2(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbx3"] - fn vtbx3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; - #[link_name = "llvm.arm.neon.vtbx4"] - fn vtbx4( - a: int8x8_t, - b: int8x8_t, - b: int8x8_t, - c: int8x8_t, - d: int8x8_t, - e: int8x8_t, - ) -> int8x8_t; -} - -/// Vector add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] -pub unsafe fn vadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - simd_add(a, b) -} - -/// Vector add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] -pub unsafe fn vaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { - simd_add(a, b) -} - -/// Vector add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] -pub unsafe fn vadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - simd_add(a, b) -} - -/// Vector add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] -pub unsafe fn vaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { - simd_add(a, b) -} - -/// Vector add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] -pub unsafe fn vadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - simd_add(a, b) -} - -/// Vector add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] -pub unsafe fn vaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { - simd_add(a, b) -} - -/// Vector add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] -pub unsafe fn vaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { - simd_add(a, b) -} - -/// Vector add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] -pub unsafe fn vadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - simd_add(a, b) -} - -/// Vector add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] -pub unsafe fn vaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { - simd_add(a, b) -} - -/// Vector add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] -pub unsafe fn vadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - simd_add(a, b) -} - -/// Vector add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] -pub unsafe fn vaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { - simd_add(a, b) -} - -/// Vector add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] -pub unsafe fn vadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - simd_add(a, b) -} - -/// Vector add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] -pub unsafe fn vaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { - simd_add(a, b) -} - -/// Vector add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] -pub unsafe fn vaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { - simd_add(a, b) -} - -/// Vector add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fadd))] -pub unsafe fn vadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - simd_add(a, b) -} - -/// Vector add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fadd))] -pub unsafe fn vaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { - simd_add(a, b) -} - -/// Vector long add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))] -pub unsafe fn vaddl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { - let a: int16x8_t = simd_cast(a); - let b: int16x8_t = simd_cast(b); - simd_add(a, b) -} - -/// Vector long add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))] -pub unsafe fn vaddl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { - let a: int32x4_t = simd_cast(a); - let b: int32x4_t = simd_cast(b); - simd_add(a, b) -} - -/// Vector long add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))] -pub unsafe fn vaddl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { - let a: int64x2_t = simd_cast(a); - let b: int64x2_t = simd_cast(b); - simd_add(a, b) -} - -/// Vector long add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))] -pub unsafe fn vaddl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { - let a: uint16x8_t = simd_cast(a); - let b: uint16x8_t = simd_cast(b); - simd_add(a, b) -} - -/// Vector long add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))] -pub unsafe fn vaddl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { - let a: uint32x4_t = simd_cast(a); - let b: uint32x4_t = simd_cast(b); - simd_add(a, b) -} - -/// Vector long add. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))] -pub unsafe fn vaddl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { - let a: uint64x2_t = simd_cast(a); - let b: uint64x2_t = simd_cast(b); - simd_add(a, b) -} - -/// Vector narrow integer. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] -pub unsafe fn vmovn_s16(a: int16x8_t) -> int8x8_t { - simd_cast(a) -} - -/// Vector narrow integer. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] -pub unsafe fn vmovn_s32(a: int32x4_t) -> int16x4_t { - simd_cast(a) -} - -/// Vector narrow integer. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] -pub unsafe fn vmovn_s64(a: int64x2_t) -> int32x2_t { - simd_cast(a) -} - -/// Vector narrow integer. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] -pub unsafe fn vmovn_u16(a: uint16x8_t) -> uint8x8_t { - simd_cast(a) -} - -/// Vector narrow integer. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] -pub unsafe fn vmovn_u32(a: uint32x4_t) -> uint16x4_t { - simd_cast(a) -} - -/// Vector narrow integer. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] -pub unsafe fn vmovn_u64(a: uint64x2_t) -> uint32x2_t { - simd_cast(a) -} - -/// Vector long move. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] -pub unsafe fn vmovl_s8(a: int8x8_t) -> int16x8_t { - simd_cast(a) -} - -/// Vector long move. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] -pub unsafe fn vmovl_s16(a: int16x4_t) -> int32x4_t { - simd_cast(a) -} - -/// Vector long move. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] -pub unsafe fn vmovl_s32(a: int32x2_t) -> int64x2_t { - simd_cast(a) -} - -/// Vector long move. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))] -pub unsafe fn vmovl_u8(a: uint8x8_t) -> uint16x8_t { - simd_cast(a) -} - -/// Vector long move. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))] -pub unsafe fn vmovl_u16(a: uint16x4_t) -> uint32x4_t { - simd_cast(a) -} - -/// Vector long move. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))] -pub unsafe fn vmovl_u32(a: uint32x2_t) -> uint64x2_t { - simd_cast(a) -} - -/// Reciprocal square-root estimate. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] -pub unsafe fn vrsqrte_f32(a: float32x2_t) -> float32x2_t { - frsqrte_v2f32(a) -} - -/// Vector bitwise not. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] -pub unsafe fn vmvn_s8(a: int8x8_t) -> int8x8_t { - let b = int8x8_t(-1, -1, -1, -1, -1, -1, -1, -1); - simd_xor(a, b) -} - -/// Vector bitwise not. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] -pub unsafe fn vmvnq_s8(a: int8x16_t) -> int8x16_t { - let b = int8x16_t( - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - ); - simd_xor(a, b) -} - -/// Vector bitwise not. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] -pub unsafe fn vmvn_s16(a: int16x4_t) -> int16x4_t { - let b = int16x4_t(-1, -1, -1, -1); - simd_xor(a, b) -} - -/// Vector bitwise not. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] -pub unsafe fn vmvnq_s16(a: int16x8_t) -> int16x8_t { - let b = int16x8_t(-1, -1, -1, -1, -1, -1, -1, -1); - simd_xor(a, b) -} - -/// Vector bitwise not. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] -pub unsafe fn vmvn_s32(a: int32x2_t) -> int32x2_t { - let b = int32x2_t(-1, -1); - simd_xor(a, b) -} - -/// Vector bitwise not. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] -pub unsafe fn vmvnq_s32(a: int32x4_t) -> int32x4_t { - let b = int32x4_t(-1, -1, -1, -1); - simd_xor(a, b) -} - -/// Vector bitwise not. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] -pub unsafe fn vmvn_u8(a: uint8x8_t) -> uint8x8_t { - let b = uint8x8_t(255, 255, 255, 255, 255, 255, 255, 255); - simd_xor(a, b) -} - -/// Vector bitwise not. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] -pub unsafe fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t { - let b = uint8x16_t( - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ); - simd_xor(a, b) -} - -/// Vector bitwise not. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] -pub unsafe fn vmvn_u16(a: uint16x4_t) -> uint16x4_t { - let b = uint16x4_t(65_535, 65_535, 65_535, 65_535); - simd_xor(a, b) -} - -/// Vector bitwise not. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] -pub unsafe fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t { - let b = uint16x8_t( - 65_535, 65_535, 65_535, 65_535, 65_535, 65_535, 65_535, 65_535, - ); - simd_xor(a, b) -} - -/// Vector bitwise not. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] -pub unsafe fn vmvn_u32(a: uint32x2_t) -> uint32x2_t { - let b = uint32x2_t(4_294_967_295, 4_294_967_295); - simd_xor(a, b) -} - -/// Vector bitwise not. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] -pub unsafe fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t { - let b = uint32x4_t(4_294_967_295, 4_294_967_295, 4_294_967_295, 4_294_967_295); - simd_xor(a, b) -} - -/// Vector bitwise not. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] -pub unsafe fn vmvn_p8(a: poly8x8_t) -> poly8x8_t { - let b = poly8x8_t(255, 255, 255, 255, 255, 255, 255, 255); - simd_xor(a, b) -} - -/// Vector bitwise not. -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] -pub unsafe fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t { - let b = poly8x16_t( - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ); - simd_xor(a, b) -} - -/// Folding minimum of adjacent pairs -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))] -pub unsafe fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - vpmins_v8i8(a, b) -} - -/// Folding minimum of adjacent pairs -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))] -pub unsafe fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - vpmins_v4i16(a, b) -} - -/// Folding minimum of adjacent pairs -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))] -pub unsafe fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - vpmins_v2i32(a, b) -} - -/// Folding minimum of adjacent pairs -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))] -pub unsafe fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - vpminu_v8i8(a, b) -} - -/// Folding minimum of adjacent pairs -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))] -pub unsafe fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - vpminu_v4i16(a, b) -} - -/// Folding minimum of adjacent pairs -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))] -pub unsafe fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - vpminu_v2i32(a, b) -} - -/// Folding minimum of adjacent pairs -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminp))] -pub unsafe fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - vpminf_v2f32(a, b) -} - -/// Folding maximum of adjacent pairs -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))] -pub unsafe fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - vpmaxs_v8i8(a, b) -} - -/// Folding maximum of adjacent pairs -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))] -pub unsafe fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { - vpmaxs_v4i16(a, b) -} - -/// Folding maximum of adjacent pairs -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))] -pub unsafe fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { - vpmaxs_v2i32(a, b) -} - -/// Folding maximum of adjacent pairs -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))] -pub unsafe fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - vpmaxu_v8i8(a, b) -} - -/// Folding maximum of adjacent pairs -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))] -pub unsafe fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - vpmaxu_v4i16(a, b) -} - -/// Folding maximum of adjacent pairs -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))] -pub unsafe fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - vpmaxu_v2i32(a, b) -} - -/// Folding maximum of adjacent pairs -#[inline] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] -#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxp))] -pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { - vpmaxf_v2f32(a, b) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { - vtbl1(a, b) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - transmute(vtbl1(transmute(a), transmute(b))) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { - transmute(vtbl1(transmute(a), transmute(b))) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { - vtbl2(a.0, a.1, b) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { - transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { - transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { - vtbl3(a.0, a.1, a.2, b) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { - transmute(vtbl3( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(b), - )) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { - transmute(vtbl3( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(b), - )) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { - vtbl4(a.0, a.1, a.2, a.3, b) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { - transmute(vtbl4( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(a.3), - transmute(b), - )) -} - -/// Table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbl))] -pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { - transmute(vtbl4( - transmute(a.0), - transmute(a.1), - transmute(a.2), - transmute(a.3), - transmute(b), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { - vtbx1(a, b, c) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { - transmute(vtbx1(transmute(a), transmute(b), transmute(c))) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { - transmute(vtbx1(transmute(a), transmute(b), transmute(c))) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { - vtbx2(a, b.0, b.1, c) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { - transmute(vtbx2( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(c), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { - transmute(vtbx2( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(c), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { - vtbx3(a, b.0, b.1, b.2, c) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { - transmute(vtbx3( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(c), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { - transmute(vtbx3( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(c), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { - vtbx4(a, b.0, b.1, b.2, b.3, c) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { - transmute(vtbx4( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - transmute(c), - )) -} - -/// Extended table look-up -#[inline] -#[cfg(target_arch = "arm")] -#[cfg(target_endian = "little")] -#[target_feature(enable = "neon,v7")] -#[cfg_attr(test, assert_instr(vtbx))] -pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { - transmute(vtbx4( - transmute(a), - transmute(b.0), - transmute(b.1), - transmute(b.2), - transmute(b.3), - transmute(c), - )) -} - -#[cfg(test)] -mod tests { - use crate::core_arch::{arm::*, simd::*}; - use std::mem::transmute; - use stdarch_test::simd_test; - - #[simd_test(enable = "neon")] - unsafe fn test_vadd_s8() { - let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1); - let e = i8x8::new(9, 9, 9, 9, 9, 9, 9, 9); - let r: i8x8 = transmute(vadd_s8(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vaddq_s8() { - let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - let b = i8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); - let e = i8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9); - let r: i8x16 = transmute(vaddq_s8(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vadd_s16() { - let a = i16x4::new(1, 2, 3, 4); - let b = i16x4::new(8, 7, 6, 5); - let e = i16x4::new(9, 9, 9, 9); - let r: i16x4 = transmute(vadd_s16(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vaddq_s16() { - let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b = i16x8::new(8, 7, 6, 5, 4, 3, 2, 1); - let e = i16x8::new(9, 9, 9, 9, 9, 9, 9, 9); - let r: i16x8 = transmute(vaddq_s16(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vadd_s32() { - let a = i32x2::new(1, 2); - let b = i32x2::new(8, 7); - let e = i32x2::new(9, 9); - let r: i32x2 = transmute(vadd_s32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vaddq_s32() { - let a = i32x4::new(1, 2, 3, 4); - let b = i32x4::new(8, 7, 6, 5); - let e = i32x4::new(9, 9, 9, 9); - let r: i32x4 = transmute(vaddq_s32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vadd_u8() { - let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b = u8x8::new(8, 7, 6, 5, 4, 3, 2, 1); - let e = u8x8::new(9, 9, 9, 9, 9, 9, 9, 9); - let r: u8x8 = transmute(vadd_u8(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vaddq_u8() { - let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8); - let b = u8x16::new(8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 1); - let e = u8x16::new(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9); - let r: u8x16 = transmute(vaddq_u8(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vadd_u16() { - let a = u16x4::new(1, 2, 3, 4); - let b = u16x4::new(8, 7, 6, 5); - let e = u16x4::new(9, 9, 9, 9); - let r: u16x4 = transmute(vadd_u16(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vaddq_u16() { - let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b = u16x8::new(8, 7, 6, 5, 4, 3, 2, 1); - let e = u16x8::new(9, 9, 9, 9, 9, 9, 9, 9); - let r: u16x8 = transmute(vaddq_u16(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vadd_u32() { - let a = u32x2::new(1, 2); - let b = u32x2::new(8, 7); - let e = u32x2::new(9, 9); - let r: u32x2 = transmute(vadd_u32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vaddq_u32() { - let a = u32x4::new(1, 2, 3, 4); - let b = u32x4::new(8, 7, 6, 5); - let e = u32x4::new(9, 9, 9, 9); - let r: u32x4 = transmute(vaddq_u32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vadd_f32() { - let a = f32x2::new(1., 2.); - let b = f32x2::new(8., 7.); - let e = f32x2::new(9., 9.); - let r: f32x2 = transmute(vadd_f32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vaddq_f32() { - let a = f32x4::new(1., 2., 3., 4.); - let b = f32x4::new(8., 7., 6., 5.); - let e = f32x4::new(9., 9., 9., 9.); - let r: f32x4 = transmute(vaddq_f32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vaddl_s8() { - let v = i8::MAX; - let a = i8x8::new(v, v, v, v, v, v, v, v); - let v = 2 * (v as i16); - let e = i16x8::new(v, v, v, v, v, v, v, v); - let r: i16x8 = transmute(vaddl_s8(transmute(a), transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vaddl_s16() { - let v = i16::MAX; - let a = i16x4::new(v, v, v, v); - let v = 2 * (v as i32); - let e = i32x4::new(v, v, v, v); - let r: i32x4 = transmute(vaddl_s16(transmute(a), transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vaddl_s32() { - let v = i32::MAX; - let a = i32x2::new(v, v); - let v = 2 * (v as i64); - let e = i64x2::new(v, v); - let r: i64x2 = transmute(vaddl_s32(transmute(a), transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vaddl_u8() { - let v = u8::MAX; - let a = u8x8::new(v, v, v, v, v, v, v, v); - let v = 2 * (v as u16); - let e = u16x8::new(v, v, v, v, v, v, v, v); - let r: u16x8 = transmute(vaddl_u8(transmute(a), transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vaddl_u16() { - let v = u16::MAX; - let a = u16x4::new(v, v, v, v); - let v = 2 * (v as u32); - let e = u32x4::new(v, v, v, v); - let r: u32x4 = transmute(vaddl_u16(transmute(a), transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vaddl_u32() { - let v = u32::MAX; - let a = u32x2::new(v, v); - let v = 2 * (v as u64); - let e = u64x2::new(v, v); - let r: u64x2 = transmute(vaddl_u32(transmute(a), transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmvn_s8() { - let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let e = i8x8::new(-1, -2, -3, -4, -5, -6, -7, -8); - let r: i8x8 = transmute(vmvn_s8(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmvnq_s8() { - let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let e = i8x16::new( - -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, - ); - let r: i8x16 = transmute(vmvnq_s8(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmvn_s16() { - let a = i16x4::new(0, 1, 2, 3); - let e = i16x4::new(-1, -2, -3, -4); - let r: i16x4 = transmute(vmvn_s16(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmvnq_s16() { - let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let e = i16x8::new(-1, -2, -3, -4, -5, -6, -7, -8); - let r: i16x8 = transmute(vmvnq_s16(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmvn_s32() { - let a = i32x2::new(0, 1); - let e = i32x2::new(-1, -2); - let r: i32x2 = transmute(vmvn_s32(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmvnq_s32() { - let a = i32x4::new(0, 1, 2, 3); - let e = i32x4::new(-1, -2, -3, -4); - let r: i32x4 = transmute(vmvnq_s32(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmvn_u8() { - let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let e = u8x8::new(255, 254, 253, 252, 251, 250, 249, 248); - let r: u8x8 = transmute(vmvn_u8(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmvnq_u8() { - let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let e = u8x16::new( - 255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240, - ); - let r: u8x16 = transmute(vmvnq_u8(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmvn_u16() { - let a = u16x4::new(0, 1, 2, 3); - let e = u16x4::new(65_535, 65_534, 65_533, 65_532); - let r: u16x4 = transmute(vmvn_u16(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmvnq_u16() { - let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let e = u16x8::new( - 65_535, 65_534, 65_533, 65_532, 65_531, 65_530, 65_529, 65_528, - ); - let r: u16x8 = transmute(vmvnq_u16(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmvn_u32() { - let a = u32x2::new(0, 1); - let e = u32x2::new(4_294_967_295, 4_294_967_294); - let r: u32x2 = transmute(vmvn_u32(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmvnq_u32() { - let a = u32x4::new(0, 1, 2, 3); - let e = u32x4::new(4_294_967_295, 4_294_967_294, 4_294_967_293, 4_294_967_292); - let r: u32x4 = transmute(vmvnq_u32(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmvn_p8() { - let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); - let e = u8x8::new(255, 254, 253, 252, 251, 250, 249, 248); - let r: u8x8 = transmute(vmvn_p8(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmvnq_p8() { - let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - let e = u8x16::new( - 255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240, - ); - let r: u8x16 = transmute(vmvnq_p8(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmovn_s16() { - let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let r: i8x8 = transmute(vmovn_s16(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmovn_s32() { - let a = i32x4::new(1, 2, 3, 4); - let e = i16x4::new(1, 2, 3, 4); - let r: i16x4 = transmute(vmovn_s32(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmovn_s64() { - let a = i64x2::new(1, 2); - let e = i32x2::new(1, 2); - let r: i32x2 = transmute(vmovn_s64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmovn_u16() { - let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let r: u8x8 = transmute(vmovn_u16(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmovn_u32() { - let a = u32x4::new(1, 2, 3, 4); - let e = u16x4::new(1, 2, 3, 4); - let r: u16x4 = transmute(vmovn_u32(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmovn_u64() { - let a = u64x2::new(1, 2); - let e = u32x2::new(1, 2); - let r: u32x2 = transmute(vmovn_u64(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmovl_s8() { - let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let r: i16x8 = transmute(vmovl_s8(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmovl_s16() { - let e = i32x4::new(1, 2, 3, 4); - let a = i16x4::new(1, 2, 3, 4); - let r: i32x4 = transmute(vmovl_s16(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmovl_s32() { - let e = i64x2::new(1, 2); - let a = i32x2::new(1, 2); - let r: i64x2 = transmute(vmovl_s32(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmovl_u8() { - let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let r: u16x8 = transmute(vmovl_u8(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmovl_u16() { - let e = u32x4::new(1, 2, 3, 4); - let a = u16x4::new(1, 2, 3, 4); - let r: u32x4 = transmute(vmovl_u16(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vmovl_u32() { - let e = u64x2::new(1, 2); - let a = u32x2::new(1, 2); - let r: u64x2 = transmute(vmovl_u32(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vrsqrt_f32() { - let a = f32x2::new(1.0, 2.0); - let e = f32x2::new(0.9980469, 0.7050781); - let r: f32x2 = transmute(vrsqrte_f32(transmute(a))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vpmin_s8() { - let a = i8x8::new(1, -2, 3, -4, 5, 6, 7, 8); - let b = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9); - let e = i8x8::new(-2, -4, 5, 7, 0, 2, 4, 6); - let r: i8x8 = transmute(vpmin_s8(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vpmin_s16() { - let a = i16x4::new(1, 2, 3, -4); - let b = i16x4::new(0, 3, 2, 5); - let e = i16x4::new(1, -4, 0, 2); - let r: i16x4 = transmute(vpmin_s16(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vpmin_s32() { - let a = i32x2::new(1, -2); - let b = i32x2::new(0, 3); - let e = i32x2::new(-2, 0); - let r: i32x2 = transmute(vpmin_s32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vpmin_u8() { - let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b = u8x8::new(0, 3, 2, 5, 4, 7, 6, 9); - let e = u8x8::new(1, 3, 5, 7, 0, 2, 4, 6); - let r: u8x8 = transmute(vpmin_u8(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vpmin_u16() { - let a = u16x4::new(1, 2, 3, 4); - let b = u16x4::new(0, 3, 2, 5); - let e = u16x4::new(1, 3, 0, 2); - let r: u16x4 = transmute(vpmin_u16(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vpmin_u32() { - let a = u32x2::new(1, 2); - let b = u32x2::new(0, 3); - let e = u32x2::new(1, 0); - let r: u32x2 = transmute(vpmin_u32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vpmin_f32() { - let a = f32x2::new(1., -2.); - let b = f32x2::new(0., 3.); - let e = f32x2::new(-2., 0.); - let r: f32x2 = transmute(vpmin_f32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vpmax_s8() { - let a = i8x8::new(1, -2, 3, -4, 5, 6, 7, 8); - let b = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9); - let e = i8x8::new(1, 3, 6, 8, 3, 5, 7, 9); - let r: i8x8 = transmute(vpmax_s8(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vpmax_s16() { - let a = i16x4::new(1, 2, 3, -4); - let b = i16x4::new(0, 3, 2, 5); - let e = i16x4::new(2, 3, 3, 5); - let r: i16x4 = transmute(vpmax_s16(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vpmax_s32() { - let a = i32x2::new(1, -2); - let b = i32x2::new(0, 3); - let e = i32x2::new(1, 3); - let r: i32x2 = transmute(vpmax_s32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vpmax_u8() { - let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); - let b = u8x8::new(0, 3, 2, 5, 4, 7, 6, 9); - let e = u8x8::new(2, 4, 6, 8, 3, 5, 7, 9); - let r: u8x8 = transmute(vpmax_u8(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vpmax_u16() { - let a = u16x4::new(1, 2, 3, 4); - let b = u16x4::new(0, 3, 2, 5); - let e = u16x4::new(2, 4, 3, 5); - let r: u16x4 = transmute(vpmax_u16(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vpmax_u32() { - let a = u32x2::new(1, 2); - let b = u32x2::new(0, 3); - let e = u32x2::new(2, 3); - let r: u32x2 = transmute(vpmax_u32(transmute(a), transmute(b))); - assert_eq!(r, e); - } - - #[simd_test(enable = "neon")] - unsafe fn test_vpmax_f32() { - let a = f32x2::new(1., -2.); - let b = f32x2::new(0., 3.); - let e = f32x2::new(1., 3.); - let r: f32x2 = transmute(vpmax_f32(transmute(a), transmute(b))); - assert_eq!(r, e); - } -} - -#[cfg(test)] -#[cfg(target_endian = "little")] -#[path = "table_lookup_tests.rs"] -mod table_lookup_tests; diff --git a/crates/core_arch/src/arm/neon/generated.rs b/crates/core_arch/src/arm/neon/generated.rs new file mode 100644 index 0000000000..fcf73e71d2 --- /dev/null +++ b/crates/core_arch/src/arm/neon/generated.rs @@ -0,0 +1,4537 @@ +// This code is automatically generated. DO NOT MODIFY. +// +// Instead, modify `crates/stdarch-gen/neon.spec` and run the following command to re-generate this file: +// +// ``` +// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec +// ``` +use super::*; +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +pub unsafe fn vand_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +pub unsafe fn vandq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +pub unsafe fn vand_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +pub unsafe fn vandq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +pub unsafe fn vand_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +pub unsafe fn vandq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +pub unsafe fn vand_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +pub unsafe fn vandq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +pub unsafe fn vand_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +pub unsafe fn vandq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +pub unsafe fn vand_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +pub unsafe fn vandq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +pub unsafe fn vand_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +pub unsafe fn vandq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +pub unsafe fn vand_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +pub unsafe fn vandq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + simd_and(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +pub unsafe fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +pub unsafe fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +pub unsafe fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +pub unsafe fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +pub unsafe fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +pub unsafe fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +pub unsafe fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +pub unsafe fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +pub unsafe fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +pub unsafe fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +pub unsafe fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +pub unsafe fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +pub unsafe fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +pub unsafe fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +pub unsafe fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +pub unsafe fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + simd_or(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +pub unsafe fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +pub unsafe fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +pub unsafe fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +pub unsafe fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +pub unsafe fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +pub unsafe fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +pub unsafe fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +pub unsafe fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +pub unsafe fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +pub unsafe fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +pub unsafe fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +pub unsafe fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +pub unsafe fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +pub unsafe fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +pub unsafe fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +pub unsafe fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + simd_xor(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +pub unsafe fn vceq_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +pub unsafe fn vceqq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +pub unsafe fn vceq_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +pub unsafe fn vceqq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +pub unsafe fn vceq_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +pub unsafe fn vceqq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +pub unsafe fn vceq_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +pub unsafe fn vceqq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +pub unsafe fn vceq_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +pub unsafe fn vceqq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +pub unsafe fn vceq_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +pub unsafe fn vceqq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + simd_eq(a, b) +} + +/// Floating-point compare equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmeq))] +pub unsafe fn vceq_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + simd_eq(a, b) +} + +/// Floating-point compare equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmeq))] +pub unsafe fn vceqq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + simd_eq(a, b) +} + +/// Compare signed greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +pub unsafe fn vcgt_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + simd_gt(a, b) +} + +/// Compare signed greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +pub unsafe fn vcgtq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + simd_gt(a, b) +} + +/// Compare signed greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +pub unsafe fn vcgt_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + simd_gt(a, b) +} + +/// Compare signed greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +pub unsafe fn vcgtq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { + simd_gt(a, b) +} + +/// Compare signed greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +pub unsafe fn vcgt_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { + simd_gt(a, b) +} + +/// Compare signed greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +pub unsafe fn vcgtq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + simd_gt(a, b) +} + +/// Compare unsigned highe +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +pub unsafe fn vcgt_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_gt(a, b) +} + +/// Compare unsigned highe +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +pub unsafe fn vcgtq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_gt(a, b) +} + +/// Compare unsigned highe +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +pub unsafe fn vcgt_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_gt(a, b) +} + +/// Compare unsigned highe +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +pub unsafe fn vcgtq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_gt(a, b) +} + +/// Compare unsigned highe +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +pub unsafe fn vcgt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_gt(a, b) +} + +/// Compare unsigned highe +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +pub unsafe fn vcgtq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_gt(a, b) +} + +/// Floating-point compare greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmgt))] +pub unsafe fn vcgt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + simd_gt(a, b) +} + +/// Floating-point compare greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmgt))] +pub unsafe fn vcgtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + simd_gt(a, b) +} + +/// Compare signed less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +pub unsafe fn vclt_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + simd_lt(a, b) +} + +/// Compare signed less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +pub unsafe fn vcltq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + simd_lt(a, b) +} + +/// Compare signed less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +pub unsafe fn vclt_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + simd_lt(a, b) +} + +/// Compare signed less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +pub unsafe fn vcltq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { + simd_lt(a, b) +} + +/// Compare signed less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +pub unsafe fn vclt_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { + simd_lt(a, b) +} + +/// Compare signed less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +pub unsafe fn vcltq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + simd_lt(a, b) +} + +/// Compare unsigned less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +pub unsafe fn vclt_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_lt(a, b) +} + +/// Compare unsigned less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +pub unsafe fn vcltq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_lt(a, b) +} + +/// Compare unsigned less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +pub unsafe fn vclt_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_lt(a, b) +} + +/// Compare unsigned less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +pub unsafe fn vcltq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_lt(a, b) +} + +/// Compare unsigned less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +pub unsafe fn vclt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_lt(a, b) +} + +/// Compare unsigned less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +pub unsafe fn vcltq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_lt(a, b) +} + +/// Floating-point compare less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmgt))] +pub unsafe fn vclt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + simd_lt(a, b) +} + +/// Floating-point compare less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmgt))] +pub unsafe fn vcltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + simd_lt(a, b) +} + +/// Compare signed less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +pub unsafe fn vcle_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + simd_le(a, b) +} + +/// Compare signed less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +pub unsafe fn vcleq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + simd_le(a, b) +} + +/// Compare signed less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +pub unsafe fn vcle_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + simd_le(a, b) +} + +/// Compare signed less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +pub unsafe fn vcleq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { + simd_le(a, b) +} + +/// Compare signed less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +pub unsafe fn vcle_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { + simd_le(a, b) +} + +/// Compare signed less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +pub unsafe fn vcleq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + simd_le(a, b) +} + +/// Compare unsigned less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +pub unsafe fn vcle_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_le(a, b) +} + +/// Compare unsigned less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +pub unsafe fn vcleq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_le(a, b) +} + +/// Compare unsigned less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +pub unsafe fn vcle_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_le(a, b) +} + +/// Compare unsigned less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +pub unsafe fn vcleq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_le(a, b) +} + +/// Compare unsigned less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +pub unsafe fn vcle_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_le(a, b) +} + +/// Compare unsigned less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +pub unsafe fn vcleq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_le(a, b) +} + +/// Floating-point compare less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmge))] +pub unsafe fn vcle_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + simd_le(a, b) +} + +/// Floating-point compare less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmge))] +pub unsafe fn vcleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + simd_le(a, b) +} + +/// Compare signed greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +pub unsafe fn vcge_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + simd_ge(a, b) +} + +/// Compare signed greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +pub unsafe fn vcgeq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + simd_ge(a, b) +} + +/// Compare signed greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +pub unsafe fn vcge_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + simd_ge(a, b) +} + +/// Compare signed greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +pub unsafe fn vcgeq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { + simd_ge(a, b) +} + +/// Compare signed greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +pub unsafe fn vcge_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { + simd_ge(a, b) +} + +/// Compare signed greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +pub unsafe fn vcgeq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + simd_ge(a, b) +} + +/// Compare unsigned greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +pub unsafe fn vcge_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_ge(a, b) +} + +/// Compare unsigned greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +pub unsafe fn vcgeq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_ge(a, b) +} + +/// Compare unsigned greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +pub unsafe fn vcge_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_ge(a, b) +} + +/// Compare unsigned greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +pub unsafe fn vcgeq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_ge(a, b) +} + +/// Compare unsigned greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +pub unsafe fn vcge_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_ge(a, b) +} + +/// Compare unsigned greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +pub unsafe fn vcgeq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_ge(a, b) +} + +/// Floating-point compare greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmge))] +pub unsafe fn vcge_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + simd_ge(a, b) +} + +/// Floating-point compare greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmge))] +pub unsafe fn vcgeq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + simd_ge(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +pub unsafe fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v8i8")] + fn vqsub_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } +vqsub_u8_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v16i8")] + fn vqsubq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } +vqsubq_u8_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +pub unsafe fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v4i16")] + fn vqsub_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } +vqsub_u16_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +pub unsafe fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v8i16")] + fn vqsubq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } +vqsubq_u16_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +pub unsafe fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v2i32")] + fn vqsub_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } +vqsub_u32_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +pub unsafe fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v4i32")] + fn vqsubq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } +vqsubq_u32_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +pub unsafe fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v8i8")] + fn vqsub_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vqsub_s8_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +pub unsafe fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v16i8")] + fn vqsubq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vqsubq_s8_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +pub unsafe fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v4i16")] + fn vqsub_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vqsub_s16_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +pub unsafe fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v8i16")] + fn vqsubq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vqsubq_s16_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +pub unsafe fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v2i32")] + fn vqsub_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vqsub_s32_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +pub unsafe fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqsubs.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v4i32")] + fn vqsubq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vqsubq_s32_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +pub unsafe fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v8i8")] + fn vhadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } +vhadd_u8_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +pub unsafe fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v16i8")] + fn vhaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } +vhaddq_u8_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +pub unsafe fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v4i16")] + fn vhadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } +vhadd_u16_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +pub unsafe fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v8i16")] + fn vhaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } +vhaddq_u16_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +pub unsafe fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v2i32")] + fn vhadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } +vhadd_u32_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +pub unsafe fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v4i32")] + fn vhaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } +vhaddq_u32_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +pub unsafe fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v8i8")] + fn vhadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vhadd_s8_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +pub unsafe fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v16i8")] + fn vhaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vhaddq_s8_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +pub unsafe fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v4i16")] + fn vhadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vhadd_s16_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +pub unsafe fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v8i16")] + fn vhaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vhaddq_s16_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +pub unsafe fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v2i32")] + fn vhadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vhadd_s32_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +pub unsafe fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v4i32")] + fn vhaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vhaddq_s32_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +pub unsafe fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v8i8")] + fn vrhadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } +vrhadd_u8_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +pub unsafe fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v16i8")] + fn vrhaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } +vrhaddq_u8_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +pub unsafe fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v4i16")] + fn vrhadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } +vrhadd_u16_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +pub unsafe fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v8i16")] + fn vrhaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } +vrhaddq_u16_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +pub unsafe fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v2i32")] + fn vrhadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } +vrhadd_u32_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +pub unsafe fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v4i32")] + fn vrhaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } +vrhaddq_u32_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +pub unsafe fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v8i8")] + fn vrhadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vrhadd_s8_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +pub unsafe fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v16i8")] + fn vrhaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vrhaddq_s8_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +pub unsafe fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v4i16")] + fn vrhadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vrhadd_s16_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +pub unsafe fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v8i16")] + fn vrhaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vrhaddq_s16_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +pub unsafe fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v2i32")] + fn vrhadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vrhadd_s32_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +pub unsafe fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v4i32")] + fn vrhaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vrhaddq_s32_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +pub unsafe fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v8i8")] + fn vqadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } +vqadd_u8_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +pub unsafe fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v16i8")] + fn vqaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } +vqaddq_u8_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +pub unsafe fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v4i16")] + fn vqadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } +vqadd_u16_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +pub unsafe fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v8i16")] + fn vqaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } +vqaddq_u16_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +pub unsafe fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v2i32")] + fn vqadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } +vqadd_u32_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +pub unsafe fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqaddu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v4i32")] + fn vqaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } +vqaddq_u32_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +pub unsafe fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v8i8")] + fn vqadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vqadd_s8_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +pub unsafe fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v16i8")] + fn vqaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vqaddq_s8_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +pub unsafe fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v4i16")] + fn vqadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vqadd_s16_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +pub unsafe fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v8i16")] + fn vqaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vqaddq_s16_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +pub unsafe fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v2i32")] + fn vqadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vqadd_s32_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +pub unsafe fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqadds.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v4i32")] + fn vqaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vqaddq_s32_(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +pub unsafe fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + simd_mul(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +pub unsafe fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fsub))] +pub unsafe fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fsub))] +pub unsafe fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + simd_sub(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +pub unsafe fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v8i8")] + fn vhsub_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } +vhsub_u8_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +pub unsafe fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v16i8")] + fn vhsubq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } +vhsubq_u8_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +pub unsafe fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v4i16")] + fn vhsub_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } +vhsub_u16_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +pub unsafe fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v8i16")] + fn vhsubq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } +vhsubq_u16_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +pub unsafe fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v2i32")] + fn vhsub_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } +vhsub_u32_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +pub unsafe fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v4i32")] + fn vhsubq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } +vhsubq_u32_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +pub unsafe fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v8i8")] + fn vhsub_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vhsub_s8_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +pub unsafe fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v16i8")] + fn vhsubq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vhsubq_s8_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +pub unsafe fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v4i16")] + fn vhsub_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vhsub_s16_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +pub unsafe fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v8i16")] + fn vhsubq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vhsubq_s16_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +pub unsafe fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v2i32")] + fn vhsub_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vhsub_s32_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +pub unsafe fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v4i32")] + fn vhsubq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vhsubq_s32_(a, b) +} + +#[cfg(test)] +#[allow(overflowing_literals)] +mod test { + use super::*; + use crate::core_arch::simd::*; + use std::mem::transmute; + use stdarch_test::simd_test; + + #[simd_test(enable = "neon")] + unsafe fn test_vand_s8() { + let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i8x8 = transmute(vand_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: i8x8 = transmute(vand_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s8() { + let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let b: i8x16 = i8x16::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let r: i8x16 = transmute(vandq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: i8x16 = transmute(vandq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_s16() { + let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let b: i16x4 = i16x4::new(0x0F, 0x0F, 0x0F, 0x0F); + let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let r: i16x4 = transmute(vand_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00); + let e: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00); + let r: i16x4 = transmute(vand_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s16() { + let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i16x8 = transmute(vandq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: i16x8 = transmute(vandq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_s32() { + let a: i32x2 = i32x2::new(0x00, 0x01); + let b: i32x2 = i32x2::new(0x0F, 0x0F); + let e: i32x2 = i32x2::new(0x00, 0x01); + let r: i32x2 = transmute(vand_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i32x2 = i32x2::new(0x00, 0x01); + let b: i32x2 = i32x2::new(0x00, 0x00); + let e: i32x2 = i32x2::new(0x00, 0x00); + let r: i32x2 = transmute(vand_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s32() { + let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let b: i32x4 = i32x4::new(0x0F, 0x0F, 0x0F, 0x0F); + let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let r: i32x4 = transmute(vandq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00); + let e: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00); + let r: i32x4 = transmute(vandq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_u8() { + let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: u8x8 = transmute(vand_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: u8x8 = transmute(vand_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u8() { + let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let b: u8x16 = u8x16::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let r: u8x16 = transmute(vandq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: u8x16 = transmute(vandq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_u16() { + let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let b: u16x4 = u16x4::new(0x0F, 0x0F, 0x0F, 0x0F); + let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let r: u16x4 = transmute(vand_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00); + let e: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00); + let r: u16x4 = transmute(vand_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u16() { + let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: u16x8 = transmute(vandq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: u16x8 = transmute(vandq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_u32() { + let a: u32x2 = u32x2::new(0x00, 0x01); + let b: u32x2 = u32x2::new(0x0F, 0x0F); + let e: u32x2 = u32x2::new(0x00, 0x01); + let r: u32x2 = transmute(vand_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u32x2 = u32x2::new(0x00, 0x01); + let b: u32x2 = u32x2::new(0x00, 0x00); + let e: u32x2 = u32x2::new(0x00, 0x00); + let r: u32x2 = transmute(vand_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u32() { + let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let b: u32x4 = u32x4::new(0x0F, 0x0F, 0x0F, 0x0F); + let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let r: u32x4 = transmute(vandq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00); + let e: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00); + let r: u32x4 = transmute(vandq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_s64() { + let a: i64x1 = i64x1::new(0x00); + let b: i64x1 = i64x1::new(0x0F); + let e: i64x1 = i64x1::new(0x00); + let r: i64x1 = transmute(vand_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i64x1 = i64x1::new(0x00); + let b: i64x1 = i64x1::new(0x00); + let e: i64x1 = i64x1::new(0x00); + let r: i64x1 = transmute(vand_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s64() { + let a: i64x2 = i64x2::new(0x00, 0x01); + let b: i64x2 = i64x2::new(0x0F, 0x0F); + let e: i64x2 = i64x2::new(0x00, 0x01); + let r: i64x2 = transmute(vandq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i64x2 = i64x2::new(0x00, 0x01); + let b: i64x2 = i64x2::new(0x00, 0x00); + let e: i64x2 = i64x2::new(0x00, 0x00); + let r: i64x2 = transmute(vandq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_u64() { + let a: u64x1 = u64x1::new(0x00); + let b: u64x1 = u64x1::new(0x0F); + let e: u64x1 = u64x1::new(0x00); + let r: u64x1 = transmute(vand_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u64x1 = u64x1::new(0x00); + let b: u64x1 = u64x1::new(0x00); + let e: u64x1 = u64x1::new(0x00); + let r: u64x1 = transmute(vand_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u64() { + let a: u64x2 = u64x2::new(0x00, 0x01); + let b: u64x2 = u64x2::new(0x0F, 0x0F); + let e: u64x2 = u64x2::new(0x00, 0x01); + let r: u64x2 = transmute(vandq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u64x2 = u64x2::new(0x00, 0x01); + let b: u64x2 = u64x2::new(0x00, 0x00); + let e: u64x2 = u64x2::new(0x00, 0x00); + let r: u64x2 = transmute(vandq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s8() { + let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i8x8 = transmute(vorr_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s8() { + let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let r: i8x16 = transmute(vorrq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s16() { + let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00); + let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let r: i16x4 = transmute(vorr_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s16() { + let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i16x8 = transmute(vorrq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s32() { + let a: i32x2 = i32x2::new(0x00, 0x01); + let b: i32x2 = i32x2::new(0x00, 0x00); + let e: i32x2 = i32x2::new(0x00, 0x01); + let r: i32x2 = transmute(vorr_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s32() { + let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00); + let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let r: i32x4 = transmute(vorrq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u8() { + let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: u8x8 = transmute(vorr_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u8() { + let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let r: u8x16 = transmute(vorrq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u16() { + let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00); + let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let r: u16x4 = transmute(vorr_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u16() { + let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: u16x8 = transmute(vorrq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u32() { + let a: u32x2 = u32x2::new(0x00, 0x01); + let b: u32x2 = u32x2::new(0x00, 0x00); + let e: u32x2 = u32x2::new(0x00, 0x01); + let r: u32x2 = transmute(vorr_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u32() { + let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00); + let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let r: u32x4 = transmute(vorrq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s64() { + let a: i64x1 = i64x1::new(0x00); + let b: i64x1 = i64x1::new(0x00); + let e: i64x1 = i64x1::new(0x00); + let r: i64x1 = transmute(vorr_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s64() { + let a: i64x2 = i64x2::new(0x00, 0x01); + let b: i64x2 = i64x2::new(0x00, 0x00); + let e: i64x2 = i64x2::new(0x00, 0x01); + let r: i64x2 = transmute(vorrq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u64() { + let a: u64x1 = u64x1::new(0x00); + let b: u64x1 = u64x1::new(0x00); + let e: u64x1 = u64x1::new(0x00); + let r: u64x1 = transmute(vorr_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u64() { + let a: u64x2 = u64x2::new(0x00, 0x01); + let b: u64x2 = u64x2::new(0x00, 0x00); + let e: u64x2 = u64x2::new(0x00, 0x01); + let r: u64x2 = transmute(vorrq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_s8() { + let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i8x8 = transmute(veor_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s8() { + let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let r: i8x16 = transmute(veorq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_s16() { + let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00); + let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let r: i16x4 = transmute(veor_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s16() { + let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i16x8 = transmute(veorq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_s32() { + let a: i32x2 = i32x2::new(0x00, 0x01); + let b: i32x2 = i32x2::new(0x00, 0x00); + let e: i32x2 = i32x2::new(0x00, 0x01); + let r: i32x2 = transmute(veor_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s32() { + let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00); + let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let r: i32x4 = transmute(veorq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_u8() { + let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: u8x8 = transmute(veor_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u8() { + let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let r: u8x16 = transmute(veorq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_u16() { + let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00); + let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let r: u16x4 = transmute(veor_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u16() { + let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: u16x8 = transmute(veorq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_u32() { + let a: u32x2 = u32x2::new(0x00, 0x01); + let b: u32x2 = u32x2::new(0x00, 0x00); + let e: u32x2 = u32x2::new(0x00, 0x01); + let r: u32x2 = transmute(veor_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u32() { + let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00); + let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let r: u32x4 = transmute(veorq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_s64() { + let a: i64x1 = i64x1::new(0x00); + let b: i64x1 = i64x1::new(0x00); + let e: i64x1 = i64x1::new(0x00); + let r: i64x1 = transmute(veor_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s64() { + let a: i64x2 = i64x2::new(0x00, 0x01); + let b: i64x2 = i64x2::new(0x00, 0x00); + let e: i64x2 = i64x2::new(0x00, 0x01); + let r: i64x2 = transmute(veorq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_u64() { + let a: u64x1 = u64x1::new(0x00); + let b: u64x1 = u64x1::new(0x00); + let e: u64x1 = u64x1::new(0x00); + let r: u64x1 = transmute(veor_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u64() { + let a: u64x2 = u64x2::new(0x00, 0x01); + let b: u64x2 = u64x2::new(0x00, 0x00); + let e: u64x2 = u64x2::new(0x00, 0x01); + let r: u64x2 = transmute(veorq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_u8() { + let a: u8x8 = u8x8::new(0xFF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0xFF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vceq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u8x8 = u8x8::new(0xFF, 0xFF, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0xFF, 0, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x8 = transmute(vceq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_u8() { + let a: u8x16 = u8x16::new(0xFF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0); + let b: u8x16 = u8x16::new(0xFF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vceqq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u8x16 = u8x16::new(0xFF, 0xFF, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0); + let b: u8x16 = u8x16::new(0xFF, 0, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, 0xFF); + let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x16 = transmute(vceqq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_u16() { + let a: u16x4 = u16x4::new(0xFF_FF, 0x01, 0x02, 0x03); + let b: u16x4 = u16x4::new(0xFF_FF, 0x01, 0x02, 0x03); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vceq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0x02, 0x03); + let b: u16x4 = u16x4::new(0xFF_FF, 0, 0x02, 0x04); + let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0); + let r: u16x4 = transmute(vceq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_u16() { + let a: u16x8 = u16x8::new(0xFF_FF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0xFF_FF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vceqq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0xFF_FF, 0, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0); + let r: u16x8 = transmute(vceqq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_u32() { + let a: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0x01); + let b: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0x01); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vceq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let b: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let r: u32x2 = transmute(vceq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_u32() { + let a: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0x01, 0x02, 0x03); + let b: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0x01, 0x02, 0x03); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vceqq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0x02, 0x03); + let b: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0x02, 0x04); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0); + let r: u32x4 = transmute(vceqq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_s8() { + let a: i8x8 = i8x8::new(0x7F, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(0x7F, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vceq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i8x8 = i8x8::new(0x7F, 0x7F, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(0x7F, -128, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x8 = transmute(vceq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_s8() { + let a: i8x16 = i8x16::new(0x7F, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, -128); + let b: i8x16 = i8x16::new(0x7F, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, -128); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vceqq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i8x16 = i8x16::new(0x7F, 0x7F, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, -128); + let b: i8x16 = i8x16::new(0x7F, -128, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, 0x7F); + let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x16 = transmute(vceqq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_s16() { + let a: i16x4 = i16x4::new(0x7F_FF, 0x01, 0x02, 0x03); + let b: i16x4 = i16x4::new(0x7F_FF, 0x01, 0x02, 0x03); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vceq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x02, 0x03); + let b: i16x4 = i16x4::new(0x7F_FF, -32768, 0x02, 0x04); + let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0); + let r: u16x4 = transmute(vceq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_s16() { + let a: i16x8 = i16x8::new(0x7F_FF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(0x7F_FF, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vceqq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(0x7F_FF, -32768, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0); + let r: u16x8 = transmute(vceqq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_s32() { + let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x01); + let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x01); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vceq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, -2147483648); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let r: u32x2 = transmute(vceq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_s32() { + let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x01, 0x02, 0x03); + let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x01, 0x02, 0x03); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vceqq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x02, 0x03); + let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, -2147483648, 0x02, 0x04); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0); + let r: u32x4 = transmute(vceqq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_f32() { + let a: f32x2 = f32x2::new(1.2, 3.4); + let b: f32x2 = f32x2::new(1.2, 3.4); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vceq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_f32() { + let a: f32x4 = f32x4::new(1.2, 3.4, 5.6, 7.8); + let b: f32x4 = f32x4::new(1.2, 3.4, 5.6, 7.8); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vceqq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vcgt_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcgtq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(0, 1, 2, 3); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vcgt_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcgtq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(0, 1); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcgt_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(0, 1, 2, 3); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcgtq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vcgt_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcgtq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(0, 1, 2, 3); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vcgt_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcgtq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(0, 1); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcgt_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32x4 = u32x4::new(0, 1, 2, 3); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcgtq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_f32() { + let a: f32x2 = f32x2::new(1.2, 2.3); + let b: f32x2 = f32x2::new(0.1, 1.2); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcgt_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_f32() { + let a: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5); + let b: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcgtq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vclt_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcltq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vclt_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcltq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(1, 2); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vclt_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcltq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_u8() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vclt_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u8() { + let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcltq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vclt_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcltq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vclt_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcltq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_f32() { + let a: f32x2 = f32x2::new(0.1, 1.2); + let b: f32x2 = f32x2::new(1.2, 2.3); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vclt_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_f32() { + let a: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4); + let b: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcltq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_s8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vcle_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcleq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vcle_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcleq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(1, 2); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcle_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcleq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_u8() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vcle_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_u8() { + let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcleq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vcle_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcleq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcle_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcleq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_f32() { + let a: f32x2 = f32x2::new(0.1, 1.2); + let b: f32x2 = f32x2::new(1.2, 2.3); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcle_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_f32() { + let a: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4); + let b: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcleq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vcge_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcgeq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(0, 1, 2, 3); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vcge_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcgeq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(0, 1); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcge_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(0, 1, 2, 3); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcgeq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vcge_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcgeq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(0, 1, 2, 3); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vcge_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcgeq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(0, 1); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcge_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32x4 = u32x4::new(0, 1, 2, 3); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcgeq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_f32() { + let a: f32x2 = f32x2::new(1.2, 2.3); + let b: f32x2 = f32x2::new(0.1, 1.2); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcge_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_f32() { + let a: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5); + let b: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcgeq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_u8() { + let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(41, 40, 39, 38, 37, 36, 35, 34); + let r: u8x8 = transmute(vqsub_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_u8() { + let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42); + let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26); + let r: u8x16 = transmute(vqsubq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_u16() { + let a: u16x4 = u16x4::new(42, 42, 42, 42); + let b: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(41, 40, 39, 38); + let r: u16x4 = transmute(vqsub_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_u16() { + let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(41, 40, 39, 38, 37, 36, 35, 34); + let r: u16x8 = transmute(vqsubq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_u32() { + let a: u32x2 = u32x2::new(42, 42); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(41, 40); + let r: u32x2 = transmute(vqsub_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_u32() { + let a: u32x4 = u32x4::new(42, 42, 42, 42); + let b: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(41, 40, 39, 38); + let r: u32x4 = transmute(vqsubq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_s8() { + let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i8x8 = i8x8::new(41, 40, 39, 38, 37, 36, 35, 34); + let r: i8x8 = transmute(vqsub_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_s8() { + let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42); + let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26); + let r: i8x16 = transmute(vqsubq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_s16() { + let a: i16x4 = i16x4::new(42, 42, 42, 42); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let e: i16x4 = i16x4::new(41, 40, 39, 38); + let r: i16x4 = transmute(vqsub_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_s16() { + let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i16x8 = i16x8::new(41, 40, 39, 38, 37, 36, 35, 34); + let r: i16x8 = transmute(vqsubq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_s32() { + let a: i32x2 = i32x2::new(42, 42); + let b: i32x2 = i32x2::new(1, 2); + let e: i32x2 = i32x2::new(41, 40); + let r: i32x2 = transmute(vqsub_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_s32() { + let a: i32x4 = i32x4::new(42, 42, 42, 42); + let b: i32x4 = i32x4::new(1, 2, 3, 4); + let e: i32x4 = i32x4::new(41, 40, 39, 38); + let r: i32x4 = transmute(vqsubq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_u8() { + let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(21, 22, 22, 23, 23, 24, 24, 25); + let r: u8x8 = transmute(vhadd_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_u8() { + let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42); + let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29); + let r: u8x16 = transmute(vhaddq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_u16() { + let a: u16x4 = u16x4::new(42, 42, 42, 42); + let b: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(21, 22, 22, 23); + let r: u16x4 = transmute(vhadd_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_u16() { + let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(21, 22, 22, 23, 23, 24, 24, 25); + let r: u16x8 = transmute(vhaddq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_u32() { + let a: u32x2 = u32x2::new(42, 42); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(21, 22); + let r: u32x2 = transmute(vhadd_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_u32() { + let a: u32x4 = u32x4::new(42, 42, 42, 42); + let b: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(21, 22, 22, 23); + let r: u32x4 = transmute(vhaddq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_s8() { + let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i8x8 = i8x8::new(21, 22, 22, 23, 23, 24, 24, 25); + let r: i8x8 = transmute(vhadd_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_s8() { + let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42); + let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29); + let r: i8x16 = transmute(vhaddq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_s16() { + let a: i16x4 = i16x4::new(42, 42, 42, 42); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let e: i16x4 = i16x4::new(21, 22, 22, 23); + let r: i16x4 = transmute(vhadd_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_s16() { + let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i16x8 = i16x8::new(21, 22, 22, 23, 23, 24, 24, 25); + let r: i16x8 = transmute(vhaddq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_s32() { + let a: i32x2 = i32x2::new(42, 42); + let b: i32x2 = i32x2::new(1, 2); + let e: i32x2 = i32x2::new(21, 22); + let r: i32x2 = transmute(vhadd_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_s32() { + let a: i32x4 = i32x4::new(42, 42, 42, 42); + let b: i32x4 = i32x4::new(1, 2, 3, 4); + let e: i32x4 = i32x4::new(21, 22, 22, 23); + let r: i32x4 = transmute(vhaddq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_u8() { + let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(22, 22, 23, 23, 24, 24, 25, 25); + let r: u8x8 = transmute(vrhadd_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_u8() { + let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42); + let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29); + let r: u8x16 = transmute(vrhaddq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_u16() { + let a: u16x4 = u16x4::new(42, 42, 42, 42); + let b: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(22, 22, 23, 23); + let r: u16x4 = transmute(vrhadd_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_u16() { + let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(22, 22, 23, 23, 24, 24, 25, 25); + let r: u16x8 = transmute(vrhaddq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_u32() { + let a: u32x2 = u32x2::new(42, 42); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(22, 22); + let r: u32x2 = transmute(vrhadd_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_u32() { + let a: u32x4 = u32x4::new(42, 42, 42, 42); + let b: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(22, 22, 23, 23); + let r: u32x4 = transmute(vrhaddq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_s8() { + let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i8x8 = i8x8::new(22, 22, 23, 23, 24, 24, 25, 25); + let r: i8x8 = transmute(vrhadd_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_s8() { + let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42); + let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29); + let r: i8x16 = transmute(vrhaddq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_s16() { + let a: i16x4 = i16x4::new(42, 42, 42, 42); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let e: i16x4 = i16x4::new(22, 22, 23, 23); + let r: i16x4 = transmute(vrhadd_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_s16() { + let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i16x8 = i16x8::new(22, 22, 23, 23, 24, 24, 25, 25); + let r: i16x8 = transmute(vrhaddq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_s32() { + let a: i32x2 = i32x2::new(42, 42); + let b: i32x2 = i32x2::new(1, 2); + let e: i32x2 = i32x2::new(22, 22); + let r: i32x2 = transmute(vrhadd_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_s32() { + let a: i32x4 = i32x4::new(42, 42, 42, 42); + let b: i32x4 = i32x4::new(1, 2, 3, 4); + let e: i32x4 = i32x4::new(22, 22, 23, 23); + let r: i32x4 = transmute(vrhaddq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_u8() { + let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(43, 44, 45, 46, 47, 48, 49, 50); + let r: u8x8 = transmute(vqadd_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_u8() { + let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42); + let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58); + let r: u8x16 = transmute(vqaddq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_u16() { + let a: u16x4 = u16x4::new(42, 42, 42, 42); + let b: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(43, 44, 45, 46); + let r: u16x4 = transmute(vqadd_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_u16() { + let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(43, 44, 45, 46, 47, 48, 49, 50); + let r: u16x8 = transmute(vqaddq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_u32() { + let a: u32x2 = u32x2::new(42, 42); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(43, 44); + let r: u32x2 = transmute(vqadd_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_u32() { + let a: u32x4 = u32x4::new(42, 42, 42, 42); + let b: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(43, 44, 45, 46); + let r: u32x4 = transmute(vqaddq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_s8() { + let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i8x8 = i8x8::new(43, 44, 45, 46, 47, 48, 49, 50); + let r: i8x8 = transmute(vqadd_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_s8() { + let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42); + let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58); + let r: i8x16 = transmute(vqaddq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_s16() { + let a: i16x4 = i16x4::new(42, 42, 42, 42); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let e: i16x4 = i16x4::new(43, 44, 45, 46); + let r: i16x4 = transmute(vqadd_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_s16() { + let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i16x8 = i16x8::new(43, 44, 45, 46, 47, 48, 49, 50); + let r: i16x8 = transmute(vqaddq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_s32() { + let a: i32x2 = i32x2::new(42, 42); + let b: i32x2 = i32x2::new(1, 2); + let e: i32x2 = i32x2::new(43, 44); + let r: i32x2 = transmute(vqadd_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_s32() { + let a: i32x4 = i32x4::new(42, 42, 42, 42); + let b: i32x4 = i32x4::new(1, 2, 3, 4); + let e: i32x4 = i32x4::new(43, 44, 45, 46); + let r: i32x4 = transmute(vqaddq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_s8() { + let a: i8x8 = i8x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i8x8 = i8x8::new(1, 4, 3, 8, 5, 12, 7, 16); + let r: i8x8 = transmute(vmul_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_s8() { + let a: i8x16 = i8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2); + let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32); + let r: i8x16 = transmute(vmulq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_s16() { + let a: i16x4 = i16x4::new(1, 2, 1, 2); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let e: i16x4 = i16x4::new(1, 4, 3, 8); + let r: i16x4 = transmute(vmul_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_s16() { + let a: i16x8 = i16x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i16x8 = i16x8::new(1, 4, 3, 8, 5, 12, 7, 16); + let r: i16x8 = transmute(vmulq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(1, 2); + let e: i32x2 = i32x2::new(1, 4); + let r: i32x2 = transmute(vmul_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_s32() { + let a: i32x4 = i32x4::new(1, 2, 1, 2); + let b: i32x4 = i32x4::new(1, 2, 3, 4); + let e: i32x4 = i32x4::new(1, 4, 3, 8); + let r: i32x4 = transmute(vmulq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_u8() { + let a: u8x8 = u8x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(1, 4, 3, 8, 5, 12, 7, 16); + let r: u8x8 = transmute(vmul_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_u8() { + let a: u8x16 = u8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2); + let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32); + let r: u8x16 = transmute(vmulq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_u16() { + let a: u16x4 = u16x4::new(1, 2, 1, 2); + let b: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(1, 4, 3, 8); + let r: u16x4 = transmute(vmul_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_u16() { + let a: u16x8 = u16x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(1, 4, 3, 8, 5, 12, 7, 16); + let r: u16x8 = transmute(vmulq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(1, 4); + let r: u32x2 = transmute(vmul_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_u32() { + let a: u32x4 = u32x4::new(1, 2, 1, 2); + let b: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(1, 4, 3, 8); + let r: u32x4 = transmute(vmulq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_f32() { + let a: f32x2 = f32x2::new(1.0, 2.0); + let b: f32x2 = f32x2::new(2.0, 3.0); + let e: f32x2 = f32x2::new(2.0, 6.0); + let r: f32x2 = transmute(vmul_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_f32() { + let a: f32x4 = f32x4::new(1.0, 2.0, 1.0, 2.0); + let b: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0); + let e: f32x4 = f32x4::new(2.0, 6.0, 4.0, 10.0); + let r: f32x4 = transmute(vmulq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: i8x8 = i8x8::new(0, 0, 2, 2, 4, 4, 6, 6); + let r: i8x8 = transmute(vsub_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2); + let e: i8x16 = i8x16::new(0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); + let r: i8x16 = transmute(vsubq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(1, 2, 1, 2); + let e: i16x4 = i16x4::new(0, 0, 2, 2); + let r: i16x4 = transmute(vsub_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: i16x8 = i16x8::new(0, 0, 2, 2, 4, 4, 6, 6); + let r: i16x8 = transmute(vsubq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(1, 2); + let e: i32x2 = i32x2::new(0, 0); + let r: i32x2 = transmute(vsub_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(1, 2, 1, 2); + let e: i32x4 = i32x4::new(0, 0, 2, 2); + let r: i32x4 = transmute(vsubq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u8x8 = u8x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: u8x8 = u8x8::new(0, 0, 2, 2, 4, 4, 6, 6); + let r: u8x8 = transmute(vsub_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: u8x16 = u8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2); + let e: u8x16 = u8x16::new(0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); + let r: u8x16 = transmute(vsubq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(1, 2, 1, 2); + let e: u16x4 = u16x4::new(0, 0, 2, 2); + let r: u16x4 = transmute(vsub_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16x8 = u16x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: u16x8 = u16x8::new(0, 0, 2, 2, 4, 4, 6, 6); + let r: u16x8 = transmute(vsubq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(0, 0); + let r: u32x2 = transmute(vsub_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32x4 = u32x4::new(1, 2, 1, 2); + let e: u32x4 = u32x4::new(0, 0, 2, 2); + let r: u32x4 = transmute(vsubq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_s64() { + let a: i64x1 = i64x1::new(1); + let b: i64x1 = i64x1::new(1); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vsub_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_s64() { + let a: i64x2 = i64x2::new(1, 2); + let b: i64x2 = i64x2::new(1, 2); + let e: i64x2 = i64x2::new(0, 0); + let r: i64x2 = transmute(vsubq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_u64() { + let a: u64x1 = u64x1::new(1); + let b: u64x1 = u64x1::new(1); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vsub_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_u64() { + let a: u64x2 = u64x2::new(1, 2); + let b: u64x2 = u64x2::new(1, 2); + let e: u64x2 = u64x2::new(0, 0); + let r: u64x2 = transmute(vsubq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_f32() { + let a: f32x2 = f32x2::new(1.0, 4.0); + let b: f32x2 = f32x2::new(1.0, 2.0); + let e: f32x2 = f32x2::new(0.0, 2.0); + let r: f32x2 = transmute(vsub_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_f32() { + let a: f32x4 = f32x4::new(1.0, 4.0, 3.0, 8.0); + let b: f32x4 = f32x4::new(1.0, 2.0, 3.0, 4.0); + let e: f32x4 = f32x4::new(0.0, 2.0, 0.0, 4.0); + let r: f32x4 = transmute(vsubq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u8x8 = u8x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: u8x8 = u8x8::new(0, 0, 1, 1, 2, 2, 3, 3); + let r: u8x8 = transmute(vhsub_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: u8x16 = u8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2); + let e: u8x16 = u8x16::new(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7); + let r: u8x16 = transmute(vhsubq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(1, 2, 1, 2); + let e: u16x4 = u16x4::new(0, 0, 1, 1); + let r: u16x4 = transmute(vhsub_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16x8 = u16x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: u16x8 = u16x8::new(0, 0, 1, 1, 2, 2, 3, 3); + let r: u16x8 = transmute(vhsubq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(0, 0); + let r: u32x2 = transmute(vhsub_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32x4 = u32x4::new(1, 2, 1, 2); + let e: u32x4 = u32x4::new(0, 0, 1, 1); + let r: u32x4 = transmute(vhsubq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: i8x8 = i8x8::new(0, 0, 1, 1, 2, 2, 3, 3); + let r: i8x8 = transmute(vhsub_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2); + let e: i8x16 = i8x16::new(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7); + let r: i8x16 = transmute(vhsubq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(1, 2, 1, 2); + let e: i16x4 = i16x4::new(0, 0, 1, 1); + let r: i16x4 = transmute(vhsub_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: i16x8 = i16x8::new(0, 0, 1, 1, 2, 2, 3, 3); + let r: i16x8 = transmute(vhsubq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(1, 2); + let e: i32x2 = i32x2::new(0, 0); + let r: i32x2 = transmute(vhsub_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(1, 2, 1, 2); + let e: i32x4 = i32x4::new(0, 0, 1, 1); + let r: i32x4 = transmute(vhsubq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } +} diff --git a/crates/core_arch/src/arm/neon/mod.rs b/crates/core_arch/src/arm/neon/mod.rs new file mode 100644 index 0000000000..5c3ccc9fc2 --- /dev/null +++ b/crates/core_arch/src/arm/neon/mod.rs @@ -0,0 +1,3952 @@ +//! ARMv7 NEON intrinsics + +#[rustfmt::skip] +mod generated; +#[rustfmt::skip] +pub use self::generated::*; + +use crate::{core_arch::simd_llvm::*, hint::unreachable_unchecked, mem::transmute, ptr}; +#[cfg(test)] +use stdarch_test::assert_instr; + +types! { + /// ARM-specific 64-bit wide vector of eight packed `i8`. + pub struct int8x8_t(i8, i8, i8, i8, i8, i8, i8, i8); + /// ARM-specific 64-bit wide vector of eight packed `u8`. + pub struct uint8x8_t(u8, u8, u8, u8, u8, u8, u8, u8); + /// ARM-specific 64-bit wide polynomial vector of eight packed `u8`. + pub struct poly8x8_t(u8, u8, u8, u8, u8, u8, u8, u8); + /// ARM-specific 64-bit wide vector of four packed `i16`. + pub struct int16x4_t(i16, i16, i16, i16); + /// ARM-specific 64-bit wide vector of four packed `u16`. + pub struct uint16x4_t(u16, u16, u16, u16); + // FIXME: ARM-specific 64-bit wide vector of four packed `f16`. + // pub struct float16x4_t(f16, f16, f16, f16); + /// ARM-specific 64-bit wide vector of four packed `u16`. + pub struct poly16x4_t(u16, u16, u16, u16); + /// ARM-specific 64-bit wide vector of two packed `i32`. + pub struct int32x2_t(i32, i32); + /// ARM-specific 64-bit wide vector of two packed `u32`. + pub struct uint32x2_t(u32, u32); + /// ARM-specific 64-bit wide vector of two packed `f32`. + pub struct float32x2_t(f32, f32); + /// ARM-specific 64-bit wide vector of one packed `i64`. + pub struct int64x1_t(i64); + /// ARM-specific 64-bit wide vector of one packed `u64`. + pub struct uint64x1_t(u64); + + /// ARM-specific 128-bit wide vector of sixteen packed `i8`. + pub struct int8x16_t( + i8, i8 ,i8, i8, i8, i8 ,i8, i8, + i8, i8 ,i8, i8, i8, i8 ,i8, i8, + ); + /// ARM-specific 128-bit wide vector of sixteen packed `u8`. + pub struct uint8x16_t( + u8, u8 ,u8, u8, u8, u8 ,u8, u8, + u8, u8 ,u8, u8, u8, u8 ,u8, u8, + ); + /// ARM-specific 128-bit wide vector of sixteen packed `u8`. + pub struct poly8x16_t( + u8, u8, u8, u8, u8, u8, u8, u8, + u8, u8, u8, u8, u8, u8, u8, u8 + ); + /// ARM-specific 128-bit wide vector of eight packed `i16`. + pub struct int16x8_t(i16, i16, i16, i16, i16, i16, i16, i16); + /// ARM-specific 128-bit wide vector of eight packed `u16`. + pub struct uint16x8_t(u16, u16, u16, u16, u16, u16, u16, u16); + // FIXME: ARM-specific 128-bit wide vector of eight packed `f16`. + // pub struct float16x8_t(f16, f16, f16, f16, f16, f16, f16); + /// ARM-specific 128-bit wide vector of eight packed `u16`. + pub struct poly16x8_t(u16, u16, u16, u16, u16, u16, u16, u16); + /// ARM-specific 128-bit wide vector of four packed `i32`. + pub struct int32x4_t(i32, i32, i32, i32); + /// ARM-specific 128-bit wide vector of four packed `u32`. + pub struct uint32x4_t(u32, u32, u32, u32); + /// ARM-specific 128-bit wide vector of four packed `f32`. + pub struct float32x4_t(f32, f32, f32, f32); + /// ARM-specific 128-bit wide vector of two packed `i64`. + pub struct int64x2_t(i64, i64); + /// ARM-specific 128-bit wide vector of two packed `u64`. + pub struct uint64x2_t(u64, u64); +} + +/// ARM-specific type containing two `int8x8_t` vectors. +#[derive(Copy, Clone)] +pub struct int8x8x2_t(pub int8x8_t, pub int8x8_t); +/// ARM-specific type containing three `int8x8_t` vectors. +#[derive(Copy, Clone)] +pub struct int8x8x3_t(pub int8x8_t, pub int8x8_t, pub int8x8_t); +/// ARM-specific type containing four `int8x8_t` vectors. +#[derive(Copy, Clone)] +pub struct int8x8x4_t(pub int8x8_t, pub int8x8_t, pub int8x8_t, pub int8x8_t); + +/// ARM-specific type containing two `uint8x8_t` vectors. +#[derive(Copy, Clone)] +pub struct uint8x8x2_t(pub uint8x8_t, pub uint8x8_t); +/// ARM-specific type containing three `uint8x8_t` vectors. +#[derive(Copy, Clone)] +pub struct uint8x8x3_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t); +/// ARM-specific type containing four `uint8x8_t` vectors. +#[derive(Copy, Clone)] +pub struct uint8x8x4_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t, pub uint8x8_t); + +/// ARM-specific type containing two `poly8x8_t` vectors. +#[derive(Copy, Clone)] +pub struct poly8x8x2_t(pub poly8x8_t, pub poly8x8_t); +/// ARM-specific type containing three `poly8x8_t` vectors. +#[derive(Copy, Clone)] +pub struct poly8x8x3_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t); +/// ARM-specific type containing four `poly8x8_t` vectors. +#[derive(Copy, Clone)] +pub struct poly8x8x4_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t, pub poly8x8_t); + +#[allow(improper_ctypes)] +extern "C" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f32")] + fn frsqrte_v2f32(a: float32x2_t) -> float32x2_t; + + //uint32x2_t vqmovn_u64 (uint64x2_t a) + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqxtn.v2i32")] + fn vqmovn_u64_(a: uint64x2_t) -> uint32x2_t; + + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v8i8")] + fn vpmins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v4i16")] + fn vpmins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v2i32")] + fn vpmins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v8i8")] + fn vpminu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v4i16")] + fn vpminu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v2i32")] + fn vpminu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminp.v2f32")] + fn vpminf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v8i8")] + fn vpmaxs_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v4i16")] + fn vpmaxs_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v2i32")] + fn vpmaxs_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v8i8")] + fn vpmaxu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v4i16")] + fn vpmaxu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v2i32")] + fn vpmaxu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxp.v2f32")] + fn vpmaxf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; +} + +#[cfg(target_arch = "arm")] +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.arm.neon.vtbl1"] + fn vtbl1(a: int8x8_t, b: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbl2"] + fn vtbl2(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbl3"] + fn vtbl3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbl4"] + fn vtbl4(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; + + #[link_name = "llvm.arm.neon.vtbx1"] + fn vtbx1(a: int8x8_t, b: int8x8_t, b: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbx2"] + fn vtbx2(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbx3"] + fn vtbx3(a: int8x8_t, b: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t) -> int8x8_t; + #[link_name = "llvm.arm.neon.vtbx4"] + fn vtbx4( + a: int8x8_t, + b: int8x8_t, + b: int8x8_t, + c: int8x8_t, + d: int8x8_t, + e: int8x8_t, + ) -> int8x8_t; +} + +/// Unsigned saturating extract narrow. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn.u64))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))] +pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t { + vqmovn_u64_(a) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +pub unsafe fn vadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +pub unsafe fn vaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +pub unsafe fn vadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +pub unsafe fn vaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +pub unsafe fn vadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +pub unsafe fn vaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +pub unsafe fn vaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +pub unsafe fn vadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +pub unsafe fn vaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +pub unsafe fn vadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +pub unsafe fn vaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +pub unsafe fn vadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +pub unsafe fn vaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +pub unsafe fn vaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fadd))] +pub unsafe fn vadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fadd))] +pub unsafe fn vaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + simd_add(a, b) +} + +/// Vector long add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))] +pub unsafe fn vaddl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { + let a: int16x8_t = simd_cast(a); + let b: int16x8_t = simd_cast(b); + simd_add(a, b) +} + +/// Vector long add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))] +pub unsafe fn vaddl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + let a: int32x4_t = simd_cast(a); + let b: int32x4_t = simd_cast(b); + simd_add(a, b) +} + +/// Vector long add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))] +pub unsafe fn vaddl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + let a: int64x2_t = simd_cast(a); + let b: int64x2_t = simd_cast(b); + simd_add(a, b) +} + +/// Vector long add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))] +pub unsafe fn vaddl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { + let a: uint16x8_t = simd_cast(a); + let b: uint16x8_t = simd_cast(b); + simd_add(a, b) +} + +/// Vector long add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))] +pub unsafe fn vaddl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + let a: uint32x4_t = simd_cast(a); + let b: uint32x4_t = simd_cast(b); + simd_add(a, b) +} + +/// Vector long add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))] +pub unsafe fn vaddl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + let a: uint64x2_t = simd_cast(a); + let b: uint64x2_t = simd_cast(b); + simd_add(a, b) +} + +/// Vector narrow integer. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +pub unsafe fn vmovn_s16(a: int16x8_t) -> int8x8_t { + simd_cast(a) +} + +/// Vector narrow integer. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +pub unsafe fn vmovn_s32(a: int32x4_t) -> int16x4_t { + simd_cast(a) +} + +/// Vector narrow integer. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +pub unsafe fn vmovn_s64(a: int64x2_t) -> int32x2_t { + simd_cast(a) +} + +/// Vector narrow integer. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +pub unsafe fn vmovn_u16(a: uint16x8_t) -> uint8x8_t { + simd_cast(a) +} + +/// Vector narrow integer. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +pub unsafe fn vmovn_u32(a: uint32x4_t) -> uint16x4_t { + simd_cast(a) +} + +/// Vector narrow integer. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +pub unsafe fn vmovn_u64(a: uint64x2_t) -> uint32x2_t { + simd_cast(a) +} + +/// Vector long move. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] +pub unsafe fn vmovl_s8(a: int8x8_t) -> int16x8_t { + simd_cast(a) +} + +/// Vector long move. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] +pub unsafe fn vmovl_s16(a: int16x4_t) -> int32x4_t { + simd_cast(a) +} + +/// Vector long move. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] +pub unsafe fn vmovl_s32(a: int32x2_t) -> int64x2_t { + simd_cast(a) +} + +/// Vector long move. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))] +pub unsafe fn vmovl_u8(a: uint8x8_t) -> uint16x8_t { + simd_cast(a) +} + +/// Vector long move. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))] +pub unsafe fn vmovl_u16(a: uint16x4_t) -> uint32x4_t { + simd_cast(a) +} + +/// Vector long move. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))] +pub unsafe fn vmovl_u32(a: uint32x2_t) -> uint64x2_t { + simd_cast(a) +} + +/// Reciprocal square-root estimate. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] +pub unsafe fn vrsqrte_f32(a: float32x2_t) -> float32x2_t { + frsqrte_v2f32(a) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +pub unsafe fn vmvn_s8(a: int8x8_t) -> int8x8_t { + let b = int8x8_t(-1, -1, -1, -1, -1, -1, -1, -1); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +pub unsafe fn vmvnq_s8(a: int8x16_t) -> int8x16_t { + let b = int8x16_t( + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + ); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +pub unsafe fn vmvn_s16(a: int16x4_t) -> int16x4_t { + let b = int16x4_t(-1, -1, -1, -1); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +pub unsafe fn vmvnq_s16(a: int16x8_t) -> int16x8_t { + let b = int16x8_t(-1, -1, -1, -1, -1, -1, -1, -1); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +pub unsafe fn vmvn_s32(a: int32x2_t) -> int32x2_t { + let b = int32x2_t(-1, -1); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +pub unsafe fn vmvnq_s32(a: int32x4_t) -> int32x4_t { + let b = int32x4_t(-1, -1, -1, -1); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +pub unsafe fn vmvn_u8(a: uint8x8_t) -> uint8x8_t { + let b = uint8x8_t(255, 255, 255, 255, 255, 255, 255, 255); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +pub unsafe fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t { + let b = uint8x16_t( + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +pub unsafe fn vmvn_u16(a: uint16x4_t) -> uint16x4_t { + let b = uint16x4_t(65_535, 65_535, 65_535, 65_535); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +pub unsafe fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t { + let b = uint16x8_t( + 65_535, 65_535, 65_535, 65_535, 65_535, 65_535, 65_535, 65_535, + ); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +pub unsafe fn vmvn_u32(a: uint32x2_t) -> uint32x2_t { + let b = uint32x2_t(4_294_967_295, 4_294_967_295); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +pub unsafe fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t { + let b = uint32x4_t(4_294_967_295, 4_294_967_295, 4_294_967_295, 4_294_967_295); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +pub unsafe fn vmvn_p8(a: poly8x8_t) -> poly8x8_t { + let b = poly8x8_t(255, 255, 255, 255, 255, 255, 255, 255); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +pub unsafe fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t { + let b = poly8x16_t( + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ); + simd_xor(a, b) +} + +/// Folding minimum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))] +pub unsafe fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + vpmins_v8i8(a, b) +} + +/// Folding minimum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))] +pub unsafe fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + vpmins_v4i16(a, b) +} + +/// Folding minimum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))] +pub unsafe fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + vpmins_v2i32(a, b) +} + +/// Folding minimum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))] +pub unsafe fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + vpminu_v8i8(a, b) +} + +/// Folding minimum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))] +pub unsafe fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + vpminu_v4i16(a, b) +} + +/// Folding minimum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))] +pub unsafe fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + vpminu_v2i32(a, b) +} + +/// Folding minimum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminp))] +pub unsafe fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + vpminf_v2f32(a, b) +} + +/// Folding maximum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))] +pub unsafe fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + vpmaxs_v8i8(a, b) +} + +/// Folding maximum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))] +pub unsafe fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + vpmaxs_v4i16(a, b) +} + +/// Folding maximum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))] +pub unsafe fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + vpmaxs_v2i32(a, b) +} + +/// Folding maximum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))] +pub unsafe fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + vpmaxu_v8i8(a, b) +} + +/// Folding maximum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))] +pub unsafe fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + vpmaxu_v4i16(a, b) +} + +/// Folding maximum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))] +pub unsafe fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + vpmaxu_v2i32(a, b) +} + +/// Folding maximum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxp))] +pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + vpmaxf_v2f32(a, b) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl1_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + vtbl1(a, b) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl1_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + transmute(vtbl1(transmute(a), transmute(b))) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl1_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x8_t { + transmute(vtbl1(transmute(a), transmute(b))) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl2_s8(a: int8x8x2_t, b: int8x8_t) -> int8x8_t { + vtbl2(a.0, a.1, b) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl2_u8(a: uint8x8x2_t, b: uint8x8_t) -> uint8x8_t { + transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl2_p8(a: poly8x8x2_t, b: uint8x8_t) -> poly8x8_t { + transmute(vtbl2(transmute(a.0), transmute(a.1), transmute(b))) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl3_s8(a: int8x8x3_t, b: int8x8_t) -> int8x8_t { + vtbl3(a.0, a.1, a.2, b) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl3_u8(a: uint8x8x3_t, b: uint8x8_t) -> uint8x8_t { + transmute(vtbl3( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(b), + )) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl3_p8(a: poly8x8x3_t, b: uint8x8_t) -> poly8x8_t { + transmute(vtbl3( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(b), + )) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl4_s8(a: int8x8x4_t, b: int8x8_t) -> int8x8_t { + vtbl4(a.0, a.1, a.2, a.3, b) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl4_u8(a: uint8x8x4_t, b: uint8x8_t) -> uint8x8_t { + transmute(vtbl4( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + transmute(b), + )) +} + +/// Table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbl))] +pub unsafe fn vtbl4_p8(a: poly8x8x4_t, b: uint8x8_t) -> poly8x8_t { + transmute(vtbl4( + transmute(a.0), + transmute(a.1), + transmute(a.2), + transmute(a.3), + transmute(b), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx1_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + vtbx1(a, b, c) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx1_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + transmute(vtbx1(transmute(a), transmute(b), transmute(c))) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx1_p8(a: poly8x8_t, b: poly8x8_t, c: uint8x8_t) -> poly8x8_t { + transmute(vtbx1(transmute(a), transmute(b), transmute(c))) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx2_s8(a: int8x8_t, b: int8x8x2_t, c: int8x8_t) -> int8x8_t { + vtbx2(a, b.0, b.1, c) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx2_u8(a: uint8x8_t, b: uint8x8x2_t, c: uint8x8_t) -> uint8x8_t { + transmute(vtbx2( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(c), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx2_p8(a: poly8x8_t, b: poly8x8x2_t, c: uint8x8_t) -> poly8x8_t { + transmute(vtbx2( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(c), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx3_s8(a: int8x8_t, b: int8x8x3_t, c: int8x8_t) -> int8x8_t { + vtbx3(a, b.0, b.1, b.2, c) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx3_u8(a: uint8x8_t, b: uint8x8x3_t, c: uint8x8_t) -> uint8x8_t { + transmute(vtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(c), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx3_p8(a: poly8x8_t, b: poly8x8x3_t, c: uint8x8_t) -> poly8x8_t { + transmute(vtbx3( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(c), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx4_s8(a: int8x8_t, b: int8x8x4_t, c: int8x8_t) -> int8x8_t { + vtbx4(a, b.0, b.1, b.2, b.3, c) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx4_u8(a: uint8x8_t, b: uint8x8x4_t, c: uint8x8_t) -> uint8x8_t { + transmute(vtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + transmute(c), + )) +} + +/// Extended table look-up +#[inline] +#[cfg(target_arch = "arm")] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vtbx))] +pub unsafe fn vtbx4_p8(a: poly8x8_t, b: poly8x8x4_t, c: uint8x8_t) -> poly8x8_t { + transmute(vtbx4( + transmute(a), + transmute(b.0), + transmute(b.1), + transmute(b.2), + transmute(b.3), + transmute(c), + )) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_args_required_const(1)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", imm5 = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, imm5 = 1))] +// Based on the discussion in https://github.com/rust-lang/stdarch/pull/792 +// `mov` seems to be an acceptable intrinsic to compile to +// #[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(vmov, imm5 = 1))] +pub unsafe fn vgetq_lane_u64(v: uint64x2_t, imm5: i32) -> u64 { + if (imm5) < 0 || (imm5) > 1 { + unreachable_unchecked() + } + let imm5 = (imm5 & 0b1) as u32; + simd_extract(v, imm5) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_args_required_const(1)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", imm5 = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov, imm5 = 0))] +// FIXME: no 32bit this seems to be turned into two vmov.32 instructions +// validate correctness +pub unsafe fn vget_lane_u64(v: uint64x1_t, imm5: i32) -> u64 { + if imm5 != 0 { + unreachable_unchecked() + } + simd_extract(v, 0) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_args_required_const(1)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u16", imm5 = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, imm5 = 2))] +pub unsafe fn vgetq_lane_u16(v: uint16x8_t, imm5: i32) -> u16 { + if (imm5) < 0 || (imm5) > 7 { + unreachable_unchecked() + } + let imm5 = (imm5 & 0b111) as u32; + simd_extract(v, imm5) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_args_required_const(1)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.32", imm5 = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mov, imm5 = 2))] +pub unsafe fn vgetq_lane_u32(v: uint32x4_t, imm5: i32) -> u32 { + if (imm5) < 0 || (imm5) > 3 { + unreachable_unchecked() + } + let imm5 = (imm5 & 0b11) as u32; + simd_extract(v, imm5) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_args_required_const(1)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov.u8", imm5 = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umov, imm5 = 2))] +pub unsafe fn vget_lane_u8(v: uint8x8_t, imm5: i32) -> u8 { + if (imm5) < 0 || (imm5) > 7 { + unreachable_unchecked() + } + let imm5 = (imm5 & 7) as u32; + simd_extract(v, imm5) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +pub unsafe fn vdupq_n_s8(value: i8) -> int8x16_t { + int8x16_t( + value, value, value, value, value, value, value, value, value, value, value, value, value, + value, value, value, + ) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +pub unsafe fn vdupq_n_u8(value: u8) -> uint8x16_t { + uint8x16_t( + value, value, value, value, value, value, value, value, value, value, value, value, value, + value, value, value, + ) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +pub unsafe fn vmovq_n_u8(value: u8) -> uint8x16_t { + vdupq_n_u8(value) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { + transmute(a) +} + +/// Unsigned shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", imm3 = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("ushr", imm3 = 1))] +#[rustc_args_required_const(1)] +pub unsafe fn vshrq_n_u8(a: uint8x16_t, imm3: i32) -> uint8x16_t { + if imm3 < 0 || imm3 > 7 { + unreachable_unchecked(); + } else { + uint8x16_t( + a.0 >> imm3, + a.1 >> imm3, + a.2 >> imm3, + a.3 >> imm3, + a.4 >> imm3, + a.5 >> imm3, + a.6 >> imm3, + a.7 >> imm3, + a.8 >> imm3, + a.9 >> imm3, + a.10 >> imm3, + a.11 >> imm3, + a.12 >> imm3, + a.13 >> imm3, + a.14 >> imm3, + a.15 >> imm3, + ) + } +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshl.s8", imm3 = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, imm3 = 1))] +#[rustc_args_required_const(1)] +pub unsafe fn vshlq_n_u8(a: uint8x16_t, imm3: i32) -> uint8x16_t { + if imm3 < 0 || imm3 > 7 { + unreachable_unchecked(); + } else { + uint8x16_t( + a.0 << imm3, + a.1 << imm3, + a.2 << imm3, + a.3 << imm3, + a.4 << imm3, + a.5 << imm3, + a.6 << imm3, + a.7 << imm3, + a.8 << imm3, + a.9 << imm3, + a.10 << imm3, + a.11 << imm3, + a.12 << imm3, + a.13 << imm3, + a.14 << imm3, + a.15 << imm3, + ) + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", n = 3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, n = 3))] +#[rustc_args_required_const(2)] +pub unsafe fn vextq_s8(a: int8x16_t, b: int8x16_t, n: i32) -> int8x16_t { + if n < 0 || n > 15 { + unreachable_unchecked(); + }; + match n & 0b1111 { + 0 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16( + a, + b, + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + ), + 2 => simd_shuffle16( + a, + b, + [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], + ), + 3 => simd_shuffle16( + a, + b, + [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + ), + 4 => simd_shuffle16( + a, + b, + [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + ), + 5 => simd_shuffle16( + a, + b, + [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], + ), + 6 => simd_shuffle16( + a, + b, + [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21], + ), + 7 => simd_shuffle16( + a, + b, + [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22], + ), + 8 => simd_shuffle16( + a, + b, + [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], + ), + 9 => simd_shuffle16( + a, + b, + [ + 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + ], + ), + 10 => simd_shuffle16( + a, + b, + [ + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + ], + ), + 11 => simd_shuffle16( + a, + b, + [ + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + ], + ), + 12 => simd_shuffle16( + a, + b, + [ + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, + ], + ), + 13 => simd_shuffle16( + a, + b, + [ + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + ], + ), + 14 => simd_shuffle16( + a, + b, + [ + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + ], + ), + 15 => simd_shuffle16( + a, + b, + [ + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + ], + ), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", n = 3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, n = 3))] +#[rustc_args_required_const(2)] +pub unsafe fn vextq_u8(a: uint8x16_t, b: uint8x16_t, n: i32) -> uint8x16_t { + if n < 0 || n > 15 { + unreachable_unchecked(); + }; + match n & 0b1111 { + 0 => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16( + a, + b, + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], + ), + 2 => simd_shuffle16( + a, + b, + [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17], + ), + 3 => simd_shuffle16( + a, + b, + [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + ), + 4 => simd_shuffle16( + a, + b, + [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + ), + 5 => simd_shuffle16( + a, + b, + [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], + ), + 6 => simd_shuffle16( + a, + b, + [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21], + ), + 7 => simd_shuffle16( + a, + b, + [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22], + ), + 8 => simd_shuffle16( + a, + b, + [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], + ), + 9 => simd_shuffle16( + a, + b, + [ + 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + ], + ), + 10 => simd_shuffle16( + a, + b, + [ + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + ], + ), + 11 => simd_shuffle16( + a, + b, + [ + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + ], + ), + 12 => simd_shuffle16( + a, + b, + [ + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, + ], + ), + 13 => simd_shuffle16( + a, + b, + [ + 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + ], + ), + 14 => simd_shuffle16( + a, + b, + [ + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + ], + ), + 15 => simd_shuffle16( + a, + b, + [ + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + ], + ), + _ => unreachable_unchecked(), + } +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(ldr))] +// even gcc compiles this to ldr: https://clang.godbolt.org/z/1bvH2x +// #[cfg_attr(test, assert_instr(ld1))] +pub unsafe fn vld1q_s8(addr: *const i8) -> int8x16_t { + ptr::read(addr as *const int8x16_t) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(ldr))] +// even gcc compiles this to ldr: https://clang.godbolt.org/z/1bvH2x +// #[cfg_attr(test, assert_instr(ld1))] +pub unsafe fn vld1q_u8(addr: *const u8) -> uint8x16_t { + ptr::read(addr as *const uint8x16_t) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::core_arch::arm::test_support::*; + use crate::core_arch::{arm::*, simd::*}; + use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec}; + use stdarch_test::simd_test; + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s8() { + let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e = a; + let r: i8x16 = transmute(vld1q_s8(transmute(&a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u8() { + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e = a; + let r: u8x16 = transmute(vld1q_u8(transmute(&a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_u8() { + let v = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r = vget_lane_u8(transmute(v), 1); + assert_eq!(r, 2); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_u32() { + let v = i32x4::new(1, 2, 3, 4); + let r = vgetq_lane_u32(transmute(v), 1); + assert_eq!(r, 2); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_u64() { + let v: u64 = 1; + let r = vget_lane_u64(transmute(v), 0); + assert_eq!(r, 1); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_u16() { + let v = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r = vgetq_lane_u16(transmute(v), 1); + assert_eq!(r, 2); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_s8() { + let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = i8x16::new( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 31, 32, + ); + let e = i8x16::new(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19); + let r: i8x16 = transmute(vextq_s8(transmute(a), transmute(b), 3)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_u8() { + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b = u8x16::new( + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 31, 31, 32, + ); + let e = u8x16::new(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19); + let r: u8x16 = transmute(vextq_u8(transmute(a), transmute(b), 3)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrq_n_u8() { + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e = u8x16::new(0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4); + let r: u8x16 = transmute(vshrq_n_u8(transmute(a), 2)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_n_u8() { + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let r: u8x16 = transmute(vshlq_n_u8(transmute(a), 2)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqmovn_u64() { + let a = u64x2::new(1, 2); + let e = u32x2::new(1, 2); + let r: u32x2 = transmute(vqmovn_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u64_u32() { + let v: i8 = 42; + let e = i8x16::new( + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + ); + let r: i8x16 = transmute(vdupq_n_s8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_s8() { + let v: i8 = 42; + let e = i8x16::new( + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + ); + let r: i8x16 = transmute(vdupq_n_s8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_u8() { + let v: u8 = 42; + let e = u8x16::new( + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + ); + let r: u8x16 = transmute(vdupq_n_u8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_u8() { + let v: u8 = 42; + let e = u8x16::new( + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + ); + let r: u8x16 = transmute(vmovq_n_u8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_u64() { + let v = i64x2::new(1, 2); + let r = vgetq_lane_u64(transmute(v), 1); + assert_eq!(r, 2); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vadd_s8() { + test_ari_s8( + |i, j| vadd_s8(i, j), + |a: i8, b: i8| -> i8 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_s8() { + testq_ari_s8( + |i, j| vaddq_s8(i, j), + |a: i8, b: i8| -> i8 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vadd_s16() { + test_ari_s16( + |i, j| vadd_s16(i, j), + |a: i16, b: i16| -> i16 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_s16() { + testq_ari_s16( + |i, j| vaddq_s16(i, j), + |a: i16, b: i16| -> i16 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vadd_s32() { + test_ari_s32( + |i, j| vadd_s32(i, j), + |a: i32, b: i32| -> i32 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_s32() { + testq_ari_s32( + |i, j| vaddq_s32(i, j), + |a: i32, b: i32| -> i32 { a.overflowing_add(b).0 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vadd_u8() { + test_ari_u8( + |i, j| vadd_u8(i, j), + |a: u8, b: u8| -> u8 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_u8() { + testq_ari_u8( + |i, j| vaddq_u8(i, j), + |a: u8, b: u8| -> u8 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vadd_u16() { + test_ari_u16( + |i, j| vadd_u16(i, j), + |a: u16, b: u16| -> u16 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_u16() { + testq_ari_u16( + |i, j| vaddq_u16(i, j), + |a: u16, b: u16| -> u16 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vadd_u32() { + test_ari_u32( + |i, j| vadd_u32(i, j), + |a: u32, b: u32| -> u32 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_u32() { + testq_ari_u32( + |i, j| vaddq_u32(i, j), + |a: u32, b: u32| -> u32 { a.overflowing_add(b).0 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vadd_f32() { + test_ari_f32(|i, j| vadd_f32(i, j), |a: f32, b: f32| -> f32 { a + b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_f32() { + testq_ari_f32(|i, j| vaddq_f32(i, j), |a: f32, b: f32| -> f32 { a + b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_s8() { + let v = i8::MAX; + let a = i8x8::new(v, v, v, v, v, v, v, v); + let v = 2 * (v as i16); + let e = i16x8::new(v, v, v, v, v, v, v, v); + let r: i16x8 = transmute(vaddl_s8(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_s16() { + let v = i16::MAX; + let a = i16x4::new(v, v, v, v); + let v = 2 * (v as i32); + let e = i32x4::new(v, v, v, v); + let r: i32x4 = transmute(vaddl_s16(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_s32() { + let v = i32::MAX; + let a = i32x2::new(v, v); + let v = 2 * (v as i64); + let e = i64x2::new(v, v); + let r: i64x2 = transmute(vaddl_s32(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_u8() { + let v = u8::MAX; + let a = u8x8::new(v, v, v, v, v, v, v, v); + let v = 2 * (v as u16); + let e = u16x8::new(v, v, v, v, v, v, v, v); + let r: u16x8 = transmute(vaddl_u8(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_u16() { + let v = u16::MAX; + let a = u16x4::new(v, v, v, v); + let v = 2 * (v as u32); + let e = u32x4::new(v, v, v, v); + let r: u32x4 = transmute(vaddl_u16(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_u32() { + let v = u32::MAX; + let a = u32x2::new(v, v); + let v = 2 * (v as u64); + let e = u64x2::new(v, v); + let r: u64x2 = transmute(vaddl_u32(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_s8() { + let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e = i8x8::new(-1, -2, -3, -4, -5, -6, -7, -8); + let r: i8x8 = transmute(vmvn_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_s8() { + let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e = i8x16::new( + -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, + ); + let r: i8x16 = transmute(vmvnq_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_s16() { + let a = i16x4::new(0, 1, 2, 3); + let e = i16x4::new(-1, -2, -3, -4); + let r: i16x4 = transmute(vmvn_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_s16() { + let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e = i16x8::new(-1, -2, -3, -4, -5, -6, -7, -8); + let r: i16x8 = transmute(vmvnq_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_s32() { + let a = i32x2::new(0, 1); + let e = i32x2::new(-1, -2); + let r: i32x2 = transmute(vmvn_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_s32() { + let a = i32x4::new(0, 1, 2, 3); + let e = i32x4::new(-1, -2, -3, -4); + let r: i32x4 = transmute(vmvnq_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_u8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e = u8x8::new(255, 254, 253, 252, 251, 250, 249, 248); + let r: u8x8 = transmute(vmvn_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e = u8x16::new( + 255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240, + ); + let r: u8x16 = transmute(vmvnq_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_u16() { + let a = u16x4::new(0, 1, 2, 3); + let e = u16x4::new(65_535, 65_534, 65_533, 65_532); + let r: u16x4 = transmute(vmvn_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_u16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e = u16x8::new( + 65_535, 65_534, 65_533, 65_532, 65_531, 65_530, 65_529, 65_528, + ); + let r: u16x8 = transmute(vmvnq_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_u32() { + let a = u32x2::new(0, 1); + let e = u32x2::new(4_294_967_295, 4_294_967_294); + let r: u32x2 = transmute(vmvn_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_u32() { + let a = u32x4::new(0, 1, 2, 3); + let e = u32x4::new(4_294_967_295, 4_294_967_294, 4_294_967_293, 4_294_967_292); + let r: u32x4 = transmute(vmvnq_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_p8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e = u8x8::new(255, 254, 253, 252, 251, 250, 249, 248); + let r: u8x8 = transmute(vmvn_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_p8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e = u8x16::new( + 255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240, + ); + let r: u8x16 = transmute(vmvnq_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_s16() { + let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vmovn_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_s32() { + let a = i32x4::new(1, 2, 3, 4); + let e = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vmovn_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_s64() { + let a = i64x2::new(1, 2); + let e = i32x2::new(1, 2); + let r: i32x2 = transmute(vmovn_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_u16() { + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vmovn_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_u32() { + let a = u32x4::new(1, 2, 3, 4); + let e = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vmovn_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_u64() { + let a = u64x2::new(1, 2); + let e = u32x2::new(1, 2); + let r: u32x2 = transmute(vmovn_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_s8() { + let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i16x8 = transmute(vmovl_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_s16() { + let e = i32x4::new(1, 2, 3, 4); + let a = i16x4::new(1, 2, 3, 4); + let r: i32x4 = transmute(vmovl_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_s32() { + let e = i64x2::new(1, 2); + let a = i32x2::new(1, 2); + let r: i64x2 = transmute(vmovl_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_u8() { + let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u16x8 = transmute(vmovl_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_u16() { + let e = u32x4::new(1, 2, 3, 4); + let a = u16x4::new(1, 2, 3, 4); + let r: u32x4 = transmute(vmovl_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_u32() { + let e = u64x2::new(1, 2); + let a = u32x2::new(1, 2); + let r: u64x2 = transmute(vmovl_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsqrt_f32() { + let a = f32x2::new(1.0, 2.0); + let e = f32x2::new(0.9980469, 0.7050781); + let r: f32x2 = transmute(vrsqrte_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmin_s8() { + let a = i8x8::new(1, -2, 3, -4, 5, 6, 7, 8); + let b = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9); + let e = i8x8::new(-2, -4, 5, 7, 0, 2, 4, 6); + let r: i8x8 = transmute(vpmin_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmin_s16() { + let a = i16x4::new(1, 2, 3, -4); + let b = i16x4::new(0, 3, 2, 5); + let e = i16x4::new(1, -4, 0, 2); + let r: i16x4 = transmute(vpmin_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmin_s32() { + let a = i32x2::new(1, -2); + let b = i32x2::new(0, 3); + let e = i32x2::new(-2, 0); + let r: i32x2 = transmute(vpmin_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmin_u8() { + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b = u8x8::new(0, 3, 2, 5, 4, 7, 6, 9); + let e = u8x8::new(1, 3, 5, 7, 0, 2, 4, 6); + let r: u8x8 = transmute(vpmin_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmin_u16() { + let a = u16x4::new(1, 2, 3, 4); + let b = u16x4::new(0, 3, 2, 5); + let e = u16x4::new(1, 3, 0, 2); + let r: u16x4 = transmute(vpmin_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmin_u32() { + let a = u32x2::new(1, 2); + let b = u32x2::new(0, 3); + let e = u32x2::new(1, 0); + let r: u32x2 = transmute(vpmin_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmin_f32() { + let a = f32x2::new(1., -2.); + let b = f32x2::new(0., 3.); + let e = f32x2::new(-2., 0.); + let r: f32x2 = transmute(vpmin_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmax_s8() { + let a = i8x8::new(1, -2, 3, -4, 5, 6, 7, 8); + let b = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9); + let e = i8x8::new(1, 3, 6, 8, 3, 5, 7, 9); + let r: i8x8 = transmute(vpmax_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmax_s16() { + let a = i16x4::new(1, 2, 3, -4); + let b = i16x4::new(0, 3, 2, 5); + let e = i16x4::new(2, 3, 3, 5); + let r: i16x4 = transmute(vpmax_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmax_s32() { + let a = i32x2::new(1, -2); + let b = i32x2::new(0, 3); + let e = i32x2::new(1, 3); + let r: i32x2 = transmute(vpmax_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmax_u8() { + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b = u8x8::new(0, 3, 2, 5, 4, 7, 6, 9); + let e = u8x8::new(2, 4, 6, 8, 3, 5, 7, 9); + let r: u8x8 = transmute(vpmax_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmax_u16() { + let a = u16x4::new(1, 2, 3, 4); + let b = u16x4::new(0, 3, 2, 5); + let e = u16x4::new(2, 4, 3, 5); + let r: u16x4 = transmute(vpmax_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmax_u32() { + let a = u32x2::new(1, 2); + let b = u32x2::new(0, 3); + let e = u32x2::new(2, 3); + let r: u32x2 = transmute(vpmax_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmax_f32() { + let a = f32x2::new(1., -2.); + let b = f32x2::new(0., 3.); + let e = f32x2::new(1., 3.); + let r: f32x2 = transmute(vpmax_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_s8() { + test_bit_s8(|i, j| vand_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s8() { + testq_bit_s8(|i, j| vandq_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_s16() { + test_bit_s16(|i, j| vand_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s16() { + testq_bit_s16(|i, j| vandq_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_s32() { + test_bit_s32(|i, j| vand_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s32() { + testq_bit_s32(|i, j| vandq_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_s64() { + test_bit_s64(|i, j| vand_s64(i, j), |a: i64, b: i64| -> i64 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s64() { + testq_bit_s64(|i, j| vandq_s64(i, j), |a: i64, b: i64| -> i64 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_u8() { + test_bit_u8(|i, j| vand_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u8() { + testq_bit_u8(|i, j| vandq_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_u16() { + test_bit_u16(|i, j| vand_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u16() { + testq_bit_u16(|i, j| vandq_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_u32() { + test_bit_u32(|i, j| vand_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u32() { + testq_bit_u32(|i, j| vandq_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_u64() { + test_bit_u64(|i, j| vand_u64(i, j), |a: u64, b: u64| -> u64 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u64() { + testq_bit_u64(|i, j| vandq_u64(i, j), |a: u64, b: u64| -> u64 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s8() { + test_bit_s8(|i, j| vorr_s8(i, j), |a: i8, b: i8| -> i8 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s8() { + testq_bit_s8(|i, j| vorrq_s8(i, j), |a: i8, b: i8| -> i8 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s16() { + test_bit_s16(|i, j| vorr_s16(i, j), |a: i16, b: i16| -> i16 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s16() { + testq_bit_s16(|i, j| vorrq_s16(i, j), |a: i16, b: i16| -> i16 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s32() { + test_bit_s32(|i, j| vorr_s32(i, j), |a: i32, b: i32| -> i32 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s32() { + testq_bit_s32(|i, j| vorrq_s32(i, j), |a: i32, b: i32| -> i32 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s64() { + test_bit_s64(|i, j| vorr_s64(i, j), |a: i64, b: i64| -> i64 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s64() { + testq_bit_s64(|i, j| vorrq_s64(i, j), |a: i64, b: i64| -> i64 { a | b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u8() { + test_bit_u8(|i, j| vorr_u8(i, j), |a: u8, b: u8| -> u8 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u8() { + testq_bit_u8(|i, j| vorrq_u8(i, j), |a: u8, b: u8| -> u8 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u16() { + test_bit_u16(|i, j| vorr_u16(i, j), |a: u16, b: u16| -> u16 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u16() { + testq_bit_u16(|i, j| vorrq_u16(i, j), |a: u16, b: u16| -> u16 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u32() { + test_bit_u32(|i, j| vorr_u32(i, j), |a: u32, b: u32| -> u32 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u32() { + testq_bit_u32(|i, j| vorrq_u32(i, j), |a: u32, b: u32| -> u32 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u64() { + test_bit_u64(|i, j| vorr_u64(i, j), |a: u64, b: u64| -> u64 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u64() { + testq_bit_u64(|i, j| vorrq_u64(i, j), |a: u64, b: u64| -> u64 { a | b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_s8() { + test_bit_s8(|i, j| veor_s8(i, j), |a: i8, b: i8| -> i8 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s8() { + testq_bit_s8(|i, j| veorq_s8(i, j), |a: i8, b: i8| -> i8 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_s16() { + test_bit_s16(|i, j| veor_s16(i, j), |a: i16, b: i16| -> i16 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s16() { + testq_bit_s16(|i, j| veorq_s16(i, j), |a: i16, b: i16| -> i16 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_s32() { + test_bit_s32(|i, j| veor_s32(i, j), |a: i32, b: i32| -> i32 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s32() { + testq_bit_s32(|i, j| veorq_s32(i, j), |a: i32, b: i32| -> i32 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_s64() { + test_bit_s64(|i, j| veor_s64(i, j), |a: i64, b: i64| -> i64 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s64() { + testq_bit_s64(|i, j| veorq_s64(i, j), |a: i64, b: i64| -> i64 { a ^ b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_u8() { + test_bit_u8(|i, j| veor_u8(i, j), |a: u8, b: u8| -> u8 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u8() { + testq_bit_u8(|i, j| veorq_u8(i, j), |a: u8, b: u8| -> u8 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_u16() { + test_bit_u16(|i, j| veor_u16(i, j), |a: u16, b: u16| -> u16 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u16() { + testq_bit_u16(|i, j| veorq_u16(i, j), |a: u16, b: u16| -> u16 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_u32() { + test_bit_u32(|i, j| veor_u32(i, j), |a: u32, b: u32| -> u32 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u32() { + testq_bit_u32(|i, j| veorq_u32(i, j), |a: u32, b: u32| -> u32 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_u64() { + test_bit_u64(|i, j| veor_u64(i, j), |a: u64, b: u64| -> u64 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u64() { + testq_bit_u64(|i, j| veorq_u64(i, j), |a: u64, b: u64| -> u64 { a ^ b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_s8() { + test_cmp_s8( + |i, j| vceq_s8(i, j), + |a: i8, b: i8| -> u8 { + if a == b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_s8() { + testq_cmp_s8( + |i, j| vceqq_s8(i, j), + |a: i8, b: i8| -> u8 { + if a == b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceq_s16() { + test_cmp_s16( + |i, j| vceq_s16(i, j), + |a: i16, b: i16| -> u16 { + if a == b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_s16() { + testq_cmp_s16( + |i, j| vceqq_s16(i, j), + |a: i16, b: i16| -> u16 { + if a == b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceq_s32() { + test_cmp_s32( + |i, j| vceq_s32(i, j), + |a: i32, b: i32| -> u32 { + if a == b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_s32() { + testq_cmp_s32( + |i, j| vceqq_s32(i, j), + |a: i32, b: i32| -> u32 { + if a == b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_u8() { + test_cmp_u8( + |i, j| vceq_u8(i, j), + |a: u8, b: u8| -> u8 { + if a == b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_u8() { + testq_cmp_u8( + |i, j| vceqq_u8(i, j), + |a: u8, b: u8| -> u8 { + if a == b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceq_u16() { + test_cmp_u16( + |i, j| vceq_u16(i, j), + |a: u16, b: u16| -> u16 { + if a == b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_u16() { + testq_cmp_u16( + |i, j| vceqq_u16(i, j), + |a: u16, b: u16| -> u16 { + if a == b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceq_u32() { + test_cmp_u32( + |i, j| vceq_u32(i, j), + |a: u32, b: u32| -> u32 { + if a == b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_u32() { + testq_cmp_u32( + |i, j| vceqq_u32(i, j), + |a: u32, b: u32| -> u32 { + if a == b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_f32() { + test_cmp_f32( + |i, j| vcge_f32(i, j), + |a: f32, b: f32| -> u32 { + if a == b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_f32() { + testq_cmp_f32( + |i, j| vcgeq_f32(i, j), + |a: f32, b: f32| -> u32 { + if a == b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_s8() { + test_cmp_s8( + |i, j| vcgt_s8(i, j), + |a: i8, b: i8| -> u8 { + if a > b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_s8() { + testq_cmp_s8( + |i, j| vcgtq_s8(i, j), + |a: i8, b: i8| -> u8 { + if a > b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_s16() { + test_cmp_s16( + |i, j| vcgt_s16(i, j), + |a: i16, b: i16| -> u16 { + if a > b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_s16() { + testq_cmp_s16( + |i, j| vcgtq_s16(i, j), + |a: i16, b: i16| -> u16 { + if a > b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_s32() { + test_cmp_s32( + |i, j| vcgt_s32(i, j), + |a: i32, b: i32| -> u32 { + if a > b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_s32() { + testq_cmp_s32( + |i, j| vcgtq_s32(i, j), + |a: i32, b: i32| -> u32 { + if a > b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_u8() { + test_cmp_u8( + |i, j| vcgt_u8(i, j), + |a: u8, b: u8| -> u8 { + if a > b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_u8() { + testq_cmp_u8( + |i, j| vcgtq_u8(i, j), + |a: u8, b: u8| -> u8 { + if a > b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_u16() { + test_cmp_u16( + |i, j| vcgt_u16(i, j), + |a: u16, b: u16| -> u16 { + if a > b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_u16() { + testq_cmp_u16( + |i, j| vcgtq_u16(i, j), + |a: u16, b: u16| -> u16 { + if a > b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_u32() { + test_cmp_u32( + |i, j| vcgt_u32(i, j), + |a: u32, b: u32| -> u32 { + if a > b { + 0xFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_u32() { + testq_cmp_u32( + |i, j| vcgtq_u32(i, j), + |a: u32, b: u32| -> u32 { + if a > b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_f32() { + test_cmp_f32( + |i, j| vcgt_f32(i, j), + |a: f32, b: f32| -> u32 { + if a > b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_f32() { + testq_cmp_f32( + |i, j| vcgtq_f32(i, j), + |a: f32, b: f32| -> u32 { + if a > b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s8() { + test_cmp_s8( + |i, j| vclt_s8(i, j), + |a: i8, b: i8| -> u8 { + if a < b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_s8() { + testq_cmp_s8( + |i, j| vcltq_s8(i, j), + |a: i8, b: i8| -> u8 { + if a < b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s16() { + test_cmp_s16( + |i, j| vclt_s16(i, j), + |a: i16, b: i16| -> u16 { + if a < b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_s16() { + testq_cmp_s16( + |i, j| vcltq_s16(i, j), + |a: i16, b: i16| -> u16 { + if a < b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s32() { + test_cmp_s32( + |i, j| vclt_s32(i, j), + |a: i32, b: i32| -> u32 { + if a < b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_s32() { + testq_cmp_s32( + |i, j| vcltq_s32(i, j), + |a: i32, b: i32| -> u32 { + if a < b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_u8() { + test_cmp_u8( + |i, j| vclt_u8(i, j), + |a: u8, b: u8| -> u8 { + if a < b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u8() { + testq_cmp_u8( + |i, j| vcltq_u8(i, j), + |a: u8, b: u8| -> u8 { + if a < b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vclt_u16() { + test_cmp_u16( + |i, j| vclt_u16(i, j), + |a: u16, b: u16| -> u16 { + if a < b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u16() { + testq_cmp_u16( + |i, j| vcltq_u16(i, j), + |a: u16, b: u16| -> u16 { + if a < b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vclt_u32() { + test_cmp_u32( + |i, j| vclt_u32(i, j), + |a: u32, b: u32| -> u32 { + if a < b { + 0xFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u32() { + testq_cmp_u32( + |i, j| vcltq_u32(i, j), + |a: u32, b: u32| -> u32 { + if a < b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_f32() { + test_cmp_f32( + |i, j| vclt_f32(i, j), + |a: f32, b: f32| -> u32 { + if a < b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_f32() { + testq_cmp_f32( + |i, j| vcltq_f32(i, j), + |a: f32, b: f32| -> u32 { + if a < b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_s8() { + test_cmp_s8( + |i, j| vcle_s8(i, j), + |a: i8, b: i8| -> u8 { + if a <= b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_s8() { + testq_cmp_s8( + |i, j| vcleq_s8(i, j), + |a: i8, b: i8| -> u8 { + if a <= b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcle_s16() { + test_cmp_s16( + |i, j| vcle_s16(i, j), + |a: i16, b: i16| -> u16 { + if a <= b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_s16() { + testq_cmp_s16( + |i, j| vcleq_s16(i, j), + |a: i16, b: i16| -> u16 { + if a <= b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcle_s32() { + test_cmp_s32( + |i, j| vcle_s32(i, j), + |a: i32, b: i32| -> u32 { + if a <= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_s32() { + testq_cmp_s32( + |i, j| vcleq_s32(i, j), + |a: i32, b: i32| -> u32 { + if a <= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_u8() { + test_cmp_u8( + |i, j| vcle_u8(i, j), + |a: u8, b: u8| -> u8 { + if a <= b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_u8() { + testq_cmp_u8( + |i, j| vcleq_u8(i, j), + |a: u8, b: u8| -> u8 { + if a <= b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcle_u16() { + test_cmp_u16( + |i, j| vcle_u16(i, j), + |a: u16, b: u16| -> u16 { + if a <= b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_u16() { + testq_cmp_u16( + |i, j| vcleq_u16(i, j), + |a: u16, b: u16| -> u16 { + if a <= b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcle_u32() { + test_cmp_u32( + |i, j| vcle_u32(i, j), + |a: u32, b: u32| -> u32 { + if a <= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_u32() { + testq_cmp_u32( + |i, j| vcleq_u32(i, j), + |a: u32, b: u32| -> u32 { + if a <= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_f32() { + test_cmp_f32( + |i, j| vcle_f32(i, j), + |a: f32, b: f32| -> u32 { + if a <= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_f32() { + testq_cmp_f32( + |i, j| vcleq_f32(i, j), + |a: f32, b: f32| -> u32 { + if a <= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_s8() { + test_cmp_s8( + |i, j| vcge_s8(i, j), + |a: i8, b: i8| -> u8 { + if a >= b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_s8() { + testq_cmp_s8( + |i, j| vcgeq_s8(i, j), + |a: i8, b: i8| -> u8 { + if a >= b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcge_s16() { + test_cmp_s16( + |i, j| vcge_s16(i, j), + |a: i16, b: i16| -> u16 { + if a >= b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_s16() { + testq_cmp_s16( + |i, j| vcgeq_s16(i, j), + |a: i16, b: i16| -> u16 { + if a >= b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcge_s32() { + test_cmp_s32( + |i, j| vcge_s32(i, j), + |a: i32, b: i32| -> u32 { + if a >= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_s32() { + testq_cmp_s32( + |i, j| vcgeq_s32(i, j), + |a: i32, b: i32| -> u32 { + if a >= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_u8() { + test_cmp_u8( + |i, j| vcge_u8(i, j), + |a: u8, b: u8| -> u8 { + if a >= b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_u8() { + testq_cmp_u8( + |i, j| vcgeq_u8(i, j), + |a: u8, b: u8| -> u8 { + if a >= b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcge_u16() { + test_cmp_u16( + |i, j| vcge_u16(i, j), + |a: u16, b: u16| -> u16 { + if a >= b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_u16() { + testq_cmp_u16( + |i, j| vcgeq_u16(i, j), + |a: u16, b: u16| -> u16 { + if a >= b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcge_u32() { + test_cmp_u32( + |i, j| vcge_u32(i, j), + |a: u32, b: u32| -> u32 { + if a >= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_u32() { + testq_cmp_u32( + |i, j| vcgeq_u32(i, j), + |a: u32, b: u32| -> u32 { + if a >= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_f32() { + test_cmp_f32( + |i, j| vcge_f32(i, j), + |a: f32, b: f32| -> u32 { + if a >= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_f32() { + testq_cmp_f32( + |i, j| vcgeq_f32(i, j), + |a: f32, b: f32| -> u32 { + if a >= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_s8() { + test_ari_s8( + |i, j| vqsub_s8(i, j), + |a: i8, b: i8| -> i8 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_s8() { + testq_ari_s8( + |i, j| vqsubq_s8(i, j), + |a: i8, b: i8| -> i8 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_s16() { + test_ari_s16( + |i, j| vqsub_s16(i, j), + |a: i16, b: i16| -> i16 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_s16() { + testq_ari_s16( + |i, j| vqsubq_s16(i, j), + |a: i16, b: i16| -> i16 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_s32() { + test_ari_s32( + |i, j| vqsub_s32(i, j), + |a: i32, b: i32| -> i32 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_s32() { + testq_ari_s32( + |i, j| vqsubq_s32(i, j), + |a: i32, b: i32| -> i32 { a.saturating_sub(b) }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_u8() { + test_ari_u8( + |i, j| vqsub_u8(i, j), + |a: u8, b: u8| -> u8 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_u8() { + testq_ari_u8( + |i, j| vqsubq_u8(i, j), + |a: u8, b: u8| -> u8 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_u16() { + test_ari_u16( + |i, j| vqsub_u16(i, j), + |a: u16, b: u16| -> u16 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_u16() { + testq_ari_u16( + |i, j| vqsubq_u16(i, j), + |a: u16, b: u16| -> u16 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_u32() { + test_ari_u32( + |i, j| vqsub_u32(i, j), + |a: u32, b: u32| -> u32 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_u32() { + testq_ari_u32( + |i, j| vqsubq_u32(i, j), + |a: u32, b: u32| -> u32 { a.saturating_sub(b) }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_s8() { + test_ari_s8(|i, j| vhadd_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_s8() { + testq_ari_s8(|i, j| vhaddq_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_s16() { + test_ari_s16(|i, j| vhadd_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_s16() { + testq_ari_s16(|i, j| vhaddq_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_s32() { + test_ari_s32(|i, j| vhadd_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_s32() { + testq_ari_s32(|i, j| vhaddq_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_u8() { + test_ari_u8(|i, j| vhadd_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_u8() { + testq_ari_u8(|i, j| vhaddq_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_u16() { + test_ari_u16(|i, j| vhadd_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_u16() { + testq_ari_u16(|i, j| vhaddq_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_u32() { + test_ari_u32(|i, j| vhadd_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_u32() { + testq_ari_u32(|i, j| vhaddq_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_s8() { + test_ari_s8(|i, j| vrhadd_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_s8() { + testq_ari_s8(|i, j| vrhaddq_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_s16() { + test_ari_s16(|i, j| vrhadd_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_s16() { + testq_ari_s16(|i, j| vrhaddq_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_s32() { + test_ari_s32(|i, j| vrhadd_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_s32() { + testq_ari_s32(|i, j| vrhaddq_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_u8() { + test_ari_u8(|i, j| vrhadd_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_u8() { + testq_ari_u8(|i, j| vrhaddq_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_u16() { + test_ari_u16(|i, j| vrhadd_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_u16() { + testq_ari_u16(|i, j| vrhaddq_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_u32() { + test_ari_u32(|i, j| vrhadd_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_u32() { + testq_ari_u32(|i, j| vrhaddq_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_s8() { + test_ari_s8( + |i, j| vqadd_s8(i, j), + |a: i8, b: i8| -> i8 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_s8() { + testq_ari_s8( + |i, j| vqaddq_s8(i, j), + |a: i8, b: i8| -> i8 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_s16() { + test_ari_s16( + |i, j| vqadd_s16(i, j), + |a: i16, b: i16| -> i16 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_s16() { + testq_ari_s16( + |i, j| vqaddq_s16(i, j), + |a: i16, b: i16| -> i16 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_s32() { + test_ari_s32( + |i, j| vqadd_s32(i, j), + |a: i32, b: i32| -> i32 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_s32() { + testq_ari_s32( + |i, j| vqaddq_s32(i, j), + |a: i32, b: i32| -> i32 { a.saturating_add(b) }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_u8() { + test_ari_u8( + |i, j| vqadd_u8(i, j), + |a: u8, b: u8| -> u8 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_u8() { + testq_ari_u8( + |i, j| vqaddq_u8(i, j), + |a: u8, b: u8| -> u8 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_u16() { + test_ari_u16( + |i, j| vqadd_u16(i, j), + |a: u16, b: u16| -> u16 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_u16() { + testq_ari_u16( + |i, j| vqaddq_u16(i, j), + |a: u16, b: u16| -> u16 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_u32() { + test_ari_u32( + |i, j| vqadd_u32(i, j), + |a: u32, b: u32| -> u32 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_u32() { + testq_ari_u32( + |i, j| vqaddq_u32(i, j), + |a: u32, b: u32| -> u32 { a.saturating_add(b) }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_s8() { + test_ari_s8( + |i, j| vmul_s8(i, j), + |a: i8, b: i8| -> i8 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_s8() { + testq_ari_s8( + |i, j| vmulq_s8(i, j), + |a: i8, b: i8| -> i8 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmul_s16() { + test_ari_s16( + |i, j| vmul_s16(i, j), + |a: i16, b: i16| -> i16 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_s16() { + testq_ari_s16( + |i, j| vmulq_s16(i, j), + |a: i16, b: i16| -> i16 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmul_s32() { + test_ari_s32( + |i, j| vmul_s32(i, j), + |a: i32, b: i32| -> i32 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_s32() { + testq_ari_s32( + |i, j| vmulq_s32(i, j), + |a: i32, b: i32| -> i32 { a.overflowing_mul(b).0 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_u8() { + test_ari_u8( + |i, j| vmul_u8(i, j), + |a: u8, b: u8| -> u8 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_u8() { + testq_ari_u8( + |i, j| vmulq_u8(i, j), + |a: u8, b: u8| -> u8 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmul_u16() { + test_ari_u16( + |i, j| vmul_u16(i, j), + |a: u16, b: u16| -> u16 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_u16() { + testq_ari_u16( + |i, j| vmulq_u16(i, j), + |a: u16, b: u16| -> u16 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmul_u32() { + test_ari_u32( + |i, j| vmul_u32(i, j), + |a: u32, b: u32| -> u32 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_u32() { + testq_ari_u32( + |i, j| vmulq_u32(i, j), + |a: u32, b: u32| -> u32 { a.overflowing_mul(b).0 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_f32() { + test_ari_f32(|i, j| vmul_f32(i, j), |a: f32, b: f32| -> f32 { a * b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_f32() { + testq_ari_f32(|i, j| vmulq_f32(i, j), |a: f32, b: f32| -> f32 { a * b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_s8() { + test_ari_s8(|i, j| vsub_s8(i, j), |a: i8, b: i8| -> i8 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_s8() { + testq_ari_s8(|i, j| vsubq_s8(i, j), |a: i8, b: i8| -> i8 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsub_s16() { + test_ari_s16(|i, j| vsub_s16(i, j), |a: i16, b: i16| -> i16 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_s16() { + testq_ari_s16(|i, j| vsubq_s16(i, j), |a: i16, b: i16| -> i16 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsub_s32() { + test_ari_s32(|i, j| vsub_s32(i, j), |a: i32, b: i32| -> i32 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_s32() { + testq_ari_s32(|i, j| vsubq_s32(i, j), |a: i32, b: i32| -> i32 { a - b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_u8() { + test_ari_u8(|i, j| vsub_u8(i, j), |a: u8, b: u8| -> u8 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_u8() { + testq_ari_u8(|i, j| vsubq_u8(i, j), |a: u8, b: u8| -> u8 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsub_u16() { + test_ari_u16(|i, j| vsub_u16(i, j), |a: u16, b: u16| -> u16 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_u16() { + testq_ari_u16(|i, j| vsubq_u16(i, j), |a: u16, b: u16| -> u16 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsub_u32() { + test_ari_u32(|i, j| vsub_u32(i, j), |a: u32, b: u32| -> u32 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_u32() { + testq_ari_u32(|i, j| vsubq_u32(i, j), |a: u32, b: u32| -> u32 { a - b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_f32() { + test_ari_f32(|i, j| vsub_f32(i, j), |a: f32, b: f32| -> f32 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_f32() { + testq_ari_f32(|i, j| vsubq_f32(i, j), |a: f32, b: f32| -> f32 { a - b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_s8() { + test_ari_s8( + |i, j| vhsub_s8(i, j), + |a: i8, b: i8| -> i8 { (((a as i16) - (b as i16)) / 2) as i8 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_s8() { + testq_ari_s8( + |i, j| vhsubq_s8(i, j), + |a: i8, b: i8| -> i8 { (((a as i16) - (b as i16)) / 2) as i8 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_s16() { + test_ari_s16( + |i, j| vhsub_s16(i, j), + |a: i16, b: i16| -> i16 { (((a as i32) - (b as i32)) / 2) as i16 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_s16() { + testq_ari_s16( + |i, j| vhsubq_s16(i, j), + |a: i16, b: i16| -> i16 { (((a as i32) - (b as i32)) / 2) as i16 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_s32() { + test_ari_s32( + |i, j| vhsub_s32(i, j), + |a: i32, b: i32| -> i32 { (((a as i64) - (b as i64)) / 2) as i32 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_s32() { + testq_ari_s32( + |i, j| vhsubq_s32(i, j), + |a: i32, b: i32| -> i32 { (((a as i64) - (b as i64)) / 2) as i32 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_u8() { + test_ari_u8( + |i, j| vhsub_u8(i, j), + |a: u8, b: u8| -> u8 { (((a as u16) - (b as u16)) / 2) as u8 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_u8() { + testq_ari_u8( + |i, j| vhsubq_u8(i, j), + |a: u8, b: u8| -> u8 { (((a as u16) - (b as u16)) / 2) as u8 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_u16() { + test_ari_u16( + |i, j| vhsub_u16(i, j), + |a: u16, b: u16| -> u16 { (((a as u16) - (b as u16)) / 2) as u16 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_u16() { + testq_ari_u16( + |i, j| vhsubq_u16(i, j), + |a: u16, b: u16| -> u16 { (((a as u16) - (b as u16)) / 2) as u16 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_u32() { + test_ari_u32( + |i, j| vhsub_u32(i, j), + |a: u32, b: u32| -> u32 { (((a as u64) - (b as u64)) / 2) as u32 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_u32() { + testq_ari_u32( + |i, j| vhsubq_u32(i, j), + |a: u32, b: u32| -> u32 { (((a as u64) - (b as u64)) / 2) as u32 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s8_u8() { + let a = i8x16::new(-1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: u8x16 = transmute(vreinterpretq_s8_u8(transmute(a))); + let e = u8x16::new(0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq!(r, e) + } + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u16_u8() { + let a = u16x8::new( + 0x01_00, 0x03_02, 0x05_04, 0x07_06, 0x09_08, 0x0B_0A, 0x0D_0C, 0x0F_0E, + ); + let r: u8x16 = transmute(vreinterpretq_u16_u8(transmute(a))); + let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq!(r, e) + } + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u32_u8() { + let a = u32x4::new(0x03_02_01_00, 0x07_06_05_04, 0x0B_0A_09_08, 0x0F_0E_0D_0C); + let r: u8x16 = transmute(vreinterpretq_u32_u8(transmute(a))); + let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq!(r, e) + } + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u64_u8() { + let a: u64x2 = u64x2::new(0x07_06_05_04_03_02_01_00, 0x0F_0E_0D_0C_0B_0A_09_08); + let r: u8x16 = transmute(vreinterpretq_u64_u8(transmute(a))); + let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq!(r, e) + } + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u8_s8() { + let a = u8x16::new(0xFF, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: i8x16 = transmute(vreinterpretq_u8_s8(transmute(a))); + let e = i8x16::new(-1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + assert_eq!(r, e) + } +} + +#[cfg(test)] +#[cfg(target_endian = "little")] +mod table_lookup_tests; diff --git a/crates/core_arch/src/arm/table_lookup_tests.rs b/crates/core_arch/src/arm/neon/table_lookup_tests.rs similarity index 100% rename from crates/core_arch/src/arm/table_lookup_tests.rs rename to crates/core_arch/src/arm/neon/table_lookup_tests.rs diff --git a/crates/core_arch/src/arm/test_support.rs b/crates/core_arch/src/arm/test_support.rs new file mode 100644 index 0000000000..337a270e40 --- /dev/null +++ b/crates/core_arch/src/arm/test_support.rs @@ -0,0 +1,830 @@ +use crate::core_arch::{arm::*, simd::*}; +use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec}; + +macro_rules! V_u8 { + () => { + vec![0x00u8, 0x01u8, 0x02u8, 0x0Fu8, 0x80u8, 0xF0u8, 0xFFu8] + }; +} +macro_rules! V_u16 { + () => { + vec![ + 0x0000u16, 0x0101u16, 0x0202u16, 0x0F0Fu16, 0x8000u16, 0xF0F0u16, 0xFFFFu16, + ] + }; +} +macro_rules! V_u32 { + () => { + vec![ + 0x00000000u32, + 0x01010101u32, + 0x02020202u32, + 0x0F0F0F0Fu32, + 0x80000000u32, + 0xF0F0F0F0u32, + 0xFFFFFFFFu32, + ] + }; +} +macro_rules! V_u64 { + () => { + vec![ + 0x0000000000000000u64, + 0x0101010101010101u64, + 0x0202020202020202u64, + 0x0F0F0F0F0F0F0F0Fu64, + 0x8080808080808080u64, + 0xF0F0F0F0F0F0F0F0u64, + 0xFFFFFFFFFFFFFFFFu64, + ] + }; +} + +macro_rules! V_i8 { + () => { + vec![ + 0x00i8, 0x01i8, 0x02i8, 0x0Fi8, -128i8, /* 0x80 */ + -16i8, /* 0xF0 */ + -1i8, /* 0xFF */ + ] + }; +} +macro_rules! V_i16 { + () => { + vec![ + 0x0000i16, 0x0101i16, 0x0202i16, 0x0F0Fi16, -32768i16, /* 0x8000 */ + -3856i16, /* 0xF0F0 */ + -1i16, /* 0xFFF */ + ] + }; +} +macro_rules! V_i32 { + () => { + vec![ + 0x00000000i32, + 0x01010101i32, + 0x02020202i32, + 0x0F0F0F0Fi32, + -2139062144i32, /* 0x80000000 */ + -252645136i32, /* 0xF0F0F0F0 */ + -1i32, /* 0xFFFFFFFF */ + ] + }; +} + +macro_rules! V_i64 { + () => { + vec![ + 0x0000000000000000i64, + 0x0101010101010101i64, + 0x0202020202020202i64, + 0x0F0F0F0F0F0F0F0Fi64, + -9223372036854775808i64, /* 0x8000000000000000 */ + -1152921504606846976i64, /* 0xF000000000000000 */ + -1i64, /* 0xFFFFFFFFFFFFFFFF */ + ] + }; +} + +macro_rules! V_f32 { + () => { + vec![ + 0.0f32, + 1.0f32, + -1.0f32, + 1.2f32, + 2.4f32, + std::f32::MAX, + std::f32::MIN, + std::f32::INFINITY, + std::f32::NEG_INFINITY, + std::f32::NAN, + ] + }; +} + +macro_rules! to64 { + ($t : ident) => { + |v: $t| -> u64 { transmute(v) } + }; +} + +macro_rules! to128 { + ($t : ident) => { + |v: $t| -> u128 { transmute(v) } + }; +} + +pub(crate) fn test( + vals: Vec, + fill1: fn(T) -> V, + fill2: fn(U) -> W, + cast: fn(W) -> X, + test_fun: fn(V, V) -> W, + verify_fun: fn(T, T) -> U, +) where + T: Copy + core::fmt::Debug + std::cmp::PartialEq, + U: Copy + core::fmt::Debug + std::cmp::PartialEq, + V: Copy + core::fmt::Debug, + W: Copy + core::fmt::Debug, + X: Copy + core::fmt::Debug + std::cmp::PartialEq, +{ + let pairs = vals.iter().zip(vals.iter()); + + for (i, j) in pairs { + let a: V = fill1(*i); + let b: V = fill1(*j); + + let actual_pre: W = test_fun(a, b); + let expected_pre: W = fill2(verify_fun(*i, *j)); + + let actual: X = cast(actual_pre); + let expected: X = cast(expected_pre); + + assert_eq!( + actual, expected, + "[{:?}:{:?}] :\nf({:?}, {:?}) = {:?}\ng({:?}, {:?}) = {:?}\n", + *i, *j, &a, &b, actual_pre, &a, &b, expected_pre + ); + } +} + +macro_rules! gen_test_fn { + ($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => { + pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) { + unsafe { + test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun) + }; + } + }; +} + +macro_rules! gen_fill_fn { + ($id: ident, $el_width: expr, $num_els: expr, $in_t : ident, $out_t: ident, $cmp_t: ident) => { + pub(crate) fn $id(val: $in_t) -> $out_t { + let initial: [$in_t; $num_els] = [val; $num_els]; + let result: $cmp_t = unsafe { transmute(initial) }; + let result_out: $out_t = unsafe { transmute(result) }; + + // println!("FILL: {:016x} as {} x {}: {:016x}", val.reverse_bits(), $el_width, $num_els, (result as u64).reverse_bits()); + + result_out + } + }; +} + +gen_fill_fn!(fill_u8, 8, 8, u8, uint8x8_t, u64); +gen_fill_fn!(fill_s8, 8, 8, i8, int8x8_t, u64); +gen_fill_fn!(fillq_u8, 8, 16, u8, uint8x16_t, u128); +gen_fill_fn!(fillq_s8, 8, 16, i8, int8x16_t, u128); + +gen_fill_fn!(fill_u16, 16, 4, u16, uint16x4_t, u64); +gen_fill_fn!(fill_s16, 16, 4, i16, int16x4_t, u64); +gen_fill_fn!(fillq_u16, 16, 8, u16, uint16x8_t, u128); +gen_fill_fn!(fillq_s16, 16, 8, i16, int16x8_t, u128); + +gen_fill_fn!(fill_u32, 32, 2, u32, uint32x2_t, u64); +gen_fill_fn!(fill_s32, 32, 2, i32, int32x2_t, u64); +gen_fill_fn!(fillq_u32, 32, 4, u32, uint32x4_t, u128); +gen_fill_fn!(fillq_s32, 32, 4, i32, int32x4_t, u128); + +gen_fill_fn!(fill_u64, 64, 1, u64, uint64x1_t, u64); +gen_fill_fn!(fill_s64, 64, 1, i64, int64x1_t, u64); +gen_fill_fn!(fillq_u64, 64, 2, u64, uint64x2_t, u128); +gen_fill_fn!(fillq_s64, 64, 2, i64, int64x2_t, u128); + +gen_fill_fn!(fill_f32, 32, 2, f32, float32x2_t, u64); +gen_fill_fn!(fillq_f32, 32, 4, f32, float32x4_t, u128); + +gen_test_fn!( + test_ari_u8, + u8, + u8, + uint8x8_t, + uint8x8_t, + u64, + V_u8!(), + fill_u8, + fill_u8, + to64!(uint8x8_t) +); +gen_test_fn!( + test_bit_u8, + u8, + u8, + uint8x8_t, + uint8x8_t, + u64, + V_u8!(), + fill_u8, + fill_u8, + to64!(uint8x8_t) +); +gen_test_fn!( + test_cmp_u8, + u8, + u8, + uint8x8_t, + uint8x8_t, + u64, + V_u8!(), + fill_u8, + fill_u8, + to64!(uint8x8_t) +); +gen_test_fn!( + testq_ari_u8, + u8, + u8, + uint8x16_t, + uint8x16_t, + u128, + V_u8!(), + fillq_u8, + fillq_u8, + to128!(uint8x16_t) +); +gen_test_fn!( + testq_bit_u8, + u8, + u8, + uint8x16_t, + uint8x16_t, + u128, + V_u8!(), + fillq_u8, + fillq_u8, + to128!(uint8x16_t) +); +gen_test_fn!( + testq_cmp_u8, + u8, + u8, + uint8x16_t, + uint8x16_t, + u128, + V_u8!(), + fillq_u8, + fillq_u8, + to128!(uint8x16_t) +); + +gen_test_fn!( + test_ari_s8, + i8, + i8, + int8x8_t, + int8x8_t, + u64, + V_i8!(), + fill_s8, + fill_s8, + to64!(int8x8_t) +); +gen_test_fn!( + test_bit_s8, + i8, + i8, + int8x8_t, + int8x8_t, + u64, + V_i8!(), + fill_s8, + fill_s8, + to64!(int8x8_t) +); +gen_test_fn!( + test_cmp_s8, + i8, + u8, + int8x8_t, + uint8x8_t, + u64, + V_i8!(), + fill_s8, + fill_u8, + to64!(uint8x8_t) +); +gen_test_fn!( + testq_ari_s8, + i8, + i8, + int8x16_t, + int8x16_t, + u128, + V_i8!(), + fillq_s8, + fillq_s8, + to128!(int8x16_t) +); +gen_test_fn!( + testq_bit_s8, + i8, + i8, + int8x16_t, + int8x16_t, + u128, + V_i8!(), + fillq_s8, + fillq_s8, + to128!(int8x16_t) +); +gen_test_fn!( + testq_cmp_s8, + i8, + u8, + int8x16_t, + uint8x16_t, + u128, + V_i8!(), + fillq_s8, + fillq_u8, + to128!(uint8x16_t) +); + +gen_test_fn!( + test_ari_u16, + u16, + u16, + uint16x4_t, + uint16x4_t, + u64, + V_u16!(), + fill_u16, + fill_u16, + to64!(uint16x4_t) +); +gen_test_fn!( + test_bit_u16, + u16, + u16, + uint16x4_t, + uint16x4_t, + u64, + V_u16!(), + fill_u16, + fill_u16, + to64!(uint16x4_t) +); +gen_test_fn!( + test_cmp_u16, + u16, + u16, + uint16x4_t, + uint16x4_t, + u64, + V_u16!(), + fill_u16, + fill_u16, + to64!(uint16x4_t) +); +gen_test_fn!( + testq_ari_u16, + u16, + u16, + uint16x8_t, + uint16x8_t, + u128, + V_u16!(), + fillq_u16, + fillq_u16, + to128!(uint16x8_t) +); +gen_test_fn!( + testq_bit_u16, + u16, + u16, + uint16x8_t, + uint16x8_t, + u128, + V_u16!(), + fillq_u16, + fillq_u16, + to128!(uint16x8_t) +); +gen_test_fn!( + testq_cmp_u16, + u16, + u16, + uint16x8_t, + uint16x8_t, + u128, + V_u16!(), + fillq_u16, + fillq_u16, + to128!(uint16x8_t) +); + +gen_test_fn!( + test_ari_s16, + i16, + i16, + int16x4_t, + int16x4_t, + u64, + V_i16!(), + fill_s16, + fill_s16, + to64!(int16x4_t) +); +gen_test_fn!( + test_bit_s16, + i16, + i16, + int16x4_t, + int16x4_t, + u64, + V_i16!(), + fill_s16, + fill_s16, + to64!(int16x4_t) +); +gen_test_fn!( + test_cmp_s16, + i16, + u16, + int16x4_t, + uint16x4_t, + u64, + V_i16!(), + fill_s16, + fill_u16, + to64!(uint16x4_t) +); +gen_test_fn!( + testq_ari_s16, + i16, + i16, + int16x8_t, + int16x8_t, + u128, + V_i16!(), + fillq_s16, + fillq_s16, + to128!(int16x8_t) +); +gen_test_fn!( + testq_bit_s16, + i16, + i16, + int16x8_t, + int16x8_t, + u128, + V_i16!(), + fillq_s16, + fillq_s16, + to128!(int16x8_t) +); +gen_test_fn!( + testq_cmp_s16, + i16, + u16, + int16x8_t, + uint16x8_t, + u128, + V_i16!(), + fillq_s16, + fillq_u16, + to128!(uint16x8_t) +); + +gen_test_fn!( + test_ari_u32, + u32, + u32, + uint32x2_t, + uint32x2_t, + u64, + V_u32!(), + fill_u32, + fill_u32, + to64!(uint32x2_t) +); +gen_test_fn!( + test_bit_u32, + u32, + u32, + uint32x2_t, + uint32x2_t, + u64, + V_u32!(), + fill_u32, + fill_u32, + to64!(uint32x2_t) +); +gen_test_fn!( + test_cmp_u32, + u32, + u32, + uint32x2_t, + uint32x2_t, + u64, + V_u32!(), + fill_u32, + fill_u32, + to64!(uint32x2_t) +); +gen_test_fn!( + testq_ari_u32, + u32, + u32, + uint32x4_t, + uint32x4_t, + u128, + V_u32!(), + fillq_u32, + fillq_u32, + to128!(uint32x4_t) +); +gen_test_fn!( + testq_bit_u32, + u32, + u32, + uint32x4_t, + uint32x4_t, + u128, + V_u32!(), + fillq_u32, + fillq_u32, + to128!(uint32x4_t) +); +gen_test_fn!( + testq_cmp_u32, + u32, + u32, + uint32x4_t, + uint32x4_t, + u128, + V_u32!(), + fillq_u32, + fillq_u32, + to128!(uint32x4_t) +); + +gen_test_fn!( + test_ari_s32, + i32, + i32, + int32x2_t, + int32x2_t, + u64, + V_i32!(), + fill_s32, + fill_s32, + to64!(int32x2_t) +); +gen_test_fn!( + test_bit_s32, + i32, + i32, + int32x2_t, + int32x2_t, + u64, + V_i32!(), + fill_s32, + fill_s32, + to64!(int32x2_t) +); +gen_test_fn!( + test_cmp_s32, + i32, + u32, + int32x2_t, + uint32x2_t, + u64, + V_i32!(), + fill_s32, + fill_u32, + to64!(uint32x2_t) +); +gen_test_fn!( + testq_ari_s32, + i32, + i32, + int32x4_t, + int32x4_t, + u128, + V_i32!(), + fillq_s32, + fillq_s32, + to128!(int32x4_t) +); +gen_test_fn!( + testq_bit_s32, + i32, + i32, + int32x4_t, + int32x4_t, + u128, + V_i32!(), + fillq_s32, + fillq_s32, + to128!(int32x4_t) +); +gen_test_fn!( + testq_cmp_s32, + i32, + u32, + int32x4_t, + uint32x4_t, + u128, + V_i32!(), + fillq_s32, + fillq_u32, + to128!(uint32x4_t) +); + +gen_test_fn!( + test_ari_u64, + u64, + u64, + uint64x1_t, + uint64x1_t, + u64, + V_u64!(), + fill_u64, + fill_u64, + to64!(uint64x1_t) +); +gen_test_fn!( + test_bit_u64, + u64, + u64, + uint64x1_t, + uint64x1_t, + u64, + V_u64!(), + fill_u64, + fill_u64, + to64!(uint64x1_t) +); +gen_test_fn!( + test_cmp_u64, + u64, + u64, + uint64x1_t, + uint64x1_t, + u64, + V_u64!(), + fill_u64, + fill_u64, + to64!(uint64x1_t) +); +gen_test_fn!( + testq_ari_u64, + u64, + u64, + uint64x2_t, + uint64x2_t, + u128, + V_u64!(), + fillq_u64, + fillq_u64, + to128!(uint64x2_t) +); +gen_test_fn!( + testq_bit_u64, + u64, + u64, + uint64x2_t, + uint64x2_t, + u128, + V_u64!(), + fillq_u64, + fillq_u64, + to128!(uint64x2_t) +); +gen_test_fn!( + testq_cmp_u64, + u64, + u64, + uint64x2_t, + uint64x2_t, + u128, + V_u64!(), + fillq_u64, + fillq_u64, + to128!(uint64x2_t) +); + +gen_test_fn!( + test_ari_s64, + i64, + i64, + int64x1_t, + int64x1_t, + u64, + V_i64!(), + fill_s64, + fill_s64, + to64!(int64x1_t) +); +gen_test_fn!( + test_bit_s64, + i64, + i64, + int64x1_t, + int64x1_t, + u64, + V_i64!(), + fill_s64, + fill_s64, + to64!(int64x1_t) +); +gen_test_fn!( + test_cmp_s64, + i64, + u64, + int64x1_t, + uint64x1_t, + u64, + V_i64!(), + fill_s64, + fill_u64, + to64!(uint64x1_t) +); +gen_test_fn!( + testq_ari_s64, + i64, + i64, + int64x2_t, + int64x2_t, + u128, + V_i64!(), + fillq_s64, + fillq_s64, + to128!(int64x2_t) +); +gen_test_fn!( + testq_bit_s64, + i64, + i64, + int64x2_t, + int64x2_t, + u128, + V_i64!(), + fillq_s64, + fillq_s64, + to128!(int64x2_t) +); +gen_test_fn!( + testq_cmp_s64, + i64, + u64, + int64x2_t, + uint64x2_t, + u128, + V_i64!(), + fillq_s64, + fillq_u64, + to128!(uint64x2_t) +); + +gen_test_fn!( + test_ari_f32, + f32, + f32, + float32x2_t, + float32x2_t, + u64, + V_f32!(), + fill_f32, + fill_f32, + to64!(float32x2_t) +); +gen_test_fn!( + test_cmp_f32, + f32, + u32, + float32x2_t, + uint32x2_t, + u64, + V_f32!(), + fill_f32, + fill_u32, + to64!(uint32x2_t) +); +gen_test_fn!( + testq_ari_f32, + f32, + f32, + float32x4_t, + float32x4_t, + u128, + V_f32!(), + fillq_f32, + fillq_f32, + to128!(float32x4_t) +); +gen_test_fn!( + testq_cmp_f32, + f32, + u32, + float32x4_t, + uint32x4_t, + u128, + V_f32!(), + fillq_f32, + fillq_u32, + to128!(uint32x4_t) +); diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index 00d369d997..7ebff27e8c 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -349,6 +349,50 @@ macro_rules! constify_imm5 { }; } +//immediate value: 0:16 +#[allow(unused)] +macro_rules! constify_imm4 { + ($imm8:expr, $expand:ident) => { + #[allow(overflowing_literals)] + match ($imm8) & 0b1111 { + 0 => $expand!(0), + 1 => $expand!(1), + 2 => $expand!(2), + 3 => $expand!(3), + 4 => $expand!(4), + 5 => $expand!(5), + 6 => $expand!(6), + 7 => $expand!(7), + 8 => $expand!(8), + 9 => $expand!(9), + 10 => $expand!(10), + 11 => $expand!(11), + 12 => $expand!(12), + 13 => $expand!(13), + 14 => $expand!(14), + _ => $expand!(15), + } + }; +} + +//immediate value: 0:7 +#[allow(unused)] +macro_rules! constify_imm3 { + ($imm8:expr, $expand:ident) => { + #[allow(overflowing_literals)] + match ($imm8) & 0b111 { + 0 => $expand!(0), + 1 => $expand!(1), + 2 => $expand!(2), + 3 => $expand!(3), + 4 => $expand!(4), + 5 => $expand!(5), + 6 => $expand!(6), + _ => $expand!(7), + } + }; +} + #[allow(unused)] macro_rules! types { ($( diff --git a/crates/stdarch-gen/Cargo.toml b/crates/stdarch-gen/Cargo.toml new file mode 100644 index 0000000000..b339672f4e --- /dev/null +++ b/crates/stdarch-gen/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "stdarch-gen" +version = "0.1.0" +authors = ["Heinz Gies "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/crates/stdarch-gen/README.md b/crates/stdarch-gen/README.md new file mode 100644 index 0000000000..54b602cdd3 --- /dev/null +++ b/crates/stdarch-gen/README.md @@ -0,0 +1,11 @@ +# Neon intrinsic code generator + +A small tool that allows to quickly generate intrinsics for the NEON architecture. + +The specification for the intrinsics can be found in `neon.spec`. + +To run and re-generate the code run the following from the root of the `stdarch` crate. + +``` +OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec +``` \ No newline at end of file diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec new file mode 100644 index 0000000000..0343a7232e --- /dev/null +++ b/crates/stdarch-gen/neon.spec @@ -0,0 +1,469 @@ +// ARM Neon intrinsic specification. +// +// This file contains the specification for a number of +// intrinsics that allows us to generate them along with +// their test cases. +// +// To the syntax of the file - it's not very intelligently parsed! +// +// # Comments +// start with AT LEAST two, or four or more slashes so // is a +// comment /////// is too. +// +// # Sections +// Sections start with EXACTLY three slashes followed +// by AT LEAST one space. Sections are used for two things: +// +// 1) they serve as the doc comment for the given intrinics. +// 2) they reset all variables (name, fn, etc.) +// +// # Variables +// +// name - The prefix of the function, suffixes are auto +// generated by the type they get passed. +// +// fn - The function to call in rust-land. +// +// aarch64 - The intrinsic to check on aarch64 architecture. +// If this is given but no arm intrinsic is provided, +// the function will exclusively be generated for +// aarch64. +// This is used to generate both aarch64 specific and +// shared intrinics by first only specifying th aarch64 +// variant then the arm variant. +// +// arm - The arm v7 intrinics used to checked for arm code +// generation. All neon functions available in arm are +// also available in aarch64. If no aarch64 intrinic was +// set they are assumed to be the same. +// Intrinics ending with a `.` will have a size suffixes +// added (such as `i8` or `i64`) that is not sign specific +// Intrinics ending with a `.s` will have a size suffixes +// added (such as `s8` or `u64`) that is sign specific +// +// a - First input for tests, it gets scaled to the size of +// the type. +// +// b - Second input for tests, it gets scaled to the size of +// the type. +// +// # special values +// +// TRUE - 'true' all bits are set to 1 +// FALSE - 'false' all bits are set to 0 +// FF - same as 'true' +// MIN - minimal value (either 0 or the lowest negative number) +// MAX - maximal value propr to overflow +// +// # validate +// Validates a and b aginst the expected result of the test. +// The special values 'TRUE' and 'FALSE' can be used to +// represent the corect NEON representation of true or +// false values. It too gets scaled to the type. +// +// Validate needs to be called before generate as it sets +// up the rules for validation that get generated for each +// type. +// # generate +// The generate command generates the intrinsics, it uses the +// Variables set and can be called multiple times while overwriting +// some of the variables. + +/// Vector bitwise and +name = vand +fn = simd_and +arm = vand +aarch64 = and +a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00 +b = 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F +validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00 +b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +validate 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +generate int*_t, uint*_t, int64x*_t, uint64x*_t + +/// Vector bitwise or (immediate, inclusive) +name = vorr +fn = simd_or +arm = vorr +aarch64 = orr +a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F +b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F +generate int*_t, uint*_t, int64x*_t, uint64x*_t + + +/// Vector bitwise exclusive or (vector) +name = veor +fn = simd_xor +arm = veor +aarch64 = eor +a = 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F +b = 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +validate 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F +generate int*_t, uint*_t, int64x*_t, uint64x*_t + +//////////////////// +// equality +//////////////////// + +/// Compare bitwise Equal (vector) +name = vceq +fn = simd_eq +a = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX +b = MIN, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, MAX +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE +a = MIN, MIN, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, MAX +b = MIN, MAX, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, MIN +validate TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE, FALSE + +aarch64 = cmeq +generate uint64x*_t, int64x1_t:uint64x1_t, int64x2_t:uint64x2_t, poly64x1_t:uint64x1_t, poly64x2_t:uint64x2_t + +arm = vceq. +generate uint*_t, int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t + +/// Floating-point compare equal +name = vceq +fn = simd_eq +a = 1.2, 3.4, 5.6, 7.8 +b = 1.2, 3.4, 5.6, 7.8 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = fcmeq +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +arm = vceq. +// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +//////////////////// +// greater then +//////////////////// + +/// Compare signed greater than +name = vcgt +fn = simd_gt +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE +aarch64 = cmgt +generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t + +arm = vcgt.s +generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t + +/// Compare unsigned highe +name = vcgt +fn = simd_gt +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmhi +generate uint64x*_t + +arm = vcgt.s +generate uint*_t + +/// Floating-point compare greater than +name = vcgt +fn = simd_gt +a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 +b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = fcmgt +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +arm = vcgt.s +// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +//////////////////// +// lesser then +//////////////////// + +/// Compare signed less than +name = vclt +fn = simd_lt +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE +aarch64 = cmgt +generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t + +arm = vcgt.s +generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t + +/// Compare unsigned less than +name = vclt +fn = simd_lt +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmhi +generate uint64x*_t + +arm = vcgt.s +generate uint*_t + +/// Floating-point compare less than +name = vclt +fn = simd_lt +a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8 +b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = fcmgt +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +arm = vcgt.s +// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +//////////////////// +// lesser then equals +//////////////////// + +/// Compare signed less than or equal +name = vcle +fn = simd_le +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmge +generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t + +arm = vcge.s +generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t + +/// Compare unsigned less than or equal +name = vcle +fn = simd_le +a = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmhs +generate uint64x*_t + +arm = vcge.s +generate uint*_t + +/// Floating-point compare less than or equal +name = vcle +fn = simd_le +a = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8 +b = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE +aarch64 = fcmge +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t +arm = vcge.s +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +//////////////////// +// greater then equals +//////////////////// + +/// Compare signed greater than or equal +name = vcge +fn = simd_ge +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmge +generate int64x1_t:uint64x1_t, int64x2_t:uint64x2_t + +arm = vcge.s +generate int8x8_t:uint8x8_t, int8x16_t:uint8x16_t, int16x4_t:uint16x4_t, int16x8_t:uint16x8_t, int32x2_t:uint32x2_t, int32x4_t:uint32x4_t + +/// Compare unsigned greater than or equal +name = vcge +fn = simd_ge +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = cmhs +generate uint64x*_t + +arm = vcge.s +generate uint*_t + +/// Floating-point compare greater than or equal +name = vcge +fn = simd_ge +a = 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8, 8.9 +b = 0.1, 1.2, 2.3, 3.4, 4.5, 5.6, 6.7, 7.8 +validate TRUE, TRUE, TRUE, TRUE, TRUE, TRUE + +aarch64 = fcmge +generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t + +arm = vcge.s +// we are missing float16x4_t:uint16x4_t, float16x8_t:uint16x8_t +generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t + +/// Saturating subtract +name = vqsub +a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26 + +arm = vqsub.s +aarch64 = uqsub +link-arm = vqsubu._EXT_ +link-aarch64 = uqsub._EXT_ +generate uint*_t + +arm = vqsub.s +aarch64 = sqsub +link-arm = vqsubs._EXT_ +link-aarch64 = sqsub._EXT_ +generate int*_t + +/// Halving add +name = vhadd +a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29 + + +arm = vhadd.s +aarch64 = uhadd +link-aarch64 = uhadd._EXT_ +link-arm = vhaddu._EXT_ +generate uint*_t + + +arm = vhadd.s +aarch64 = shadd +link-aarch64 = shadd._EXT_ +link-arm = vhadds._EXT_ +generate int*_t + +/// Rounding halving add +name = vrhadd +a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29 + +arm = vrhadd.s +aarch64 = urhadd +link-arm = vrhaddu._EXT_ +link-aarch64 = urhadd._EXT_ +generate uint*_t + +arm = vrhadd.s +aarch64 = srhadd +link-arm = vrhadds._EXT_ +link-aarch64 = srhadd._EXT_ +generate int*_t + +/// Saturating add +name = vqadd +a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58 + +arm = vqadd.s +aarch64 = uqadd +link-arm = vqaddu._EXT_ +link-aarch64 = uqadd._EXT_ +generate uint*_t + +arm = vqadd.s +aarch64 = sqadd +link-arm = vqadds._EXT_ +link-aarch64 = sqadd._EXT_ +generate int*_t + +// requires 1st and second argument to be different, this not implemented yet +// /// Signed saturating accumulate of unsigned value +// +// name = vuqadd +// a = 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 +// b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +// e = 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58 + +// it seems like we don't have those in rustland :( +// aarch64 = suqadd +// link-aarch64 = usqadd._EXT_ +// generate int64x*_t + +/ arm = suqadd +// link-arm = vuqadds._EXT_ +// link-aarch64 = suqadd._EXT_ +// generate int*_t + + +/// Multiply +name = vmul +a = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 +b = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +validate 1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32 +arm = vmul. +aarch64 = mul +fn = simd_mul +generate int*_t, uint*_t + +/// Multiply +name = vmul +fn = simd_mul +a = 1.0, 2.0, 1.0, 2.0 +b = 2.0, 3.0, 4.0, 5.0 +validate 2.0, 6.0, 4.0, 10.0 + +aarch64 = fmul +generate float64x*_t + +arm = vmul. +generate float*_t + + +/// Subtract +name = vsub +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 +validate 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14 +arm = vsub. +aarch64 = sub +fn = simd_sub +generate int*_t, uint*_t, int64x*_t, uint64x*_t + +/// Subtract +name = vsub +fn = simd_sub +a = 1.0, 4.0, 3.0, 8.0 +b = 1.0, 2.0, 3.0, 4.0 +validate 0.0, 2.0, 0.0, 4.0 + +aarch64 = fsub +generate float64x*_t + +arm = vsub. +generate float*_t + + +/// Signed halving subtract +name = vhsub +a = 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 +b = 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2 +validate 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7 + +arm = vhsub.s +aarch64 = uhsub +link-arm = vhsubu._EXT_ +link-aarch64 = uhsub._EXT_ +generate uint*_t + +arm = vhsub.s +aarch64 = shsub +link-arm = vhsubs._EXT_ +link-aarch64 = shsub._EXT_ +generate int*_t diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs new file mode 100644 index 0000000000..8a9d9f25c0 --- /dev/null +++ b/crates/stdarch-gen/src/main.rs @@ -0,0 +1,750 @@ +use std::env; +use std::fs::File; +use std::io::prelude::*; +use std::io::{self, BufReader}; +use std::path::PathBuf; + +const IN: &str = "neon.spec"; +const ARM_OUT: &str = "generated.rs"; +const AARCH64_OUT: &str = "generated.rs"; + +const UINT_TYPES: [&str; 6] = [ + "uint8x8_t", + "uint8x16_t", + "uint16x4_t", + "uint16x8_t", + "uint32x2_t", + "uint32x4_t", +]; + +const UINT_TYPES_64: [&str; 2] = ["uint64x1_t", "uint64x2_t"]; + +const INT_TYPES: [&str; 6] = [ + "int8x8_t", + "int8x16_t", + "int16x4_t", + "int16x8_t", + "int32x2_t", + "int32x4_t", +]; + +const INT_TYPES_64: [&str; 2] = ["int64x1_t", "int64x2_t"]; + +const FLOAT_TYPES: [&str; 2] = [ + //"float8x8_t", not supported by rust + //"float8x16_t", not supported by rust + //"float16x4_t", not supported by rust + //"float16x8_t", not supported by rust + "float32x2_t", + "float32x4_t", +]; + +const FLOAT_TYPES_64: [&str; 2] = [ + //"float8x8_t", not supported by rust + //"float8x16_t", not supported by rust + //"float16x4_t", not supported by rust + //"float16x8_t", not supported by rust + "float64x1_t", + "float64x2_t", +]; + +fn type_len(t: &str) -> usize { + match t { + "int8x8_t" => 8, + "int8x16_t" => 16, + "int16x4_t" => 4, + "int16x8_t" => 8, + "int32x2_t" => 2, + "int32x4_t" => 4, + "int64x1_t" => 1, + "int64x2_t" => 2, + "uint8x8_t" => 8, + "uint8x16_t" => 16, + "uint16x4_t" => 4, + "uint16x8_t" => 8, + "uint32x2_t" => 2, + "uint32x4_t" => 4, + "uint64x1_t" => 1, + "uint64x2_t" => 2, + "float16x4_t" => 4, + "float16x8_t" => 8, + "float32x2_t" => 2, + "float32x4_t" => 4, + "float64x1_t" => 1, + "float64x2_t" => 2, + "poly64x1_t" => 1, + "poly64x2_t" => 2, + _ => panic!("unknown type: {}", t), + } +} + +fn type_to_suffix(t: &str) -> &str { + match t { + "int8x8_t" => "_s8", + "int8x16_t" => "q_s8", + "int16x4_t" => "_s16", + "int16x8_t" => "q_s16", + "int32x2_t" => "_s32", + "int32x4_t" => "q_s32", + "int64x1_t" => "_s64", + "int64x2_t" => "q_s64", + "uint8x8_t" => "_u8", + "uint8x16_t" => "q_u8", + "uint16x4_t" => "_u16", + "uint16x8_t" => "q_u16", + "uint32x2_t" => "_u32", + "uint32x4_t" => "q_u32", + "uint64x1_t" => "_u64", + "uint64x2_t" => "q_u64", + "float16x4_t" => "_f16", + "float16x8_t" => "q_f16", + "float32x2_t" => "_f32", + "float32x4_t" => "q_f32", + "float64x1_t" => "_f64", + "float64x2_t" => "q_f64", + "poly64x1_t" => "_p64", + "poly64x2_t" => "q_p64", + _ => panic!("unknown type: {}", t), + } +} + +fn type_to_global_type(t: &str) -> &str { + match t { + "int8x8_t" => "i8x8", + "int8x16_t" => "i8x16", + "int16x4_t" => "i16x4", + "int16x8_t" => "i16x8", + "int32x2_t" => "i32x2", + "int32x4_t" => "i32x4", + "int64x1_t" => "i64x1", + "int64x2_t" => "i64x2", + "uint8x8_t" => "u8x8", + "uint8x16_t" => "u8x16", + "uint16x4_t" => "u16x4", + "uint16x8_t" => "u16x8", + "uint32x2_t" => "u32x2", + "uint32x4_t" => "u32x4", + "uint64x1_t" => "u64x1", + "uint64x2_t" => "u64x2", + "float16x4_t" => "f16x4", + "float16x8_t" => "f16x8", + "float32x2_t" => "f32x2", + "float32x4_t" => "f32x4", + "float64x1_t" => "f64", + "float64x2_t" => "f64x2", + "poly64x1_t" => "i64x1", + "poly64x2_t" => "i64x2", + _ => panic!("unknown type: {}", t), + } +} + +// fn type_to_native_type(t: &str) -> &str { +// match t { +// "int8x8_t" => "i8", +// "int8x16_t" => "i8", +// "int16x4_t" => "i16", +// "int16x8_t" => "i16", +// "int32x2_t" => "i32", +// "int32x4_t" => "i32", +// "int64x1_t" => "i64", +// "int64x2_t" => "i64", +// "uint8x8_t" => "u8", +// "uint8x16_t" => "u8", +// "uint16x4_t" => "u16", +// "uint16x8_t" => "u16", +// "uint32x2_t" => "u32", +// "uint32x4_t" => "u32", +// "uint64x1_t" => "u64", +// "uint64x2_t" => "u64", +// "float16x4_t" => "f16", +// "float16x8_t" => "f16", +// "float32x2_t" => "f32", +// "float32x4_t" => "f32", +// "float64x1_t" => "f64", +// "float64x2_t" => "f64", +// "poly64x1_t" => "i64", +// "poly64x2_t" => "i64", +// _ => panic!("unknown type: {}", t), +// } +// } + +fn type_to_ext(t: &str) -> &str { + match t { + "int8x8_t" => "v8i8", + "int8x16_t" => "v16i8", + "int16x4_t" => "v4i16", + "int16x8_t" => "v8i16", + "int32x2_t" => "v2i32", + "int32x4_t" => "v4i32", + "int64x1_t" => "v1i64", + "int64x2_t" => "v2i64", + "uint8x8_t" => "v8i8", + "uint8x16_t" => "v16i8", + "uint16x4_t" => "v4i16", + "uint16x8_t" => "v8i16", + "uint32x2_t" => "v2i32", + "uint32x4_t" => "v4i32", + "uint64x1_t" => "v1i64", + "uint64x2_t" => "v2i64", + "float16x4_t" => "v4f16", + "float16x8_t" => "v8f16", + "float32x2_t" => "v2f32", + "float32x4_t" => "v4f32", + "float64x1_t" => "v1f64", + "float64x2_t" => "v2f64", + /* + "poly64x1_t" => "i64x1", + "poly64x2_t" => "i64x2", + */ + _ => panic!("unknown type for extension: {}", t), + } +} + +fn values(t: &str, vs: &[String]) -> String { + if vs.len() == 1 && !t.contains('x') { + format!(": {} = {}", t, vs[0]) + } else if vs.len() == 1 && type_to_global_type(t) == "f64" { + format!(": {} = {}", type_to_global_type(t), vs[0]) + } else { + format!( + ": {} = {}::new({})", + type_to_global_type(t), + type_to_global_type(t), + vs.iter() + .map(|v| map_val(type_to_global_type(t), v)) + //.map(|v| format!("{}{}", v, type_to_native_type(t))) + .collect::>() + .join(", ") + ) + } +} + +fn max_val(t: &str) -> &'static str { + match &t[..3] { + "u8x" => "0xFF", + "u16" => "0xFF_FF", + "u32" => "0xFF_FF_FF_FF", + "u64" => "0xFF_FF_FF_FF_FF_FF_FF_FF", + "i8x" => "0x7F", + "i16" => "0x7F_FF", + "i32" => "0x7F_FF_FF_FF", + "i64" => "0x7F_FF_FF_FF_FF_FF_FF_FF", + "f32" => "3.40282347e+38", + "f64" => "1.7976931348623157e+308", + _ => panic!("No TRUE for type {}", t), + } +} + +fn min_val(t: &str) -> &'static str { + match &t[..3] { + "u8x" => "0", + "u16" => "0", + "u32" => "0", + "u64" => "0", + "i8x" => "-128", + "i16" => "-32768", + "i32" => "-2147483648", + "i64" => "-9223372036854775808", + "f32" => "-3.40282347e+38", + "f64" => "-1.7976931348623157e+308", + _ => panic!("No TRUE for type {}", t), + } +} + +fn true_val(t: &str) -> &'static str { + match &t[..3] { + "u8x" => "0xFF", + "u16" => "0xFF_FF", + "u32" => "0xFF_FF_FF_FF", + "u64" => "0xFF_FF_FF_FF_FF_FF_FF_FF", + _ => panic!("No TRUE for type {}", t), + } +} + +fn ff_val(t: &str) -> &'static str { + match &t[..3] { + "u8x" => "0xFF", + "u16" => "0xFF_FF", + "u32" => "0xFF_FF_FF_FF", + "u64" => "0xFF_FF_FF_FF_FF_FF_FF_FF", + "i8x" => "0xFF", + "i16" => "0xFF_FF", + "i32" => "0xFF_FF_FF_FF", + "i64" => "0xFF_FF_FF_FF_FF_FF_FF_FF", + _ => panic!("No TRUE for type {}", t), + } +} + +fn false_val(_t: &str) -> &'static str { + "0" +} +fn map_val<'v>(t: &str, v: &'v str) -> &'v str { + match v { + "FALSE" => false_val(t), + "TRUE" => true_val(t), + "MAX" => min_val(t), + "MIN" => max_val(t), + "FF" => ff_val(t), + o => o, + } +} + +#[allow(clippy::too_many_arguments)] +fn gen_aarch64( + current_comment: &str, + current_fn: &Option, + name: &str, + current_aarch64: &Option, + link_aarch64: &Option, + in_t: &str, + out_t: &str, + current_tests: &[(Vec, Vec, Vec)], +) -> (String, String) { + let _global_t = type_to_global_type(in_t); + let _global_ret_t = type_to_global_type(out_t); + let current_fn = if let Some(current_fn) = current_fn.clone() { + if link_aarch64.is_some() { + panic!("[{}] Can't specify link and fn at the same time.", name) + } + current_fn + } else { + if link_aarch64.is_none() { + panic!("[{}] Either fn or link-aarch have to be specified.", name) + } + format!("{}_", name) + }; + let current_aarch64 = current_aarch64.clone().unwrap(); + let ext_c = if let Some(link_aarch64) = link_aarch64.clone() { + let ext = type_to_ext(in_t); + + format!( + r#" + #[allow(improper_ctypes)] + extern "C" {{ + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.{}")] + fn {}(a: {}, a: {}) -> {}; + }} +"#, + link_aarch64.replace("_EXT_", ext), + current_fn, + in_t, + in_t, + out_t + ) + } else { + String::new() + }; + let function = format!( + r#" +{} +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr({}))] +pub unsafe fn {}(a: {}, b: {}) -> {} {{ + {}{}(a, b) +}} +"#, + current_comment, current_aarch64, name, in_t, in_t, out_t, ext_c, current_fn, + ); + + let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t)); + (function, test) +} + +fn gen_test( + name: &str, + in_t: &str, + out_t: &str, + current_tests: &[(Vec, Vec, Vec)], + len: usize, +) -> String { + let mut test = format!( + r#" + #[simd_test(enable = "neon")] + unsafe fn test_{}() {{"#, + name, + ); + for (a, b, e) in current_tests { + let a: Vec = a.iter().take(len).cloned().collect(); + let b: Vec = b.iter().take(len).cloned().collect(); + let e: Vec = e.iter().take(len).cloned().collect(); + let t = format!( + r#" + let a{}; + let b{}; + let e{}; + let r: {} = transmute({}(transmute(a), transmute(b))); + assert_eq!(r, e); +"#, + values(in_t, &a), + values(in_t, &b), + values(out_t, &e), + type_to_global_type(out_t), + name + ); + test.push_str(&t); + } + test.push_str(" }\n"); + test +} + +#[allow(clippy::too_many_arguments)] +fn gen_arm( + current_comment: &str, + current_fn: &Option, + name: &str, + current_arm: &str, + link_arm: &Option, + current_aarch64: &Option, + link_aarch64: &Option, + in_t: &str, + out_t: &str, + current_tests: &[(Vec, Vec, Vec)], +) -> (String, String) { + let _global_t = type_to_global_type(in_t); + let _global_ret_t = type_to_global_type(out_t); + let current_aarch64 = current_aarch64 + .clone() + .unwrap_or_else(|| current_arm.to_string()); + + let current_fn = if let Some(current_fn) = current_fn.clone() { + if link_aarch64.is_some() || link_arm.is_some() { + panic!( + "[{}] Can't specify link and function at the same time. {} / {:?} / {:?}", + name, current_fn, link_aarch64, link_arm + ) + } + current_fn + } else { + if link_aarch64.is_none() || link_arm.is_none() { + panic!( + "[{}] Either fn or link-arm and link-aarch have to be specified.", + name + ) + } + format!("{}_", name) + }; + + let ext_c = + if let (Some(link_arm), Some(link_aarch64)) = (link_arm.clone(), link_aarch64.clone()) { + let ext = type_to_ext(in_t); + + format!( + r#"#[allow(improper_ctypes)] + extern "C" {{ + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.{}")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.{}")] + fn {}(a: {}, b: {}) -> {}; + }} +"#, + link_arm.replace("_EXT_", ext), + link_aarch64.replace("_EXT_", ext), + current_fn, + in_t, + in_t, + out_t + ) + } else { + String::new() + }; + + let function = format!( + r#" +{} +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr({}))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr({}))] +pub unsafe fn {}(a: {}, b: {}) -> {} {{ + {}{}(a, b) +}} +"#, + current_comment, + expand_intrinsic(¤t_arm, in_t), + expand_intrinsic(¤t_aarch64, in_t), + name, + in_t, + in_t, + out_t, + ext_c, + current_fn, + ); + let test = gen_test(name, &in_t, &out_t, current_tests, type_len(in_t)); + + (function, test) +} + +fn expand_intrinsic(intr: &str, t: &str) -> String { + if intr.ends_with(".") { + let ext = match t { + "int8x8_t" => "i8", + "int8x16_t" => "i8", + "int16x4_t" => "i16", + "int16x8_t" => "i16", + "int32x2_t" => "i32", + "int32x4_t" => "i32", + "int64x1_t" => "i64", + "int64x2_t" => "i64", + "uint8x8_t" => "i8", + "uint8x16_t" => "i8", + "uint16x4_t" => "i16", + "uint16x8_t" => "i16", + "uint32x2_t" => "i32", + "uint32x4_t" => "i32", + "uint64x1_t" => "i64", + "uint64x2_t" => "i64", + "float16x4_t" => "f16", + "float16x8_t" => "f16", + "float32x2_t" => "f32", + "float32x4_t" => "f32", + "float64x1_t" => "f64", + "float64x2_t" => "f64", + /* + "poly64x1_t" => "i64x1", + "poly64x2_t" => "i64x2", + */ + _ => panic!("unknown type for extension: {}", t), + }; + format!(r#""{}{}""#, intr, ext) + } else if intr.ends_with(".s") { + let ext = match t { + "int8x8_t" => "s8", + "int8x16_t" => "s8", + "int16x4_t" => "s16", + "int16x8_t" => "s16", + "int32x2_t" => "s32", + "int32x4_t" => "s32", + "int64x1_t" => "s64", + "int64x2_t" => "s64", + "uint8x8_t" => "u8", + "uint8x16_t" => "u8", + "uint16x4_t" => "u16", + "uint16x8_t" => "u16", + "uint32x2_t" => "u32", + "uint32x4_t" => "u32", + "uint64x1_t" => "u64", + "uint64x2_t" => "u64", + "float16x4_t" => "f16", + "float16x8_t" => "f16", + "float32x2_t" => "f32", + "float32x4_t" => "f32", + "float64x1_t" => "f64", + "float64x2_t" => "f64", + /* + "poly64x1_t" => "i64x1", + "poly64x2_t" => "i64x2", + */ + _ => panic!("unknown type for extension: {}", t), + }; + format!(r#""{}{}""#, &intr[..intr.len() - 1], ext) + } else { + intr.to_string() + } +} + +fn main() -> io::Result<()> { + let args: Vec = env::args().collect(); + let in_file = args.get(1).cloned().unwrap_or_else(|| IN.to_string()); + + let f = File::open(in_file).expect("Failed to open neon.spec"); + let f = BufReader::new(f); + + let mut current_comment = String::new(); + let mut current_name: Option = None; + let mut current_fn: Option = None; + let mut current_arm: Option = None; + let mut current_aarch64: Option = None; + let mut link_arm: Option = None; + let mut link_aarch64: Option = None; + let mut a: Vec = Vec::new(); + let mut b: Vec = Vec::new(); + let mut current_tests: Vec<(Vec, Vec, Vec)> = Vec::new(); + + // + // THIS FILE IS GENERATED FORM neon.spec DO NOT CHANGE IT MANUALLY + // + let mut out_arm = String::from( + r#"// This code is automatically generated. DO NOT MODIFY. +// +// Instead, modify `crates/stdarch-gen/neon.spec` and run the following command to re-generate this file: +// +// ``` +// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec +// ``` +use super::*; +#[cfg(test)] +use stdarch_test::assert_instr; +"#, + ); + let mut tests_arm = String::from( + r#" +#[cfg(test)] +#[allow(overflowing_literals)] +mod test { + use super::*; + use crate::core_arch::simd::*; + use std::mem::transmute; + use stdarch_test::simd_test; +"#, + ); + // + // THIS FILE IS GENERATED FORM neon.spec DO NOT CHANGE IT MANUALLY + // + let mut out_aarch64 = String::from( + r#"// This code is automatically generated. DO NOT MODIFY. +// +// Instead, modify `crates/stdarch-gen/neon.spec` and run the following command to re-generate this file: +// +// ``` +// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec +// ``` +use super::*; +#[cfg(test)] +use stdarch_test::assert_instr; +"#, + ); + let mut tests_aarch64 = String::from( + r#" +#[cfg(test)] +mod test { + use super::*; + use crate::core_arch::simd::*; + use std::mem::transmute; + use stdarch_test::simd_test; +"#, + ); + + for line in f.lines() { + let line = line.unwrap(); + if line.is_empty() { + continue; + } + if line.starts_with("/// ") { + current_comment = line; + current_name = None; + current_fn = None; + current_arm = None; + current_aarch64 = None; + link_aarch64 = None; + link_arm = None; + current_tests = Vec::new(); + } else if line.starts_with("//") { + } else if line.starts_with("name = ") { + current_name = Some(String::from(&line[7..])); + } else if line.starts_with("fn = ") { + current_fn = Some(String::from(&line[5..])); + } else if line.starts_with("arm = ") { + current_arm = Some(String::from(&line[6..])); + } else if line.starts_with("aarch64 = ") { + current_aarch64 = Some(String::from(&line[10..])); + } else if line.starts_with("a = ") { + a = line[4..].split(',').map(|v| v.trim().to_string()).collect(); + } else if line.starts_with("b = ") { + b = line[4..].split(',').map(|v| v.trim().to_string()).collect(); + } else if line.starts_with("validate ") { + let e = line[9..].split(',').map(|v| v.trim().to_string()).collect(); + current_tests.push((a.clone(), b.clone(), e)); + } else if line.starts_with("link-aarch64 = ") { + link_aarch64 = Some(String::from(&line[15..])); + } else if line.starts_with("link-arm = ") { + link_arm = Some(String::from(&line[11..])); + } else if line.starts_with("generate ") { + let line = &line[9..]; + let types: Vec = line + .split(',') + .map(|v| v.trim().to_string()) + .flat_map(|v| match v.as_str() { + "uint*_t" => UINT_TYPES.iter().map(|v| v.to_string()).collect(), + "uint64x*_t" => UINT_TYPES_64.iter().map(|v| v.to_string()).collect(), + "int*_t" => INT_TYPES.iter().map(|v| v.to_string()).collect(), + "int64x*_t" => INT_TYPES_64.iter().map(|v| v.to_string()).collect(), + "float*_t" => FLOAT_TYPES.iter().map(|v| v.to_string()).collect(), + "float64x*_t" => FLOAT_TYPES_64.iter().map(|v| v.to_string()).collect(), + _ => vec![v], + }) + .collect(); + + for line in types { + let spec: Vec<&str> = line.split(':').map(|e| e.trim()).collect(); + let in_t; + let out_t; + if spec.len() == 1 { + in_t = spec[0]; + out_t = spec[0]; + } else if spec.len() == 2 { + in_t = spec[0]; + out_t = spec[1]; + } else { + panic!("Bad spec: {}", line) + } + let current_name = current_name.clone().unwrap(); + let name = format!("{}{}", current_name, type_to_suffix(in_t),); + + if let Some(current_arm) = current_arm.clone() { + let (function, test) = gen_arm( + ¤t_comment, + ¤t_fn, + &name, + ¤t_arm, + &link_arm, + ¤t_aarch64, + &link_aarch64, + &in_t, + &out_t, + ¤t_tests, + ); + out_arm.push_str(&function); + tests_arm.push_str(&test); + } else { + let (function, test) = gen_aarch64( + ¤t_comment, + ¤t_fn, + &name, + ¤t_aarch64, + &link_aarch64, + &in_t, + &out_t, + ¤t_tests, + ); + out_aarch64.push_str(&function); + tests_aarch64.push_str(&test); + } + } + } + } + tests_arm.push('}'); + tests_arm.push('\n'); + tests_aarch64.push('}'); + tests_aarch64.push('\n'); + + let arm_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap()) + .join("src") + .join("arm") + .join("neon"); + std::fs::create_dir_all(&arm_out_path)?; + + let mut file_arm = File::create(arm_out_path.join(ARM_OUT))?; + file_arm.write_all(out_arm.as_bytes())?; + file_arm.write_all(tests_arm.as_bytes())?; + + let aarch64_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap()) + .join("src") + .join("aarch64") + .join("neon"); + std::fs::create_dir_all(&aarch64_out_path)?; + + let mut file_aarch = File::create(aarch64_out_path.join(AARCH64_OUT))?; + file_aarch.write_all(out_aarch64.as_bytes())?; + file_aarch.write_all(tests_aarch64.as_bytes())?; + /* + if let Err(e) = Command::new("rustfmt") + .arg(&arm_out_path) + .arg(&aarch64_out_path) + .status() { + eprintln!("Could not format `{}`: {}", arm_out_path.to_str().unwrap(), e); + eprintln!("Could not format `{}`: {}", aarch64_out_path.to_str().unwrap(), e); + }; + */ + Ok(()) +} diff --git a/crates/stdarch-test/src/lib.rs b/crates/stdarch-test/src/lib.rs index 4e25d2a02d..fa73a7bba6 100644 --- a/crates/stdarch-test/src/lib.rs +++ b/crates/stdarch-test/src/lib.rs @@ -88,6 +88,12 @@ pub fn assert(_fnptr: usize, fnname: &str, expected: &str) { instrs = &instrs[..instrs.len() - 1]; } + // If the expected intrinsic is a nop it is compiled away so we + // can't check for it - aka the intrinsic is not generating any code + if expected == "nop" { + return; + } + // Look for `expected` as the first part of any instruction in this // function, e.g., tzcntl in tzcntl %rax,%rax. let found = instrs.iter().any(|s| s.starts_with(expected)); diff --git a/crates/stdarch-verify/src/lib.rs b/crates/stdarch-verify/src/lib.rs index d71623c7f3..c56fb0de7e 100644 --- a/crates/stdarch-verify/src/lib.rs +++ b/crates/stdarch-verify/src/lib.rs @@ -204,11 +204,13 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream { "poly8x16x2_t" => quote! { &POLY8X16X2 }, "poly8x16x3_t" => quote! { &POLY8X16X3 }, "poly8x16x4_t" => quote! { &POLY8X16X4 }, + "poly64_t" => quote! { &P64 }, "poly64x1_t" => quote! { &POLY64X1 }, "poly64x2_t" => quote! { &POLY64X2 }, "poly8x16_t" => quote! { &POLY8X16 }, "poly16x4_t" => quote! { &POLY16X4 }, "poly16x8_t" => quote! { &POLY16X8 }, + "poly128_t" => quote! { &P128 }, "v16i8" => quote! { &v16i8 }, "v8i16" => quote! { &v8i16 }, @@ -222,7 +224,7 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream { "v4f32" => quote! { &v4f32 }, "v2f64" => quote! { &v2f64 }, - s => panic!("unspported type: \"{}\"", s), + s => panic!("unsupported type: \"{}\"", s), }, syn::Type::Ptr(syn::TypePtr { ref elem,