From 5b1fda15f148337f62d5c3b68b369f50a9184f91 Mon Sep 17 00:00:00 2001 From: reucru01 Date: Fri, 24 Oct 2025 10:15:01 +0100 Subject: [PATCH 1/3] Fixes generator, neon intrinics now build in debug This fixes build issues associated with failing LLVM const param assertions --- .../src/arm_shared/neon/generated.rs | 1128 ++++++----------- .../spec/neon/arm_shared.spec.yml | 335 ++--- crates/stdarch-gen-arm/src/expression.rs | 17 + crates/stdarch-gen-arm/src/intrinsic.rs | 3 +- 4 files changed, 565 insertions(+), 918 deletions(-) diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index b5ba792b18..c1bd70175c 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -15137,268 +15137,104 @@ pub unsafe fn vld1q_f16_x4(a: *const f16) -> float16x8x4_t { #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { - transmute(vld1_v2f32( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -pub unsafe fn vld1_f32(ptr: *const f32) -> float32x2_t { - let ret_val: float32x2_t = transmute(vld1_v2f32( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [1, 0]) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v2f32::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { - transmute(vld1q_v4f32( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_f32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] -pub unsafe fn vld1q_f32(ptr: *const f32) -> float32x4_t { - let ret_val: float32x4_t = transmute(vld1q_v4f32( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "little")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { - transmute(vld1_v8i8( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v4f32::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] pub unsafe fn vld1_u8(ptr: *const u8) -> uint8x8_t { - let ret_val: uint8x8_t = transmute(vld1_v8i8( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v8i8::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { - transmute(vld1q_v16i8( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u8)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1q_u8(ptr: *const u8) -> uint8x16_t { - let ret_val: uint8x16_t = transmute(vld1q_v16i8( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "little")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { - transmute(vld1_v4i16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v16i8::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] pub unsafe fn vld1_u16(ptr: *const u16) -> uint16x4_t { - let ret_val: uint16x4_t = transmute(vld1_v4i16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v4i16::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { - transmute(vld1q_v8i16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1q_u16(ptr: *const u16) -> uint16x8_t { - let ret_val: uint16x8_t = transmute(vld1q_v8i16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "little")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] -pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { - transmute(vld1_v2i32( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v8i16::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] pub unsafe fn vld1_u32(ptr: *const u32) -> uint32x2_t { - let ret_val: uint32x2_t = transmute(vld1_v2i32( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [1, 0]) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v2i32::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { - transmute(vld1q_v4i32( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] -pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { - let ret_val: uint32x4_t = transmute(vld1q_v4i32( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v4i32::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_u64)"] @@ -15410,212 +15246,86 @@ pub unsafe fn vld1q_u32(ptr: *const u32) -> uint32x4_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] pub unsafe fn vld1_u64(ptr: *const u64) -> uint64x1_t { - transmute(vld1_v1i64( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "little")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] -pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { - transmute(vld1q_v2i64( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v1i64::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_u64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] pub unsafe fn vld1q_u64(ptr: *const u64) -> uint64x2_t { - let ret_val: uint64x2_t = transmute(vld1q_v2i64( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [1, 0]) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v2i64::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { - transmute(vld1_v8i8( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1_p8(ptr: *const p8) -> poly8x8_t { - let ret_val: poly8x8_t = transmute(vld1_v8i8( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "little")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] -pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { - transmute(vld1q_v16i8( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v8i8::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p8)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] pub unsafe fn vld1q_p8(ptr: *const p8) -> poly8x16_t { - let ret_val: poly8x16_t = transmute(vld1q_v16i8( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!( - ret_val, - ret_val, - [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0] - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v16i8::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { - transmute(vld1_v4i16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1_p16(ptr: *const p16) -> poly16x4_t { - let ret_val: poly16x4_t = transmute(vld1_v4i16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "little")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,v7")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] -pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { - transmute(vld1q_v8i16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1_v4i16::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p16)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "big")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] pub unsafe fn vld1q_p16(ptr: *const p16) -> poly16x8_t { - let ret_val: poly16x8_t = transmute(vld1q_v8i16( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v8i16::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"] #[doc = "## Safety"] #[doc = " * Neon instrinsic unsafe"] #[inline] -#[cfg(target_endian = "little")] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,aes")] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t { - transmute(vld1q_v2i64( - ptr as *const i8, - crate::mem::align_of::() as i32, - )) -} -#[doc = "Load multiple single-element structures to one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_p64)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] -#[inline] -#[cfg(target_endian = "big")] -#[cfg(target_arch = "arm")] -#[target_feature(enable = "neon,aes")] -#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] -pub unsafe fn vld1q_p64(ptr: *const p64) -> poly64x2_t { - let ret_val: poly64x2_t = transmute(vld1q_v2i64( - ptr as *const i8, - crate::mem::align_of::() as i32, - )); - simd_shuffle!(ret_val, ret_val, [1, 0]) + const ALIGN: i32 = crate::mem::align_of::() as i32; + transmute(vld1q_v2i64::(ptr as *const i8)) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_f32_x2)"] @@ -16694,7 +16404,8 @@ pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { - vld1_v8i8(ptr as *const i8, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1_v8i8::(ptr as *const i8) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s8)"] @@ -16706,7 +16417,8 @@ pub unsafe fn vld1_s8(ptr: *const i8) -> int8x8_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { - vld1q_v16i8(ptr as *const i8, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1q_v16i8::(ptr as *const i8) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s16)"] @@ -16718,7 +16430,8 @@ pub unsafe fn vld1q_s8(ptr: *const i8) -> int8x16_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { - vld1_v4i16(ptr as *const i8, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1_v4i16::(ptr as *const i8) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s16)"] @@ -16730,7 +16443,8 @@ pub unsafe fn vld1_s16(ptr: *const i16) -> int16x4_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { - vld1q_v8i16(ptr as *const i8, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1q_v8i16::(ptr as *const i8) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s32)"] @@ -16742,7 +16456,8 @@ pub unsafe fn vld1q_s16(ptr: *const i16) -> int16x8_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { - vld1_v2i32(ptr as *const i8, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1_v2i32::(ptr as *const i8) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s32)"] @@ -16754,7 +16469,8 @@ pub unsafe fn vld1_s32(ptr: *const i32) -> int32x2_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { - vld1q_v4i32(ptr as *const i8, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1q_v4i32::(ptr as *const i8) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s64)"] @@ -16766,7 +16482,8 @@ pub unsafe fn vld1q_s32(ptr: *const i32) -> int32x4_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vldr))] pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { - vld1_v1i64(ptr as *const i8, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1_v1i64::(ptr as *const i8) } #[doc = "Load multiple single-element structures to one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_s64)"] @@ -16778,7 +16495,8 @@ pub unsafe fn vld1_s64(ptr: *const i64) -> int64x1_t { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.64"))] pub unsafe fn vld1q_s64(ptr: *const i64) -> int64x2_t { - vld1q_v2i64(ptr as *const i8, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vld1q_v2i64::(ptr as *const i8) } #[doc = "Load multiple single-element structures to one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_s8_x2)"] @@ -19417,160 +19135,136 @@ pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t { ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [7, 6, 5, 4, 3, 2, 1, 0]) }; ret_val } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v1i64)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v1i64(a: *const i8, b: i32) -> int64x1_t { +unsafe fn vld1_v1i64(a: *const i8) -> int64x1_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v1i64")] fn _vld1_v1i64(a: *const i8, b: i32) -> int64x1_t; } - _vld1_v1i64(a, b) + _vld1_v1i64(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v2f32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v2f32(a: *const i8, b: i32) -> float32x2_t { +unsafe fn vld1_v2f32(a: *const i8) -> float32x2_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2f32")] fn _vld1_v2f32(a: *const i8, b: i32) -> float32x2_t; } - _vld1_v2f32(a, b) + _vld1_v2f32(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v2i32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v2i32(a: *const i8, b: i32) -> int32x2_t { +unsafe fn vld1_v2i32(a: *const i8) -> int32x2_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i32")] fn _vld1_v2i32(a: *const i8, b: i32) -> int32x2_t; } - _vld1_v2i32(a, b) + _vld1_v2i32(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v4i16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v4i16(a: *const i8, b: i32) -> int16x4_t { +unsafe fn vld1_v4i16(a: *const i8) -> int16x4_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i16")] fn _vld1_v4i16(a: *const i8, b: i32) -> int16x4_t; } - _vld1_v4i16(a, b) + _vld1_v4i16(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v8i8)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1_v8i8(a: *const i8, b: i32) -> int8x8_t { +unsafe fn vld1_v8i8(a: *const i8) -> int8x8_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i8")] fn _vld1_v8i8(a: *const i8, b: i32) -> int8x8_t; } - _vld1_v8i8(a, b) + _vld1_v8i8(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v16i8)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v16i8(a: *const i8, b: i32) -> int8x16_t { +unsafe fn vld1q_v16i8(a: *const i8) -> int8x16_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v16i8")] fn _vld1q_v16i8(a: *const i8, b: i32) -> int8x16_t; } - _vld1q_v16i8(a, b) + _vld1q_v16i8(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v2i64)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v2i64(a: *const i8, b: i32) -> int64x2_t { +unsafe fn vld1q_v2i64(a: *const i8) -> int64x2_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v2i64")] fn _vld1q_v2i64(a: *const i8, b: i32) -> int64x2_t; } - _vld1q_v2i64(a, b) + _vld1q_v2i64(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v4f32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v4f32(a: *const i8, b: i32) -> float32x4_t { +unsafe fn vld1q_v4f32(a: *const i8) -> float32x4_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4f32")] fn _vld1q_v4f32(a: *const i8, b: i32) -> float32x4_t; } - _vld1q_v4f32(a, b) + _vld1q_v4f32(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v4i32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v4i32(a: *const i8, b: i32) -> int32x4_t { +unsafe fn vld1q_v4i32(a: *const i8) -> int32x4_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v4i32")] fn _vld1q_v4i32(a: *const i8, b: i32) -> int32x4_t; } - _vld1q_v4i32(a, b) + _vld1q_v4i32(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v8i16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] +#[rustc_legacy_const_generics(1)] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", ALIGN = 0))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -unsafe fn vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t { +unsafe fn vld1q_v8i16(a: *const i8) -> int16x8_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1.v8i16")] fn _vld1q_v8i16(a: *const i8, b: i32) -> int16x8_t; } - _vld1q_v8i16(a, b) + _vld1q_v8i16(a, ALIGN) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1_v4f16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] @@ -19585,10 +19279,6 @@ unsafe fn vld1_v4f16(a: *const i8, b: i32) -> float16x4_t { } _vld1_v4f16(a, b) } -#[doc = "Load multiple single-element structures to one, two, three, or four registers"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vld1q_v8f16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[cfg(target_arch = "arm")] #[target_feature(enable = "neon,v7")] @@ -61060,117 +60750,229 @@ pub fn vsha256su1q_u32(tw0_3: uint32x4_t, w8_11: uint32x4_t, w12_15: uint32x4_t) } unsafe { _vsha256su1q_u32(tw0_3, w8_11, w12_15) } } +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v16i8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")] + fn _vshiftlins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; + } + unsafe { _vshiftlins_v16i8(a, b, const { int8x16_t([N as i8; 16]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v1i64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")] + fn _vshiftlins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t; + } + unsafe { _vshiftlins_v1i64(a, b, const { int64x1_t([N as i64; 1]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")] + fn _vshiftlins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; + } + unsafe { _vshiftlins_v2i32(a, b, const { int32x2_t([N; 2]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v2i64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")] + fn _vshiftlins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t; + } + unsafe { _vshiftlins_v2i64(a, b, const { int64x2_t([N as i64; 2]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")] + fn _vshiftlins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; + } + unsafe { _vshiftlins_v4i16(a, b, const { int16x4_t([N as i16; 4]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v4i32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")] + fn _vshiftlins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; + } + unsafe { _vshiftlins_v4i32(a, b, const { int32x4_t([N; 4]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v8i16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")] + fn _vshiftlins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; + } + unsafe { _vshiftlins_v8i16(a, b, const { int16x8_t([N as i16; 8]) }) } +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg(target_arch = "arm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] +#[rustc_legacy_const_generics(2)] +fn vshiftlins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + unsafe extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")] + fn _vshiftlins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + } + unsafe { _vshiftlins_v8i8(a, b, const { int8x8_t([N as i8; 8]) }) } +} #[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v16i8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v16i8)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -fn vshiftins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v16i8(a: int8x16_t, b: int8x16_t) -> int8x16_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v16i8")] - fn _vshiftins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; + fn _vshiftrins_v16i8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t; } - unsafe { _vshiftins_v16i8(a, b, c) } + unsafe { _vshiftrins_v16i8(a, b, const { int8x16_t([-N as i8; 16]) }) } } #[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v1i64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v1i64)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -fn vshiftins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t { +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v1i64(a: int64x1_t, b: int64x1_t) -> int64x1_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v1i64")] - fn _vshiftins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t; + fn _vshiftrins_v1i64(a: int64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t; } - unsafe { _vshiftins_v1i64(a, b, c) } + unsafe { _vshiftrins_v1i64(a, b, const { int64x1_t([-N as i64; 1]) }) } } #[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v2i32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i32)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -fn vshiftins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i32")] - fn _vshiftins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; + fn _vshiftrins_v2i32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t; } - unsafe { _vshiftins_v2i32(a, b, c) } + unsafe { _vshiftrins_v2i32(a, b, const { int32x2_t([-N; 2]) }) } } #[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v2i64)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v2i64)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -fn vshiftins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t { +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v2i64(a: int64x2_t, b: int64x2_t) -> int64x2_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v2i64")] - fn _vshiftins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t; + fn _vshiftrins_v2i64(a: int64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t; } - unsafe { _vshiftins_v2i64(a, b, c) } + unsafe { _vshiftrins_v2i64(a, b, const { int64x2_t([-N as i64; 2]) }) } } #[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v4i16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i16)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -fn vshiftins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i16")] - fn _vshiftins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; + fn _vshiftrins_v4i16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t; } - unsafe { _vshiftins_v4i16(a, b, c) } + unsafe { _vshiftrins_v4i16(a, b, const { int16x4_t([-N as i16; 4]) }) } } #[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v4i32)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v4i32)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -fn vshiftins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v4i32(a: int32x4_t, b: int32x4_t) -> int32x4_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v4i32")] - fn _vshiftins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; + fn _vshiftrins_v4i32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t; } - unsafe { _vshiftins_v4i32(a, b, c) } + unsafe { _vshiftrins_v4i32(a, b, const { int32x4_t([-N; 4]) }) } } #[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v8i16)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i16)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -fn vshiftins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v8i16(a: int16x8_t, b: int16x8_t) -> int16x8_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i16")] - fn _vshiftins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; + fn _vshiftrins_v8i16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t; } - unsafe { _vshiftins_v8i16(a, b, c) } + unsafe { _vshiftrins_v8i16(a, b, const { int16x8_t([-N as i16; 8]) }) } } #[doc = "Shift Right and Insert (immediate)"] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftins_v8i8)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshiftrins_v8i8)"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -fn vshiftins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { +#[rustc_legacy_const_generics(2)] +fn vshiftrins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftins.v8i8")] - fn _vshiftins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; + fn _vshiftrins_v8i8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t; } - unsafe { _vshiftins_v8i8(a, b, c) } + unsafe { _vshiftrins_v8i8(a, b, const { int8x8_t([-N as i8; 8]) }) } } #[doc = "Shift left"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vshl_n_s8)"] @@ -62706,7 +62508,7 @@ pub fn vshrn_n_u64(a: uint64x2_t) -> uint32x2_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { static_assert_uimm_bits!(N, 3); - vshiftins_v8i8(a, b, int8x8_t::splat(N as i8)) + vshiftlins_v8i8::(a, b) } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s8)"] @@ -62718,7 +62520,7 @@ pub fn vsli_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { static_assert_uimm_bits!(N, 3); - vshiftins_v16i8(a, b, int8x16_t::splat(N as i8)) + vshiftlins_v16i8::(a, b) } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s16)"] @@ -62730,7 +62532,7 @@ pub fn vsliq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { static_assert_uimm_bits!(N, 4); - vshiftins_v4i16(a, b, int16x4_t::splat(N as i16)) + vshiftlins_v4i16::(a, b) } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s16)"] @@ -62742,7 +62544,7 @@ pub fn vsli_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { static_assert_uimm_bits!(N, 4); - vshiftins_v8i16(a, b, int16x8_t::splat(N as i16)) + vshiftlins_v8i16::(a, b) } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s32)"] @@ -62754,7 +62556,7 @@ pub fn vsliq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { static_assert!(N >= 0 && N <= 31); - vshiftins_v2i32(a, b, int32x2_t::splat(N)) + vshiftlins_v2i32::(a, b) } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s32)"] @@ -62766,7 +62568,7 @@ pub fn vsli_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { static_assert!(N >= 0 && N <= 31); - vshiftins_v4i32(a, b, int32x4_t::splat(N)) + vshiftlins_v4i32::(a, b) } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_s64)"] @@ -62778,7 +62580,7 @@ pub fn vsliq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { static_assert!(N >= 0 && N <= 63); - vshiftins_v1i64(a, b, int64x1_t::splat(N as i64)) + vshiftlins_v1i64::(a, b) } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_s64)"] @@ -62790,7 +62592,7 @@ pub fn vsli_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { static_assert!(N >= 0 && N <= 63); - vshiftins_v2i64(a, b, int64x2_t::splat(N as i64)) + vshiftlins_v2i64::(a, b) } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u8)"] @@ -62802,13 +62604,7 @@ pub fn vsliq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { static_assert_uimm_bits!(N, 3); - unsafe { - transmute(vshiftins_v8i8( - transmute(a), - transmute(b), - int8x8_t::splat(N as i8), - )) - } + unsafe { transmute(vshiftlins_v8i8::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u8)"] @@ -62820,13 +62616,7 @@ pub fn vsli_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { static_assert_uimm_bits!(N, 3); - unsafe { - transmute(vshiftins_v16i8( - transmute(a), - transmute(b), - int8x16_t::splat(N as i8), - )) - } + unsafe { transmute(vshiftlins_v16i8::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u16)"] @@ -62838,13 +62628,7 @@ pub fn vsliq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { static_assert_uimm_bits!(N, 4); - unsafe { - transmute(vshiftins_v4i16( - transmute(a), - transmute(b), - int16x4_t::splat(N as i16), - )) - } + unsafe { transmute(vshiftlins_v4i16::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u16)"] @@ -62856,13 +62640,7 @@ pub fn vsli_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { static_assert_uimm_bits!(N, 4); - unsafe { - transmute(vshiftins_v8i16( - transmute(a), - transmute(b), - int16x8_t::splat(N as i16), - )) - } + unsafe { transmute(vshiftlins_v8i16::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u32)"] @@ -62874,13 +62652,7 @@ pub fn vsliq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { static_assert!(N >= 0 && N <= 31); - unsafe { - transmute(vshiftins_v2i32( - transmute(a), - transmute(b), - int32x2_t::splat(N), - )) - } + unsafe { transmute(vshiftlins_v2i32::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u32)"] @@ -62892,13 +62664,7 @@ pub fn vsli_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { static_assert!(N >= 0 && N <= 31); - unsafe { - transmute(vshiftins_v4i32( - transmute(a), - transmute(b), - int32x4_t::splat(N), - )) - } + unsafe { transmute(vshiftlins_v4i32::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_u64)"] @@ -62910,13 +62676,7 @@ pub fn vsliq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { static_assert!(N >= 0 && N <= 63); - unsafe { - transmute(vshiftins_v1i64( - transmute(a), - transmute(b), - int64x1_t::splat(N as i64), - )) - } + unsafe { transmute(vshiftlins_v1i64::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_u64)"] @@ -62928,13 +62688,7 @@ pub fn vsli_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { static_assert!(N >= 0 && N <= 63); - unsafe { - transmute(vshiftins_v2i64( - transmute(a), - transmute(b), - int64x2_t::splat(N as i64), - )) - } + unsafe { transmute(vshiftlins_v2i64::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p8)"] @@ -62946,13 +62700,7 @@ pub fn vsliq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { static_assert_uimm_bits!(N, 3); - unsafe { - transmute(vshiftins_v8i8( - transmute(a), - transmute(b), - int8x8_t::splat(N as i8), - )) - } + unsafe { transmute(vshiftlins_v8i8::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p8)"] @@ -62964,13 +62712,7 @@ pub fn vsli_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { static_assert_uimm_bits!(N, 3); - unsafe { - transmute(vshiftins_v16i8( - transmute(a), - transmute(b), - int8x16_t::splat(N as i8), - )) - } + unsafe { transmute(vshiftlins_v16i8::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsli_n_p16)"] @@ -62982,13 +62724,7 @@ pub fn vsliq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { #[rustc_legacy_const_generics(2)] pub fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { static_assert_uimm_bits!(N, 4); - unsafe { - transmute(vshiftins_v4i16( - transmute(a), - transmute(b), - int16x4_t::splat(N as i16), - )) - } + unsafe { transmute(vshiftlins_v4i16::(transmute(a), transmute(b))) } } #[doc = "Shift Left and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsliq_n_p16)"] @@ -63000,13 +62736,7 @@ pub fn vsli_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { #[rustc_legacy_const_generics(2)] pub fn vsliq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { static_assert_uimm_bits!(N, 4); - unsafe { - transmute(vshiftins_v8i16( - transmute(a), - transmute(b), - int16x8_t::splat(N as i16), - )) - } + unsafe { transmute(vshiftlins_v8i16::(transmute(a), transmute(b))) } } #[doc = "Signed shift right and accumulate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsra_n_s8)"] @@ -63386,7 +63116,7 @@ pub fn vsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { static_assert!(1 <= N && N <= 8); - vshiftins_v8i8(a, b, int8x8_t::splat(-N as i8)) + vshiftrins_v8i8::(a, b) } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s8)"] @@ -63398,7 +63128,7 @@ pub fn vsri_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { static_assert!(1 <= N && N <= 8); - vshiftins_v16i8(a, b, int8x16_t::splat(-N as i8)) + vshiftrins_v16i8::(a, b) } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s16)"] @@ -63410,7 +63140,7 @@ pub fn vsriq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { static_assert!(1 <= N && N <= 16); - vshiftins_v4i16(a, b, int16x4_t::splat(-N as i16)) + vshiftrins_v4i16::(a, b) } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s16)"] @@ -63422,7 +63152,7 @@ pub fn vsri_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { static_assert!(1 <= N && N <= 16); - vshiftins_v8i16(a, b, int16x8_t::splat(-N as i16)) + vshiftrins_v8i16::(a, b) } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s32)"] @@ -63434,7 +63164,7 @@ pub fn vsriq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { static_assert!(1 <= N && N <= 32); - vshiftins_v2i32(a, b, int32x2_t::splat(-N)) + vshiftrins_v2i32::(a, b) } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s32)"] @@ -63446,7 +63176,7 @@ pub fn vsri_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { static_assert!(1 <= N && N <= 32); - vshiftins_v4i32(a, b, int32x4_t::splat(-N)) + vshiftrins_v4i32::(a, b) } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_s64)"] @@ -63458,7 +63188,7 @@ pub fn vsriq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { static_assert!(1 <= N && N <= 64); - vshiftins_v1i64(a, b, int64x1_t::splat(-N as i64)) + vshiftrins_v1i64::(a, b) } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_s64)"] @@ -63470,7 +63200,7 @@ pub fn vsri_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { static_assert!(1 <= N && N <= 64); - vshiftins_v2i64(a, b, int64x2_t::splat(-N as i64)) + vshiftrins_v2i64::(a, b) } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u8)"] @@ -63482,13 +63212,7 @@ pub fn vsriq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { static_assert!(1 <= N && N <= 8); - unsafe { - transmute(vshiftins_v8i8( - transmute(a), - transmute(b), - int8x8_t::splat(-N as i8), - )) - } + unsafe { transmute(vshiftrins_v8i8::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u8)"] @@ -63500,13 +63224,7 @@ pub fn vsri_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { static_assert!(1 <= N && N <= 8); - unsafe { - transmute(vshiftins_v16i8( - transmute(a), - transmute(b), - int8x16_t::splat(-N as i8), - )) - } + unsafe { transmute(vshiftrins_v16i8::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u16)"] @@ -63518,13 +63236,7 @@ pub fn vsriq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { static_assert!(1 <= N && N <= 16); - unsafe { - transmute(vshiftins_v4i16( - transmute(a), - transmute(b), - int16x4_t::splat(-N as i16), - )) - } + unsafe { transmute(vshiftrins_v4i16::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u16)"] @@ -63536,13 +63248,7 @@ pub fn vsri_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { static_assert!(1 <= N && N <= 16); - unsafe { - transmute(vshiftins_v8i16( - transmute(a), - transmute(b), - int16x8_t::splat(-N as i16), - )) - } + unsafe { transmute(vshiftrins_v8i16::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u32)"] @@ -63554,13 +63260,7 @@ pub fn vsriq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { static_assert!(1 <= N && N <= 32); - unsafe { - transmute(vshiftins_v2i32( - transmute(a), - transmute(b), - int32x2_t::splat(-N), - )) - } + unsafe { transmute(vshiftrins_v2i32::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u32)"] @@ -63572,13 +63272,7 @@ pub fn vsri_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { static_assert!(1 <= N && N <= 32); - unsafe { - transmute(vshiftins_v4i32( - transmute(a), - transmute(b), - int32x4_t::splat(-N), - )) - } + unsafe { transmute(vshiftrins_v4i32::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_u64)"] @@ -63590,13 +63284,7 @@ pub fn vsriq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { static_assert!(1 <= N && N <= 64); - unsafe { - transmute(vshiftins_v1i64( - transmute(a), - transmute(b), - int64x1_t::splat(-N as i64), - )) - } + unsafe { transmute(vshiftrins_v1i64::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_u64)"] @@ -63608,13 +63296,7 @@ pub fn vsri_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { static_assert!(1 <= N && N <= 64); - unsafe { - transmute(vshiftins_v2i64( - transmute(a), - transmute(b), - int64x2_t::splat(-N as i64), - )) - } + unsafe { transmute(vshiftrins_v2i64::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p8)"] @@ -63626,13 +63308,7 @@ pub fn vsriq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { static_assert!(1 <= N && N <= 8); - unsafe { - transmute(vshiftins_v8i8( - transmute(a), - transmute(b), - int8x8_t::splat(-N as i8), - )) - } + unsafe { transmute(vshiftrins_v8i8::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p8)"] @@ -63644,13 +63320,7 @@ pub fn vsri_n_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { static_assert!(1 <= N && N <= 8); - unsafe { - transmute(vshiftins_v16i8( - transmute(a), - transmute(b), - int8x16_t::splat(-N as i8), - )) - } + unsafe { transmute(vshiftrins_v16i8::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsri_n_p16)"] @@ -63662,13 +63332,7 @@ pub fn vsriq_n_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { #[rustc_legacy_const_generics(2)] pub fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { static_assert!(1 <= N && N <= 16); - unsafe { - transmute(vshiftins_v4i16( - transmute(a), - transmute(b), - int16x4_t::splat(-N as i16), - )) - } + unsafe { transmute(vshiftrins_v4i16::(transmute(a), transmute(b))) } } #[doc = "Shift Right and Insert (immediate)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsriq_n_p16)"] @@ -63680,13 +63344,7 @@ pub fn vsri_n_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { #[rustc_legacy_const_generics(2)] pub fn vsriq_n_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { static_assert!(1 <= N && N <= 16); - unsafe { - transmute(vshiftins_v8i16( - transmute(a), - transmute(b), - int16x8_t::splat(-N as i16), - )) - } + unsafe { transmute(vshiftrins_v8i16::(transmute(a), transmute(b))) } } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f16)"] @@ -63987,11 +63645,8 @@ pub unsafe fn vst1q_f16_x4(a: *mut f16, b: float16x8x4_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { - vst1_v2f32( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v2f32::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_f32)"] @@ -64004,11 +63659,8 @@ pub unsafe fn vst1_f32(ptr: *mut f32, a: float32x2_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { - vst1q_v4f32( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v4f32::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s8)"] @@ -64021,7 +63673,8 @@ pub unsafe fn vst1q_f32(ptr: *mut f32, a: float32x4_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { - vst1_v8i8(ptr as *const i8, a, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v8i8::(ptr as *const i8, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s8)"] @@ -64034,7 +63687,8 @@ pub unsafe fn vst1_s8(ptr: *mut i8, a: int8x8_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { - vst1q_v16i8(ptr as *const i8, a, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v16i8::(ptr as *const i8, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s16)"] @@ -64047,7 +63701,8 @@ pub unsafe fn vst1q_s8(ptr: *mut i8, a: int8x16_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { - vst1_v4i16(ptr as *const i8, a, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v4i16::(ptr as *const i8, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s16)"] @@ -64060,7 +63715,8 @@ pub unsafe fn vst1_s16(ptr: *mut i16, a: int16x4_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { - vst1q_v8i16(ptr as *const i8, a, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v8i16::(ptr as *const i8, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s32)"] @@ -64073,7 +63729,8 @@ pub unsafe fn vst1q_s16(ptr: *mut i16, a: int16x8_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { - vst1_v2i32(ptr as *const i8, a, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v2i32::(ptr as *const i8, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s32)"] @@ -64086,7 +63743,8 @@ pub unsafe fn vst1_s32(ptr: *mut i32, a: int32x2_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { - vst1q_v4i32(ptr as *const i8, a, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v4i32::(ptr as *const i8, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_s64)"] @@ -64099,7 +63757,8 @@ pub unsafe fn vst1q_s32(ptr: *mut i32, a: int32x4_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { - vst1_v1i64(ptr as *const i8, a, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v1i64::(ptr as *const i8, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_s64)"] @@ -64112,7 +63771,8 @@ pub unsafe fn vst1_s64(ptr: *mut i64, a: int64x1_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { - vst1q_v2i64(ptr as *const i8, a, crate::mem::align_of::() as i32) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v2i64::(ptr as *const i8, a) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u8)"] @@ -64125,11 +63785,8 @@ pub unsafe fn vst1q_s64(ptr: *mut i64, a: int64x2_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { - vst1_v8i8( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v8i8::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u8)"] @@ -64142,11 +63799,8 @@ pub unsafe fn vst1_u8(ptr: *mut u8, a: uint8x8_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { - vst1q_v16i8( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v16i8::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u16)"] @@ -64159,11 +63813,8 @@ pub unsafe fn vst1q_u8(ptr: *mut u8, a: uint8x16_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { - vst1_v4i16( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v4i16::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u16)"] @@ -64176,11 +63827,8 @@ pub unsafe fn vst1_u16(ptr: *mut u16, a: uint16x4_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { - vst1q_v8i16( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v8i16::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u32)"] @@ -64193,11 +63841,8 @@ pub unsafe fn vst1q_u16(ptr: *mut u16, a: uint16x8_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { - vst1_v2i32( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v2i32::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u32)"] @@ -64210,11 +63855,8 @@ pub unsafe fn vst1_u32(ptr: *mut u32, a: uint32x2_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { - vst1q_v4i32( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v4i32::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_u64)"] @@ -64227,11 +63869,8 @@ pub unsafe fn vst1q_u32(ptr: *mut u32, a: uint32x4_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { - vst1_v1i64( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v1i64::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_u64)"] @@ -64244,11 +63883,8 @@ pub unsafe fn vst1_u64(ptr: *mut u64, a: uint64x1_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { - vst1q_v2i64( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v2i64::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p8)"] @@ -64261,11 +63897,8 @@ pub unsafe fn vst1q_u64(ptr: *mut u64, a: uint64x2_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { - vst1_v8i8( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v8i8::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p8)"] @@ -64278,11 +63911,8 @@ pub unsafe fn vst1_p8(ptr: *mut p8, a: poly8x8_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { - vst1q_v16i8( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v16i8::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p16)"] @@ -64295,11 +63925,8 @@ pub unsafe fn vst1q_p8(ptr: *mut p8, a: poly8x16_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { - vst1_v4i16( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v4i16::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p16)"] @@ -64312,11 +63939,8 @@ pub unsafe fn vst1_p16(ptr: *mut p16, a: poly16x4_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { - vst1q_v8i16( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v8i16::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_p64)"] @@ -64329,11 +63953,8 @@ pub unsafe fn vst1q_p16(ptr: *mut p16, a: poly16x8_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { - vst1_v1i64( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1_v1i64::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_p64)"] @@ -64346,11 +63967,8 @@ pub unsafe fn vst1_p64(ptr: *mut p64, a: poly64x1_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] pub unsafe fn vst1q_p64(ptr: *mut p64, a: poly64x2_t) { - vst1q_v2i64( - ptr as *const i8, - transmute(a), - crate::mem::align_of::() as i32, - ) + const ALIGN: i32 = crate::mem::align_of::() as i32; + vst1q_v2i64::(ptr as *const i8, transmute(a)) } #[doc = "Store multiple single-element structures to one, two, three, or four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_f32_x2)"] @@ -66973,175 +66591,145 @@ pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) { pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) { vst1q_s16_x4(transmute(a), transmute(b)) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v1i64)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] -unsafe fn vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v1i64(addr: *const i8, val: int64x1_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v1i64.p0")] fn _vst1_v1i64(addr: *const i8, val: int64x1_t, align: i32); } - _vst1_v1i64(addr, val, align) + _vst1_v1i64(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v2f32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -unsafe fn vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v2f32(addr: *const i8, val: float32x2_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2f32.p0")] fn _vst1_v2f32(addr: *const i8, val: float32x2_t, align: i32); } - _vst1_v2f32(addr, val, align) + _vst1_v2f32(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v2i32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -unsafe fn vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v2i32(addr: *const i8, val: int32x2_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i32.p0")] fn _vst1_v2i32(addr: *const i8, val: int32x2_t, align: i32); } - _vst1_v2i32(addr, val, align) + _vst1_v2i32(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v4i16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -unsafe fn vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v4i16(addr: *const i8, val: int16x4_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i16.p0")] fn _vst1_v4i16(addr: *const i8, val: int16x4_t, align: i32); } - _vst1_v4i16(addr, val, align) + _vst1_v4i16(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v8i8)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] -unsafe fn vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1_v8i8(addr: *const i8, val: int8x8_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i8.p0")] fn _vst1_v8i8(addr: *const i8, val: int8x8_t, align: i32); } - _vst1_v8i8(addr, val, align) + _vst1_v8i8(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v16i8)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8"))] -unsafe fn vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.8", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v16i8(addr: *const i8, val: int8x16_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v16i8.p0")] fn _vst1q_v16i8(addr: *const i8, val: int8x16_t, align: i32); } - _vst1q_v16i8(addr, val, align) + _vst1q_v16i8(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v2i64)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64"))] -unsafe fn vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.64", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v2i64(addr: *const i8, val: int64x2_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v2i64.p0")] fn _vst1q_v2i64(addr: *const i8, val: int64x2_t, align: i32); } - _vst1q_v2i64(addr, val, align) + _vst1q_v2i64(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v4f32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -unsafe fn vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v4f32(addr: *const i8, val: float32x4_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4f32.p0")] fn _vst1q_v4f32(addr: *const i8, val: float32x4_t, align: i32); } - _vst1q_v4f32(addr, val, align) + _vst1q_v4f32(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v4i32)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32"))] -unsafe fn vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.32", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v4i32(addr: *const i8, val: int32x4_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v4i32.p0")] fn _vst1q_v4i32(addr: *const i8, val: int32x4_t, align: i32); } - _vst1q_v4i32(addr, val, align) + _vst1q_v4i32(addr, val, ALIGN) } -#[doc = "Store multiple single-element structures from one, two, three, or four registers."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1q_v8i16)"] -#[doc = "## Safety"] -#[doc = " * Neon instrinsic unsafe"] #[inline] #[target_feature(enable = "neon")] #[cfg(target_arch = "arm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16"))] -unsafe fn vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32) { +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vst1.16", ALIGN = 0))] +#[rustc_legacy_const_generics(2)] +unsafe fn vst1q_v8i16(addr: *const i8, val: int16x8_t) { unsafe extern "unadjusted" { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1.v8i16.p0")] fn _vst1q_v8i16(addr: *const i8, val: int16x8_t, align: i32); } - _vst1q_v8i16(addr, val, align) + _vst1q_v8i16(addr, val, ALIGN) } #[doc = "Store multiple single-element structures from one, two, three, or four registers."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst1_v4f16)"] diff --git a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index 43dd3b9031..4816d17bd6 100644 --- a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -11787,39 +11787,39 @@ intrinsics: - name: "vld1{type[0]}" visibility: private - doc: "Load multiple single-element structures to one, two, three, or four registers" - arguments: ["a: {type[1]}", "b: {type[2]}"] - return_type: "{neon_type[3]}" + arguments: ["a: {type[1]}"] + static_defs: ["const ALIGN: i32"] + return_type: "{neon_type[2]}" attr: + - FnCall: [rustc_legacy_const_generics, ['1']] - *target-is-arm - *enable-v7 - # - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vld1]]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vld1.8"', "ALIGN = 0"]]}]] - *neon-arm-unstable safety: unsafe: [neon] types: - - ["_v8i8", "*const i8", "i32", "int8x8_t"] - - ["q_v16i8", "*const i8", "i32", "int8x16_t"] - - ["_v4i16", "*const i8", "i32", "int16x4_t"] - - ["q_v8i16", "*const i8", "i32", "int16x8_t"] - - ["_v2i32", "*const i8", "i32", "int32x2_t"] - - ["q_v4i32", "*const i8", "i32", "int32x4_t"] - - ["_v1i64", "*const i8", "i32", "int64x1_t"] - - ["q_v2i64", "*const i8", "i32", "int64x2_t"] - - ["_v2f32", "*const i8", "i32", "float32x2_t"] - - ["q_v4f32", "*const i8", "i32", "float32x4_t"] + - ["_v8i8", "*const i8", "int8x8_t" ] + - ["q_v16i8", "*const i8", "int8x16_t" ] + - ["_v4i16", "*const i8", "int16x4_t" ] + - ["q_v8i16", "*const i8", "int16x8_t" ] + - ["_v2i32", "*const i8", "int32x2_t" ] + - ["q_v4i32", "*const i8", "int32x4_t" ] + - ["_v1i64", "*const i8", "int64x1_t" ] + - ["q_v2i64", "*const i8", "int64x2_t" ] + - ["_v2f32", "*const i8", "float32x2_t"] + - ["q_v4f32", "*const i8", "float32x4_t"] compose: - LLVMLink: name: "vld1.{type[0]}" + arguments: ["a: {type[1]}", "b: i32"] links: - - link: "llvm.arm.neon.vld1.{neon_type[3]}" + - link: "llvm.arm.neon.vld1.{neon_type[2]}" arch: arm - - FnCall: ["_vld1{type[0]}", [a, b]] - + - FnCall: ["_vld1{type[0]}", [a, ALIGN]] - name: "vld1{type[0]}" visibility: private - doc: "Load multiple single-element structures to one, two, three, or four registers" arguments: ["a: {type[1]}", "b: {type[2]}"] return_type: "{neon_type[3]}" attr: @@ -11855,19 +11855,23 @@ intrinsics: - *neon-arm-unstable - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]] types: - - ['*const i8', int8x8_t, '"vld1.8"', 'crate::mem::align_of::() as i32', '_v8i8'] - - ['*const i8', int8x16_t, '"vld1.8"', 'crate::mem::align_of::() as i32', 'q_v16i8'] - - ['*const i16', int16x4_t, '"vld1.16"', 'crate::mem::align_of::() as i32', '_v4i16'] - - ['*const i16', int16x8_t, '"vld1.16"', 'crate::mem::align_of::() as i32', 'q_v8i16'] - - ['*const i32', int32x2_t, 'vldr', 'crate::mem::align_of::() as i32', '_v2i32'] - - ['*const i32', int32x4_t, '"vld1.32"', 'crate::mem::align_of::() as i32', 'q_v4i32'] - - ['*const i64', int64x1_t, 'vldr', 'crate::mem::align_of::() as i32', '_v1i64'] - - ['*const i64', int64x2_t, '"vld1.64"', 'crate::mem::align_of::() as i32', 'q_v2i64'] + - ['*const i8', int8x8_t, '"vld1.8"', 'crate::mem::align_of::()', '_v8i8' ] + - ['*const i8', int8x16_t, '"vld1.8"', 'crate::mem::align_of::()', 'q_v16i8'] + - ['*const i16', int16x4_t, '"vld1.16"', 'crate::mem::align_of::()', '_v4i16' ] + - ['*const i16', int16x8_t, '"vld1.16"', 'crate::mem::align_of::()', 'q_v8i16'] + - ['*const i32', int32x2_t, 'vldr', 'crate::mem::align_of::()', '_v2i32' ] + - ['*const i32', int32x4_t, '"vld1.32"', 'crate::mem::align_of::()', 'q_v4i32'] + - ['*const i64', int64x1_t, 'vldr', 'crate::mem::align_of::()', '_v1i64' ] + - ['*const i64', int64x2_t, '"vld1.64"', 'crate::mem::align_of::()', 'q_v2i64'] compose: + - Const: + - ALIGN + - "i32" + - "{type[3]} as i32" - FnCall: - "vld1{type[4]}" - - - 'ptr as *const i8' - - '{type[3]}' + - ['ptr as *const i8'] + - ['ALIGN'] - name: "vld1{neon_type[1].no}" doc: "Load multiple single-element structures to one, two, three, or four registers." @@ -11881,28 +11885,32 @@ intrinsics: - *neon-arm-unstable - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['{type[2]}']]}]] types: - - ['*const u8', uint8x8_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::() as i32', '_v8i8'] - - ['*const u8', uint8x16_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v16i8'] - - ['*const u16', uint16x4_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', '_v4i16'] - - ['*const u16', uint16x8_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v8i16'] - - ['*const u32', uint32x2_t, 'vldr', 'neon,v7', 'crate::mem::align_of::() as i32', '_v2i32'] - - ['*const u32', uint32x4_t, '"vld1.32"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v4i32'] - - ['*const u64', uint64x1_t, 'vldr', 'neon,v7', 'crate::mem::align_of::() as i32', '_v1i64'] - - ['*const u64', uint64x2_t, '"vld1.64"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v2i64'] - - ['*const p8', poly8x8_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::() as i32', '_v8i8'] - - ['*const p8', poly8x16_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v16i8'] - - ['*const p16', poly16x4_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', '_v4i16'] - - ['*const p16', poly16x8_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v8i16'] - - ['*const p64', poly64x2_t, '"vld1.64"', 'neon,aes', 'crate::mem::align_of::() as i32', 'q_v2i64'] - - ['*const f32', float32x2_t, 'vldr', 'neon,v7', 'crate::mem::align_of::() as i32', '_v2f32'] - - ['*const f32', float32x4_t, '"vld1.32"', 'neon,v7', 'crate::mem::align_of::() as i32', 'q_v4f32'] - compose: + - ['*const u8', uint8x8_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::()', '_v8i8' ] + - ['*const u8', uint8x16_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::()', 'q_v16i8'] + - ['*const u16', uint16x4_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::()', '_v4i16' ] + - ['*const u16', uint16x8_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::()', 'q_v8i16'] + - ['*const u32', uint32x2_t, 'vldr', 'neon,v7', 'crate::mem::align_of::()', '_v2i32' ] + - ['*const u32', uint32x4_t, '"vld1.32"', 'neon,v7', 'crate::mem::align_of::()', 'q_v4i32'] + - ['*const u64', uint64x1_t, 'vldr', 'neon,v7', 'crate::mem::align_of::()', '_v1i64' ] + - ['*const u64', uint64x2_t, '"vld1.64"', 'neon,v7', 'crate::mem::align_of::()', 'q_v2i64'] + - ['*const p8', poly8x8_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::()', '_v8i8' ] + - ['*const p8', poly8x16_t, '"vld1.8"', 'neon,v7', 'crate::mem::align_of::()', 'q_v16i8'] + - ['*const p16', poly16x4_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::()', '_v4i16' ] + - ['*const p16', poly16x8_t, '"vld1.16"', 'neon,v7', 'crate::mem::align_of::()', 'q_v8i16'] + - ['*const p64', poly64x2_t, '"vld1.64"', 'neon,aes', 'crate::mem::align_of::()', 'q_v2i64'] + - ['*const f32', float32x2_t, 'vldr', 'neon,v7', 'crate::mem::align_of::()', '_v2f32' ] + - ['*const f32', float32x4_t, '"vld1.32"', 'neon,v7', 'crate::mem::align_of::()', 'q_v4f32'] + compose: + - Const: + - ALIGN + - "i32" + - "{type[4]} as i32" - FnCall: - transmute - - FnCall: - "vld1{type[5]}" - - - 'ptr as *const i8' - - '{type[4]}' + - ['ptr as *const i8'] + - ['ALIGN'] - name: "vld1{neon_type[1].no}" doc: "Load multiple single-element structures to one, two, three, or four registers." @@ -13724,32 +13732,35 @@ intrinsics: - name: "vst1{type[0]}" visibility: private - doc: "Store multiple single-element structures from one, two, three, or four registers." - arguments: ["addr: {type[1]}", "val: {neon_type[2]}", "align: {type[3]}"] + arguments: ["addr: {type[1]}", "val: {neon_type[2]}"] + static_defs: ["const ALIGN: i32"] safety: unsafe: [neon] attr: - *target-is-arm - *neon-v7 - *neon-arm-unstable - - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[4]}"']]}]] + - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[3]}"', "ALIGN = 0"]]}]] + - FnCall: ["rustc_legacy_const_generics", ['2']] types: - - ['_v8i8', '* const i8', int8x8_t, i32, '8'] - - ['q_v16i8', '* const i8', int8x16_t, i32, '8'] - - ['_v4i16', '* const i8', int16x4_t, i32, '16'] - - ['q_v8i16', '* const i8', int16x8_t, i32, '16'] - - ['_v2i32', '* const i8', int32x2_t, i32, '32'] - - ['q_v4i32', '* const i8', int32x4_t, i32, '32'] - - ['_v1i64', '* const i8', int64x1_t, i32, '64'] - - ['q_v2i64', '* const i8', int64x2_t, i32, '64'] - - ['_v2f32', '* const i8', float32x2_t, i32, '32'] - - ['q_v4f32', '* const i8', float32x4_t, i32, '32'] + - ['_v8i8', '* const i8', int8x8_t, '8' ] + - ['q_v16i8', '* const i8', int8x16_t, '8' ] + - ['_v4i16', '* const i8', int16x4_t, '16'] + - ['q_v8i16', '* const i8', int16x8_t, '16'] + - ['_v2i32', '* const i8', int32x2_t, '32'] + - ['q_v4i32', '* const i8', int32x4_t, '32'] + - ['_v1i64', '* const i8', int64x1_t, '64'] + - ['q_v2i64', '* const i8', int64x2_t, '64'] + - ['_v2f32', '* const i8', float32x2_t, '32'] + - ['q_v4f32', '* const i8', float32x4_t, '32'] compose: - LLVMLink: name: "_vst1{type[0]}" + arguments: ["addr: {type[1]}", "val: {neon_type[2]}", "align: i32"] links: - link: "llvm.arm.neon.vst1.{neon_type[2]}.p0" arch: arm + - FnCall: ["_vst1{type[0]}",[addr, val, ALIGN]] - name: "vst1{type[0]}" visibility: private @@ -13785,37 +13796,39 @@ intrinsics: - *neon-arm-unstable - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, ['"vst1.{type[2]}"']]}]] types: - - ['*mut i8', int8x8_t, '8', 'a', 'crate::mem::align_of::() as i32', '_v8i8'] - - ['*mut i8', int8x16_t, '8', 'a', 'crate::mem::align_of::() as i32', 'q_v16i8'] - - ['*mut i16', int16x4_t, '16', 'a', 'crate::mem::align_of::() as i32', '_v4i16'] - - ['*mut i16', int16x8_t, '16', 'a', 'crate::mem::align_of::() as i32', 'q_v8i16'] - - ['*mut i32', int32x2_t, '32', 'a', 'crate::mem::align_of::() as i32', '_v2i32'] - - ['*mut i32', int32x4_t, '32', 'a', 'crate::mem::align_of::() as i32', 'q_v4i32'] - - ['*mut i64', int64x1_t, '64', 'a', 'crate::mem::align_of::() as i32', '_v1i64'] - - ['*mut i64', int64x2_t, '64', 'a', 'crate::mem::align_of::() as i32', 'q_v2i64'] - - ['*mut u8', uint8x8_t, '8', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v8i8'] - - ['*mut u8', uint8x16_t, '8', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v16i8'] - - ['*mut u16', uint16x4_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v4i16'] - - ['*mut u16', uint16x8_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v8i16'] - - ['*mut u32', uint32x2_t, '32', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v2i32'] - - ['*mut u32', uint32x4_t, '32', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v4i32'] - - ['*mut u64', uint64x1_t, '64', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v1i64'] - - ['*mut u64', uint64x2_t, '64', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v2i64'] - - ['*mut p8', poly8x8_t, '8', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v8i8'] - - ['*mut p8', poly8x16_t, '8', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v16i8'] - - ['*mut p16', poly16x4_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v4i16'] - - ['*mut p16', poly16x8_t, '16', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v8i16'] - - ['*mut p64', poly64x1_t, '64', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v1i64'] - - ['*mut p64', poly64x2_t, '64', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v2i64'] - - ['*mut f32', float32x2_t, '32', 'transmute(a)', 'crate::mem::align_of::() as i32', '_v2f32'] - - ['*mut f32', float32x4_t, '32', 'transmute(a)', 'crate::mem::align_of::() as i32', 'q_v4f32'] - compose: + - ['*mut i8', int8x8_t, '8', 'a', 'crate::mem::align_of::()', '_v8i8' ] + - ['*mut i8', int8x16_t, '8', 'a', 'crate::mem::align_of::()', 'q_v16i8'] + - ['*mut i16', int16x4_t, '16', 'a', 'crate::mem::align_of::()', '_v4i16' ] + - ['*mut i16', int16x8_t, '16', 'a', 'crate::mem::align_of::()', 'q_v8i16'] + - ['*mut i32', int32x2_t, '32', 'a', 'crate::mem::align_of::()', '_v2i32' ] + - ['*mut i32', int32x4_t, '32', 'a', 'crate::mem::align_of::()', 'q_v4i32'] + - ['*mut i64', int64x1_t, '64', 'a', 'crate::mem::align_of::()', '_v1i64' ] + - ['*mut i64', int64x2_t, '64', 'a', 'crate::mem::align_of::()', 'q_v2i64'] + - ['*mut u8', uint8x8_t, '8', 'transmute(a)', 'crate::mem::align_of::()', '_v8i8' ] + - ['*mut u8', uint8x16_t, '8', 'transmute(a)', 'crate::mem::align_of::()', 'q_v16i8'] + - ['*mut u16', uint16x4_t, '16', 'transmute(a)', 'crate::mem::align_of::()', '_v4i16' ] + - ['*mut u16', uint16x8_t, '16', 'transmute(a)', 'crate::mem::align_of::()', 'q_v8i16'] + - ['*mut u32', uint32x2_t, '32', 'transmute(a)', 'crate::mem::align_of::()', '_v2i32' ] + - ['*mut u32', uint32x4_t, '32', 'transmute(a)', 'crate::mem::align_of::()', 'q_v4i32'] + - ['*mut u64', uint64x1_t, '64', 'transmute(a)', 'crate::mem::align_of::()', '_v1i64' ] + - ['*mut u64', uint64x2_t, '64', 'transmute(a)', 'crate::mem::align_of::()', 'q_v2i64'] + - ['*mut p8', poly8x8_t, '8', 'transmute(a)', 'crate::mem::align_of::()', '_v8i8' ] + - ['*mut p8', poly8x16_t, '8', 'transmute(a)', 'crate::mem::align_of::()', 'q_v16i8'] + - ['*mut p16', poly16x4_t, '16', 'transmute(a)', 'crate::mem::align_of::()', '_v4i16' ] + - ['*mut p16', poly16x8_t, '16', 'transmute(a)', 'crate::mem::align_of::()', 'q_v8i16'] + - ['*mut p64', poly64x1_t, '64', 'transmute(a)', 'crate::mem::align_of::()', '_v1i64' ] + - ['*mut p64', poly64x2_t, '64', 'transmute(a)', 'crate::mem::align_of::()', 'q_v2i64'] + - ['*mut f32', float32x2_t, '32', 'transmute(a)', 'crate::mem::align_of::()', '_v2f32' ] + - ['*mut f32', float32x4_t, '32', 'transmute(a)', 'crate::mem::align_of::()', 'q_v4f32'] + compose: + - Const: + - ALIGN + - "i32" + - "{type[4]} as i32" - FnCall: - "vst1{type[5]}" - - - 'ptr as *const i8' - - '{type[3]}' - - '{type[4]}' - + - ['ptr as *const i8','{type[3]}'] + - ['ALIGN'] - name: "vst1{neon_type[1].no}" doc: "Store multiple single-element structures from one, two, three, or four registers." @@ -13839,32 +13852,64 @@ intrinsics: - '{type[3]}' - '{type[4]}' - - - name: "vshiftins{type[0]}" + - name: "vshiftlins{type[0]}" visibility: private + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] + return_type: "{neon_type[1]}" + safety: safe + attr: + - *target-is-arm + - *neon-v7 + - *neon-arm-unstable + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ['const N: i32'] + types: + - ['_v8i8', 'int8x8_t', '8', 'int8x8_t([N as i8; 8 ])' ] + - ['_v16i8', 'int8x16_t', '8', 'int8x16_t([N as i8; 16])'] + - ['_v4i16', 'int16x4_t', '16', 'int16x4_t([N as i16; 4])'] + - ['_v8i16', 'int16x8_t', '16', 'int16x8_t([N as i16; 8])'] + - ['_v2i32', 'int32x2_t', '32', 'int32x2_t([N; 2])' ] + - ['_v4i32', 'int32x4_t', '32', 'int32x4_t([N; 4])' ] + - ['_v1i64', 'int64x1_t', '64', 'int64x1_t([N as i64; 1])'] + - ['_v2i64', 'int64x2_t', '64', 'int64x2_t([N as i64; 2])'] + compose: + - LLVMLink: + name: "_vshiftins{type[0]}" + arguments: ["a: {type[1]}", "b: {type[1]}", "c: {type[1]}"] + links: + - link: "llvm.arm.neon.vshiftins.{neon_type[1]}" + arch: arm + - FnCall: ["_vshiftlins{type[0]}", [a,b, "const {{ {type[3]} }}"], [], true] + + - name: "vshiftrins{type[0]}" doc: "Shift Right and Insert (immediate)" - arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}", "c: {neon_type[1]}"] + visibility: private + arguments: ["a: {neon_type[1]}", "b: {neon_type[1]}"] return_type: "{neon_type[1]}" safety: safe attr: - *target-is-arm - *neon-v7 - *neon-arm-unstable + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ['const N: i32'] types: - - ['_v8i8', "int8x8_t", '8'] - - ['_v16i8', 'int8x16_t', '8'] - - ['_v4i16', 'int16x4_t', '16'] - - ['_v8i16', 'int16x8_t', '16'] - - ['_v2i32', 'int32x2_t', '32'] - - ['_v4i32', 'int32x4_t', '32'] - - ['_v1i64', 'int64x1_t', '64'] - - ['_v2i64', 'int64x2_t', '64'] + - ['_v8i8', 'int8x8_t', '8', 'int8x8_t([-N as i8; 8 ])' ] + - ['_v16i8', 'int8x16_t', '8', 'int8x16_t([-N as i8; 16])'] + - ['_v4i16', 'int16x4_t', '16', 'int16x4_t([-N as i16; 4])'] + - ['_v8i16', 'int16x8_t', '16', 'int16x8_t([-N as i16; 8])'] + - ['_v2i32', 'int32x2_t', '32', 'int32x2_t([-N; 2])' ] + - ['_v4i32', 'int32x4_t', '32', 'int32x4_t([-N; 4])' ] + - ['_v1i64', 'int64x1_t', '64', 'int64x1_t([-N as i64; 1])'] + - ['_v2i64', 'int64x2_t', '64', 'int64x2_t([-N as i64; 2])'] compose: - LLVMLink: name: "_vshiftins{type[0]}" + arguments: ["a: {type[1]}", "b: {type[1]}", "c: {type[1]}"] links: - link: "llvm.arm.neon.vshiftins.{neon_type[1]}" arch: arm + - FnCall: ["_vshiftrins{type[0]}", [a,b, "const {{ {type[3]} }}"], [], true] - name: "vsri{neon_type[0].N}" doc: "Shift Right and Insert (immediate)" @@ -13879,18 +13924,18 @@ intrinsics: - FnCall: [rustc_legacy_const_generics, ['2']] static_defs: ['const N: i32'] types: - - [uint8x8_t, "neon,v7", '8', '1 <= N && N <= 8', 'v8i8', 'int8x8_t::splat', '-N as i8'] - - [uint8x16_t, "neon,v7", '8', '1 <= N && N <= 8', 'v16i8', 'int8x16_t::splat', '-N as i8'] - - [uint16x4_t, "neon,v7", '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16'] - - [uint16x8_t, "neon,v7", '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16'] - - [uint32x2_t, "neon,v7", '32', '1 <= N && N <= 32', 'v2i32', 'int32x2_t::splat', '-N'] - - [uint32x4_t, "neon,v7", '32', '1 <= N && N <= 32', 'v4i32', 'int32x4_t::splat', '-N'] - - [uint64x1_t, "neon,v7", '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64'] - - [uint64x2_t, "neon,v7", '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64'] - - [poly8x8_t, "neon,v7", '8', '1 <= N && N <= 8', 'v8i8', 'int8x8_t::splat', '-N as i8'] - - [poly8x16_t, "neon,v7", '8', '1 <= N && N <= 8', 'v16i8', 'int8x16_t::splat', '-N as i8'] - - [poly16x4_t, "neon,v7", '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16'] - - [poly16x8_t, "neon,v7", '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16'] + - [uint8x8_t, "neon,v7", '8', '1 <= N && N <= 8', 'v8i8' ] + - [uint8x16_t, "neon,v7", '8', '1 <= N && N <= 8', 'v16i8'] + - [uint16x4_t, "neon,v7", '16', '1 <= N && N <= 16', 'v4i16'] + - [uint16x8_t, "neon,v7", '16', '1 <= N && N <= 16', 'v8i16'] + - [uint32x2_t, "neon,v7", '32', '1 <= N && N <= 32', 'v2i32'] + - [uint32x4_t, "neon,v7", '32', '1 <= N && N <= 32', 'v4i32'] + - [uint64x1_t, "neon,v7", '64', '1 <= N && N <= 64', 'v1i64'] + - [uint64x2_t, "neon,v7", '64', '1 <= N && N <= 64', 'v2i64'] + - [poly8x8_t, "neon,v7", '8', '1 <= N && N <= 8', 'v8i8' ] + - [poly8x16_t, "neon,v7", '8', '1 <= N && N <= 8', 'v16i8'] + - [poly16x4_t, "neon,v7", '16', '1 <= N && N <= 16', 'v4i16'] + - [poly16x8_t, "neon,v7", '16', '1 <= N && N <= 16', 'v8i16'] ## These live in ./crates/core_arch/src/arm/neon.rs #- [poly64x1_t, "neon,v7,aes", '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64'] #- [poly64x2_t, "neon,v7,aes", '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64'] @@ -13899,10 +13944,9 @@ intrinsics: - FnCall: - 'transmute' - - FnCall: - - "vshiftins_{type[4]}" + - "vshiftrins_{type[4]}::" - - FnCall: [transmute, [a]] - FnCall: [transmute, [b]] - - FnCall: ["{type[5]}", ["{type[6]}"]] - name: "vsri{neon_type[0].N}" doc: "Shift Right and Insert (immediate)" @@ -13917,21 +13961,20 @@ intrinsics: - FnCall: [rustc_legacy_const_generics, ['2']] safety: safe types: - - [int8x8_t, '8', '1 <= N && N <= 8', 'v8i8', 'int8x8_t::splat', '-N as i8'] - - [int8x16_t, '8', '1 <= N && N <= 8', 'v16i8', 'int8x16_t::splat', '-N as i8'] - - [int16x4_t, '16', '1 <= N && N <= 16', 'v4i16', 'int16x4_t::splat', '-N as i16'] - - [int16x8_t, '16', '1 <= N && N <= 16', 'v8i16', 'int16x8_t::splat', '-N as i16'] - - [int32x2_t, '32', '1 <= N && N <= 32', 'v2i32', 'int32x2_t::splat', '-N'] - - [int32x4_t, '32', '1 <= N && N <= 32', 'v4i32', 'int32x4_t::splat', '-N'] - - [int64x1_t, '64', '1 <= N && N <= 64', 'v1i64', 'int64x1_t::splat', '-N as i64'] - - [int64x2_t, '64', '1 <= N && N <= 64', 'v2i64', 'int64x2_t::splat', '-N as i64'] + - [int8x8_t, '8', '1 <= N && N <= 8', 'v8i8' ] + - [int8x16_t, '8', '1 <= N && N <= 8', 'v16i8'] + - [int16x4_t, '16', '1 <= N && N <= 16', 'v4i16'] + - [int16x8_t, '16', '1 <= N && N <= 16', 'v8i16'] + - [int32x2_t, '32', '1 <= N && N <= 32', 'v2i32'] + - [int32x4_t, '32', '1 <= N && N <= 32', 'v4i32'] + - [int64x1_t, '64', '1 <= N && N <= 64', 'v1i64'] + - [int64x2_t, '64', '1 <= N && N <= 64', 'v2i64'] compose: - FnCall: ["static_assert!", ['{type[2]}']] - FnCall: - - "vshiftins_{type[3]}" + - "vshiftrins_{type[3]}::" - - a - b - - FnCall: ["{type[4]}", ["{type[5]}"]] - name: "vsli{neon_type[0].N}" doc: "Shift Left and Insert (immediate)" @@ -13946,18 +13989,18 @@ intrinsics: - FnCall: [rustc_legacy_const_generics, ['2']] static_defs: ['const N: i32'] types: - - [uint8x8_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v8i8', 'int8x8_t::splat', 'N as i8'] - - [uint8x16_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v16i8', 'int8x16_t::splat', 'N as i8'] - - [uint16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v4i16', 'int16x4_t::splat', 'N as i16'] - - [uint16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v8i16', 'int16x8_t::splat', 'N as i16'] - - [uint32x2_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32', 'int32x2_t::splat', 'N'] - - [uint32x4_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32', 'int32x4_t::splat', 'N'] - - [uint64x1_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64'] - - [uint64x2_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64'] - - [poly8x8_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v8i8', 'int8x8_t::splat', 'N as i8'] - - [poly8x16_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v16i8', 'int8x16_t::splat', 'N as i8'] - - [poly16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v4i16', 'int16x4_t::splat', 'N as i16'] - - [poly16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v8i16', 'int16x8_t::splat', 'N as i16'] + - [uint8x8_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v8i8'] + - [uint8x16_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v16i8'] + - [uint16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v4i16'] + - [uint16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v8i16'] + - [uint32x2_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32'] + - [uint32x4_t, "neon,v7", '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32'] + - [uint64x1_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64'] + - [uint64x2_t, "neon,v7", '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64'] + - [poly8x8_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v8i8'] + - [poly8x16_t, "neon,v7", '8', 'static_assert_uimm_bits!', 'N, 3', 'v16i8'] + - [poly16x4_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v4i16'] + - [poly16x8_t, "neon,v7", '16', 'static_assert_uimm_bits!', 'N, 4', 'v8i16'] ## These live in ./crates/core_arch/src/arm/neon.rs #- [poly64x1_t, "neon,v7,aes", '"vsli.64"', 'static_assert!', '0 <= N && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64'] #- [poly64x2_t, "neon,v7,aes", '"vsli.64"', 'static_assert!', '0 <= N && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64'] @@ -13966,10 +14009,9 @@ intrinsics: - FnCall: - 'transmute' - - FnCall: - - "vshiftins_{type[5]}" + - "vshiftlins_{type[5]}::" - - FnCall: [transmute, [a]] - FnCall: [transmute, [b]] - - FnCall: ["{type[6]}", ["{type[7]}"]] - name: "vsli{neon_type[0].N}" doc: "Shift Left and Insert (immediate)" @@ -13984,21 +14026,20 @@ intrinsics: - FnCall: [rustc_legacy_const_generics, ['2']] static_defs: ['const N: i32'] types: - - [int8x8_t, '8', 'static_assert_uimm_bits!', 'N, 3', 'v8i8', 'int8x8_t::splat', 'N as i8'] - - [int8x16_t, '8', 'static_assert_uimm_bits!', 'N, 3', 'v16i8', 'int8x16_t::splat', 'N as i8'] - - [int16x4_t, '16', 'static_assert_uimm_bits!', 'N, 4', 'v4i16', 'int16x4_t::splat', 'N as i16'] - - [int16x8_t, '16', 'static_assert_uimm_bits!', 'N, 4', 'v8i16', 'int16x8_t::splat', 'N as i16'] - - [int32x2_t, '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32', 'int32x2_t::splat', 'N'] - - [int32x4_t, '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32', 'int32x4_t::splat', 'N'] - - [int64x1_t, '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64', 'int64x1_t::splat', 'N as i64'] - - [int64x2_t, '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64', 'int64x2_t::splat', 'N as i64'] + - [int8x8_t, '8', 'static_assert_uimm_bits!', 'N, 3', 'v8i8' ] + - [int8x16_t, '8', 'static_assert_uimm_bits!', 'N, 3', 'v16i8'] + - [int16x4_t, '16', 'static_assert_uimm_bits!', 'N, 4', 'v4i16'] + - [int16x8_t, '16', 'static_assert_uimm_bits!', 'N, 4', 'v8i16'] + - [int32x2_t, '32', 'static_assert!', 'N >= 0 && N <= 31', 'v2i32'] + - [int32x4_t, '32', 'static_assert!', 'N >= 0 && N <= 31', 'v4i32'] + - [int64x1_t, '64', 'static_assert!', 'N >= 0 && N <= 63', 'v1i64'] + - [int64x2_t, '64', 'static_assert!', 'N >= 0 && N <= 63', 'v2i64'] compose: - FnCall: ["{type[2]}", ['{type[3]}']] - FnCall: - - "vshiftins_{type[4]}" + - "vshiftlins_{type[4]}::" - - a - b - - FnCall: ["{type[5]}", ["{type[6]}"]] - name: "vcombine{neon_type[0].no}" doc: Join two smaller vectors into a single larger vector diff --git a/crates/stdarch-gen-arm/src/expression.rs b/crates/stdarch-gen-arm/src/expression.rs index d5644ef27d..bf48f0dab7 100644 --- a/crates/stdarch-gen-arm/src/expression.rs +++ b/crates/stdarch-gen-arm/src/expression.rs @@ -108,6 +108,8 @@ impl ToTokens for FnCall { pub enum Expression { /// (Re)Defines a variable Let(LetVariant), + /// Defines a const + Const(WildString, TypeKind, Box), /// Performs a variable assignment operation Assign(String, Box), /// Performs a macro call @@ -169,6 +171,7 @@ impl Expression { | LetVariant::WithType(_, _, ex) | LetVariant::MutWithType(_, _, ex), ) => ex.pre_build(ctx), + Self::Const(_, _, ex) => ex.pre_build(ctx), Self::CastAs(ex, _) => ex.pre_build(ctx), Self::Multiply(lhs, rhs) | Self::Xor(lhs, rhs) => { lhs.pre_build(ctx)?; @@ -245,6 +248,15 @@ impl Expression { ); ex.build(intrinsic, ctx) } + Self::Const(var_name, ty, ex) => { + var_name.build_acle(ctx.local)?; + ctx.local.variables.insert( + var_name.to_string(), + (ty.to_owned(), VariableType::Internal), + ); + ex.build(intrinsic, ctx) + } + Self::CastAs(ex, _) => ex.build(intrinsic, ctx), Self::Multiply(lhs, rhs) | Self::Xor(lhs, rhs) => { lhs.build(intrinsic, ctx)?; @@ -303,6 +315,7 @@ impl Expression { | LetVariant::WithType(_, _, exp) | LetVariant::MutWithType(_, _, exp), ) => exp.requires_unsafe_wrapper(ctx_fn), + Self::Const(_, _, exp) => exp.requires_unsafe_wrapper(ctx_fn), Self::Array(exps) => exps.iter().any(|exp| exp.requires_unsafe_wrapper(ctx_fn)), Self::Multiply(lhs, rhs) | Self::Xor(lhs, rhs) => { lhs.requires_unsafe_wrapper(ctx_fn) || rhs.requires_unsafe_wrapper(ctx_fn) @@ -462,6 +475,10 @@ impl ToTokens for Expression { let var_ident = format_ident!("{}", var_name.to_string()); tokens.append_all(quote! { let mut #var_ident: #ty = #exp }) } + Self::Const(var_name, ty, exp) => { + let var_ident = format_ident!("{}", var_name.to_string()); + tokens.append_all(quote! { const #var_ident: #ty = #exp }) + } Self::Assign(var_name, exp) => { /* If we are dereferencing a variable to assign a value \ * the 'format_ident!' macro does not like the asterix */ diff --git a/crates/stdarch-gen-arm/src/intrinsic.rs b/crates/stdarch-gen-arm/src/intrinsic.rs index efaa9e1418..0cdff6ff6c 100644 --- a/crates/stdarch-gen-arm/src/intrinsic.rs +++ b/crates/stdarch-gen-arm/src/intrinsic.rs @@ -1730,7 +1730,8 @@ fn create_tokens(intrinsic: &Intrinsic, endianness: Endianness, tokens: &mut Tok "{fn_name} needs to be private, or to have documentation." ); assert!( - !safety.has_doc_comments(), + !safety.has_doc_comments() + || matches!(intrinsic.visibility, FunctionVisibility::Private), "{fn_name} needs a documentation section for its safety comments." ); } From 51f3dabce33248e6d89f8fd45447330b55fb8a3a Mon Sep 17 00:00:00 2001 From: reucru01 Date: Thu, 6 Nov 2025 10:01:41 +0000 Subject: [PATCH 2/3] Fixes differences found between clang & rust The intrinsics test was flagging differences in aarch64_be between rust in debug and clang in O2. It was found that rust was correct in debug, but incorrect in release, and in both cases were being compared against clang in O2 which was also incorrect. The vdot intrinsics were fixed and are now correct in rust for both release and debug. However the vcmla ones could not be as the issue lies with LLVM. Both the vdot and vcmla intrinsics were added to the skiplist as clang is still incorrect for both. LLVM issue: https://github.com/llvm/llvm-project/issues/166190 --- ci/intrinsic-test.sh | 2 +- .../core_arch/src/aarch64/neon/generated.rs | 24 ++-- .../src/arm_shared/neon/generated.rs | 32 +++--- crates/intrinsic-test/missing_aarch64_be.txt | 105 ++++++++++++++++++ .../spec/neon/aarch64.spec.yml | 30 ++--- .../spec/neon/arm_shared.spec.yml | 36 +++--- 6 files changed, 171 insertions(+), 58 deletions(-) create mode 100644 crates/intrinsic-test/missing_aarch64_be.txt diff --git a/ci/intrinsic-test.sh b/ci/intrinsic-test.sh index be63f0c0c6..a8f96897bc 100755 --- a/ci/intrinsic-test.sh +++ b/ci/intrinsic-test.sh @@ -56,7 +56,7 @@ case ${TARGET} in aarch64_be-unknown-linux-gnu*) TEST_CPPFLAGS="-fuse-ld=lld" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt + TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64_be.txt TEST_CXX_COMPILER="clang++" TEST_RUNNER="${CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER}" : "${TEST_SAMPLE_INTRINSICS_PERCENTAGE:=100}" diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index 09cf381804..ce864cc7d9 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -9500,10 +9500,10 @@ pub fn vdivh_f16(a: f16, b: f16) -> f16 { #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")] pub fn vdot_laneq_s32(a: int32x2_t, b: int8x8_t, c: int8x16_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 2); + let c: int32x4_t = vreinterpretq_s32_s8(c); unsafe { - let c: int32x4_t = transmute(c); let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_s32(a, b, transmute(c)) + vdot_s32(a, b, vreinterpret_s8_s32(c)) } } #[doc = "Dot product arithmetic (indexed)"] @@ -9515,11 +9515,11 @@ pub fn vdot_laneq_s32(a: int32x2_t, b: int8x8_t, c: int8x16_t) #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")] pub fn vdotq_laneq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); + let c: int32x4_t = vreinterpretq_s32_s8(c); unsafe { - let c: int32x4_t = transmute(c); let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_s32(a, b, transmute(c)) + vdotq_s32(a, b, vreinterpretq_s8_s32(c)) } } #[doc = "Dot product arithmetic (indexed)"] @@ -9531,10 +9531,10 @@ pub fn vdotq_laneq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")] pub fn vdot_laneq_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x16_t) -> uint32x2_t { static_assert_uimm_bits!(LANE, 2); + let c: uint32x4_t = vreinterpretq_u32_u8(c); unsafe { - let c: uint32x4_t = transmute(c); let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_u32(a, b, transmute(c)) + vdot_u32(a, b, vreinterpret_u8_u32(c)) } } #[doc = "Dot product arithmetic (indexed)"] @@ -9546,11 +9546,11 @@ pub fn vdot_laneq_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x16_ #[unstable(feature = "stdarch_neon_dotprod", issue = "117224")] pub fn vdotq_laneq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 2); + let c: uint32x4_t = vreinterpretq_u32_u8(c); unsafe { - let c: uint32x4_t = transmute(c); let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_u32(a, b, transmute(c)) + vdotq_u32(a, b, vreinterpretq_u8_u32(c)) } } #[doc = "Set all vector lanes to the same value"] @@ -28283,10 +28283,10 @@ pub fn vuqadds_s32(a: i32, b: u32) -> i32 { #[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] pub fn vusdot_laneq_s32(a: int32x2_t, b: uint8x8_t, c: int8x16_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 2); + let c: int32x4_t = vreinterpretq_s32_s8(c); unsafe { - let c: int32x4_t = transmute(c); let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vusdot_s32(a, b, transmute(c)) + vusdot_s32(a, b, vreinterpret_s8_s32(c)) } } #[doc = "Dot product index form with unsigned and signed integers"] @@ -28298,11 +28298,11 @@ pub fn vusdot_laneq_s32(a: int32x2_t, b: uint8x8_t, c: int8x16_ #[unstable(feature = "stdarch_neon_i8mm", issue = "117223")] pub fn vusdotq_laneq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); + let c: int32x4_t = vreinterpretq_s32_s8(c); unsafe { - let c: int32x4_t = transmute(c); let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vusdotq_s32(a, b, transmute(c)) + vusdotq_s32(a, b, vreinterpretq_s8_s32(c)) } } #[doc = "Unzip vectors"] diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index c1bd70175c..7229f33f61 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -9201,10 +9201,10 @@ pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { )] pub fn vdot_lane_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); + let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { - let c: int32x2_t = transmute(c); let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_s32(a, b, transmute(c)) + vdot_s32(a, b, vreinterpret_s8_s32(c)) } } #[doc = "Dot product arithmetic (indexed)"] @@ -9228,11 +9228,11 @@ pub fn vdot_lane_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> )] pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); + let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { - let c: int32x2_t = transmute(c); let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_s32(a, b, transmute(c)) + vdotq_s32(a, b, vreinterpretq_s8_s32(c)) } } #[doc = "Dot product arithmetic (indexed)"] @@ -9256,10 +9256,10 @@ pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) )] pub fn vdot_lane_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t { static_assert_uimm_bits!(LANE, 1); + let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { - let c: uint32x2_t = transmute(c); let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_u32(a, b, transmute(c)) + vdot_u32(a, b, vreinterpret_u8_u32(c)) } } #[doc = "Dot product arithmetic (indexed)"] @@ -9283,11 +9283,11 @@ pub fn vdot_lane_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) )] pub fn vdotq_lane_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 1); + let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { - let c: uint32x2_t = transmute(c); let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_u32(a, b, transmute(c)) + vdotq_u32(a, b, vreinterpretq_u8_u32(c)) } } #[doc = "Dot product arithmetic (vector)"] @@ -71710,10 +71710,10 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { )] pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); + let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { - let c: uint32x2_t = transmute(c); let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vusdot_s32(a, transmute(c), b) + vusdot_s32(a, vreinterpret_u8_u32(c), b) } } #[doc = "Dot product index form with signed and unsigned integers"] @@ -71737,11 +71737,11 @@ pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) )] pub fn vsudotq_lane_s32(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); + let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { - let c: uint32x2_t = transmute(c); let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vusdotq_s32(a, transmute(c), b) + vusdotq_s32(a, vreinterpretq_u8_u32(c), b) } } #[doc = "Table look-up"] @@ -73630,10 +73630,10 @@ pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { )] pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); + let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { - let c: int32x2_t = transmute(c); let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vusdot_s32(a, b, transmute(c)) + vusdot_s32(a, b, vreinterpret_s8_s32(c)) } } #[doc = "Dot product index form with unsigned and signed integers"] @@ -73657,11 +73657,11 @@ pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) )] pub fn vusdotq_lane_s32(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); + let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { - let c: int32x2_t = transmute(c); let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vusdotq_s32(a, b, transmute(c)) + vusdotq_s32(a, b, vreinterpretq_s8_s32(c)) } } #[doc = "Dot product vector form with unsigned and signed integers"] diff --git a/crates/intrinsic-test/missing_aarch64_be.txt b/crates/intrinsic-test/missing_aarch64_be.txt new file mode 100644 index 0000000000..28e1d9203e --- /dev/null +++ b/crates/intrinsic-test/missing_aarch64_be.txt @@ -0,0 +1,105 @@ +# Bad LLVM codegen for BE in O2 in clang, and release in rust (https://github.com/llvm/llvm-project/issues/166190) +vcmla_lane_f16 +vcmla_laneq_f16 +vcmla_rot180_lane_f16 +vcmla_rot180_laneq_f16 +vcmla_rot270_lane_f16 +vcmla_rot270_laneq_f16 +vcmla_rot90_lane_f16 +vcmla_rot90_laneq_f16 +vcmlaq_lane_f16 +vcmlaq_laneq_f16 +vcmlaq_laneq_f32 +vcmlaq_rot180_lane_f16 +vcmlaq_rot180_laneq_f16 +vcmlaq_rot180_laneq_f32 +vcmlaq_rot270_lane_f16 +vcmlaq_rot270_laneq_f16 +vcmlaq_rot270_laneq_f32 +vcmlaq_rot90_lane_f16 +vcmlaq_rot90_laneq_f16 +vcmlaq_rot90_laneq_f32 +# Bad codegen for BE in O2 in clang, correct in rust. Same cause as above issue. +vdot_lane_s32 +vdot_lane_u32 +vdot_laneq_s32 +vdot_laneq_u32 +vdotq_lane_s32 +vdotq_lane_u32 +vdotq_laneq_s32 +vdotq_laneq_u32 +vsudot_lane_s32 +vsudot_laneq_s32 +vsudotq_lane_s32 +vsudotq_laneq_s32 +vusdot_lane_s32 +vusdot_laneq_s32 +vusdotq_lane_s32 +vusdotq_laneq_s32 + +# Below are in common to missing_aarch64.txt + +# Not implemented in stdarch yet +vbfdot_f32 +vbfdot_lane_f32 +vbfdot_laneq_f32 +vbfdotq_f32 +vbfdotq_lane_f32 +vbfdotq_laneq_f32 +vbfmlalbq_f32 +vbfmlalbq_lane_f32 +vbfmlalbq_laneq_f32 +vbfmlaltq_f32 +vbfmlaltq_lane_f32 +vbfmlaltq_laneq_f32 +vbfmmlaq_f32 + + +# Implemented in stdarch, but missing in Clang. +vrnd32xq_f64 +vrnd32zq_f64 +vrnd64xq_f64 +vrnd64zq_f64 +vamin_f32 +vaminq_f32 +vaminq_f64 +vamax_f32 +vamaxq_f32 +vamaxq_f64 +# LLVM select error, and missing in Clang. +vrnd32x_f64 +vrnd32z_f64 +vrnd64x_f64 +vrnd64z_f64 +vluti2_lane_p16 +vluti2_lane_p8 +vluti2_lane_s16 +vluti2_lane_s8 +vluti2_lane_u16 +vluti2_lane_u8 +vluti2q_lane_p16 +vluti2q_lane_p8 +vluti2q_lane_s16 +vluti2q_lane_s8 +vluti2q_lane_u16 +vluti2q_lane_u8 +vluti4q_lane_f16_x2 +vluti4q_lane_p16_x2 +vluti4q_lane_p8 +vluti4q_lane_s16_x2 +vluti4q_lane_s8 +vluti4q_lane_u16_x2 +vluti4q_lane_u8 +vluti4q_laneq_f16_x2 +vluti4q_laneq_p16_x2 +vluti4q_laneq_p8 +vluti4q_laneq_s16_x2 +vluti4q_laneq_s8 +vluti4q_laneq_u16_x2 +vluti4q_laneq_u8 + +# Broken in Clang +vcvth_s16_f16 +# FIXME: Broken output due to missing f16 printing support in Rust +vmulh_lane_f16 +vmulh_laneq_f16 diff --git a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index fe64f9d786..48c12779a8 100644 --- a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -5091,13 +5091,13 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [int32x2_t, uint8x8_t, int8x16_t, '[LANE as u32, LANE as u32]'] - - [int32x4_t, uint8x16_t, int8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x2_t, uint8x8_t, int8x16_t, '[LANE as u32, LANE as u32]',''] + - [int32x4_t, uint8x16_t, int8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '2']] - - Let: [c, int32x4_t, {FnCall: [transmute, [c]]}] + - Let: [c, int32x4_t, {FnCall: ['vreinterpretq_s32_s8', [c]]}] - Let: [c, "{neon_type[0]}", {FnCall: [simd_shuffle!, [c, c, "{type[3]}"]]}] - - FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: [transmute, [c]]}]] + - FnCall: ["vusdot{neon_type[0].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]] - name: "vsudot{neon_type[0].laneq_nox}" doc: "Dot product index form with signed and unsigned integers" @@ -5123,7 +5123,11 @@ intrinsics: - c - "{type[4]}" - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] - - FnCall: ["vusdot{neon_type[0].no}", [a, {FnCall: [transmute, [c]]}, b]] + - FnCall: + - "vusdot{neon_type[0].no}" + - - a + - FnCall: [transmute, [c]] + - b - name: "vmul{neon_type.no}" doc: Multiply @@ -6580,14 +6584,14 @@ intrinsics: - FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']] safety: safe types: - - [int32x2_t, int8x8_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32]'] - - [int32x4_t, int8x16_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x2_t, int8x8_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32]', ''] + - [int32x4_t, int8x16_t, int8x16_t, int32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '2']] - Let: - c - "{neon_type[3]}" - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]] - Let: - c - "{neon_type[0]}" @@ -6596,7 +6600,7 @@ intrinsics: - "vdot{neon_type[0].no}" - - a - b - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] - name: "vdot{neon_type[0].laneq_nox}" doc: Dot product arithmetic (indexed) @@ -6610,14 +6614,14 @@ intrinsics: - FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']] safety: safe types: - - [uint32x2_t, uint8x8_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32]'] - - [uint32x4_t, uint8x16_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint32x2_t, uint8x8_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32]',''] + - [uint32x4_t, uint8x16_t, uint8x16_t, uint32x4_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '2']] - Let: - c - "{neon_type[3]}" - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpretq_{neon_type[0]}_{neon_type[1]}', [c]] - Let: - c - "{neon_type[0]}" @@ -6626,7 +6630,7 @@ intrinsics: - "vdot{neon_type[0].no}" - - a - b - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] - name: "vmax{neon_type.no}" doc: Maximum (vector) diff --git a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index 4816d17bd6..bb44aab66b 100644 --- a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -6261,19 +6261,19 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - ['_lane_s32', int32x2_t, uint8x8_t, '[LANE as u32, LANE as u32]'] - - ['q_lane_s32', int32x4_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - ['_lane_s32', int32x2_t, uint8x8_t, '[LANE as u32, LANE as u32]',''] + - ['q_lane_s32', int32x4_t, uint8x16_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '1']] - Let: - c - int32x2_t - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret_s32_s8', [c]] - Let: - c - "{type[1]}" - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] - - FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: [transmute, [c]]}]] + - FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]] - name: "vsudot{neon_type[0].lane_nox}" doc: "Dot product index form with signed and unsigned integers" @@ -6290,19 +6290,23 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - [int32x2_t, int8x8_t, uint8x8_t, '[LANE as u32, LANE as u32]', uint32x2_t] - - [int32x4_t, int8x16_t, uint8x8_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t] + - [int32x2_t, int8x8_t, uint8x8_t, '[LANE as u32, LANE as u32]', uint32x2_t,''] + - [int32x4_t, int8x16_t, uint8x8_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]', uint32x4_t,'q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '1']] - Let: - c - uint32x2_t - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret_u32_u8', [c]] - Let: - c - "{type[4]}" - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] - - FnCall: ["vusdot{neon_type[0].no}", [a, {FnCall: [transmute, [c]]}, b]] + - FnCall: + - "vusdot{neon_type[0].no}" + - - a + - FnCall: ['vreinterpret{type[5]}_u8_u32', [c]] + - b - name: "vmul{neon_type[1].no}" doc: Multiply @@ -7113,14 +7117,14 @@ intrinsics: - *neon-cfg-arm-unstable safety: safe types: - - [int32x2_t, int8x8_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32]'] - - [int32x4_t, int8x16_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x2_t, int8x8_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32]',''] + - [int32x4_t, int8x16_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '1']] - Let: - c - "{neon_type[3]}" - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]] - Let: - c - "{neon_type[0]}" @@ -7129,7 +7133,7 @@ intrinsics: - "vdot{neon_type[0].no}" - - a - b - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] - name: "vdot{neon_type[0].lane_nox}" doc: Dot product arithmetic (indexed) @@ -7146,14 +7150,14 @@ intrinsics: - *neon-cfg-arm-unstable safety: safe types: - - [uint32x2_t, uint8x8_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32]'] - - [uint32x4_t, uint8x16_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint32x2_t, uint8x8_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32]',''] + - [uint32x4_t, uint8x16_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '1']] - Let: - c - "{neon_type[3]}" - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]] - Let: - c - "{neon_type[0]}" @@ -7162,7 +7166,7 @@ intrinsics: - "vdot{neon_type[0].no}" - - a - b - - FnCall: [transmute, [c]] + - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] - name: "vmax{neon_type.no}" doc: Maximum (vector) From f564a013ef6da9dd541406f6499b5831325cb4da Mon Sep 17 00:00:00 2001 From: reucru01 Date: Mon, 24 Nov 2025 14:34:03 +0000 Subject: [PATCH 3/3] Provides work-around for vreinterpret inline fail --- .../src/arm_shared/neon/generated.rs | 304 +++++++++++++++++- .../spec/neon/arm_shared.spec.yml | 20 +- 2 files changed, 300 insertions(+), 24 deletions(-) diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index 7229f33f61..d45454c696 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -9183,6 +9183,7 @@ pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] @@ -9201,15 +9202,77 @@ pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { )] pub fn vdot_lane_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { + let c: int32x2_t = transmute(c); let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_s32(a, b, vreinterpret_s8_s32(c)) + vdot_s32(a, b, transmute(c)) + } +} +#[doc = "Dot product arithmetic (indexed)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdot_lane_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + let b: int8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: int32x2_t = transmute(c); + let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + let ret_val: int32x2_t = vdot_s32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Dot product arithmetic (indexed)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: int32x2_t = transmute(c); + let c: int32x4_t = + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vdotq_s32(a, b, transmute(c)) } } #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"] #[inline] +#[cfg(target_endian = "big")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsdot, LANE = 0))] @@ -9228,16 +9291,22 @@ pub fn vdot_lane_s32(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> )] pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); - let c: int32x2_t = vreinterpret_s32_s8(c); + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + let b: int8x16_t = + unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { + let c: int32x2_t = transmute(c); let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_s32(a, b, vreinterpretq_s8_s32(c)) + let ret_val: int32x4_t = vdotq_s32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] @@ -9256,15 +9325,48 @@ pub fn vdotq_lane_s32(a: int32x4_t, b: int8x16_t, c: int8x8_t) )] pub fn vdot_lane_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t { static_assert_uimm_bits!(LANE, 1); - let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { + let c: uint32x2_t = transmute(c); let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vdot_u32(a, b, vreinterpret_u8_u32(c)) + vdot_u32(a, b, transmute(c)) + } +} +#[doc = "Dot product arithmetic (indexed)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(udot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdot_lane_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t { + static_assert_uimm_bits!(LANE, 1); + let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: uint32x2_t = transmute(c); + let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + let ret_val: uint32x2_t = vdot_u32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Dot product arithmetic (indexed)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"] #[inline] +#[cfg(target_endian = "little")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[target_feature(enable = "neon,dotprod")] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] @@ -9283,11 +9385,45 @@ pub fn vdot_lane_u32(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) )] pub fn vdotq_lane_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 1); - let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { + let c: uint32x2_t = transmute(c); let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vdotq_u32(a, b, vreinterpretq_u8_u32(c)) + vdotq_u32(a, b, transmute(c)) + } +} +#[doc = "Dot product arithmetic (indexed)"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"] +#[inline] +#[cfg(target_endian = "big")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[target_feature(enable = "neon,dotprod")] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vudot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(udot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_dotprod", issue = "117224") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vdotq_lane_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t { + static_assert_uimm_bits!(LANE, 1); + let a: uint32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + let b: uint8x16_t = + unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: uint32x2_t = transmute(c); + let c: uint32x4_t = + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let ret_val: uint32x4_t = vdotq_u32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } #[doc = "Dot product arithmetic (vector)"] @@ -71692,6 +71828,7 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { #[doc = "Dot product index form with signed and unsigned integers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] @@ -71710,15 +71847,77 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { )] pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - let c: uint32x2_t = vreinterpret_u32_u8(c); unsafe { + let c: uint32x2_t = transmute(c); let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vusdot_s32(a, vreinterpret_u8_u32(c), b) + vusdot_s32(a, transmute(c), b) + } +} +#[doc = "Dot product index form with signed and unsigned integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_lane_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sudot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + let b: int8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: uint32x2_t = transmute(c); + let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + let ret_val: int32x2_t = vusdot_s32(a, transmute(c), b); + simd_shuffle!(ret_val, ret_val, [1, 0]) + } +} +#[doc = "Dot product index form with signed and unsigned integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"] +#[inline] +#[cfg(target_endian = "little")] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(sudot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vsudotq_lane_s32(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + unsafe { + let c: uint32x2_t = transmute(c); + let c: uint32x4_t = + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vusdotq_s32(a, transmute(c), b) } } #[doc = "Dot product index form with signed and unsigned integers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"] #[inline] +#[cfg(target_endian = "big")] #[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsudot, LANE = 0))] @@ -71737,11 +71936,16 @@ pub fn vsudot_lane_s32(a: int32x2_t, b: int8x8_t, c: uint8x8_t) )] pub fn vsudotq_lane_s32(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); - let c: uint32x2_t = vreinterpret_u32_u8(c); + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + let b: int8x16_t = + unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: uint8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; unsafe { + let c: uint32x2_t = transmute(c); let c: uint32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vusdotq_s32(a, vreinterpretq_u8_u32(c), b) + let ret_val: int32x4_t = vusdotq_s32(a, transmute(c), b); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } #[doc = "Table look-up"] @@ -73612,6 +73816,7 @@ pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { #[doc = "Dot product index form with unsigned and signed integers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] @@ -73630,15 +73835,48 @@ pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { )] pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); - let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { + let c: int32x2_t = transmute(c); let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); - vusdot_s32(a, b, vreinterpret_s8_s32(c)) + vusdot_s32(a, b, transmute(c)) + } +} +#[doc = "Dot product index form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_lane_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t { + static_assert_uimm_bits!(LANE, 1); + let a: int32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; + let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: int32x2_t = transmute(c); + let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + let ret_val: int32x2_t = vusdot_s32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [1, 0]) } } #[doc = "Dot product index form with unsigned and signed integers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"] #[inline] +#[cfg(target_endian = "little")] #[target_feature(enable = "neon,i8mm")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] @@ -73657,11 +73895,45 @@ pub fn vusdot_lane_s32(a: int32x2_t, b: uint8x8_t, c: int8x8_t) )] pub fn vusdotq_lane_s32(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); - let c: int32x2_t = vreinterpret_s32_s8(c); unsafe { + let c: int32x2_t = transmute(c); let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); - vusdotq_s32(a, b, vreinterpretq_s8_s32(c)) + vusdotq_s32(a, b, transmute(c)) + } +} +#[doc = "Dot product index form with unsigned and signed integers"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"] +#[inline] +#[cfg(target_endian = "big")] +#[target_feature(enable = "neon,i8mm")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vusdot, LANE = 0))] +#[cfg_attr( + all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), + assert_instr(usdot, LANE = 0) +)] +#[rustc_legacy_const_generics(3)] +#[cfg_attr( + not(target_arch = "arm"), + unstable(feature = "stdarch_neon_i8mm", issue = "117223") +)] +#[cfg_attr( + target_arch = "arm", + unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") +)] +pub fn vusdotq_lane_s32(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t { + static_assert_uimm_bits!(LANE, 1); + let a: int32x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; + let b: uint8x16_t = + unsafe { simd_shuffle!(b, b, [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) }; + let c: int8x8_t = unsafe { simd_shuffle!(c, c, [7, 6, 5, 4, 3, 2, 1, 0]) }; + unsafe { + let c: int32x2_t = transmute(c); + let c: int32x4_t = + simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let ret_val: int32x4_t = vusdotq_s32(a, b, transmute(c)); + simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) } } #[doc = "Dot product vector form with unsigned and signed integers"] diff --git a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index bb44aab66b..9ebdc4334c 100644 --- a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -6259,6 +6259,7 @@ intrinsics: - *neon-unstable-i8mm - *neon-cfg-arm-unstable static_defs: ["const LANE: i32"] + big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. safety: safe types: - ['_lane_s32', int32x2_t, uint8x8_t, '[LANE as u32, LANE as u32]',''] @@ -6268,12 +6269,12 @@ intrinsics: - Let: - c - int32x2_t - - FnCall: ['vreinterpret_s32_s8', [c]] + - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_s32_s8', [c]] - Let: - c - "{type[1]}" - FnCall: [simd_shuffle!, [c, c, "{type[3]}"]] - - FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: ['vreinterpret{type[4]}_s8_s32', [c]]}]] + - FnCall: ["vusdot{neon_type[1].no}", [a, b, {FnCall: [transmute, [c]]}]] #'vreinterpret{type[4]}_s8_s32' - name: "vsudot{neon_type[0].lane_nox}" doc: "Dot product index form with signed and unsigned integers" @@ -6288,6 +6289,7 @@ intrinsics: - *neon-unstable-i8mm - *neon-cfg-arm-unstable static_defs: ["const LANE: i32"] + big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. safety: safe types: - [int32x2_t, int8x8_t, uint8x8_t, '[LANE as u32, LANE as u32]', uint32x2_t,''] @@ -6297,7 +6299,7 @@ intrinsics: - Let: - c - uint32x2_t - - FnCall: ['vreinterpret_u32_u8', [c]] + - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_u32_u8', [c]] - Let: - c - "{type[4]}" @@ -6305,7 +6307,7 @@ intrinsics: - FnCall: - "vusdot{neon_type[0].no}" - - a - - FnCall: ['vreinterpret{type[5]}_u8_u32', [c]] + - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret{type[5]}_u8_u32', [c]] - b - name: "vmul{neon_type[1].no}" @@ -7115,6 +7117,7 @@ intrinsics: - FnCall: [rustc_legacy_const_generics, ['3']] - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]] - *neon-cfg-arm-unstable + big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. safety: safe types: - [int32x2_t, int8x8_t, int8x8_t, int32x2_t, '[LANE as u32, LANE as u32]',''] @@ -7124,7 +7127,7 @@ intrinsics: - Let: - c - "{neon_type[3]}" - - FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]] + - FnCall: [transmute, [c]] - Let: - c - "{neon_type[0]}" @@ -7133,7 +7136,7 @@ intrinsics: - "vdot{neon_type[0].no}" - - a - b - - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] + - FnCall: [transmute, [c]] - name: "vdot{neon_type[0].lane_nox}" doc: Dot product arithmetic (indexed) @@ -7149,6 +7152,7 @@ intrinsics: - FnCall: [cfg_attr, [{FnCall: [not, ['target_arch = "arm"']]}, {FnCall: [unstable, ['feature = "stdarch_neon_dotprod"', 'issue = "117224"']]}]] - *neon-cfg-arm-unstable safety: safe + big_endian_inverse: true # TODO: Remove this attribute, and replace transmute with vreinterpret when https://github.com/llvm/llvm-project/pull/169337 is merged, LLVM inlining issue causing assertion failure. types: - [uint32x2_t, uint8x8_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32]',''] - [uint32x4_t, uint8x16_t, uint8x8_t, uint32x2_t, '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]','q'] @@ -7157,7 +7161,7 @@ intrinsics: - Let: - c - "{neon_type[3]}" - - FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]] + - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret_{neon_type[0]}_{neon_type[1]}', [c]] - Let: - c - "{neon_type[0]}" @@ -7166,7 +7170,7 @@ intrinsics: - "vdot{neon_type[0].no}" - - a - b - - FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] + - FnCall: [transmute, [c]] #- FnCall: ['vreinterpret{type[5]}_{neon_type[1]}_{neon_type[0]}', [c]] - name: "vmax{neon_type.no}" doc: Maximum (vector)