diff --git a/crates/core_arch/src/arm_shared/neon/generated.rs b/crates/core_arch/src/arm_shared/neon/generated.rs index cf4d10162e..106c814749 100644 --- a/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/crates/core_arch/src/arm_shared/neon/generated.rs @@ -33834,7 +33834,6 @@ pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { #[doc = "Add pairwise."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u8)"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] @@ -33854,36 +33853,8 @@ pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { unsafe { transmute(vpadd_s8(transmute(a), transmute(b))) } } #[doc = "Add pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u8)"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(addp) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { - let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) }; - let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) }; - unsafe { - let ret_val: uint8x8_t = transmute(vpadd_s8(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0]) - } -} -#[doc = "Add pairwise."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u16)"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] @@ -33903,36 +33874,8 @@ pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { unsafe { transmute(vpadd_s16(transmute(a), transmute(b))) } } #[doc = "Add pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u16)"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(addp) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { - let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) }; - let b: uint16x4_t = unsafe { simd_shuffle!(b, b, [3, 2, 1, 0]) }; - unsafe { - let ret_val: uint16x4_t = transmute(vpadd_s16(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0]) - } -} -#[doc = "Add pairwise."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u32)"] #[inline(always)] -#[cfg(target_endian = "little")] #[target_feature(enable = "neon")] #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] @@ -33951,33 +33894,6 @@ pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { unsafe { transmute(vpadd_s32(transmute(a), transmute(b))) } } -#[doc = "Add pairwise."] -#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u32)"] -#[inline(always)] -#[cfg(target_endian = "big")] -#[target_feature(enable = "neon")] -#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] -#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] -#[cfg_attr( - all(test, any(target_arch = "aarch64", target_arch = "arm64ec")), - assert_instr(addp) -)] -#[cfg_attr( - not(target_arch = "arm"), - stable(feature = "neon_intrinsics", since = "1.59.0") -)] -#[cfg_attr( - target_arch = "arm", - unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800") -)] -pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { - let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) }; - let b: uint32x2_t = unsafe { simd_shuffle!(b, b, [1, 0]) }; - unsafe { - let ret_val: uint32x2_t = transmute(vpadd_s32(transmute(a), transmute(b))); - simd_shuffle!(ret_val, ret_val, [1, 0]) - } -} #[doc = "Signed Add and Accumulate Long Pairwise."] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s8)"] #[inline(always)] diff --git a/crates/intrinsic-test/src/arm/types.rs b/crates/intrinsic-test/src/arm/types.rs index 4be8d1e48b..18468bd558 100644 --- a/crates/intrinsic-test/src/arm/types.rs +++ b/crates/intrinsic-test/src/arm/types.rs @@ -7,11 +7,10 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { /// Gets a string containing the typename for this type in C format. fn c_type(&self) -> String { let prefix = self.kind.c_prefix(); - let const_prefix = if self.constant { "const " } else { "" }; if let (Some(bit_len), simd_len, vec_len) = (self.bit_len, self.simd_len, self.vec_len) { match (simd_len, vec_len) { - (None, None) => format!("{const_prefix}{prefix}{bit_len}_t"), + (None, None) => format!("{prefix}{bit_len}_t"), (Some(simd), None) => format!("{prefix}{bit_len}x{simd}_t"), (Some(simd), Some(vec)) => format!("{prefix}{bit_len}x{simd}x{vec}_t"), (None, Some(_)) => todo!("{self:#?}"), // Likely an invalid case diff --git a/crates/intrinsic-test/src/common/argument.rs b/crates/intrinsic-test/src/common/argument.rs index 385cf32d3b..413d5314d3 100644 --- a/crates/intrinsic-test/src/common/argument.rs +++ b/crates/intrinsic-test/src/common/argument.rs @@ -30,8 +30,7 @@ where } pub fn to_c_type(&self) -> String { - let prefix = if self.ty.constant { "const " } else { "" }; - format!("{prefix}{}", self.ty.c_type()) + self.ty.c_type() } pub fn generate_name(&self) -> String { @@ -176,9 +175,10 @@ where pub fn load_values_c(&self, indentation: Indentation) -> String { self.iter() .filter(|&arg| !arg.has_constraint()) - .map(|arg| { + .enumerate() + .map(|(idx, arg)| { format!( - "{indentation}{ty} {name} = cast<{ty}>({load}(&{name}_vals[i]));\n", + "{indentation}{ty} {name} = cast<{ty}>({load}(&{name}_vals[i+{idx}]));\n", ty = arg.to_c_type(), name = arg.generate_name(), load = if arg.is_simd() { @@ -197,7 +197,8 @@ where pub fn load_values_rust(&self, indentation: Indentation) -> String { self.iter() .filter(|&arg| !arg.has_constraint()) - .map(|arg| { + .enumerate() + .map(|(idx, arg)| { let load = if arg.is_simd() { arg.ty.get_load_function(Language::Rust) } else { @@ -205,7 +206,7 @@ where }; let typecast = if load.len() > 2 { "as _" } else { "" }; format!( - "{indentation}let {name} = {load}({vals_name}.as_ptr().offset(i){typecast});\n", + "{indentation}let {name} = {load}({vals_name}.as_ptr().offset(i+{idx}){typecast});\n", name = arg.generate_name(), vals_name = arg.rust_vals_array_name(), ) diff --git a/crates/intrinsic-test/src/common/compare.rs b/crates/intrinsic-test/src/common/compare.rs index c22d7fd4ec..c1438d1bbf 100644 --- a/crates/intrinsic-test/src/common/compare.rs +++ b/crates/intrinsic-test/src/common/compare.rs @@ -78,6 +78,8 @@ pub fn compare_outputs( .filter_map(|output| output.trim().split_once("\n")) .collect::>(); + assert!(!c_output_map.is_empty(), "No C intrinsic output found!"); + let intrinsics = c_output_map .keys() .chain(rust_output_map.keys()) diff --git a/crates/intrinsic-test/src/common/gen_c.rs b/crates/intrinsic-test/src/common/gen_c.rs index a95b4c36b7..12b5a600d5 100644 --- a/crates/intrinsic-test/src/common/gen_c.rs +++ b/crates/intrinsic-test/src/common/gen_c.rs @@ -25,6 +25,13 @@ pub fn generate_c_test_loop( passes: u32, ) -> std::io::Result<()> { let body_indentation = indentation.nested(); + // Successive arguments are offset increasingly from their value array start + let passes = passes + 1 + - intrinsic + .arguments + .iter() + .filter(|&arg| !arg.has_constraint()) + .count() as u32; writeln!( w, "{indentation}for (int i=0; i<{passes}; i++) {{\n\ diff --git a/crates/intrinsic-test/src/common/gen_rust.rs b/crates/intrinsic-test/src/common/gen_rust.rs index 82b97701bb..5a0bc7b4d4 100644 --- a/crates/intrinsic-test/src/common/gen_rust.rs +++ b/crates/intrinsic-test/src/common/gen_rust.rs @@ -214,6 +214,12 @@ pub fn generate_rust_test_loop( passes: u32, ) -> std::io::Result<()> { let intrinsic_name = &intrinsic.name; + let passes = passes + 1 + - intrinsic + .arguments + .iter() + .filter(|&arg| !arg.has_constraint()) + .count() as u32; // Each function (and each specialization) has its own type. Erase that type with a cast. let mut coerce = String::from("unsafe fn("); diff --git a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index f6ef7f17d7..7902381fa2 100644 --- a/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -13298,6 +13298,7 @@ intrinsics: doc: "Add pairwise." arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"] return_type: "{neon_type[0]}" + big_endian_inverse: false attr: - *neon-v7 - FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpadd]]}]]