Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 0 additions & 84 deletions crates/core_arch/src/arm_shared/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33834,7 +33834,6 @@ pub fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t {
#[doc = "Add pairwise."]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u8)"]
#[inline(always)]
#[cfg(target_endian = "little")]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
Expand All @@ -33854,36 +33853,8 @@ pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
unsafe { transmute(vpadd_s8(transmute(a), transmute(b))) }
}
#[doc = "Add pairwise."]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u8)"]
#[inline(always)]
#[cfg(target_endian = "big")]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
#[cfg_attr(
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
assert_instr(addp)
)]
#[cfg_attr(
not(target_arch = "arm"),
stable(feature = "neon_intrinsics", since = "1.59.0")
)]
#[cfg_attr(
target_arch = "arm",
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
)]
pub fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t {
let a: uint8x8_t = unsafe { simd_shuffle!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) };
let b: uint8x8_t = unsafe { simd_shuffle!(b, b, [7, 6, 5, 4, 3, 2, 1, 0]) };
unsafe {
let ret_val: uint8x8_t = transmute(vpadd_s8(transmute(a), transmute(b)));
simd_shuffle!(ret_val, ret_val, [7, 6, 5, 4, 3, 2, 1, 0])
}
}
#[doc = "Add pairwise."]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u16)"]
#[inline(always)]
#[cfg(target_endian = "little")]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
Expand All @@ -33903,36 +33874,8 @@ pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
unsafe { transmute(vpadd_s16(transmute(a), transmute(b))) }
}
#[doc = "Add pairwise."]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u16)"]
#[inline(always)]
#[cfg(target_endian = "big")]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
#[cfg_attr(
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
assert_instr(addp)
)]
#[cfg_attr(
not(target_arch = "arm"),
stable(feature = "neon_intrinsics", since = "1.59.0")
)]
#[cfg_attr(
target_arch = "arm",
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
)]
pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
let a: uint16x4_t = unsafe { simd_shuffle!(a, a, [3, 2, 1, 0]) };
let b: uint16x4_t = unsafe { simd_shuffle!(b, b, [3, 2, 1, 0]) };
unsafe {
let ret_val: uint16x4_t = transmute(vpadd_s16(transmute(a), transmute(b)));
simd_shuffle!(ret_val, ret_val, [3, 2, 1, 0])
}
}
#[doc = "Add pairwise."]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u32)"]
#[inline(always)]
#[cfg(target_endian = "little")]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
Expand All @@ -33951,33 +33894,6 @@ pub fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
unsafe { transmute(vpadd_s32(transmute(a), transmute(b))) }
}
#[doc = "Add pairwise."]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpadd_u32)"]
#[inline(always)]
#[cfg(target_endian = "big")]
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))]
#[cfg_attr(
all(test, any(target_arch = "aarch64", target_arch = "arm64ec")),
assert_instr(addp)
)]
#[cfg_attr(
not(target_arch = "arm"),
stable(feature = "neon_intrinsics", since = "1.59.0")
)]
#[cfg_attr(
target_arch = "arm",
unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")
)]
pub fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
let a: uint32x2_t = unsafe { simd_shuffle!(a, a, [1, 0]) };
let b: uint32x2_t = unsafe { simd_shuffle!(b, b, [1, 0]) };
unsafe {
let ret_val: uint32x2_t = transmute(vpadd_s32(transmute(a), transmute(b)));
simd_shuffle!(ret_val, ret_val, [1, 0])
}
}
#[doc = "Signed Add and Accumulate Long Pairwise."]
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vpaddl_s8)"]
#[inline(always)]
Expand Down
3 changes: 1 addition & 2 deletions crates/intrinsic-test/src/arm/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType {
/// Gets a string containing the typename for this type in C format.
fn c_type(&self) -> String {
let prefix = self.kind.c_prefix();
let const_prefix = if self.constant { "const " } else { "" };

if let (Some(bit_len), simd_len, vec_len) = (self.bit_len, self.simd_len, self.vec_len) {
match (simd_len, vec_len) {
(None, None) => format!("{const_prefix}{prefix}{bit_len}_t"),
(None, None) => format!("{prefix}{bit_len}_t"),
(Some(simd), None) => format!("{prefix}{bit_len}x{simd}_t"),
(Some(simd), Some(vec)) => format!("{prefix}{bit_len}x{simd}x{vec}_t"),
(None, Some(_)) => todo!("{self:#?}"), // Likely an invalid case
Expand Down
13 changes: 7 additions & 6 deletions crates/intrinsic-test/src/common/argument.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ where
}

pub fn to_c_type(&self) -> String {
let prefix = if self.ty.constant { "const " } else { "" };
format!("{prefix}{}", self.ty.c_type())
self.ty.c_type()
Copy link
Copy Markdown
Contributor

@folkertdev folkertdev Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you've now removed the "const " prefix in both cases (here and in c_type), is that right?

View changes since the review

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, it should be handled by generate_c_constraint_blocks() now, and this avoids a warning about const const appearing in the output

}

pub fn generate_name(&self) -> String {
Expand Down Expand Up @@ -176,9 +175,10 @@ where
pub fn load_values_c(&self, indentation: Indentation) -> String {
self.iter()
.filter(|&arg| !arg.has_constraint())
.map(|arg| {
.enumerate()
.map(|(idx, arg)| {
format!(
"{indentation}{ty} {name} = cast<{ty}>({load}(&{name}_vals[i]));\n",
"{indentation}{ty} {name} = cast<{ty}>({load}(&{name}_vals[i+{idx}]));\n",
Copy link
Copy Markdown
Contributor

@folkertdev folkertdev Apr 17, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to be clear, not using idx before was a bug right?

View changes since the review

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doing it this way (ensuring that two inputs to the intrinsic aren't always equal) provides a bit of extra coverage and should slightly reduce runtime. Just an oversight that we never did it this way originally

ty = arg.to_c_type(),
name = arg.generate_name(),
load = if arg.is_simd() {
Expand All @@ -197,15 +197,16 @@ where
pub fn load_values_rust(&self, indentation: Indentation) -> String {
self.iter()
.filter(|&arg| !arg.has_constraint())
.map(|arg| {
.enumerate()
.map(|(idx, arg)| {
let load = if arg.is_simd() {
arg.ty.get_load_function(Language::Rust)
} else {
"*".to_string()
};
let typecast = if load.len() > 2 { "as _" } else { "" };
format!(
"{indentation}let {name} = {load}({vals_name}.as_ptr().offset(i){typecast});\n",
"{indentation}let {name} = {load}({vals_name}.as_ptr().offset(i+{idx}){typecast});\n",
name = arg.generate_name(),
vals_name = arg.rust_vals_array_name(),
)
Expand Down
2 changes: 2 additions & 0 deletions crates/intrinsic-test/src/common/compare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ pub fn compare_outputs(
.filter_map(|output| output.trim().split_once("\n"))
.collect::<HashMap<&str, &str>>();

assert!(!c_output_map.is_empty(), "No C intrinsic output found!");

let intrinsics = c_output_map
.keys()
.chain(rust_output_map.keys())
Expand Down
7 changes: 7 additions & 0 deletions crates/intrinsic-test/src/common/gen_c.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ pub fn generate_c_test_loop<T: IntrinsicTypeDefinition + Sized>(
passes: u32,
) -> std::io::Result<()> {
let body_indentation = indentation.nested();
// Successive arguments are offset increasingly from their value array start
let passes = passes + 1
- intrinsic
.arguments
.iter()
.filter(|&arg| !arg.has_constraint())
.count() as u32;
writeln!(
w,
"{indentation}for (int i=0; i<{passes}; i++) {{\n\
Expand Down
6 changes: 6 additions & 0 deletions crates/intrinsic-test/src/common/gen_rust.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,12 @@ pub fn generate_rust_test_loop<T: IntrinsicTypeDefinition>(
passes: u32,
) -> std::io::Result<()> {
let intrinsic_name = &intrinsic.name;
let passes = passes + 1
- intrinsic
.arguments
.iter()
.filter(|&arg| !arg.has_constraint())
.count() as u32;

// Each function (and each specialization) has its own type. Erase that type with a cast.
let mut coerce = String::from("unsafe fn(");
Expand Down
1 change: 1 addition & 0 deletions crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13298,6 +13298,7 @@ intrinsics:
doc: "Add pairwise."
arguments: ["a: {neon_type[0]}", "b: {neon_type[0]}"]
return_type: "{neon_type[0]}"
big_endian_inverse: false
attr:
- *neon-v7
- FnCall: [cfg_attr, [*test-is-arm, {FnCall: [assert_instr, [vpadd]]}]]
Expand Down