-
Notifications
You must be signed in to change notification settings - Fork 305
s390x: use simd_shuffle! macro
#1965
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -896,31 +896,31 @@ mod sealed { | |
| #[cfg_attr(test, assert_instr(vrepb, IMM2 = 1))] | ||
| unsafe fn vrepb<const IMM2: u32>(a: vector_signed_char) -> vector_signed_char { | ||
| static_assert_uimm_bits!(IMM2, 4); | ||
| simd_shuffle(a, a, const { u32x16::from_array([IMM2; 16]) }) | ||
| simd_shuffle!(a, a, [IMM2; 16]) | ||
| } | ||
|
|
||
| #[inline] | ||
| #[target_feature(enable = "vector")] | ||
| #[cfg_attr(test, assert_instr(vreph, IMM2 = 1))] | ||
| unsafe fn vreph<const IMM2: u32>(a: vector_signed_short) -> vector_signed_short { | ||
| static_assert_uimm_bits!(IMM2, 3); | ||
| simd_shuffle(a, a, const { u32x8::from_array([IMM2; 8]) }) | ||
| simd_shuffle!(a, a, [IMM2; 8]) | ||
| } | ||
|
|
||
| #[inline] | ||
| #[target_feature(enable = "vector")] | ||
| #[cfg_attr(test, assert_instr(vrepf, IMM2 = 1))] | ||
| unsafe fn vrepf<const IMM2: u32>(a: vector_signed_int) -> vector_signed_int { | ||
| static_assert_uimm_bits!(IMM2, 2); | ||
| simd_shuffle(a, a, const { u32x4::from_array([IMM2; 4]) }) | ||
| simd_shuffle!(a, a, [IMM2; 4]) | ||
| } | ||
|
|
||
| #[inline] | ||
| #[target_feature(enable = "vector")] | ||
| #[cfg_attr(test, assert_instr(vrepg, IMM2 = 1))] | ||
| unsafe fn vrepg<const IMM2: u32>(a: vector_signed_long_long) -> vector_signed_long_long { | ||
| static_assert_uimm_bits!(IMM2, 1); | ||
| simd_shuffle(a, a, const { u32x2::from_array([IMM2; 2]) }) | ||
| simd_shuffle!(a, a, [IMM2; 2]) | ||
| } | ||
|
|
||
| macro_rules! impl_vec_splat { | ||
|
|
@@ -5307,11 +5307,13 @@ pub unsafe fn vec_search_string_until_zero_cc<T: sealed::VectorSearchString>( | |
| #[inline] | ||
| #[target_feature(enable = "vector-enhancements-1")] | ||
| #[unstable(feature = "stdarch_s390x", issue = "135681")] | ||
| // FIXME: this emits `vflls` where `vldeb` is expected | ||
| // #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vldeb))] | ||
| // NOTE: `vflls` and `vldeb` are equivalent; our disassmbler prefers vflls. | ||
| #[cfg_attr( | ||
| all(test, target_feature = "vector-enhancements-1"), | ||
| assert_instr(vflls) | ||
| )] | ||
| pub unsafe fn vec_doublee(a: vector_float) -> vector_double { | ||
| let even = simd_shuffle::<_, _, f32x2>(a, a, const { u32x2::from_array([0, 2]) }); | ||
| simd_as(even) | ||
| simd_as::<f32x2, vector_double>(simd_shuffle!(a, a, [0, 2])) | ||
| } | ||
|
|
||
| /// Vector Convert from double to float (even elements) | ||
|
|
@@ -5322,11 +5324,7 @@ pub unsafe fn vec_doublee(a: vector_float) -> vector_double { | |
| // #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vledb))] | ||
| pub unsafe fn vec_floate(a: vector_double) -> vector_float { | ||
| let truncated: f32x2 = simd_as(a); | ||
| simd_shuffle( | ||
| truncated, | ||
| truncated, | ||
| const { u32x4::from_array([0, 0, 1, 1]) }, | ||
| ) | ||
| simd_shuffle!(truncated, truncated, [0, 0, 1, 1]) | ||
|
Comment on lines
-5325
to
+5327
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and here, this is not all that important of course, but it's a bit of a wart. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it's deliberately left unspecified what happens to the odd elements. This matches the behavior of the underlying VECTOR FP LOAD ROUNDED instruction, which also states: "The data in the odd elements of the first operand is unpredictable." You could mask it to some defined value, but at extra runtime cost of course.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right, well there is nothing we can do then. There is always inline assembly if someone really needs to emit the exact instruction. |
||
| } | ||
|
|
||
| /// Vector Convert from int to float | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@uweigand
just checking, is that correct? this implementation produces
vldebon godbolt, so I really think it's the disassembler that is pickingvfllshere.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, the two are equivalent; in fact the binary machine code is identical, it's just different assembler mnemonics for the same instruction. Both
vldeb v1, v2andvflls v1, v2are simply extended mnemonics for the "base" instructionvfll v1, v2, 0, 0(see the PoP chapter 24 under VECTOR FP LOAD LENGTHENED). It's a bit unfortunate to have two mnemonics for the same thing, but it's due to historical reasons (vfllsis the more recent one, and probably should be preferred).