Skip to content

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Sep 9, 2025

Now that they wrap the __builtin_elementwise_fshl/fshr builtin intrinsics this is pretty trivial.

Another step towards #153152 - I'll handle the AVX512 rotates next

Now that they wrap the __builtin_elementwise_fshl/fshr builtin intrinsics this is pretty trivial.

Another step towards llvm#153152 - I'll handle the AVX512 rotates next
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Sep 9, 2025
@RKSimon RKSimon enabled auto-merge (squash) September 9, 2025 10:53
@llvmbot
Copy link
Member

llvmbot commented Sep 9, 2025

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-clang

Author: Simon Pilgrim (RKSimon)

Changes

Now that they wrap the __builtin_elementwise_fshl/fshr builtin intrinsics this is pretty trivial.

Another step towards #153152 - I'll handle the AVX512 rotates next


Full diff: https://github.com/llvm/llvm-project/pull/157643.diff

2 Files Affected:

  • (modified) clang/lib/Headers/xopintrin.h (+4-4)
  • (modified) clang/test/CodeGen/X86/xop-builtins.c (+4)
diff --git a/clang/lib/Headers/xopintrin.h b/clang/lib/Headers/xopintrin.h
index 7015719659139..aba632f941152 100644
--- a/clang/lib/Headers/xopintrin.h
+++ b/clang/lib/Headers/xopintrin.h
@@ -208,25 +208,25 @@ _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
   return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_rot_epi8(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_elementwise_fshl((__v16qu)__A, (__v16qu)__A, (__v16qu)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_rot_epi16(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_elementwise_fshl((__v8hu)__A, (__v8hu)__A, (__v8hu)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_rot_epi32(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm_rot_epi64(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B);
diff --git a/clang/test/CodeGen/X86/xop-builtins.c b/clang/test/CodeGen/X86/xop-builtins.c
index 994fc7b3e136a..a3cff2c89da1f 100644
--- a/clang/test/CodeGen/X86/xop-builtins.c
+++ b/clang/test/CodeGen/X86/xop-builtins.c
@@ -215,24 +215,28 @@ __m128i test_mm_rot_epi8(__m128i a, __m128i b) {
   // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
   return _mm_rot_epi8(a, b);
 }
+TEST_CONSTEXPR(match_v16qi(_mm_rot_epi8((__m128i)(__v16qs){15, -14, -13, -12, 11, 10, 9, 8, 7, 6, 5, -4, 3, -2, 1, 0}, (__m128i)(__v16qs){0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15}), 15, -27, -4, -89, -80, 65, 36, 4, 7, 12, 65, -25, 48, -33, 4, 0));
 
 __m128i test_mm_rot_epi16(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_rot_epi16
   // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
   return _mm_rot_epi16(a, b);
 }
+TEST_CONSTEXPR(match_v8hi(_mm_rot_epi16((__m128i)(__v8hi){7, 6, 5, -4, 3, -2, 1, 0}, (__m128i)(__v8hi){0, 1, -2, 3, -4, 5, -6, 7}), 7, 12, 16385, -25, 12288, -33, 1024, 0));
 
 __m128i test_mm_rot_epi32(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_rot_epi32
   // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
   return _mm_rot_epi32(a, b);
 }
+TEST_CONSTEXPR(match_v4si(_mm_rot_epi32((__m128i)(__v4si){3, -2, 1, 0}, (__m128i)(__v4si){0, 1, -2, 3}), 3, -3, 1073741824, 0));
 
 __m128i test_mm_rot_epi64(__m128i a, __m128i b) {
   // CHECK-LABEL: test_mm_rot_epi64
   // CHECK: call {{.*}}<2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
   return _mm_rot_epi64(a, b);
 }
+TEST_CONSTEXPR(match_v2di(_mm_rot_epi64((__m128i)(__v2di){99, -55}, (__m128i)(__v2di){1, -2}), 198, 9223372036854775794LL));
 
 __m128i test_mm_roti_epi8(__m128i a) {
   // CHECK-LABEL: test_mm_roti_epi8

Copy link

github-actions bot commented Sep 9, 2025

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff origin/main HEAD --extensions h,c -- clang/lib/Headers/xopintrin.h clang/test/CodeGen/X86/xop-builtins.c

⚠️
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing origin/main to the base branch/commit you want to compare against.
⚠️

View the diff from clang-format here.
diff --git a/clang/lib/Headers/xopintrin.h b/clang/lib/Headers/xopintrin.h
index aba632f94..40915402f 100644
--- a/clang/lib/Headers/xopintrin.h
+++ b/clang/lib/Headers/xopintrin.h
@@ -209,26 +209,22 @@ _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_rot_epi8(__m128i __A, __m128i __B)
-{
+_mm_rot_epi8(__m128i __A, __m128i __B) {
   return (__m128i)__builtin_elementwise_fshl((__v16qu)__A, (__v16qu)__A, (__v16qu)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_rot_epi16(__m128i __A, __m128i __B)
-{
+_mm_rot_epi16(__m128i __A, __m128i __B) {
   return (__m128i)__builtin_elementwise_fshl((__v8hu)__A, (__v8hu)__A, (__v8hu)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_rot_epi32(__m128i __A, __m128i __B)
-{
+_mm_rot_epi32(__m128i __A, __m128i __B) {
   return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_rot_epi64(__m128i __A, __m128i __B)
-{
+_mm_rot_epi64(__m128i __A, __m128i __B) {
   return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B);
 }
 

@RKSimon RKSimon merged commit 3a76747 into llvm:main Sep 9, 2025
12 of 13 checks passed
@RKSimon RKSimon deleted the x86-xop-rotate-constexpr branch September 9, 2025 11:38
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants