-
Notifications
You must be signed in to change notification settings - Fork 15k
[X86] Allow XOP rotate intrinsics to be used in constexpr #157643
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Now that they wrap the __builtin_elementwise_fshl/fshr builtin intrinsics this is pretty trivial. Another step towards llvm#153152 - I'll handle the AVX512 rotates next
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang Author: Simon Pilgrim (RKSimon) ChangesNow that they wrap the __builtin_elementwise_fshl/fshr builtin intrinsics this is pretty trivial. Another step towards #153152 - I'll handle the AVX512 rotates next Full diff: https://github.com/llvm/llvm-project/pull/157643.diff 2 Files Affected:
diff --git a/clang/lib/Headers/xopintrin.h b/clang/lib/Headers/xopintrin.h
index 7015719659139..aba632f941152 100644
--- a/clang/lib/Headers/xopintrin.h
+++ b/clang/lib/Headers/xopintrin.h
@@ -208,25 +208,25 @@ _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_rot_epi8(__m128i __A, __m128i __B)
{
return (__m128i)__builtin_elementwise_fshl((__v16qu)__A, (__v16qu)__A, (__v16qu)__B);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_rot_epi16(__m128i __A, __m128i __B)
{
return (__m128i)__builtin_elementwise_fshl((__v8hu)__A, (__v8hu)__A, (__v8hu)__B);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_rot_epi32(__m128i __A, __m128i __B)
{
return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_rot_epi64(__m128i __A, __m128i __B)
{
return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B);
diff --git a/clang/test/CodeGen/X86/xop-builtins.c b/clang/test/CodeGen/X86/xop-builtins.c
index 994fc7b3e136a..a3cff2c89da1f 100644
--- a/clang/test/CodeGen/X86/xop-builtins.c
+++ b/clang/test/CodeGen/X86/xop-builtins.c
@@ -215,24 +215,28 @@ __m128i test_mm_rot_epi8(__m128i a, __m128i b) {
// CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
return _mm_rot_epi8(a, b);
}
+TEST_CONSTEXPR(match_v16qi(_mm_rot_epi8((__m128i)(__v16qs){15, -14, -13, -12, 11, 10, 9, 8, 7, 6, 5, -4, 3, -2, 1, 0}, (__m128i)(__v16qs){0, 1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15}), 15, -27, -4, -89, -80, 65, 36, 4, 7, 12, 65, -25, 48, -33, 4, 0));
__m128i test_mm_rot_epi16(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_rot_epi16
// CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
return _mm_rot_epi16(a, b);
}
+TEST_CONSTEXPR(match_v8hi(_mm_rot_epi16((__m128i)(__v8hi){7, 6, 5, -4, 3, -2, 1, 0}, (__m128i)(__v8hi){0, 1, -2, 3, -4, 5, -6, 7}), 7, 12, 16385, -25, 12288, -33, 1024, 0));
__m128i test_mm_rot_epi32(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_rot_epi32
// CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
return _mm_rot_epi32(a, b);
}
+TEST_CONSTEXPR(match_v4si(_mm_rot_epi32((__m128i)(__v4si){3, -2, 1, 0}, (__m128i)(__v4si){0, 1, -2, 3}), 3, -3, 1073741824, 0));
__m128i test_mm_rot_epi64(__m128i a, __m128i b) {
// CHECK-LABEL: test_mm_rot_epi64
// CHECK: call {{.*}}<2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
return _mm_rot_epi64(a, b);
}
+TEST_CONSTEXPR(match_v2di(_mm_rot_epi64((__m128i)(__v2di){99, -55}, (__m128i)(__v2di){1, -2}), 198, 9223372036854775794LL));
__m128i test_mm_roti_epi8(__m128i a) {
// CHECK-LABEL: test_mm_roti_epi8
|
You can test this locally with the following command:git-clang-format --diff origin/main HEAD --extensions h,c -- clang/lib/Headers/xopintrin.h clang/test/CodeGen/X86/xop-builtins.c
View the diff from clang-format here.diff --git a/clang/lib/Headers/xopintrin.h b/clang/lib/Headers/xopintrin.h
index aba632f94..40915402f 100644
--- a/clang/lib/Headers/xopintrin.h
+++ b/clang/lib/Headers/xopintrin.h
@@ -209,26 +209,22 @@ _mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
}
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_rot_epi8(__m128i __A, __m128i __B)
-{
+_mm_rot_epi8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_elementwise_fshl((__v16qu)__A, (__v16qu)__A, (__v16qu)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_rot_epi16(__m128i __A, __m128i __B)
-{
+_mm_rot_epi16(__m128i __A, __m128i __B) {
return (__m128i)__builtin_elementwise_fshl((__v8hu)__A, (__v8hu)__A, (__v8hu)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_rot_epi32(__m128i __A, __m128i __B)
-{
+_mm_rot_epi32(__m128i __A, __m128i __B) {
return (__m128i)__builtin_elementwise_fshl((__v4su)__A, (__v4su)__A, (__v4su)__B);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm_rot_epi64(__m128i __A, __m128i __B)
-{
+_mm_rot_epi64(__m128i __A, __m128i __B) {
return (__m128i)__builtin_elementwise_fshl((__v2du)__A, (__v2du)__A, (__v2du)__B);
}
|
Now that they wrap the __builtin_elementwise_fshl/fshr builtin intrinsics this is pretty trivial.
Another step towards #153152 - I'll handle the AVX512 rotates next