Skip to content

Conversation

RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Sep 23, 2025

Merge the function attributes as they are all constexpr - the CVTPS2PH will stay as macros wrapping the builtin functions as we need to enforce compile time rounding mode constraints

Merge the function attributes as they are all constexpr - the CVTPS2PH will stay as macros wrapping the builtin functions as we need to enforce compile time rounding mode constraints
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics labels Sep 23, 2025
@RKSimon RKSimon enabled auto-merge (squash) September 23, 2025 14:57
@llvmbot
Copy link
Member

llvmbot commented Sep 23, 2025

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-clang

Author: Simon Pilgrim (RKSimon)

Changes

Merge the function attributes as they are all constexpr - the CVTPS2PH will stay as macros wrapping the builtin functions as we need to enforce compile time rounding mode constraints


Full diff: https://github.com/llvm/llvm-project/pull/160317.diff

1 Files Affected:

  • (modified) clang/lib/Headers/f16cintrin.h (+15-14)
diff --git a/clang/lib/Headers/f16cintrin.h b/clang/lib/Headers/f16cintrin.h
index 83965334e2c9b..b6ca7088d3864 100644
--- a/clang/lib/Headers/f16cintrin.h
+++ b/clang/lib/Headers/f16cintrin.h
@@ -15,17 +15,20 @@
 #define __F16CINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS128 \
-  __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128)))
-#define __DEFAULT_FN_ATTRS256 \
-  __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
-
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("f16c"),           \
+                 __min_vector_width__(128))) constexpr
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("f16c"),           \
+                 __min_vector_width__(256))) constexpr
 #else
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("f16c"),           \
+                 __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("f16c"),           \
+                 __min_vector_width__(256)))
 #endif
 
 /* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
@@ -43,7 +46,7 @@
 /// \param __a
 ///    A 16-bit half-precision float value.
 /// \returns The converted 32-bit float value.
-static __inline float __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline float __DEFAULT_FN_ATTRS128
 _cvtsh_ss(unsigned short __a)
 {
   return (float)__builtin_bit_cast(__fp16, __a);
@@ -112,7 +115,7 @@ _cvtsh_ss(unsigned short __a)
 ///    A 128-bit vector containing 16-bit half-precision float values. The lower
 ///    64 bits are used in the conversion.
 /// \returns A 128-bit vector of [4 x float] containing converted float values.
-static __inline __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline __m128 __DEFAULT_FN_ATTRS128
 _mm_cvtph_ps(__m128i __a)
 {
   typedef __fp16 __v4fp16 __attribute__((__vector_size__(8)));
@@ -159,7 +162,7 @@ _mm_cvtph_ps(__m128i __a)
 ///    converted to 32-bit single-precision float values.
 /// \returns A vector of [8 x float] containing the converted 32-bit
 ///    single-precision float values.
-static __inline __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline __m256 __DEFAULT_FN_ATTRS256
 _mm256_cvtph_ps(__m128i __a)
 {
   typedef __fp16 __v8fp16 __attribute__((__vector_size__(16), __aligned__(16)));
@@ -169,7 +172,5 @@ _mm256_cvtph_ps(__m128i __a)
 
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
-#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
-#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
 
 #endif /* __F16CINTRIN_H */

Copy link

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff origin/main HEAD --extensions h -- clang/lib/Headers/f16cintrin.h

⚠️
The reproduction instructions above might return results for more than one PR
in a stack if you are using a stacked PR workflow. You can limit the results by
changing origin/main to the base branch/commit you want to compare against.
⚠️

View the diff from clang-format here.
diff --git a/clang/lib/Headers/f16cintrin.h b/clang/lib/Headers/f16cintrin.h
index b6ca7088d..2368c4189 100644
--- a/clang/lib/Headers/f16cintrin.h
+++ b/clang/lib/Headers/f16cintrin.h
@@ -46,9 +46,7 @@
 /// \param __a
 ///    A 16-bit half-precision float value.
 /// \returns The converted 32-bit float value.
-static __inline float __DEFAULT_FN_ATTRS128
-_cvtsh_ss(unsigned short __a)
-{
+static __inline float __DEFAULT_FN_ATTRS128 _cvtsh_ss(unsigned short __a) {
   return (float)__builtin_bit_cast(__fp16, __a);
 }
 
@@ -115,9 +113,7 @@ _cvtsh_ss(unsigned short __a)
 ///    A 128-bit vector containing 16-bit half-precision float values. The lower
 ///    64 bits are used in the conversion.
 /// \returns A 128-bit vector of [4 x float] containing converted float values.
-static __inline __m128 __DEFAULT_FN_ATTRS128
-_mm_cvtph_ps(__m128i __a)
-{
+static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_cvtph_ps(__m128i __a) {
   typedef __fp16 __v4fp16 __attribute__((__vector_size__(8)));
 
   __v4hi __v = __builtin_shufflevector((__v8hi)__a, (__v8hi)__a, 0, 1, 2, 3);
@@ -162,9 +158,7 @@ _mm_cvtph_ps(__m128i __a)
 ///    converted to 32-bit single-precision float values.
 /// \returns A vector of [8 x float] containing the converted 32-bit
 ///    single-precision float values.
-static __inline __m256 __DEFAULT_FN_ATTRS256
-_mm256_cvtph_ps(__m128i __a)
-{
+static __inline __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtph_ps(__m128i __a) {
   typedef __fp16 __v8fp16 __attribute__((__vector_size__(16), __aligned__(16)));
 
   return (__m256) __builtin_convertvector((__v8fp16)__a, __v8sf);

@RKSimon RKSimon merged commit 1ed6be6 into llvm:main Sep 23, 2025
11 of 12 checks passed
@RKSimon RKSimon deleted the f16c-all-constexpr-functions branch September 24, 2025 07:22
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants