New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libcxx] Use generic builtins for popcount, clz and ctz #86563
[libcxx] Use generic builtins for popcount, clz and ctz #86563
Conversation
@llvm/pr-subscribers-libcxx Author: Marc Auberer (marcauberer) ChangesFixes #86556 Use cc @nickdesaulniers Full diff: https://github.com/llvm/llvm-project/pull/86563.diff 6 Files Affected:
diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index 396cfc2c3f4064..28d1e03b4a381e 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -25,15 +25,15 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT {
- return __builtin_clz(__x);
+ return __builtin_clzg(__x);
}
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT {
- return __builtin_clzl(__x);
+ return __builtin_clzg(__x);
}
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT {
- return __builtin_clzll(__x);
+ return __builtin_clzg(__x);
}
#ifndef _LIBCPP_HAS_NO_INT128
@@ -47,8 +47,8 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x)
// - Any bits set:
// - The number of leading zeros of the input is the number of leading
// zeros in the high 64-bits.
- return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast<unsigned long long>(__x)))
- : __builtin_clzll(static_cast<unsigned long long>(__x >> 64));
+ return ((__x >> 64) == 0) ? (64 + __builtin_clzg(static_cast<unsigned long long>(__x)))
+ : __builtin_clzg(static_cast<unsigned long long>(__x >> 64));
}
#endif // _LIBCPP_HAS_NO_INT128
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index b6b3ac52ca4e47..dc05a88a6153d5 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -24,15 +24,15 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT {
- return __builtin_ctz(__x);
+ return __builtin_ctzg(__x);
}
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT {
- return __builtin_ctzl(__x);
+ return __builtin_ctzg(__x);
}
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT {
- return __builtin_ctzll(__x);
+ return __builtin_ctzg(__x);
}
template <class _Tp>
diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h
index b0319cef251894..42026554335261 100644
--- a/libcxx/include/__bit/popcount.h
+++ b/libcxx/include/__bit/popcount.h
@@ -24,15 +24,15 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned __x) _NOEXCEPT {
- return __builtin_popcount(__x);
+ return __builtin_popcountg(__x);
}
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long __x) _NOEXCEPT {
- return __builtin_popcountl(__x);
+ return __builtin_popcountg(__x);
}
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long long __x) _NOEXCEPT {
- return __builtin_popcountll(__x);
+ return __builtin_popcountg(__x);
}
#if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/src/include/ryu/d2s_intrinsics.h b/libcxx/src/include/ryu/d2s_intrinsics.h
index be50361fb3b334..afe64649a0be1c 100644
--- a/libcxx/src/include/ryu/d2s_intrinsics.h
+++ b/libcxx/src/include/ryu/d2s_intrinsics.h
@@ -249,7 +249,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) {
_LIBCPP_ASSERT_INTERNAL(__value != 0, "");
_LIBCPP_ASSERT_INTERNAL(__p < 64, "");
- // __builtin_ctzll doesn't appear to be faster here.
+ // __builtin_ctzll/__builtin_ctzg doesn't appear to be faster here.
return (__value & ((1ull << __p) - 1)) == 0;
}
diff --git a/libcxx/src/include/ryu/ryu.h b/libcxx/src/include/ryu/ryu.h
index 7b19ecfec5915a..67fb0392f1e205 100644
--- a/libcxx/src/include/ryu/ryu.h
+++ b/libcxx/src/include/ryu/ryu.h
@@ -72,7 +72,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward64(unsigned long* __in
if (__mask == 0) {
return false;
}
- *__index = __builtin_ctzll(__mask);
+ *__index = __builtin_ctzg(__mask);
return true;
}
@@ -80,7 +80,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward(unsigned long* __inde
if (__mask == 0) {
return false;
}
- *__index = __builtin_ctz(__mask);
+ *__index = __builtin_ctzg(__mask);
return true;
}
#endif // !_MSC_VER
diff --git a/libcxx/src/ryu/f2s.cpp b/libcxx/src/ryu/f2s.cpp
index f42fbd68c91d2d..e7b5d39669f990 100644
--- a/libcxx/src/ryu/f2s.cpp
+++ b/libcxx/src/ryu/f2s.cpp
@@ -107,7 +107,7 @@ inline constexpr uint64_t __FLOAT_POW5_SPLIT[47] = {
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint32_t __value, const uint32_t __p) {
_LIBCPP_ASSERT_INTERNAL(__value != 0, "");
_LIBCPP_ASSERT_INTERNAL(__p < 32, "");
- // __builtin_ctz doesn't appear to be faster here.
+ // __builtin_ctz/__builtin_ctzg doesn't appear to be faster here.
return (__value & ((1u << __p) - 1)) == 0;
}
|
✅ With the latest revision this PR passed the Python code formatter. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
Right, so these builtins are nascent. Only unreleased versions of clang and gcc support them (top of tree). So you'll need to use This wont be as simple as a global find+replace, unfortunately. |
5c05092
to
7fdc51b
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider that if the compiler has support for that builtin, __countl_zero
itself could be implemented almost entirely in terms of a call to __builtin_clzg
.
Same for __countr_zero
/ __builtin_ctzg
, and popcount
/ __builtin_popcountg
.
7fdc51b
to
9d22aef
Compare
What benefit does this change have? |
I suspect it would be possible to omit most of the content of libcxx/include/__bit/countl.h, libcxx/include/__bit/countr.h, and libcxx/include/__bit/popcount.h when this builtin is defined. The current PR is perhaps too granular in its use of __has_builtin checks. For example, take the That would allow the preprocessor/lexer and Sema to do less work when these builtins exist. Is that worth the change here? I'll leave that to libc++ maintainers to make a call. |
fc3821c
to
89eaec3
Compare
13a6c33
to
7a25d00
Compare
The build failure should be fixed with #86577, so let's wait for it to land. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for your patch. I like to only use the new builtin where it carries its weight.
Please explain in the commit message why this change is useful.
61c90b8
to
f4f200b
Compare
f4f200b
to
556a84d
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've one minor comment, other than that it LGTM.
There is one issue; our CI is not testing with a Clang version that has #86577. I will look at that and let you know when the CI is ready to really test this patch. Hence the "Request changes" status.
556a84d
to
5285bc1
Compare
5285bc1
to
16a8c1d
Compare
771b2b1
to
e2ca263
Compare
I'm working on patches that enable using clang 19 in the CI. This might take some time. These patches need to be reviewed too. Sorry if that was not clear from my previous message. (Normally in libc++ we try to switch faster to the latest Clang, but the change in LLVM versioning did give some issues that took a while to resolve.) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've updated the CI and it now runs Ubuntu clang version 19.0.0 (++20240409031520+c8917048e3aa-1~exp1~20240409151624.1609)
. Can you rebase the patch to trigger the CI?
LGTM when the CI passes.
Use __builtin_popcountg instead of __buildin_popcount{l|ll} Use __builtin_clzg instead of __buildin_clz{l|ll} Use __builtin_ctzg instead of __builtin_ctz{l|ll}
Co-authored-by: Nick Desaulniers <nickdesaulniers@users.noreply.github.com>
e2ca263
to
d3c2847
Compare
Fixes #86556
Use
__builtin_popcountg
instead of__buildin_popcount{l|ll}
Use
__builtin_clzg instead
of__buildin_clz{l|ll}
Use
__builtin_ctzg instead
of__builtin_ctz{l|ll}
The generic variant of the builtins can be used to simplify some logic with >= Clang 19 or >= GCC 14, where these generic variants are available. As for backwards compatibility reasons, we can't completely remove the old logic. Therefore, I left ToDo comments to address this, as soon as support for pre Clang 19 as well as pre GCC 14 is dropped.