-
Notifications
You must be signed in to change notification settings - Fork 10.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libcxx] Align __recommend() + 1 by __endian_factor
#90292
Conversation
|
@llvm/pr-subscribers-libcxx Author: Vitaly Buka (vitalybuka) Changes
Full diff: https://github.com/llvm/llvm-project/pull/90292.diff 1 Files Affected:
diff --git a/libcxx/include/string b/libcxx/include/string
index 883bc1d7e5dc9f..ff9c7b81375c9c 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -868,7 +868,7 @@ private:
__r_.first() = __rep();
__set_short_size(__size);
} else {
- auto __capacity = __recommend(__size) + 1;
+ auto __capacity = __align_it<__endian_factor>(__recommend(__size) + 1);
auto __allocation = __alloc_traits::allocate(__alloc(), __capacity);
__begin_lifetime(__allocation, __capacity);
__set_long_cap(__capacity);
@@ -1876,6 +1876,7 @@ private:
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __set_long_cap(size_type __s) _NOEXCEPT {
__r_.first().__l.__cap_ = __s / __endian_factor;
__r_.first().__l.__is_long_ = true;
+ _LIBCPP_ASSERT_INTERNAL(__s == __get_long_cap(), "Size must be __endian_factor aligned.");
}
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 size_type __get_long_cap() const _NOEXCEPT {
@@ -2201,7 +2202,7 @@ basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_ty
__set_short_size(__sz);
__p = __get_short_pointer();
} else {
- auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__reserve) + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__recommend(__reserve) + 1));
__p = __allocation.ptr;
__begin_lifetime(__p, __allocation.count);
__set_long_pointer(__p);
@@ -2225,7 +2226,7 @@ basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_ty
__set_short_size(__sz);
__p = __get_short_pointer();
} else {
- auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__sz) + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__recommend(__sz) + 1));
__p = __allocation.ptr;
__begin_lifetime(__p, __allocation.count);
__set_long_pointer(__p);
@@ -2250,7 +2251,7 @@ basic_string<_CharT, _Traits, _Allocator>::__init_copy_ctor_external(const value
} else {
if (__sz > max_size())
__throw_length_error();
- auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__sz) + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__recommend(__sz) + 1));
__p = __allocation.ptr;
__begin_lifetime(__p, __allocation.count);
__set_long_pointer(__p);
@@ -2273,7 +2274,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void basic_string<_CharT, _Traits, _Allocator>::__
__set_short_size(__n);
__p = __get_short_pointer();
} else {
- auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__n) + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__recommend(__n) + 1));
__p = __allocation.ptr;
__begin_lifetime(__p, __allocation.count);
__set_long_pointer(__p);
@@ -2338,7 +2339,7 @@ basic_string<_CharT, _Traits, _Allocator>::__init_with_size(_InputIterator __fir
__p = __get_short_pointer();
} else {
- auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__sz) + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__recommend(__sz) + 1));
__p = __allocation.ptr;
__begin_lifetime(__p, __allocation.count);
__set_long_pointer(__p);
@@ -2378,7 +2379,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX20 void basic_string<_CharT, _Traits, _Allocator>::__
size_type __cap =
__old_cap < __ms / 2 - __alignment ? __recommend(std::max(__old_cap + __delta_cap, 2 * __old_cap)) : __ms - 1;
__annotate_delete();
- auto __allocation = std::__allocate_at_least(__alloc(), __cap + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__cap + 1));
pointer __p = __allocation.ptr;
__begin_lifetime(__p, __allocation.count);
if (__n_copy != 0)
@@ -2421,7 +2422,7 @@ void _LIBCPP_CONSTEXPR_SINCE_CXX20
size_type __cap =
__old_cap < __ms / 2 - __alignment ? __recommend(std::max(__old_cap + __delta_cap, 2 * __old_cap)) : __ms - 1;
__annotate_delete();
- auto __allocation = std::__allocate_at_least(__alloc(), __cap + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__cap + 1));
pointer __p = __allocation.ptr;
__begin_lifetime(__p, __allocation.count);
if (__n_copy != 0)
@@ -3254,14 +3255,14 @@ basic_string<_CharT, _Traits, _Allocator>::__shrink_or_extend(size_type __target
__p = __get_long_pointer();
} else {
if (__target_capacity > __cap) {
- auto __allocation = std::__allocate_at_least(__alloc(), __target_capacity + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__target_capacity + 1));
__new_data = __allocation.ptr;
__target_capacity = __allocation.count - 1;
} else {
#ifndef _LIBCPP_HAS_NO_EXCEPTIONS
try {
#endif // _LIBCPP_HAS_NO_EXCEPTIONS
- auto __allocation = std::__allocate_at_least(__alloc(), __target_capacity + 1);
+ auto __allocation = std::__allocate_at_least(__alloc(), __align_it<__endian_factor>(__target_capacity + 1));
__new_data = __allocation.ptr;
__target_capacity = __allocation.count - 1;
#ifndef _LIBCPP_HAS_NO_EXCEPTIONS
|
__set_long_cap and __get_long_cap
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add some more info here? I don't see what bug this fixes, and I don't understand why we can't drop a byte when setting the long cap.
|
We pass the capacity directly to deallocate, e.g.: and for sized-delete, we are required to pass the actual size that was passed to allocate, not a rounded amount. |
|
AFAICT we always return |
The assert inserted there confirms that it's not true in some cases, on check-cxx, even without sanitizers. |
Ok, then what am I missing? |
|
BTW it's expected that there might be a byte dropped if the allocator returns more than what we asked for. That's completely legal for us to do. (But I don't think we test that for |
Sorry, maybe we are discussing different things.
|
|
What I'm saying is that |
Hm, I would expect allocator should still accept the same size for new/delete, even if it returns more. Otherwise using those operators will be very annoying. |
Yes, it does. With |
So, I am not sure then what are you asking? |
|
I'm asking where we get the mismatch. My current mental model is that we always allocate an even number of elements (due to |
Got it, will reproduce with details soon.
Sorry, it's just how I understood the code, will be happy to improve. |
libcxx/include/string
Outdated
| @@ -1876,6 +1876,7 @@ private: | |||
| _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __set_long_cap(size_type __s) _NOEXCEPT { | |||
| __r_.first().__l.__cap_ = __s / __endian_factor; | |||
| __r_.first().__l.__is_long_ = true; | |||
| _LIBCPP_ASSERT_INTERNAL(__s == __get_long_cap(), "Size must be __endian_factor aligned."); | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@philnik777
Note, this assert is enough to reproduce the issue on LIT_FILTER=string ninja check-cxx, in case it would be easy for you to debug yourself.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or maybe not...
I'll be back with details soon.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see:
https://github.com/llvm/llvm-project/actions/runs/8899718744/job/24441331959?pr=90645
Also my local run if we apply only #90645
gdb runtimes/runtimes-bins/test/std/strings/basic.string/string.modifiers/string_replace/Output/size_size_pointer_size.pass.cpp.dir/t.tmp.exe
#0 __pthread_kill_implementation (threadid=<optimized out>, signo=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
#1 0x00007ffff7b691cf in __pthread_kill_internal (signo=<optimized out>, threadid=<optimized out>) at ./nptl/pthread_kill.c:89
#2 __GI___pthread_kill (threadid=<optimized out>, signo=<optimized out>) at ./nptl/pthread_kill.c:89
#3 0x00007ffff7cb3e90 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
#4 0x00007ffff7b054b2 in __GI_abort () at ./stdlib/abort.c:79
#5 0x00007ffff7f44bce in std::__1::__libcpp_verbose_abort (format=0x7ffff7e9c1f8 "%s")
at /usr/local/google/home/vitalybuka/src/llvm.git/llvm-project/libcxx/src/verbose_abort.cpp:74
#6 0x00007ffff7f2da14 in std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__set_long_cap[abi:de190000](unsigned long) (this=0x7fffffffc7b0, __s=25) at include/c++/v1/string:1879
#7 0x00007ffff7f3ab96 in std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__init (this=0x7fffffffc7b0,
__s=0x5555555e638d "1234567890123456789bcde", __sz=23) at include/c++/v1/string:2233
#8 0x0000555555584334 in std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::basic_string[abi:de190000]<0>
()
#9 0x0000555555557441 in bool test0<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >() ()
#10 0x00005555555552d9 in void test<std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> > >() ()
#11 0x00005555555552bb in main ()
- __init(const value_type* __s, size_type __sz = 23)
- __recommend(23) returns 24, because 23 is
__min_cap - then as in the description we allocate 25, but store
25 / 2 - and next time we delete 24
|
I've looked into this a bit more and it turns out that we have two unrelated bugs in I think diff --git a/libcxx/include/string b/libcxx/include/string
index 883bc1d7e5dc..13af92fbce3c 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -1959,10 +1959,10 @@ private:
if (__s < __min_cap) {
return static_cast<size_type>(__min_cap) - 1;
}
- const size_type __boundary = sizeof(value_type) < __alignment ? __alignment / sizeof(value_type) : 1;
+ const size_type __boundary = sizeof(value_type) < __alignment ? __alignment / sizeof(value_type) : __endian_factor;
size_type __guess = __align_it<__boundary>(__s + 1) - 1;
if (__guess == __min_cap)
- ++__guess;
+ __guess += 2;
return __guess;
}should fix the issues you're seeing. |
My last reply written before reading this, but looks like they are consistent. |
libcxx/include/string
Outdated
| size_type __guess = __align_it<__boundary>(__s + 1) - 1; | ||
| if (__guess == __min_cap) | ||
| ++__guess; | ||
| __guess += 2; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
works with ASSERT, and likely will work with asan
Should __guess += 2 be replaced with __guess += __endian_factor if we want to keep __endian_factor alignment?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
works with ASSERT, and likely will work with asan Should
__guess += 2be replaced with__guess += __endian_factorif we want to keep __endian_factor alignment?
Assuming __endian_factor unlikely will be >2 in future, 2 will work, but __endian_factor is better for reader?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I think __endian_factor would be the right call. Maybe we should rename it into something like __min_capacity_alignment, since that's what it basically is? Just a though, not a request for this patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To unblock landing #83774 I'm happy with this patch if you remove the _LIBCPP_ASSERT_INTERNAL (since it may not actually be correct) and change __guess += 2 to __guess += __endian_factor.
|
@alk Sorry, I am failing to see how this is a regression introduced in 18. Can you share a godbolt link, commit or something else? |
|
"How" is up to you guys. I know that simple test posted above works on libc++ 17 and fails on libc++ 18. Shouldn't it be enough? Here is link again: https://paste.debian.net/hidden/8edbffc1/ Some kind of crash will happen on any C++ memory allocator that cares to verify sizes. gperftools on master does in debug builds. "Abseil" tcmalloc from github.com/google/tcmalloc has been doing this for ages. So you should be able to verify regression in multiple ways. At FreeBSD ticket Dimitry Andric claims that he found the commit(s) (kudos and much thanks for this; @DimitryAndric). Here: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=279560#c10 But it isn't super trivial to me, so I'll leave it to experts. "how" might be worth mini-retrospective, especially given that I know for sure that Google does sized delete internally on 100% internal workloads. |
|
As far as I could determine, this particular behavior of std::string regressed with 04ce0ba ("Unconditionally lower std::string's alignment requirement from 16 to 8"), which also got merged to 18.x, but it could be that it does not manifest itself on all platforms. We did observe the issue on FreeBSD, which uses libc++ by default, and @alk observed it on Debian Linux. |
|
@alk Ah, I managed to make your reproducer work. I was using incorrect flags, it needs optimizations enabled to trigger. This is a slightly simplified version that shows the issue on Clang 18 but not on 17 and 16: https://godbolt.org/z/nYYvG74d7 @DimitryAndric Thanks a lot for the information. So then this is indeed a regression in LLVM 18. @tstellar Since this is happening in |
|
There are no more 18.1.x releases planned, but we can do another one if the bug is severe enough. Can someone summarize for me what kinds of use cases are impacted? |
|
Basically, folks passing the @alk @ckennelly There seems to be a claim that Google's using this flag at large scale internally. If it's possible to detail the impact of this regression a bit more, it might help to give at least one "real world" point of view on this. |
Is there a reason why gcc has this on by default, but clang doesn't? |
|
I'm unable to say whether or not this is a problem for Google's codebase, since @alk speculated upthread that it might be string ABI dependent. I'll see if I can loop some folks in who would know. Either way, sized deallocation is an important performance optimization in modern memory allocators to avoid a costly pointer-to-size lookup (in TCMalloc's case, a 2/3-level radix tree walk). Passing the wrong size is UB and diagnosed by ASan as such, and in practice means we might place the deallocated object on the wrong freelist. |
Basically, we had trouble with rolling it out. It's a wide-reaching change to deploy in the wild because it's effectively an ABI change (call sites that used to call |
|
OK, if we backport this fix, is there someone willing to do some extra testing on the release branch? I would like to get some kind of confirmation that the backport fixes the issues and that it doesn't introduce another regression? |
|
@vitalybuka found this with sanitizers (and |
Sure, send me PR, or a branch with a fix. |
Google has indeed been using -fsized-deallocation with tcmalloc (which uses the deallocation size information) internally for a long time. However, we also use the libc++ unstable abi, which enables the "alternate" string layout. In that version, the "endian_factor" constant is 1 on little endian machines, rather than 2, so this bug doesn't appear. |
|
/cherry-pick d129ea8 |
|
Failed to cherry-pick: d129ea8 https://github.com/llvm/llvm-project/actions/runs/9485332934 Please manually backport the fix and push it to your github fork. Once this is done, please create a pull request |
This is detected by asan after llvm#83774 Allocation size will be divided by `__endian_factor` before storing. If it's not aligned, we will not be able to recover allocation size to pass into `__alloc_traits::deallocate`. we have code like this ``` auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__sz) + 1); __p = __allocation.ptr; __set_long_cap(__allocation.count); void __set_long_cap(size_type __s) _NOEXCEPT { __r_.first().__l.__cap_ = __s / __endian_factor; __r_.first().__l.__is_long_ = true; } size_type __get_long_cap() const _NOEXCEPT { return __r_.first().__l.__cap_ * __endian_factor; } inline ~basic_string() { __annotate_delete(); if (__is_long()) __alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap()); } ``` 1. __recommend() -> even size 2. `std::__allocate_at_least(__alloc(), __recommend(__sz) + 1)` - > not even size 3. ` __set_long_cap() `- > lose one bit of size for __endian_factor == 2 (see `/ __endian_factor`) 4. `__alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap())` -> uses even size (see `__get_long_cap`) (cherry picked from commit d129ea8)
|
/pull-request #95264 |
This is detected by asan after #83774 Allocation size will be divided by `__endian_factor` before storing. If it's not aligned, we will not be able to recover allocation size to pass into `__alloc_traits::deallocate`. we have code like this ``` auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__sz) + 1); __p = __allocation.ptr; __set_long_cap(__allocation.count); void __set_long_cap(size_type __s) _NOEXCEPT { __r_.first().__l.__cap_ = __s / __endian_factor; __r_.first().__l.__is_long_ = true; } size_type __get_long_cap() const _NOEXCEPT { return __r_.first().__l.__cap_ * __endian_factor; } inline ~basic_string() { __annotate_delete(); if (__is_long()) __alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap()); } ``` 1. __recommend() -> even size 2. `std::__allocate_at_least(__alloc(), __recommend(__sz) + 1)` - > not even size 3. ` __set_long_cap() `- > lose one bit of size for __endian_factor == 2 (see `/ __endian_factor`) 4. `__alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap())` -> uses even size (see `__get_long_cap`) (cherry picked from commit d129ea8)
Clang 19 finally enabled it by default (as of 130e93c). IMO this makes it even more important to be able to provide a smooth upgrade path (i.e. people being able to test |
This is detected by asan after llvm#83774 Allocation size will be divided by `__endian_factor` before storing. If it's not aligned, we will not be able to recover allocation size to pass into `__alloc_traits::deallocate`. we have code like this ``` auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__sz) + 1); __p = __allocation.ptr; __set_long_cap(__allocation.count); void __set_long_cap(size_type __s) _NOEXCEPT { __r_.first().__l.__cap_ = __s / __endian_factor; __r_.first().__l.__is_long_ = true; } size_type __get_long_cap() const _NOEXCEPT { return __r_.first().__l.__cap_ * __endian_factor; } inline ~basic_string() { __annotate_delete(); if (__is_long()) __alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap()); } ``` 1. __recommend() -> even size 2. `std::__allocate_at_least(__alloc(), __recommend(__sz) + 1)` - > not even size 3. ` __set_long_cap() `- > lose one bit of size for __endian_factor == 2 (see `/ __endian_factor`) 4. `__alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap())` -> uses even size (see `__get_long_cap`) (cherry picked from commit d129ea8)
This is detected by asan after llvm#83774 Allocation size will be divided by `__endian_factor` before storing. If it's not aligned, we will not be able to recover allocation size to pass into `__alloc_traits::deallocate`. we have code like this ``` auto __allocation = std::__allocate_at_least(__alloc(), __recommend(__sz) + 1); __p = __allocation.ptr; __set_long_cap(__allocation.count); void __set_long_cap(size_type __s) _NOEXCEPT { __r_.first().__l.__cap_ = __s / __endian_factor; __r_.first().__l.__is_long_ = true; } size_type __get_long_cap() const _NOEXCEPT { return __r_.first().__l.__cap_ * __endian_factor; } inline ~basic_string() { __annotate_delete(); if (__is_long()) __alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap()); } ``` 1. __recommend() -> even size 2. `std::__allocate_at_least(__alloc(), __recommend(__sz) + 1)` - > not even size 3. ` __set_long_cap() `- > lose one bit of size for __endian_factor == 2 (see `/ __endian_factor`) 4. `__alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap())` -> uses even size (see `__get_long_cap`) (cherry picked from commit d129ea8)
This is detected by asan after #83774
Allocation size will be divided by
__endian_factorbefore storing. If it's not aligned,we will not be able to recover allocation size to pass into
__alloc_traits::deallocate.we have code like this
std::__allocate_at_least(__alloc(), __recommend(__sz) + 1)- > not even size__set_long_cap()- > lose one bit of size for __endian_factor == 2 (see/ __endian_factor)__alloc_traits::deallocate(__alloc(), __get_long_pointer(), __get_long_cap())-> uses even size (see__get_long_cap)