Skip to content

Commit 478e45f

Browse files
authored
[libc++] Improve performance of std::atomic_flag on Windows (#163524)
On Windows 8 and above, the WaitOnAddress, WakeByAddressSingle and WakeByAddressAll functions allow efficient implementation of the C++20 wait and notify features of std::atomic_flag. These Windows functions have never been made use of in libc++, leading to very poor performance of these features on Windows platforms, as they are implemented using a spin loop with backoff, rather than using any OS thread signalling whatsoever. This change implements the use of these OS functions where available, falling back to the original implementation on Windows versions prior to 8. Relevant API docs from Microsoft: https://learn.microsoft.com/en-us/windows/win32/api/synchapi/nf-synchapi-waitonaddress https://learn.microsoft.com/en-us/windows/win32/api/synchapi/nf-synchapi-wakebyaddresssingle https://learn.microsoft.com/en-us/windows/win32/api/synchapi/nf-synchapi-wakebyaddressall Fixes #127221
1 parent 2ac9e59 commit 478e45f

File tree

1 file changed

+69
-0
lines changed

1 file changed

+69
-0
lines changed

libcxx/src/atomic.cpp

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@
4141
// OpenBSD has no indirect syscalls
4242
# define _LIBCPP_FUTEX(...) futex(__VA_ARGS__)
4343

44+
#elif defined(_WIN32)
45+
46+
# include <memory>
47+
# include <windows.h>
48+
4449
#else // <- Add other operating systems here
4550

4651
// Baseline needs no new headers
@@ -101,6 +106,70 @@ static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const vo
101106
_umtx_op(const_cast<__cxx_atomic_contention_t*>(__ptr), UMTX_OP_WAKE, __notify_one ? 1 : INT_MAX, nullptr, nullptr);
102107
}
103108

109+
#elif defined(_WIN32)
110+
111+
static void* win32_get_synch_api_function(const char* function_name) {
112+
// Attempt to load the API set. Note that as per the Microsoft STL implementation, we assume this API is already
113+
// loaded and accessible. While this isn't explicitly guaranteed by publicly available Win32 API documentation, it is
114+
// true in practice, and may be guaranteed by internal documentation not released publicly. In any case the fact that
115+
// the Microsoft STL made this assumption is reasonable basis to say that we can too. The alternative to this would be
116+
// to use LoadLibrary, but then leak the module handle. We can't call FreeLibrary, as this would have to be triggered
117+
// by a global static destructor, which would hang off DllMain, and calling FreeLibrary from DllMain is explicitly
118+
// mentioned as not being allowed:
119+
// https://learn.microsoft.com/en-us/windows/win32/dlls/dllmain
120+
// Given the range of bad options here, we have chosen to mirror what Microsoft did, as it seems fair to assume that
121+
// Microsoft will guarantee compatibility for us, as we are exposed to the same conditions as all existing Windows
122+
// apps using the Microsoft STL VS2015/2017/2019/2022 runtimes, where Windows 7 support has not been excluded at
123+
// compile time.
124+
static auto module_handle = GetModuleHandleW(L"api-ms-win-core-synch-l1-2-0.dll");
125+
if (module_handle == nullptr) {
126+
return nullptr;
127+
}
128+
129+
// Attempt to locate the function in the API and return the result to the caller. Note that the NULL return from this
130+
// method is documented as being interchangeable with nullptr.
131+
// https://devblogs.microsoft.com/oldnewthing/20180307-00/?p=98175
132+
return reinterpret_cast<void*>(GetProcAddress(module_handle, function_name));
133+
}
134+
135+
static void
136+
__libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) {
137+
// WaitOnAddress was added in Windows 8 (build 9200)
138+
static auto wait_on_address = reinterpret_cast<BOOL(WINAPI*)(volatile void*, PVOID, SIZE_T, DWORD)>(
139+
win32_get_synch_api_function("WaitOnAddress"));
140+
if (wait_on_address != nullptr) {
141+
wait_on_address(const_cast<__cxx_atomic_contention_t*>(__ptr), &__val, sizeof(__val), INFINITE);
142+
} else {
143+
__libcpp_thread_poll_with_backoff(
144+
[=]() -> bool { return !__cxx_nonatomic_compare_equal(__cxx_atomic_load(__ptr, memory_order_relaxed), __val); },
145+
__libcpp_timed_backoff_policy());
146+
}
147+
}
148+
149+
static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const volatile* __ptr, bool __notify_one) {
150+
if (__notify_one) {
151+
// WakeByAddressSingle was added in Windows 8 (build 9200)
152+
static auto wake_by_address_single =
153+
reinterpret_cast<void(WINAPI*)(PVOID)>(win32_get_synch_api_function("WakeByAddressSingle"));
154+
if (wake_by_address_single != nullptr) {
155+
wake_by_address_single(const_cast<__cxx_atomic_contention_t*>(__ptr));
156+
} else {
157+
// The fallback implementation of waking does nothing, as the fallback wait implementation just does polling, so
158+
// there's nothing to do here.
159+
}
160+
} else {
161+
// WakeByAddressAll was added in Windows 8 (build 9200)
162+
static auto wake_by_address_all =
163+
reinterpret_cast<void(WINAPI*)(PVOID)>(win32_get_synch_api_function("WakeByAddressAll"));
164+
if (wake_by_address_all != nullptr) {
165+
wake_by_address_all(const_cast<__cxx_atomic_contention_t*>(__ptr));
166+
} else {
167+
// The fallback implementation of waking does nothing, as the fallback wait implementation just does polling, so
168+
// there's nothing to do here.
169+
}
170+
}
171+
}
172+
104173
#else // <- Add other operating systems here
105174

106175
// Baseline is just a timed backoff

0 commit comments

Comments
 (0)