From 3186d7eb1194092301e4d23827e8643383c03486 Mon Sep 17 00:00:00 2001 From: Ohad Ravid Date: Sat, 1 Nov 2025 08:15:13 -0700 Subject: [PATCH 1/3] Added Windows bindings for FLS-related functions --- library/std/src/sys/pal/windows/c/bindings.txt | 6 ++++++ library/std/src/sys/pal/windows/c/windows_sys.rs | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/library/std/src/sys/pal/windows/c/bindings.txt b/library/std/src/sys/pal/windows/c/bindings.txt index 9009aa09f48ed..20b0698c66451 100644 --- a/library/std/src/sys/pal/windows/c/bindings.txt +++ b/library/std/src/sys/pal/windows/c/bindings.txt @@ -2130,6 +2130,11 @@ FindExSearchNameMatch FindFirstFileExW FindNextFileW FIONBIO +FLS_OUT_OF_INDEXES +FlsAlloc +FlsFree +FlsGetValue +FlsSetValue FlushFileBuffers FORMAT_MESSAGE_ALLOCATE_BUFFER FORMAT_MESSAGE_ARGUMENT_ARRAY @@ -2319,6 +2324,7 @@ OPEN_ALWAYS OPEN_EXISTING OpenProcessToken OVERLAPPED +PFLS_CALLBACK_FUNCTION PIPE_ACCEPT_REMOTE_CLIENTS PIPE_ACCESS_DUPLEX PIPE_ACCESS_INBOUND diff --git a/library/std/src/sys/pal/windows/c/windows_sys.rs b/library/std/src/sys/pal/windows/c/windows_sys.rs index 98f277b33780c..47c12413c339a 100644 --- a/library/std/src/sys/pal/windows/c/windows_sys.rs +++ b/library/std/src/sys/pal/windows/c/windows_sys.rs @@ -27,6 +27,10 @@ windows_targets::link!("kernel32.dll" "system" fn ExitProcess(uexitcode : u32) - windows_targets::link!("kernel32.dll" "system" fn FindClose(hfindfile : HANDLE) -> BOOL); windows_targets::link!("kernel32.dll" "system" fn FindFirstFileExW(lpfilename : PCWSTR, finfolevelid : FINDEX_INFO_LEVELS, lpfindfiledata : *mut core::ffi::c_void, fsearchop : FINDEX_SEARCH_OPS, lpsearchfilter : *const core::ffi::c_void, dwadditionalflags : FIND_FIRST_EX_FLAGS) -> HANDLE); windows_targets::link!("kernel32.dll" "system" fn FindNextFileW(hfindfile : HANDLE, lpfindfiledata : *mut WIN32_FIND_DATAW) -> BOOL); +windows_targets::link!("kernel32.dll" "system" fn FlsAlloc(lpcallback : PFLS_CALLBACK_FUNCTION) -> u32); +windows_targets::link!("kernel32.dll" "system" fn FlsFree(dwflsindex : u32) -> BOOL); +windows_targets::link!("kernel32.dll" "system" fn FlsGetValue(dwflsindex : u32) -> *mut core::ffi::c_void); +windows_targets::link!("kernel32.dll" "system" fn FlsSetValue(dwflsindex : u32, lpflsdata : *const core::ffi::c_void) -> BOOL); windows_targets::link!("kernel32.dll" "system" fn FlushFileBuffers(hfile : HANDLE) -> BOOL); windows_targets::link!("kernel32.dll" "system" fn FormatMessageW(dwflags : FORMAT_MESSAGE_OPTIONS, lpsource : *const core::ffi::c_void, dwmessageid : u32, dwlanguageid : u32, lpbuffer : PWSTR, nsize : u32, arguments : *const *const i8) -> u32); windows_targets::link!("kernel32.dll" "system" fn FreeEnvironmentStringsW(penv : PCWSTR) -> BOOL); @@ -2668,6 +2672,7 @@ impl Default for FLOATING_SAVE_AREA { unsafe { core::mem::zeroed() } } } +pub const FLS_OUT_OF_INDEXES: u32 = 4294967295u32; pub const FORMAT_MESSAGE_ALLOCATE_BUFFER: FORMAT_MESSAGE_OPTIONS = 256u32; pub const FORMAT_MESSAGE_ARGUMENT_ARRAY: FORMAT_MESSAGE_OPTIONS = 8192u32; pub const FORMAT_MESSAGE_FROM_HMODULE: FORMAT_MESSAGE_OPTIONS = 2048u32; @@ -3038,6 +3043,8 @@ pub struct OVERLAPPED_0_0 { } pub type PCSTR = *const u8; pub type PCWSTR = *const u16; +pub type PFLS_CALLBACK_FUNCTION = + Option; pub type PIO_APC_ROUTINE = Option< unsafe extern "system" fn( apccontext: *mut core::ffi::c_void, From da86a0e6b1f9105be201756eb24a4940bdc5b122 Mon Sep 17 00:00:00 2001 From: Ohad Ravid Date: Sat, 1 Nov 2025 08:22:22 -0700 Subject: [PATCH 2/3] Replace `thread_local::guard` on Windows with an FLS-based impl --- .../std/src/sys/thread_local/guard/windows.rs | 124 ++++++------------ 1 file changed, 40 insertions(+), 84 deletions(-) diff --git a/library/std/src/sys/thread_local/guard/windows.rs b/library/std/src/sys/thread_local/guard/windows.rs index f747129465d6d..261f6089113ec 100644 --- a/library/std/src/sys/thread_local/guard/windows.rs +++ b/library/std/src/sys/thread_local/guard/windows.rs @@ -1,103 +1,59 @@ //! Support for Windows TLS destructors. //! -//! Unfortunately, Windows does not provide a nice API to provide a destructor -//! for a TLS variable. Thus, the solution here ended up being a little more -//! obscure, but fear not, the internet has informed me [1][2] that this solution -//! is not unique (no way I could have thought of it as well!). The key idea is -//! to insert some hook somewhere to run arbitrary code on thread termination. -//! With this in place we'll be able to run anything we like, including all -//! TLS destructors! +//! Windows has an API to provide a destructor for a FLS (fiber local storage) variable, +//! which behaves similarly to a TLS variable for our purpose [1]. //! -//! In order to realize this, all TLS destructors are tracked by *us*, not the -//! Windows runtime. This means that we have a global list of destructors for +//! All TLS destructors are tracked by *us*, not the Windows runtime. +//! This means that we have a global list of destructors for //! each TLS key or variable that we know about. //! -//! # What's up with CRT$XLB? -//! -//! For anything about TLS destructors to work on Windows, we have to be able -//! to run *something* when a thread exits. To do so, we place a very special -//! static in a very special location. If this is encoded in just the right -//! way, the kernel's loader is apparently nice enough to run some function -//! of ours whenever a thread exits! How nice of the kernel! -//! -//! Lots of detailed information can be found in source [1] above, but the -//! gist of it is that this is leveraging a feature of Microsoft's PE format -//! (executable format) which is not actually used by any compilers today. -//! This apparently translates to any callbacks in the ".CRT$XLB" section -//! being run on certain events. -//! -//! So after all that, we use the compiler's `#[link_section]` feature to place -//! a callback pointer into the magic section so it ends up being called. -//! -//! # What's up with this callback? -//! -//! The callback specified receives a number of parameters from... someone! -//! (the kernel? the runtime? I'm not quite sure!) There are a few events that -//! this gets invoked for, but we're currently only interested on when a -//! thread or a process "detaches" (exits). The process part happens for the -//! last thread and the thread part happens for any normal thread. -//! -//! # The article mentions weird stuff about "/INCLUDE"? -//! -//! It sure does! Specifically we're talking about this quote: -//! -//! ```quote -//! The Microsoft run-time library facilitates this process by defining a -//! memory image of the TLS Directory and giving it the special name -//! “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The -//! linker looks for this memory image and uses the data there to create the -//! TLS Directory. Other compilers that support TLS and work with the -//! Microsoft linker must use this same technique. -//! ``` -//! -//! Basically what this means is that if we want support for our TLS -//! destructors/our hook being called then we need to make sure the linker does -//! not omit this symbol. Otherwise it will omit it and our callback won't be -//! wired up. -//! -//! We don't actually use the `/INCLUDE` linker flag here like the article -//! mentions because the Rust compiler doesn't propagate linker flags, but -//! instead we use a shim function which performs a volatile 1-byte load from -//! the address of the _tls_used symbol to ensure it sticks around. -//! -//! [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way -//! [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42 +//! [1]: https://devblogs.microsoft.com/oldnewthing/20191011-00/?p=102989 use core::ffi::c_void; +use crate::cell::Cell; use crate::ptr; use crate::sys::c; -unsafe extern "C" { - #[link_name = "_tls_used"] - static TLS_USED: u8; +pub type Key = u32; + +unsafe fn create(dtor: c::PFLS_CALLBACK_FUNCTION) -> Key { + let key_result = unsafe { c::FlsAlloc(dtor) }; + + if key_result == c::FLS_OUT_OF_INDEXES { + rtabort!("out of FLS keys"); + } + + key_result } -pub fn enable() { - // When destructors are used, we need to add a reference to the _tls_used - // symbol provided by the CRT, otherwise the TLS support code will get - // GC'd by the linker and our callback won't be called. - unsafe { ptr::from_ref(&TLS_USED).read_volatile() }; - // We also need to reference CALLBACK to make sure it does not get GC'd - // by the compiler/LLVM. The callback will end up inside the TLS - // callback array pointed to by _TLS_USED through linker shenanigans, - // but as far as the compiler is concerned, it looks like the data is - // unused, so we need this hack to prevent it from disappearing. - unsafe { ptr::from_ref(&CALLBACK).read_volatile() }; + +unsafe fn set(key: Key, ptr: *const c_void) { + let result = unsafe { c::FlsSetValue(key, ptr) }; + + if result == c::FALSE { + rtabort!("failed to set FLS value"); + } } -#[unsafe(link_section = ".CRT$XLB")] -#[cfg_attr(miri, used)] // Miri only considers explicitly `#[used]` statics for `lookup_link_section` -pub static CALLBACK: unsafe extern "system" fn(*mut c_void, u32, *mut c_void) = tls_callback; +pub fn enable() { + #[thread_local] + static REGISTERED: Cell = Cell::new(false); -unsafe extern "system" fn tls_callback(_h: *mut c_void, dw_reason: u32, _pv: *mut c_void) { - if dw_reason == c::DLL_THREAD_DETACH || dw_reason == c::DLL_PROCESS_DETACH { + if !REGISTERED.replace(true) { unsafe { - #[cfg(target_thread_local)] - super::super::destructors::run(); - #[cfg(not(target_thread_local))] - super::super::key::run_dtors(); + let key = create(Some(cleanup)); + set(key, ptr::dangling()); + }; + } +} - crate::rt::thread_cleanup(); - } +unsafe extern "system" fn cleanup(_ptr: *const c_void) { + unsafe { + #[cfg(target_thread_local)] + super::super::destructors::run(); + #[cfg(not(target_thread_local))] + super::super::key::run_dtors(); } + + crate::rt::thread_cleanup(); } From 6502684f7d67f29d126320914d22c2ed29363a0b Mon Sep 17 00:00:00 2001 From: Ohad Ravid Date: Sat, 8 Nov 2025 10:25:15 -0800 Subject: [PATCH 3/3] Use a static atomic to store a common key --- .../std/src/sys/thread_local/guard/windows.rs | 27 ++++++++++++++++--- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/library/std/src/sys/thread_local/guard/windows.rs b/library/std/src/sys/thread_local/guard/windows.rs index 261f6089113ec..1421e501d4f09 100644 --- a/library/std/src/sys/thread_local/guard/windows.rs +++ b/library/std/src/sys/thread_local/guard/windows.rs @@ -10,10 +10,11 @@ //! [1]: https://devblogs.microsoft.com/oldnewthing/20191011-00/?p=102989 use core::ffi::c_void; +use core::sync::atomic::{AtomicU32, Ordering}; use crate::cell::Cell; use crate::ptr; -use crate::sys::c; +use crate::sys::c::{self, FLS_OUT_OF_INDEXES}; pub type Key = u32; @@ -35,15 +36,33 @@ unsafe fn set(key: Key, ptr: *const c_void) { } } +static KEY: AtomicU32 = AtomicU32::new(FLS_OUT_OF_INDEXES); + pub fn enable() { #[thread_local] static REGISTERED: Cell = Cell::new(false); if !REGISTERED.replace(true) { - unsafe { - let key = create(Some(cleanup)); - set(key, ptr::dangling()); + let current_key = KEY.load(Ordering::Acquire); + + // If we already allocated a key, we only need to set it to a non-null value so that the dtor is run. + let key = if current_key != FLS_OUT_OF_INDEXES { + current_key + } else { + // Otherwise, we try to allocate a key. + let new_key = unsafe { create(Some(cleanup)) }; + + // Now we need to set this key to be used by everyone else. + // If we won the race, our key is the right one and we can set it to non-null value. + // If we lost, we'll use the winning key. + // Note: we are not freeing our losing key since according to the docs + // > It is expected that DLLs call [the FlsFree] function (if at all) only during DLL_PROCESS_DETACH. + match KEY.compare_exchange(current_key, new_key, Ordering::Release, Ordering::Acquire) { + Ok(_) => new_key, + Err(other_key) => other_key, + } }; + unsafe { set(key, ptr::without_provenance(1)) }; } }