From cdccb2256509b0ed348fbecc5407932b223eaa44 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Mon, 18 Mar 2024 11:47:59 +0000 Subject: [PATCH 1/2] fixup(#7160 / tokio_epoll_uring_ext): double-panic caused by info! in thread-local's drop() Manual testing of the changes in #7160 revealed that, if the thread-local destructor ever runs (it apparently doesn't in our test suite runs, otherwise #7160 would not have auto-merged), we can encounter an `abort()` due to a double-panic in the tracing code. This github comment here contains the stack trace: https://github.com/neondatabase/neon/pull/7160#issuecomment-2003778176 This PR reverts #7160 and uses a atomic counter to identify the thread-local in log messages, instead of the memory address of the thread local, which may be re-used. --- .../io_engine/tokio_epoll_uring_ext.rs | 40 +++++++++---------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs b/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs index 9b2efef5d420..a1efdb60aef0 100644 --- a/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs +++ b/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs @@ -5,8 +5,8 @@ //! on older kernels, such as some (but not all) older kernels in the Linux 5.10 series. //! See for more details. -use std::sync::atomic::AtomicU32; -use std::sync::{Arc, Weak}; +use std::sync::atomic::{AtomicU32, AtomicU64}; +use std::sync::Arc; use tokio_util::sync::CancellationToken; use tracing::{error, info, info_span, warn, Instrument}; @@ -24,38 +24,31 @@ struct ThreadLocalState(Arc); struct ThreadLocalStateInner { cell: tokio::sync::OnceCell, launch_attempts: AtomicU32, - weak_self: Weak, + /// populated lazily through fetch_add from [`THREAD_LOCAL_STATE_ID`] + thread_local_state_id: once_cell::sync::OnceCell, } impl ThreadLocalState { pub fn new() -> Self { - Self(Arc::new_cyclic(|weak| ThreadLocalStateInner { + Self(Arc::new(ThreadLocalStateInner { cell: tokio::sync::OnceCell::default(), launch_attempts: AtomicU32::new(0), - weak_self: Weak::clone(weak), + // NB: don't initialize from THREAD_LOCAL_STATE_ID here, + // there is apparently no guarantee that the thread-local initializer + // runs on the thread for which it's intended. + thread_local_state_id: once_cell::sync::OnceCell::new(), })) } -} -impl ThreadLocalStateInner { pub fn make_id_string(&self) -> String { - format!("0x{:p}", self.weak_self.as_ptr()) - } -} - -impl Drop for ThreadLocalStateInner { - fn drop(&mut self) { - info!(parent: None, id=%self.make_id_string(), "tokio_epoll_uring_ext: thread-local state is being dropped and id might be re-used in the future"); + match self.0.thread_local_state_id.get() { + Some(id) => format!("{id}"), + None => "???".to_string(), + } } } -impl std::ops::Deref for ThreadLocalState { - type Target = ThreadLocalStateInner; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} +static THREAD_LOCAL_STATE_ID: AtomicU64 = AtomicU64::new(0); thread_local! { static THREAD_LOCAL: ThreadLocalState = ThreadLocalState::new(); @@ -70,6 +63,11 @@ pub async fn thread_local_system() -> Handle { let get_or_init_res = inner .cell .get_or_try_init(|| async { + // so that `make_id_string()` below works + inner.thread_local_state_id.get_or_init(|| { + THREAD_LOCAL_STATE_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed) + }); + let attempt_no = inner .launch_attempts .fetch_add(1, std::sync::atomic::Ordering::Relaxed); From 01bf967f3587477bde88bc8d1af933639d3534c3 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Mon, 18 Mar 2024 14:36:13 +0000 Subject: [PATCH 2/2] avoid OnceCell --- .../io_engine/tokio_epoll_uring_ext.rs | 21 +++++-------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs b/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs index a1efdb60aef0..6ea19d6b2d5f 100644 --- a/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs +++ b/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs @@ -5,7 +5,7 @@ //! on older kernels, such as some (but not all) older kernels in the Linux 5.10 series. //! See for more details. -use std::sync::atomic::{AtomicU32, AtomicU64}; +use std::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use std::sync::Arc; use tokio_util::sync::CancellationToken; @@ -24,8 +24,8 @@ struct ThreadLocalState(Arc); struct ThreadLocalStateInner { cell: tokio::sync::OnceCell, launch_attempts: AtomicU32, - /// populated lazily through fetch_add from [`THREAD_LOCAL_STATE_ID`] - thread_local_state_id: once_cell::sync::OnceCell, + /// populated through fetch_add from [`THREAD_LOCAL_STATE_ID`] + thread_local_state_id: u64, } impl ThreadLocalState { @@ -33,18 +33,12 @@ impl ThreadLocalState { Self(Arc::new(ThreadLocalStateInner { cell: tokio::sync::OnceCell::default(), launch_attempts: AtomicU32::new(0), - // NB: don't initialize from THREAD_LOCAL_STATE_ID here, - // there is apparently no guarantee that the thread-local initializer - // runs on the thread for which it's intended. - thread_local_state_id: once_cell::sync::OnceCell::new(), + thread_local_state_id: THREAD_LOCAL_STATE_ID.fetch_add(1, Ordering::Relaxed), })) } pub fn make_id_string(&self) -> String { - match self.0.thread_local_state_id.get() { - Some(id) => format!("{id}"), - None => "???".to_string(), - } + format!("{}", self.0.thread_local_state_id) } } @@ -63,11 +57,6 @@ pub async fn thread_local_system() -> Handle { let get_or_init_res = inner .cell .get_or_try_init(|| async { - // so that `make_id_string()` below works - inner.thread_local_state_id.get_or_init(|| { - THREAD_LOCAL_STATE_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed) - }); - let attempt_no = inner .launch_attempts .fetch_add(1, std::sync::atomic::Ordering::Relaxed);