From d6c7a2641971ef359b302dcb18a21e984c9d06d5 Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Fri, 29 Mar 2024 18:34:08 +0800 Subject: [PATCH] utils/logalloc: do not allocate memory in reclaim_timer::report() before this change, `reclaim_timer::report()` calls ```c++ fmt::format(", at {}", current_backtrace()) ``` which allocates a `std::string` on heap, so it can fail and throw. in that case, `std::terminate()` is called. but at that moment, the reason why `reclaim_timer::report()` gets called is that we fail to reclaim memory for the caller. so we are more likely to run into this issue. anyway, we should not allocate memory in this path. in this change, a dedicated printer is created so that we don't format to a temporary `std::string`, and instead write directly to the buffer of logger. this avoids the memory allocation. Fixes #18099 Signed-off-by: Kefu Chai Closes scylladb/scylladb#18100 (cherry picked from commit fcf7ca5675946caaf2aace267136ae5de37b7f6b) --- utils/logalloc.cc | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/utils/logalloc.cc b/utils/logalloc.cc index 229dddd4df26..2630650ef809 100644 --- a/utils/logalloc.cc +++ b/utils/logalloc.cc @@ -204,6 +204,37 @@ migrate_fn_type::unregister_migrator(uint32_t index) { static_migrators().remove(index); } + +namespace { + +// for printing extra message in reclaim_timer::report() when stall is detected. +// +// this helper struct is deliberately introduced to ensure no dynamic +// allocations in reclaim_timer::report(), which is involved in handling OOMs. +struct extra_msg_when_stall_detected { + bool stall_detected; + saved_backtrace backtrace; + extra_msg_when_stall_detected(bool detected, saved_backtrace&& backtrace) + : stall_detected{detected} + , backtrace{std::move(backtrace)} + {} +}; + +} + +template <> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + auto format(const extra_msg_when_stall_detected& msg, fmt::format_context& ctx) const { + if (msg.stall_detected) { + return fmt::format_to(ctx.out(), ", at {}", msg.backtrace); + } else { + return ctx.out(); + } + } +}; + + namespace logalloc { // LSA-specific bad_alloc variant which allows adding additional information on @@ -1511,7 +1542,8 @@ void reclaim_timer::report() const noexcept { auto time_level = _stall_detected ? log_level::warn : log_level::debug; auto info_level = _stall_detected ? log_level::info : log_level::debug; auto MiB = 1024*1024; - auto msg_extra = _stall_detected ? fmt::format(", at {}", current_backtrace()) : ""; + auto msg_extra = extra_msg_when_stall_detected(_stall_detected, + _stall_detected ? current_backtrace() : saved_backtrace{}); timing_logger.log(time_level, "{} took {} us, trying to release {:.3f} MiB {}preemptibly, reserve: {{goal: {}, max: {}}}{}", _name, (_duration + 500ns) / 1us, (float)_memory_to_release / MiB, _preemptible ? "" : "non-",