From 8a67173f753776f4818cb099cc33aa0c15827915 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Tue, 28 Oct 2025 14:39:20 -0700 Subject: [PATCH 1/2] [libunwind] Fix execution flow imbalance when using C++ Exceptions This patch should fix tracing support when using C++ Exceptions. When unwinding C++ Exceptions, libunwind would have a first searching phase to detect its caller landing pad using the personality function, and a second phase that returns to the landing pad and restore the register context by also updating the link register to point to the landing pad address found inthe first phase. Since it changes the link register value and returns all the frames that have been called in libunwind, it causes an imbalance in the execution flow which breaks Apple Processor Trace analysis and affects its clients, like Instruments.app. This patch addresses the issue by generating the right amount of `ret` instructions for every function called in libcxx & libunwind to rebalance the execution flow, right before returning to the catch block. rdar://131181678 rdar://131622012 Signed-off-by: Med Ismail Bennani --- libunwind/src/Registers.hpp | 12 ++++- libunwind/src/UnwindCursor.hpp | 73 +++++++++++++++++++++++++- libunwind/src/UnwindLevel1.c | 24 +++++---- libunwind/src/UnwindRegistersRestore.S | 15 +++++- libunwind/src/assembly.h | 4 ++ libunwind/src/config.h | 3 ++ libunwind/src/libunwind.cpp | 22 +++++++- libunwind/src/libunwind_ext.h | 9 ++++ 8 files changed, 146 insertions(+), 16 deletions(-) diff --git a/libunwind/src/Registers.hpp b/libunwind/src/Registers.hpp index 5a5b57835379a..9b96e94a8ff32 100644 --- a/libunwind/src/Registers.hpp +++ b/libunwind/src/Registers.hpp @@ -1827,7 +1827,8 @@ inline const char *Registers_ppc64::getRegisterName(int regNum) { /// Registers_arm64 holds the register state of a thread in a 64-bit arm /// process. class _LIBUNWIND_HIDDEN Registers_arm64; -extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *); +extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *, + unsigned walkedFrames); #if defined(_LIBUNWIND_USE_GCS) extern "C" void *__libunwind_shstk_get_jump_target() { @@ -1855,7 +1856,14 @@ class _LIBUNWIND_HIDDEN Registers_arm64 { v128 getVectorRegister(int num) const; void setVectorRegister(int num, v128 value); static const char *getRegisterName(int num); - void jumpto() { __libunwind_Registers_arm64_jumpto(this); } +#ifdef _LIBUNWIND_TRACE_RET_INJECT + __attribute__((noinline, disable_tail_calls)) void + returnto(unsigned walkedFrames) { + __libunwind_Registers_arm64_jumpto(this, walkedFrames); + } +#else + void jumpto() { __libunwind_Registers_arm64_jumpto(this, 0); } +#endif static constexpr int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_ARM64; } diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index 7ec5f9e91578a..b4123b6bb75ad 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -472,7 +472,13 @@ class _LIBUNWIND_HIDDEN AbstractUnwindCursor { virtual void getInfo(unw_proc_info_t *) { _LIBUNWIND_ABORT("getInfo not implemented"); } - virtual void jumpto() { _LIBUNWIND_ABORT("jumpto not implemented"); } +#ifdef _LIBUNWIND_TRACE_RET_INJECT + __attribute__((noinline, disable_tail_calls)) +#endif + virtual void + jumpto() { + _LIBUNWIND_ABORT("jumpto not implemented"); + } virtual bool isSignalFrame() { _LIBUNWIND_ABORT("isSignalFrame not implemented"); } @@ -489,6 +495,12 @@ class _LIBUNWIND_HIDDEN AbstractUnwindCursor { virtual void saveVFPAsX() { _LIBUNWIND_ABORT("saveVFPAsX not implemented"); } #endif +#ifdef _LIBUNWIND_TRACE_RET_INJECT + virtual void setWalkedFrames(unsigned) { + _LIBUNWIND_ABORT("setWalkedFrames not implemented"); + } +#endif + #ifdef _AIX virtual uintptr_t getDataRelBase() { _LIBUNWIND_ABORT("getDataRelBase not implemented"); @@ -965,7 +977,11 @@ class UnwindCursor : public AbstractUnwindCursor{ virtual void setFloatReg(int, unw_fpreg_t); virtual int step(bool stage2 = false); virtual void getInfo(unw_proc_info_t *); - virtual void jumpto(); +#ifdef _LIBUNWIND_TRACE_RET_INJECT + __attribute__((noinline, disable_tail_calls)) +#endif + virtual void + jumpto(); virtual bool isSignalFrame(); virtual bool getFunctionName(char *buf, size_t len, unw_word_t *off); virtual void setInfoBasedOnIPRegister(bool isReturnAddress = false); @@ -974,6 +990,10 @@ class UnwindCursor : public AbstractUnwindCursor{ virtual void saveVFPAsX(); #endif +#ifdef _LIBUNWIND_TRACE_RET_INJECT + virtual void setWalkedFrames(unsigned); +#endif + #ifdef _AIX virtual uintptr_t getDataRelBase(); #endif @@ -1356,6 +1376,9 @@ class UnwindCursor : public AbstractUnwindCursor{ defined(_LIBUNWIND_TARGET_HAIKU) bool _isSigReturn = false; #endif +#ifdef _LIBUNWIND_TRACE_RET_INJECT + uint32_t _walkedFrames; +#endif }; @@ -1410,7 +1433,46 @@ void UnwindCursor::setFloatReg(int regNum, unw_fpreg_t value) { } template void UnwindCursor::jumpto() { +#ifdef _LIBUNWIND_TRACE_RET_INJECT + /* + + The value of `_walkedFrames` is computed in `unwind_phase2` and represents the + number of frames walked starting `unwind_phase2` to get to the landing pad. + + ``` + // uc is initialized by __unw_getcontext in the parent frame. + // The first stack frame walked is unwind_phase2. + unsigned framesWalked = 1; + ``` + + To that, we need to add the number of function calls in libunwind between + `unwind_phase2` & `__libunwind_Registers_arm64_jumpto` which performs the long + jump, to rebalance the execution flow. + + ``` + frame #0: libunwind.1.dylib`__libunwind_Registers_arm64_jumpto at UnwindRegistersRestore.S:646 + frame #1: libunwind.1.dylib`libunwind::Registers_arm64::returnto at Registers.hpp:2291:3 + frame #2: libunwind.1.dylib`libunwind::UnwindCursor::jumpto at UnwindCursor.hpp:1474:14 + frame #3: libunwind.1.dylib`__unw_resume at libunwind.cpp:375:7 + frame #4: libunwind.1.dylib`__unw_resume_with_frames_walked at libunwind.cpp:363:10 + frame #5: libunwind.1.dylib`unwind_phase2 at UnwindLevel1.c:328:9 + frame #6: libunwind.1.dylib`_Unwind_RaiseException at UnwindLevel1.c:480:10 + frame #7: libc++abi.dylib`__cxa_throw at cxa_exception.cpp:295:5 + ... + ``` + + If we look at the backtrace from `__libunwind_Registers_arm64_jumpto`, we see + there are 5 frames on the stack to reach `unwind_phase2`. However, only 4 of + them will never return, since `__libunwind_Registers_arm64_jumpto` returns + back to the landing pad, so we need to subtract 1 to the number of + `_EXTRA_LIBUNWIND_FRAMES_WALKED`. + */ + + static constexpr size_t _EXTRA_LIBUNWIND_FRAMES_WALKED = 5 - 1; + _registers.returnto(_walkedFrames + _EXTRA_LIBUNWIND_FRAMES_WALKED); +#else _registers.jumpto(); +#endif } #ifdef __arm__ @@ -1419,6 +1481,13 @@ template void UnwindCursor::saveVFPAsX() { } #endif +#ifdef _LIBUNWIND_TRACE_RET_INJECT +template +void UnwindCursor::setWalkedFrames(unsigned walkedFrames) { + _walkedFrames = walkedFrames; +} +#endif + #ifdef _AIX template uintptr_t UnwindCursor::getDataRelBase() { diff --git a/libunwind/src/UnwindLevel1.c b/libunwind/src/UnwindLevel1.c index b0cd60dfb9141..799da44a550c4 100644 --- a/libunwind/src/UnwindLevel1.c +++ b/libunwind/src/UnwindLevel1.c @@ -48,16 +48,15 @@ // avoided when invoking the `jumpto()` function. To do this, we use inline // assemblies to "goto" the `jumpto()` for these architectures. #if !defined(_LIBUNWIND_USE_CET) && !defined(_LIBUNWIND_USE_GCS) -#define __unw_phase2_resume(cursor, fn) \ +#define __unw_phase2_resume(cursor, payload) \ do { \ - (void)fn; \ - __unw_resume((cursor)); \ + __unw_resume_with_frames_walked((cursor), (payload)); \ } while (0) #elif defined(_LIBUNWIND_TARGET_I386) #define __shstk_step_size (4) -#define __unw_phase2_resume(cursor, fn) \ +#define __unw_phase2_resume(cursor, payload) \ do { \ - _LIBUNWIND_POP_SHSTK_SSP((fn)); \ + _LIBUNWIND_POP_SHSTK_SSP((payload)); \ void *shstkRegContext = __libunwind_shstk_get_registers((cursor)); \ void *shstkJumpAddress = __libunwind_shstk_get_jump_target(); \ __asm__ volatile("push %%edi\n\t" \ @@ -67,9 +66,9 @@ } while (0) #elif defined(_LIBUNWIND_TARGET_X86_64) #define __shstk_step_size (8) -#define __unw_phase2_resume(cursor, fn) \ +#define __unw_phase2_resume(cursor, payload) \ do { \ - _LIBUNWIND_POP_SHSTK_SSP((fn)); \ + _LIBUNWIND_POP_SHSTK_SSP((payload)); \ void *shstkRegContext = __libunwind_shstk_get_registers((cursor)); \ void *shstkJumpAddress = __libunwind_shstk_get_jump_target(); \ __asm__ volatile("jmpq *%%rdx\n\t" ::"D"(shstkRegContext), \ @@ -77,16 +76,17 @@ } while (0) #elif defined(_LIBUNWIND_TARGET_AARCH64) #define __shstk_step_size (8) -#define __unw_phase2_resume(cursor, fn) \ +#define __unw_phase2_resume(cursor, payload) \ do { \ - _LIBUNWIND_POP_SHSTK_SSP((fn)); \ + _LIBUNWIND_POP_SHSTK_SSP((payload)); \ void *shstkRegContext = __libunwind_shstk_get_registers((cursor)); \ void *shstkJumpAddress = __libunwind_shstk_get_jump_target(); \ __asm__ volatile("mov x0, %0\n\t" \ + "mov x1, wzr\n\t" \ "br %1\n\t" \ : \ : "r"(shstkRegContext), "r"(shstkJumpAddress) \ - : "x0"); \ + : "x0", "x1"); \ } while (0) #endif @@ -205,6 +205,8 @@ extern int __unw_step_stage2(unw_cursor_t *); #if defined(_LIBUNWIND_USE_GCS) // Enable the GCS target feature to permit gcspop instructions to be used. __attribute__((target("+gcs"))) +#elif defined(_LIBUNWIND_TRACE_RET_INJECT) +__attribute__((noinline, disable_tail_calls)) #endif static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, @@ -349,6 +351,8 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, #if defined(_LIBUNWIND_USE_GCS) // Enable the GCS target feature to permit gcspop instructions to be used. __attribute__((target("+gcs"))) +#elif defined(_LIBUNWIND_TRACE_RET_INJECT) +__attribute__((noinline, disable_tail_calls)) #endif static _Unwind_Reason_Code unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor, diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S index 198735fa800a9..18005bc322beb 100644 --- a/libunwind/src/UnwindRegistersRestore.S +++ b/libunwind/src/UnwindRegistersRestore.S @@ -643,13 +643,26 @@ Lnovec: #endif // -// extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *); +// extern "C" void __libunwind_Registers_arm64_jumpto(Registers_arm64 *, unsigned); // // On entry: // thread_state pointer is in x0 +// walked_frames counter is in x1 // .p2align 2 DEFINE_LIBUNWIND_FUNCTION(__libunwind_Registers_arm64_jumpto) + + #if defined(_LIBUNWIND_TRACE_RET_INJECT) + cbz w1, 1f + 0: + subs w1, w1, #1 + adr x16, #8 + ret x16 + + b.ne 0b + 1: + #endif + // skip restore of x0,x1 for now ldp x2, x3, [x0, #0x010] ldp x4, x5, [x0, #0x020] diff --git a/libunwind/src/assembly.h b/libunwind/src/assembly.h index f0fcd006f2073..84c9d526f1d75 100644 --- a/libunwind/src/assembly.h +++ b/libunwind/src/assembly.h @@ -132,6 +132,10 @@ #if defined(__APPLE__) +#if defined(__aarch64__) || defined(__arm64__) || defined(__arm64e__) +#define _LIBUNWIND_TRACE_RET_INJECT 1 +#endif + #define SYMBOL_IS_FUNC(name) #define HIDDEN_SYMBOL(name) .private_extern name #if defined(_LIBUNWIND_HIDE_SYMBOLS) diff --git a/libunwind/src/config.h b/libunwind/src/config.h index deb5a4d4d73d4..230001d5dfd85 100644 --- a/libunwind/src/config.h +++ b/libunwind/src/config.h @@ -28,6 +28,9 @@ #define _LIBUNWIND_SUPPORT_COMPACT_UNWIND 1 #define _LIBUNWIND_SUPPORT_DWARF_UNWIND 1 #endif + #if defined(__aarch64__) || defined(__arm64__) || defined(__arm64e__) + #define _LIBUNWIND_TRACE_RET_INJECT 1 + #endif #elif defined(_WIN32) #ifdef __SEH__ #define _LIBUNWIND_SUPPORT_SEH_UNWIND 1 diff --git a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp index 951d87db868bc..3a94b6cf0cc5c 100644 --- a/libunwind/src/libunwind.cpp +++ b/libunwind/src/libunwind.cpp @@ -247,7 +247,27 @@ _LIBUNWIND_HIDDEN int __unw_get_proc_info(unw_cursor_t *cursor, } _LIBUNWIND_WEAK_ALIAS(__unw_get_proc_info, unw_get_proc_info) -/// Resume execution at cursor position (aka longjump). +/// Rebalance the execution flow by injecting the right amount of `ret` +/// instruction relatively to the amount of `walkedFrames` then resume execution +/// at cursor position (aka longjump). +_LIBUNWIND_HIDDEN int __unw_resume_with_frames_walked(unw_cursor_t *cursor, + unsigned walkedFrames) { + _LIBUNWIND_TRACE_API("__unw_resume(cursor=%p, walkedFrames=%u)", + static_cast(cursor), walkedFrames); +#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) + // Inform the ASan runtime that now might be a good time to clean stuff up. + __asan_handle_no_return(); +#endif +#ifdef _LIBUNWIND_TRACE_RET_INJECT + AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; + co->setWalkedFrames(walkedFrames); +#endif + return __unw_resume(cursor); +} +_LIBUNWIND_WEAK_ALIAS(__unw_resume_with_frames_walked, + unw_resume_with_frames_walked) + +/// Legacy function. Resume execution at cursor position (aka longjump). _LIBUNWIND_HIDDEN int __unw_resume(unw_cursor_t *cursor) { _LIBUNWIND_TRACE_API("__unw_resume(cursor=%p)", static_cast(cursor)); #if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) diff --git a/libunwind/src/libunwind_ext.h b/libunwind/src/libunwind_ext.h index 28db43a4f6eef..baca58a8c6d72 100644 --- a/libunwind/src/libunwind_ext.h +++ b/libunwind/src/libunwind_ext.h @@ -30,6 +30,15 @@ extern int __unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *); extern int __unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *); extern int __unw_set_reg(unw_cursor_t *, unw_regnum_t, unw_word_t); extern int __unw_set_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t); +#ifdef _LIBUNWIND_TRACE_RET_INJECT +__attribute__((noinline, disable_tail_calls)) +#endif +extern int +__unw_resume_with_frames_walked(unw_cursor_t *, unsigned); +// Legacy function. Do not use. +#ifdef _LIBUNWIND_TRACE_RET_INJECT +__attribute__((noinline, disable_tail_calls)) +#endif extern int __unw_resume(unw_cursor_t *); #ifdef __arm__ From 3d541a242e611ac16baac54506270990deaf2a63 Mon Sep 17 00:00:00 2001 From: Med Ismail Bennani Date: Tue, 28 Oct 2025 14:40:14 -0700 Subject: [PATCH 2/2] [libunwind] Address formatting issues Signed-off-by: Med Ismail Bennani --- libunwind/src/Registers.hpp | 6 ++++-- libunwind/src/UnwindCursor.hpp | 10 ++++++---- libunwind/src/libunwind_ext.h | 3 +-- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/libunwind/src/Registers.hpp b/libunwind/src/Registers.hpp index 9b96e94a8ff32..6580788b620a8 100644 --- a/libunwind/src/Registers.hpp +++ b/libunwind/src/Registers.hpp @@ -1857,10 +1857,12 @@ class _LIBUNWIND_HIDDEN Registers_arm64 { void setVectorRegister(int num, v128 value); static const char *getRegisterName(int num); #ifdef _LIBUNWIND_TRACE_RET_INJECT - __attribute__((noinline, disable_tail_calls)) void - returnto(unsigned walkedFrames) { + // clang-format off + __attribute__((noinline, disable_tail_calls)) + void returnto(unsigned walkedFrames) { __libunwind_Registers_arm64_jumpto(this, walkedFrames); } + // clang-format on #else void jumpto() { __libunwind_Registers_arm64_jumpto(this, 0); } #endif diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index b4123b6bb75ad..644bd5f685e8d 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -475,8 +475,7 @@ class _LIBUNWIND_HIDDEN AbstractUnwindCursor { #ifdef _LIBUNWIND_TRACE_RET_INJECT __attribute__((noinline, disable_tail_calls)) #endif - virtual void - jumpto() { + virtual void jumpto() { _LIBUNWIND_ABORT("jumpto not implemented"); } virtual bool isSignalFrame() { @@ -980,8 +979,9 @@ class UnwindCursor : public AbstractUnwindCursor{ #ifdef _LIBUNWIND_TRACE_RET_INJECT __attribute__((noinline, disable_tail_calls)) #endif - virtual void - jumpto(); + // clang-format off + virtual void jumpto(); + // clang-format on virtual bool isSignalFrame(); virtual bool getFunctionName(char *buf, size_t len, unw_word_t *off); virtual void setInfoBasedOnIPRegister(bool isReturnAddress = false); @@ -1434,6 +1434,7 @@ void UnwindCursor::setFloatReg(int regNum, unw_fpreg_t value) { template void UnwindCursor::jumpto() { #ifdef _LIBUNWIND_TRACE_RET_INJECT + // clang-format off /* The value of `_walkedFrames` is computed in `unwind_phase2` and represents the @@ -1467,6 +1468,7 @@ template void UnwindCursor::jumpto() { back to the landing pad, so we need to subtract 1 to the number of `_EXTRA_LIBUNWIND_FRAMES_WALKED`. */ + // clang-format on static constexpr size_t _EXTRA_LIBUNWIND_FRAMES_WALKED = 5 - 1; _registers.returnto(_walkedFrames + _EXTRA_LIBUNWIND_FRAMES_WALKED); diff --git a/libunwind/src/libunwind_ext.h b/libunwind/src/libunwind_ext.h index baca58a8c6d72..b4f8f57274d31 100644 --- a/libunwind/src/libunwind_ext.h +++ b/libunwind/src/libunwind_ext.h @@ -33,8 +33,7 @@ extern int __unw_set_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t); #ifdef _LIBUNWIND_TRACE_RET_INJECT __attribute__((noinline, disable_tail_calls)) #endif -extern int -__unw_resume_with_frames_walked(unw_cursor_t *, unsigned); +extern int __unw_resume_with_frames_walked(unw_cursor_t *, unsigned); // Legacy function. Do not use. #ifdef _LIBUNWIND_TRACE_RET_INJECT __attribute__((noinline, disable_tail_calls))