Skip to content

Conversation

@cofibrant
Copy link
Contributor

@cofibrant cofibrant commented Nov 11, 2025

At present, the shrink wrapping pass misses opportunities to shrink wrap in the presence of machine basic blocks which exit the function without returning. Such cases arise from C++ functions like the following:

int foo(int err, void* ptr) {
    if (err == -1) {
         if (ptr == nullptr) {
             throw MyException("Received `nullptr`!", __FILE__, __LINE__);
         }
         
         handle(ptr);
    }
    
    return STATUS_OK;
}

In particular, assuming MyException's constructor is not marked noexcept, the above code will generate a trivial EH landing pad calling __cxa_free_exception() and rethrowing the unhandled internal exception, exiting the function without returning. As such, the shrink wrapping pass refuses to touch the above function, spilling to the stack on every call, even though no CSRs are clobbered on the hot path. This patch tweaks the shrink wrapping logic to enable the pass to fire in this and similar cases.

@llvmbot
Copy link
Member

llvmbot commented Nov 11, 2025

@llvm/pr-subscribers-backend-aarch64

Author: Nathan Corbyn (cofibrant)

Changes

At present, the shrink wrapping pass misses opportunities to shrink wrap in the presence of machine basic blocks which exit the function without returning. Such cases arise from C++ functions like the following:

int foo(int err, void* ptr) {
    if (err == -1) {
         if (ptr == nullptr) {
             throw MyException("Received `nullptr`!", __FILE__, __LINE__);
         }
         
         handle(ptr);
    }
    
    return STATUS_OK;
}

In particular, assuming MyException's constructor is not marked noexcept, the above code will generate a trivial EH landing pad calling __cxa_free_exception() and rethrowing the unhandled internal exception, exiting the function without returning. As such, at present, the shrink wrapping refuses to touch the above function, spilling to the stack on every call, even though no CSRs are touched on the hot path. This patch tweaks the shrink wrapping logic to enable the pass to fire in this and similar cases.


Full diff: https://github.com/llvm/llvm-project/pull/167548.diff

3 Files Affected:

  • (modified) llvm/include/llvm/CodeGen/MachineBasicBlock.h (+7)
  • (modified) llvm/lib/CodeGen/ShrinkWrap.cpp (+5-7)
  • (added) llvm/test/CodeGen/AArch64/shrinkwrap-no-return.ll (+76)
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index fcf7bab09fcff..a56cf56c81cd9 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -988,6 +988,13 @@ class MachineBasicBlock
     return !empty() && back().isEHScopeReturn();
   }
 
+  /// Convenience function that returns true if the block exits the function
+  /// without returning.
+  bool isNoReturnBlock() const {
+    return !empty() && succ_empty() && !back().isReturn() &&
+           !back().isIndirectBranch();
+  }
+
   /// Split a basic block into 2 pieces at \p SplitPoint. A new block will be
   /// inserted after this block, and all instructions after \p SplitInst moved
   /// to it (\p SplitInst will be in the original block). If \p LIS is provided,
diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp
index 83581052560cb..c2221cacc5bd5 100644
--- a/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -697,14 +697,12 @@ void ShrinkWrapImpl::updateSaveRestorePoints(MachineBasicBlock &MBB,
 
   if (!Restore)
     Restore = &MBB;
-  else if (MPDT->getNode(&MBB)) // If the block is not in the post dom tree, it
-                                // means the block never returns. If that's the
-                                // case, we don't want to call
-                                // `findNearestCommonDominator`, which will
-                                // return `Restore`.
+  else if (MBB.isNoReturnBlock()) {
+    // MBB exits the function without returning, so we don't need an epilogue
+    // here. This is common for things like cleanup landing pads etc. In these
+    // cases, we can skip updating `Restore`.
+  } else
     Restore = MPDT->findNearestCommonDominator(Restore, &MBB);
-  else
-    Restore = nullptr; // Abort, we can't find a restore point in this case.
 
   // Make sure we would be able to insert the restore code before the
   // terminator.
diff --git a/llvm/test/CodeGen/AArch64/shrinkwrap-no-return.ll b/llvm/test/CodeGen/AArch64/shrinkwrap-no-return.ll
new file mode 100644
index 0000000000000..55e8b539e0a35
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/shrinkwrap-no-return.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=aarch64 -o - %s | FileCheck %s
+
+@exception = external hidden constant { ptr, ptr, ptr }
+
+define noundef i32 @early_exit_or_throw(i32 %in) personality ptr @__gxx_personality_v0 {
+; CHECK-LABEL: early_exit_or_throw:
+; CHECK:       .Lfunc_begin0:
+; CHECK-NEXT:    .cfi_startproc
+; CHECK-NEXT:    .cfi_personality 156, DW.ref.__gxx_personality_v0
+; CHECK-NEXT:    .cfi_lsda 28, .Lexception0
+; CHECK-NEXT:  // %bb.0: // %entry
+; CHECK-NEXT:    cmp w0, #1
+; CHECK-NEXT:    b.eq .LBB0_2
+; CHECK-NEXT:  // %bb.1: // %exit
+; CHECK-NEXT:    mov w0, wzr
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB0_2: // %setup
+; CHECK-NEXT:    str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    mov w0, #32 // =0x20
+; CHECK-NEXT:    bl __cxa_allocate_exception
+; CHECK-NEXT:  .Ltmp0: // EH_LABEL
+; CHECK-NEXT:    bl construct_exception
+; CHECK-NEXT:  .Ltmp1: // EH_LABEL
+; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT:  // %bb.3: // %throw
+; CHECK-NEXT:    adrp x2, :got:destruct_exception
+; CHECK-NEXT:    adrp x1, exception
+; CHECK-NEXT:    add x1, x1, :lo12:exception
+; CHECK-NEXT:    ldr x2, [x2, :got_lo12:destruct_exception]
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    bl __cxa_throw
+; CHECK-NEXT:  .LBB0_4: // %teardown
+; CHECK-NEXT:  .Ltmp2: // EH_LABEL
+; CHECK-NEXT:    mov x20, x0
+; CHECK-NEXT:    mov x0, x19
+; CHECK-NEXT:    bl __cxa_free_exception
+; CHECK-NEXT:    mov x0, x20
+; CHECK-NEXT:    bl _Unwind_Resume
+entry:
+  %cmp = icmp eq i32 %in, 1
+  br i1 %cmp, label %setup, label %exit
+
+setup:
+  %exception = tail call ptr @__cxa_allocate_exception(i64 32) nounwind
+  %call = invoke noundef ptr @construct_exception(ptr noundef nonnull %exception) to label %throw unwind label %teardown
+
+throw:
+  tail call void @__cxa_throw(ptr nonnull %exception, ptr nonnull @exception, ptr nonnull @destruct_exception) noreturn
+  unreachable
+
+teardown:
+  %caught = landingpad { ptr, i32 } cleanup
+  tail call void @__cxa_free_exception(ptr nonnull %exception) nounwind
+  resume { ptr, i32 } %caught
+
+exit:
+  ret i32 0
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare ptr @__cxa_allocate_exception(i64) local_unnamed_addr
+declare void @__cxa_free_exception(ptr) local_unnamed_addr
+declare void @__cxa_throw(ptr, ptr, ptr) local_unnamed_addr cold noreturn
+
+declare noundef ptr @construct_exception(ptr noundef nonnull returned) unnamed_addr
+declare noundef ptr @destruct_exception(ptr noundef nonnull returned) unnamed_addr mustprogress nounwind ssp uwtable(sync)
+
+declare void @noreturn() noreturn

@cofibrant
Copy link
Contributor Author

I'm sure that I'm missing some good ideas for test cases, but wasn't sure what to add!

@cofibrant
Copy link
Contributor Author

Ping

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants