From a966768f633ae2590551ead7d758c747b8efd9af Mon Sep 17 00:00:00 2001 From: Zhaoxuan Jiang Date: Mon, 8 Dec 2025 19:20:10 +0800 Subject: [PATCH] [AArch64] Fix missing register definitions in homogeneous epilog lowering The lowering for HOM_Epilog did not transfer explicit register defs from the pseudo-instruction to the generated helper calls. MachineVerifier would complain if a following tail call uses one of the restored CSRs. This scenario occurs in code generated by the Swift compiler, where X20 is used to pass swiftself. This patch fixes the issue by adding the missing defs back to the helper call as implicit defs. --- .../AArch64LowerHomogeneousPrologEpilog.cpp | 27 +++++++++++------- ...64-homogeneous-prolog-epilog-tail-call.mir | 28 +++++++++++++++++++ 2 files changed, 45 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-tail-call.mir diff --git a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp index 03dd1cd702d17..d69f12e7c0a7c 100644 --- a/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp +++ b/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp @@ -483,16 +483,17 @@ bool AArch64LowerHomogeneousPE::lowerEpilog( assert(MI.getOpcode() == AArch64::HOM_Epilog); auto Return = NextMBBI; + MachineInstr *HelperCall = nullptr; if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) { // When MBB ends with a return, emit a tail-call to the epilog helper auto *EpilogTailHelper = getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail); - BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi)) - .addGlobalAddress(EpilogTailHelper) - .addImm(0) - .setMIFlag(MachineInstr::FrameDestroy) - .copyImplicitOps(MI) - .copyImplicitOps(*Return); + HelperCall = BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi)) + .addGlobalAddress(EpilogTailHelper) + .addImm(0) + .setMIFlag(MachineInstr::FrameDestroy) + .copyImplicitOps(MI) + .copyImplicitOps(*Return); NextMBBI = std::next(Return); Return->removeFromParent(); } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs, @@ -500,10 +501,10 @@ bool AArch64LowerHomogeneousPE::lowerEpilog( // The default epilog helper case. auto *EpilogHelper = getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog); - BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) - .addGlobalAddress(EpilogHelper) - .setMIFlag(MachineInstr::FrameDestroy) - .copyImplicitOps(MI); + HelperCall = BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) + .addGlobalAddress(EpilogHelper) + .setMIFlag(MachineInstr::FrameDestroy) + .copyImplicitOps(MI); } else { // Fall back to no-helper. for (int I = 0; I < Size - 2; I += 2) @@ -512,6 +513,12 @@ bool AArch64LowerHomogeneousPE::lowerEpilog( emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true); } + // Make sure all explicit definitions are preserved in the helper call; + // implicit ones are already handled by copyImplicitOps. + if (HelperCall) + for (auto &Def : MBBI->defs()) + HelperCall->addRegisterDefined(Def.getReg(), + MF.getRegInfo().getTargetRegisterInfo()); MBBI->removeFromParent(); return true; } diff --git a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-tail-call.mir b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-tail-call.mir new file mode 100644 index 0000000000000..8a09df4693118 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-tail-call.mir @@ -0,0 +1,28 @@ +# RUN: llc -verify-machineinstrs -mtriple=arm64-applie-ios7.0 -start-before=aarch64-lower-homogeneous-prolog-epilog -homogeneous-prolog-epilog %s +# +# This test ensures defined registers are preserved after lowering homogeneous +# epilog into helper calls. Without the fix, the verifier would complain about +# X20 being used by use_x20 without being defined. +--- | + define void @foo() { + entry: + ret void + } + declare void @use_x20() +... +--- +name: foo +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$x20' } +body: | + bb.0: + liveins: $x0, $x20, $lr, $x19, $x20 + frame-setup HOM_Prolog $lr, $fp, $x19, $x20, 16 + $sp = frame-setup SUBXri $sp, 32, 0 + bb.1: + $sp = frame-destroy ADDXri $sp, 32, 0 + $lr, $fp, $x19, $x20 = frame-destroy HOM_Epilog + TCRETURNdi @use_x20, 0, csr_aarch64_aapcs, implicit $sp, implicit $x20