diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 0e9adde564b3e..c013bbe9926fe 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -239,11 +239,6 @@ static cl::opt EnableRedZone("aarch64-redzone", cl::desc("enable use of redzone on AArch64"), cl::init(false), cl::Hidden); -static cl::opt - ReverseCSRRestoreSeq("reverse-csr-restore-seq", - cl::desc("reverse the CSR restore sequence"), - cl::init(false), cl::Hidden); - static cl::opt StackTaggingMergeSetTag( "stack-tagging-merge-settag", cl::desc("merge settag instruction in function epilog"), cl::init(true), @@ -307,8 +302,6 @@ bool AArch64FrameLowering::homogeneousPrologEpilog( return false; if (!EnableHomogeneousPrologEpilog) return false; - if (ReverseCSRRestoreSeq) - return false; if (EnableRedZone) return false; @@ -3111,7 +3104,27 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, hasFP(MF)); - auto EmitMI = [&](const RegPairInfo &RPI) -> MachineBasicBlock::iterator { + if (homogeneousPrologEpilog(MF, &MBB)) { + auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog)) + .setMIFlag(MachineInstr::FrameDestroy); + for (auto &RPI : RegPairs) { + MIB.addReg(RPI.Reg1, RegState::Define); + MIB.addReg(RPI.Reg2, RegState::Define); + } + return true; + } + + // For performance reasons restore SVE register in increasing order + auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; }; + auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR); + auto PPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsPPR); + std::reverse(PPRBegin, PPREnd.base()); + auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; }; + auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR); + auto ZPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsZPR); + std::reverse(ZPRBegin, ZPREnd.base()); + + for (const RegPairInfo &RPI : RegPairs) { unsigned Reg1 = RPI.Reg1; unsigned Reg2 = RPI.Reg2; @@ -3185,43 +3198,6 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( MachineMemOperand::MOLoad, Size, Alignment)); if (NeedsWinCFI) InsertSEH(MIB, TII, MachineInstr::FrameDestroy); - - return MIB->getIterator(); - }; - - if (homogeneousPrologEpilog(MF, &MBB)) { - auto MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::HOM_Epilog)) - .setMIFlag(MachineInstr::FrameDestroy); - for (auto &RPI : RegPairs) { - MIB.addReg(RPI.Reg1, RegState::Define); - MIB.addReg(RPI.Reg2, RegState::Define); - } - return true; - } - - // For performance reasons restore SVE register in increasing order - auto IsPPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::PPR; }; - auto PPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsPPR); - auto PPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsPPR); - std::reverse(PPRBegin, PPREnd.base()); - auto IsZPR = [](const RegPairInfo &c) { return c.Type == RegPairInfo::ZPR; }; - auto ZPRBegin = std::find_if(RegPairs.begin(), RegPairs.end(), IsZPR); - auto ZPREnd = std::find_if(RegPairs.rbegin(), RegPairs.rend(), IsZPR); - std::reverse(ZPRBegin, ZPREnd.base()); - - if (ReverseCSRRestoreSeq) { - MachineBasicBlock::iterator First = MBB.end(); - for (const RegPairInfo &RPI : reverse(RegPairs)) { - MachineBasicBlock::iterator It = EmitMI(RPI); - if (First == MBB.end()) - First = It; - } - if (First != MBB.end()) - MBB.splice(MBBI, &MBB, First); - } else { - for (const RegPairInfo &RPI : RegPairs) { - (void)EmitMI(RPI); - } } return true; diff --git a/llvm/test/CodeGen/AArch64/reverse-csr-restore-seq.mir b/llvm/test/CodeGen/AArch64/reverse-csr-restore-seq.mir deleted file mode 100644 index de4baec50e0c6..0000000000000 --- a/llvm/test/CodeGen/AArch64/reverse-csr-restore-seq.mir +++ /dev/null @@ -1,101 +0,0 @@ -# RUN: llc -run-pass=prologepilog -reverse-csr-restore-seq -o - -mtriple=aarch64-- %s | FileCheck %s --check-prefixes=CHECK -# RUN: llc -start-before=prologepilog -stop-after=aarch64-ldst-opt -reverse-csr-restore-seq -o - -mtriple=aarch64-- %s | FileCheck %s --check-prefixes=CHECK -# ---- | - - define void @foo() nounwind { entry: unreachable } - - define void @bar() nounwind { entry: unreachable } - - define void @baz() nounwind { entry: unreachable } - -... ---- -name: foo -# CHECK-LABEL: name: foo -tracksRegLiveness: true -body: | - bb.0: - $x19 = IMPLICIT_DEF - $x20 = IMPLICIT_DEF - $x21 = IMPLICIT_DEF - $x22 = IMPLICIT_DEF - $x23 = IMPLICIT_DEF - $x24 = IMPLICIT_DEF - $x25 = IMPLICIT_DEF - $x26 = IMPLICIT_DEF - - ; The local stack size is 0, so the last ldp in the sequence will also - ; restore the stack. - ; CHECK: $x24, $x23 = frame-destroy LDPXi $sp, 2 - ; CHECK-NEXT: $x22, $x21 = frame-destroy LDPXi $sp, 4 - ; CHECK-NEXT: $x20, $x19 = frame-destroy LDPXi $sp, 6 - - ; The ldp and the stack increment get merged even before - ; the load-store optimizer. - ; CHECK-NEXT: early-clobber $sp, $x26, $x25 = frame-destroy LDPXpost $sp, 8 - - RET_ReallyLR -... ---- -name: bar -# CHECK-LABEL: name: bar -tracksRegLiveness: true -stack: - - { id : 0, size: 8, alignment: 4, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - local-offset: -4, debug-info-variable: '', debug-info-expression: '', - debug-info-location: '' } - -body: | - bb.0: - $x19 = IMPLICIT_DEF - $x20 = IMPLICIT_DEF - $x21 = IMPLICIT_DEF - $x22 = IMPLICIT_DEF - $x23 = IMPLICIT_DEF - $x24 = IMPLICIT_DEF - $x25 = IMPLICIT_DEF - $x26 = IMPLICIT_DEF - - ; The local stack size is not 0, and we can combine the CSR stack size with - ; the local stack size. This results in rewriting the offsets for all the - ; save/restores and forbids us to merge the stack adjustment and the last pop. - ; In this case, there is no point of moving the first CSR pair at the end. - ; We do it anyway, as it's a small price to pay for the resulting - ; simplification in the epilogue emission code. - ; CHECK: $x24, $x23 = frame-destroy LDPXi $sp, 4 - ; CHECK-NEXT: $x22, $x21 = frame-destroy LDPXi $sp, 6 - ; CHECK-NEXT: $x20, $x19 = frame-destroy LDPXi $sp, 8 - ; CHECK-NEXT: $x26, $x25 = frame-destroy LDPXi $sp, 2 - ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 80, 0 - RET_ReallyLR -... ---- -# Check that the load from the offset 0 is moved at the end even when hasFP is -# false. -name: baz -# CHECK-LABEL: name: baz -alignment: 4 -tracksRegLiveness: true -frameInfo: - adjustsStack: true - hasCalls: true -body: | - bb.0: - successors: %bb.1 - - $x0 = IMPLICIT_DEF - $x20 = IMPLICIT_DEF - $x21 = IMPLICIT_DEF - - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp, implicit-def $x0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - B %bb.1 - - bb.1: - ; CHECK: $x21, $x20 = frame-destroy LDPXi $sp, 2 - ; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 32 - RET_ReallyLR -...