Skip to content

Commit 8f7f2c4

Browse files
committed
Revert "[AArch64] Homogeneous Prolog and Epilog Size Optimization"
This reverts commit 0426be3. Reverting due to some expensive-checks failures in tests.
1 parent ab1d977 commit 8f7f2c4

11 files changed

+9
-1001
lines changed

llvm/lib/Target/AArch64/AArch64.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ FunctionPass *createAArch64SLSHardeningPass();
4242
FunctionPass *createAArch64IndirectThunks();
4343
FunctionPass *createAArch64SpeculationHardeningPass();
4444
FunctionPass *createAArch64LoadStoreOptimizationPass();
45-
ModulePass *createAArch64LowerHomogeneousPrologEpilogPass();
4645
FunctionPass *createAArch64SIMDInstrOptPass();
4746
ModulePass *createAArch64PromoteConstantPass();
4847
FunctionPass *createAArch64ConditionOptimizerPass();
@@ -80,7 +79,6 @@ void initializeAArch64ExpandPseudoPass(PassRegistry&);
8079
void initializeAArch64SLSHardeningPass(PassRegistry&);
8180
void initializeAArch64SpeculationHardeningPass(PassRegistry&);
8281
void initializeAArch64LoadStoreOptPass(PassRegistry&);
83-
void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
8482
void initializeAArch64SIMDInstrOptPass(PassRegistry&);
8583
void initializeAArch64PreLegalizerCombinerPass(PassRegistry&);
8684
void initializeAArch64PostLegalizerCombinerPass(PassRegistry &);

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 9 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -179,11 +179,6 @@ static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
179179
cl::desc("sort stack allocations"),
180180
cl::init(true), cl::Hidden);
181181

182-
cl::opt<bool> EnableHomogeneousPrologEpilog(
183-
"homogeneous-prolog-epilog", cl::init(false), cl::ZeroOrMore, cl::Hidden,
184-
cl::desc("Emit homogeneous prologue and epilogue for the size "
185-
"optimization (default = off)"));
186-
187182
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
188183

189184
/// Returns the argument pop size.
@@ -218,47 +213,6 @@ static uint64_t getArgumentPopSize(MachineFunction &MF,
218213
return ArgumentPopSize;
219214
}
220215

221-
static bool produceCompactUnwindFrame(MachineFunction &MF);
222-
static bool needsWinCFI(const MachineFunction &MF);
223-
static StackOffset getSVEStackSize(const MachineFunction &MF);
224-
225-
/// Returns true if a homogeneous prolog or epilog code can be emitted
226-
/// for the size optimization. If possible, a frame helper call is injected.
227-
/// When Exit block is given, this check is for epilog.
228-
bool AArch64FrameLowering::homogeneousPrologEpilog(
229-
MachineFunction &MF, MachineBasicBlock *Exit) const {
230-
if (!MF.getFunction().hasMinSize())
231-
return false;
232-
if (!EnableHomogeneousPrologEpilog)
233-
return false;
234-
if (ReverseCSRRestoreSeq)
235-
return false;
236-
if (EnableRedZone)
237-
return false;
238-
239-
// TODO: Window is supported yet.
240-
if (needsWinCFI(MF))
241-
return false;
242-
// TODO: SVE is not supported yet.
243-
if (getSVEStackSize(MF))
244-
return false;
245-
246-
// Bail on stack adjustment needed on return for simplicity.
247-
const MachineFrameInfo &MFI = MF.getFrameInfo();
248-
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
249-
if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))
250-
return false;
251-
if (Exit && getArgumentPopSize(MF, *Exit))
252-
return false;
253-
254-
return true;
255-
}
256-
257-
/// Returns true if CSRs should be paired.
258-
bool AArch64FrameLowering::producePairRegisters(MachineFunction &MF) const {
259-
return produceCompactUnwindFrame(MF) || homogeneousPrologEpilog(MF);
260-
}
261-
262216
/// This is the biggest offset to the stack pointer we can encode in aarch64
263217
/// instructions (without using a separate calculation and a temp register).
264218
/// Note that the exception here are vector stores/loads which cannot encode any
@@ -651,8 +605,6 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
651605
const MachineFrameInfo &MFI = MF.getFrameInfo();
652606
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
653607
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
654-
if (homogeneousPrologEpilog(MF))
655-
return false;
656608

657609
if (AFI->getLocalStackSize() == 0)
658610
return false;
@@ -1196,16 +1148,12 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
11961148
// All of the remaining stack allocations are for locals.
11971149
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
11981150
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
1199-
bool HomPrologEpilog = homogeneousPrologEpilog(MF);
12001151
if (CombineSPBump) {
12011152
assert(!SVEStackSize && "Cannot combine SP bump with SVE");
12021153
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
12031154
StackOffset::getFixed(-NumBytes), TII,
12041155
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
12051156
NumBytes = 0;
1206-
} else if (HomPrologEpilog) {
1207-
// Stack has been already adjusted.
1208-
NumBytes -= PrologueSaveSize;
12091157
} else if (PrologueSaveSize != 0) {
12101158
MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
12111159
MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI);
@@ -1233,20 +1181,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
12331181
if (CombineSPBump)
12341182
FPOffset += AFI->getLocalStackSize();
12351183

1236-
if (HomPrologEpilog) {
1237-
auto Prolog = MBBI;
1238-
--Prolog;
1239-
assert(Prolog->getOpcode() == AArch64::HOM_Prolog);
1240-
Prolog->addOperand(MachineOperand::CreateImm(FPOffset));
1241-
} else {
1242-
// Issue sub fp, sp, FPOffset or
1243-
// mov fp,sp when FPOffset is zero.
1244-
// Note: All stores of callee-saved registers are marked as "FrameSetup".
1245-
// This code marks the instruction(s) that set the FP also.
1246-
emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
1247-
StackOffset::getFixed(FPOffset), TII,
1248-
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
1249-
}
1184+
// Issue sub fp, sp, FPOffset or
1185+
// mov fp,sp when FPOffset is zero.
1186+
// Note: All stores of callee-saved registers are marked as "FrameSetup".
1187+
// This code marks the instruction(s) that set the FP also.
1188+
emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
1189+
StackOffset::getFixed(FPOffset), TII,
1190+
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI);
12501191
}
12511192

12521193
if (windowsRequiresStackProbe(MF, NumBytes)) {
@@ -1674,25 +1615,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
16741615
// function.
16751616
if (MF.hasEHFunclets())
16761617
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1677-
if (homogeneousPrologEpilog(MF, &MBB)) {
1678-
assert(!NeedsWinCFI);
1679-
auto LastPopI = MBB.getFirstTerminator();
1680-
if (LastPopI != MBB.begin()) {
1681-
auto HomogeneousEpilog = std::prev(LastPopI);
1682-
if (HomogeneousEpilog->getOpcode() == AArch64::HOM_Epilog)
1683-
LastPopI = HomogeneousEpilog;
1684-
}
1685-
1686-
// Adjust local stack
1687-
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
1688-
StackOffset::getFixed(-AFI->getLocalStackSize()), TII,
1689-
MachineInstr::FrameDestroy, false, NeedsWinCFI);
1690-
1691-
// SP has been already adjusted while restoring callee save regs.
1692-
// We've bailed-out the case with adjusting SP for arguments.
1693-
assert(AfterCSRPopSize == 0);
1694-
return;
1695-
}
16961618
bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
16971619
// Assume we can't combine the last pop with the sp restore.
16981620

@@ -2411,22 +2333,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
24112333
MBB.addLiveIn(AArch64::X18);
24122334
}
24132335

2414-
if (homogeneousPrologEpilog(MF)) {
2415-
auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Prolog))
2416-
.setMIFlag(MachineInstr::FrameSetup);
2417-
2418-
for (auto &RPI : RegPairs) {
2419-
MIB.addReg(RPI.Reg1);
2420-
MIB.addReg(RPI.Reg2);
2421-
2422-
// Update register live in.
2423-
if (!MRI.isReserved(RPI.Reg1))
2424-
MBB.addLiveIn(RPI.Reg1);
2425-
if (!MRI.isReserved(RPI.Reg2))
2426-
MBB.addLiveIn(RPI.Reg2);
2427-
}
2428-
return true;
2429-
}
24302336
for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
24312337
++RPII) {
24322338
RegPairInfo RPI = *RPII;
@@ -2622,14 +2528,6 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
26222528
for (const RegPairInfo &RPI : reverse(RegPairs))
26232529
if (!RPI.isScalable())
26242530
EmitMI(RPI);
2625-
} else if (homogeneousPrologEpilog(MF, &MBB)) {
2626-
auto MIB = BuildMI(MBB, MI, DL, TII.get(AArch64::HOM_Epilog))
2627-
.setMIFlag(MachineInstr::FrameDestroy);
2628-
for (auto &RPI : RegPairs) {
2629-
MIB.addReg(RPI.Reg1, RegState::Define);
2630-
MIB.addReg(RPI.Reg2, RegState::Define);
2631-
}
2632-
return true;
26332531
} else
26342532
for (const RegPairInfo &RPI : RegPairs)
26352533
if (!RPI.isScalable())
@@ -2699,7 +2597,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
26992597
// MachO's compact unwind format relies on all registers being stored in
27002598
// pairs.
27012599
// FIXME: the usual format is actually better if unwinding isn't needed.
2702-
if (producePairRegisters(MF) && PairedReg != AArch64::NoRegister &&
2600+
if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister &&
27032601
!SavedRegs.test(PairedReg)) {
27042602
SavedRegs.set(PairedReg);
27052603
if (AArch64::GPR64RegClass.contains(PairedReg) &&
@@ -2778,7 +2676,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
27782676
// MachO's compact unwind format relies on all registers being stored in
27792677
// pairs, so if we need to spill one extra for BigStack, then we need to
27802678
// store the pair.
2781-
if (producePairRegisters(MF))
2679+
if (produceCompactUnwindFrame(MF))
27822680
SavedRegs.set(UnspilledCSGPRPaired);
27832681
ExtraCSSpill = UnspilledCSGPR;
27842682
}

llvm/lib/Target/AArch64/AArch64FrameLowering.h

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -124,16 +124,6 @@ class AArch64FrameLowering : public TargetFrameLowering {
124124
SmallVectorImpl<int> &ObjectsToAllocate) const override;
125125

126126
private:
127-
/// Returns true if a homogeneous prolog or epilog code can be emitted
128-
/// for the size optimization. If so, HOM_Prolog/HOM_Epilog pseudo
129-
/// instructions are emitted in place. When Exit block is given, this check is
130-
/// for epilog.
131-
bool homogeneousPrologEpilog(MachineFunction &MF,
132-
MachineBasicBlock *Exit = nullptr) const;
133-
134-
/// Returns true if CSRs should be paired.
135-
bool producePairRegisters(MachineFunction &MF) const;
136-
137127
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
138128
uint64_t StackBumpBytes) const;
139129

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3896,14 +3896,6 @@ let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
38963896
Sched<[]>;
38973897
}
38983898

3899-
// Pseudo instructions for homogeneous prolog/epilog
3900-
let isPseudo = 1 in {
3901-
// Save CSRs in order, {FPOffset}
3902-
def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
3903-
// Restore CSRs in order
3904-
def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
3905-
}
3906-
39073899
//===----------------------------------------------------------------------===//
39083900
// Floating point immediate move.
39093901
//===----------------------------------------------------------------------===//

0 commit comments

Comments
 (0)