Skip to content

Commit

Permalink
Reapply "[CodeGen] Add new pass for late cleanup of redundant definit…
Browse files Browse the repository at this point in the history
…ions."

This reverts commit 122efef.

- Patch fixed to not reuse definitions from predecessors in EH landing pads.
- Late review suggestions (by MaskRay) have been addressed.
- M68k/pipeline.ll test updated.
- Init captures added in processBlock() to avoid capturing structured bindings.
- RISCV has this disabled for now.

Original commit message:

A new pass MachineLateInstrsCleanup is added to be run after PEI.

This is a simple pass that removes redundant and identical instructions
whenever found by scanning the MF once while keeping track of register
definitions in a map. These instructions are typically immediate loads
resulting from rematerialization, and address loads emitted by target in
eliminateFrameInde().

This is enabled by default, but a target could easily disable it by means of
'disablePass(&MachineLateInstrsCleanupID);'.

This late cleanup is naturally not "optimal" in removing instructions as it
is done by looking at phys-regs, but still quite effective. It would be
desirable to improve other parts of CodeGen and avoid these redundant
instructions in the first place, but there are no ideas for this yet.

Differential Revision: https://reviews.llvm.org/D123394

Reviewed By: RKSimon, foad, craig.topper, arsenm, asb
  • Loading branch information
JonPsson committed Dec 5, 2022
1 parent 962863d commit 5ecd363
Show file tree
Hide file tree
Showing 59 changed files with 894 additions and 1,257 deletions.
3 changes: 3 additions & 0 deletions llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1131,6 +1131,9 @@ void CodeGenPassBuilder<Derived>::addMachineLateOptimization(
if (!TM.requiresStructuredCFG())
addPass(TailDuplicatePass());

// Cleanup of redundant (identical) address/immediate loads.
addPass(MachineLateInstrsCleanupPass());

// Copy propagation.
addPass(MachineCopyPropagationPass());
}
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/CodeGen/MachinePassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ DUMMY_MACHINE_FUNCTION_PASS("implicit-null-checks", ImplicitNullChecksPass, ())
DUMMY_MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass, ())
DUMMY_MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass, ())
DUMMY_MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass, ())
DUMMY_MACHINE_FUNCTION_PASS("machine-latecleanup", MachineLateInstrsCleanupPass, ())
DUMMY_MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass, ())
DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass, ())
DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass, ())
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,10 @@ namespace llvm {

MachineFunctionPass *createMachineCopyPropagationPass(bool UseCopyInstr);

/// MachineLateInstrsCleanup - This pass removes redundant identical
/// instructions after register allocation and rematerialization.
extern char &MachineLateInstrsCleanupID;

/// PeepholeOptimizer - This pass performs peephole optimizations -
/// like extension and comparison eliminations.
extern char &PeepholeOptimizerID;
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/InitializePasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,7 @@ void initializeMachineDominanceFrontierPass(PassRegistry&);
void initializeMachineDominatorTreePass(PassRegistry&);
void initializeMachineFunctionPrinterPassPass(PassRegistry&);
void initializeMachineFunctionSplitterPass(PassRegistry &);
void initializeMachineLateInstrsCleanupPass(PassRegistry&);
void initializeMachineLICMPass(PassRegistry&);
void initializeMachineLoopInfoPass(PassRegistry&);
void initializeMachineModuleInfoWrapperPassPass(PassRegistry &);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ add_llvm_component_library(LLVMCodeGen
MachineFunctionSplitter.cpp
MachineInstrBundle.cpp
MachineInstr.cpp
MachineLateInstrsCleanup.cpp
MachineLICM.cpp
MachineLoopInfo.cpp
MachineLoopUtils.cpp
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/CodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineCycleInfoWrapperPassPass(Registry);
initializeMachineDominatorTreePass(Registry);
initializeMachineFunctionPrinterPassPass(Registry);
initializeMachineLateInstrsCleanupPass(Registry);
initializeMachineLICMPass(Registry);
initializeMachineLoopInfoPass(Registry);
initializeMachineModuleInfoWrapperPassPass(Registry);
Expand Down
239 changes: 239 additions & 0 deletions llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
//==--- MachineLateInstrsCleanup.cpp - Late Instructions Cleanup Pass -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This simple pass removes any identical and redundant immediate or address
// loads to the same register. The immediate loads removed can originally be
// the result of rematerialization, while the addresses are redundant frame
// addressing anchor points created during Frame Indices elimination.
//
//===----------------------------------------------------------------------===//

#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"

using namespace llvm;

#define DEBUG_TYPE "machine-latecleanup"

STATISTIC(NumRemoved, "Number of redundant instructions removed.");

namespace {

class MachineLateInstrsCleanup : public MachineFunctionPass {
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;

// Data structures to map regs to their definitions per MBB.
using Reg2DefMap = std::map<Register, MachineInstr*>;
std::vector<Reg2DefMap> RegDefs;

// Walk through the instructions in MBB and remove any redundant
// instructions.
bool processBlock(MachineBasicBlock *MBB);

public:
static char ID; // Pass identification, replacement for typeid

MachineLateInstrsCleanup() : MachineFunctionPass(ID) {
initializeMachineLateInstrsCleanupPass(*PassRegistry::getPassRegistry());
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}

bool runOnMachineFunction(MachineFunction &MF) override;

MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::NoVRegs);
}
};

} // end anonymous namespace

char MachineLateInstrsCleanup::ID = 0;

char &llvm::MachineLateInstrsCleanupID = MachineLateInstrsCleanup::ID;

INITIALIZE_PASS(MachineLateInstrsCleanup, DEBUG_TYPE,
"Machine Late Instructions Cleanup Pass", false, false)

bool MachineLateInstrsCleanup::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;

TRI = MF.getSubtarget().getRegisterInfo();
TII = MF.getSubtarget().getInstrInfo();

RegDefs.clear();
RegDefs.resize(MF.getNumBlockIDs());

// Visit all MBBs in an order that maximises the reuse from predecessors.
bool Changed = false;
ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
for (MachineBasicBlock *MBB : RPOT)
Changed |= processBlock(MBB);

return Changed;
}

// Clear any previous kill flag on Reg found before I in MBB. Walk backwards
// in MBB and if needed continue in predecessors until a use/def of Reg is
// encountered. This seems to be faster in practice than tracking kill flags
// in a map.
static void clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
BitVector &VisitedPreds,
const TargetRegisterInfo *TRI) {
VisitedPreds.set(MBB->getNumber());
while (I != MBB->begin()) {
--I;
bool Found = false;
for (auto &MO : I->operands())
if (MO.isReg() && TRI->regsOverlap(MO.getReg(), Reg)) {
if (MO.isDef())
return;
if (MO.readsReg()) {
MO.setIsKill(false);
Found = true; // Keep going for an implicit kill of the super-reg.
}
}
if (Found)
return;
}

// If an earlier def is not in MBB, continue in predecessors.
if (!MBB->isLiveIn(Reg))
MBB->addLiveIn(Reg);
assert(!MBB->pred_empty() && "Predecessor def not found!");
for (MachineBasicBlock *Pred : MBB->predecessors())
if (!VisitedPreds.test(Pred->getNumber()))
clearKillsForDef(Reg, Pred, Pred->end(), VisitedPreds, TRI);
}

static void removeRedundantDef(MachineInstr *MI,
const TargetRegisterInfo *TRI) {
Register Reg = MI->getOperand(0).getReg();
BitVector VisitedPreds(MI->getMF()->getNumBlockIDs());
clearKillsForDef(Reg, MI->getParent(), MI->getIterator(), VisitedPreds, TRI);
MI->eraseFromParent();
++NumRemoved;
}

// Return true if MI is a potential candidate for reuse/removal and if so
// also the register it defines in DefedReg. A candidate is a simple
// instruction that does not touch memory, has only one register definition
// and the only reg it may use is FrameReg. Typically this is an immediate
// load or a load-address instruction.
static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
Register FrameReg) {
DefedReg = MCRegister::NoRegister;
bool SawStore = true;
if (!MI->isSafeToMove(nullptr, SawStore) || MI->isImplicitDef() ||
MI->isInlineAsm())
return false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg()) {
if (MO.isDef()) {
if (i == 0 && !MO.isImplicit() && !MO.isDead())
DefedReg = MO.getReg();
else
return false;
} else if (MO.getReg() && MO.getReg() != FrameReg)
return false;
} else if (!(MO.isImm() || MO.isCImm() || MO.isFPImm() || MO.isCPI() ||
MO.isGlobal() || MO.isSymbol()))
return false;
}
return DefedReg.isValid();
}

bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
bool Changed = false;
Reg2DefMap &MBBDefs = RegDefs[MBB->getNumber()];

// Find reusable definitions in the predecessor(s).
if (!MBB->pred_empty() && !MBB->isEHPad()) {
MachineBasicBlock *FirstPred = *MBB->pred_begin();
for (auto [Reg, DefMI] : RegDefs[FirstPred->getNumber()])
if (llvm::all_of(
drop_begin(MBB->predecessors()),
[&, &Reg = Reg, &DefMI = DefMI](const MachineBasicBlock *Pred) {
auto PredDefI = RegDefs[Pred->getNumber()].find(Reg);
return PredDefI != RegDefs[Pred->getNumber()].end() &&
DefMI->isIdenticalTo(*PredDefI->second);
})) {
MBBDefs[Reg] = DefMI;
LLVM_DEBUG(dbgs() << "Reusable instruction from pred(s): in "
<< printMBBReference(*MBB) << ": " << *DefMI;);
}
}

// Process MBB.
MachineFunction *MF = MBB->getParent();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
Register FrameReg = TRI->getFrameRegister(*MF);
for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
// If FrameReg is modified, no previous load-address instructions (using
// it) are valid.
if (MI.modifiesRegister(FrameReg, TRI)) {
MBBDefs.clear();
continue;
}

Register DefedReg;
bool IsCandidate = isCandidate(&MI, DefedReg, FrameReg);

// Check for an earlier identical and reusable instruction.
if (IsCandidate) {
auto DefI = MBBDefs.find(DefedReg);
if (DefI != MBBDefs.end() && MI.isIdenticalTo(*DefI->second)) {
LLVM_DEBUG(dbgs() << "Removing redundant instruction in "
<< printMBBReference(*MBB) << ": " << MI;);
removeRedundantDef(&MI, TRI);
Changed = true;
continue;
}
}

// Clear any entries in map that MI clobbers.
for (auto DefI = MBBDefs.begin(); DefI != MBBDefs.end();) {
Register Reg = DefI->first;
if (MI.modifiesRegister(Reg, TRI))
DefI = MBBDefs.erase(DefI);
else
++DefI;
}

// Record this MI for potential later reuse.
if (IsCandidate) {
LLVM_DEBUG(dbgs() << "Found interesting instruction in "
<< printMBBReference(*MBB) << ": " << MI;);
MBBDefs[DefedReg] = &MI;
}
}

return Changed;
}
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/TargetPassConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1523,6 +1523,9 @@ void TargetPassConfig::addOptimizedRegAlloc() {

/// Add passes that optimize machine instructions after register allocation.
void TargetPassConfig::addMachineLateOptimization() {
// Cleanup of redundant immediate/address loads.
addPass(&MachineLateInstrsCleanupID);

// Branch folding must be run after regalloc and prolog/epilog insertion.
addPass(&BranchFolderPassID);

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ void NVPTXPassConfig::addIRPasses() {
// of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
disablePass(&PrologEpilogCodeInserterID);
disablePass(&MachineLateInstrsCleanupID);
disablePass(&MachineCopyPropagationID);
disablePass(&TailDuplicateID);
disablePass(&StackMapLivenessID);
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,10 @@ void RISCVPassConfig::addPreRegAlloc() {
void RISCVPassConfig::addPostRegAlloc() {
if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
addPass(createRISCVRedundantCopyEliminationPass());

// Temporarily disabled until post-RA pseudo expansion problem is fixed,
// see D123394 and D139169.
disablePass(&MachineLateInstrsCleanupID);
}

yaml::MachineFunctionInfo *
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,7 @@ void WebAssemblyPassConfig::addPostRegAlloc() {
// them.

// These functions all require the NoVRegs property.
disablePass(&MachineLateInstrsCleanupID);
disablePass(&MachineCopyPropagationID);
disablePass(&PostRAMachineSinkingID);
disablePass(&PostRASchedulerID);
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AArch64/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Shrink Wrapping analysis
; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization
; CHECK-NEXT: Machine Late Instructions Cleanup Pass
; CHECK-NEXT: Control Flow Optimizer
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Tail Duplication
Expand Down
7 changes: 0 additions & 7 deletions llvm/test/CodeGen/AArch64/stack-guard-remat-bitcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,8 @@ define i32 @test_stack_guard_remat2() ssp {
; CHECK-NEXT: Lloh5:
; CHECK-NEXT: ldr x9, [x9]
; CHECK-NEXT: str x8, [sp]
; CHECK-NEXT: Lloh6:
; CHECK-NEXT: adrp x8, ___stack_chk_guard@GOTPAGE
; CHECK-NEXT: stur x9, [x29, #-8]
; CHECK-NEXT: Lloh7:
; CHECK-NEXT: ldr x8, [x8, ___stack_chk_guard@GOTPAGEOFF]
; CHECK-NEXT: ldur x9, [x29, #-8]
; CHECK-NEXT: Lloh8:
; CHECK-NEXT: ldr x8, [x8]
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: b.ne LBB0_2
; CHECK-NEXT: ; %bb.1: ; %entry
Expand All @@ -46,7 +40,6 @@ define i32 @test_stack_guard_remat2() ssp {
; CHECK-NEXT: ret
; CHECK-NEXT: LBB0_2: ; %entry
; CHECK-NEXT: bl ___stack_chk_fail
; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh6, Lloh7, Lloh8
; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh1, Lloh3, Lloh5
; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh0, Lloh2, Lloh4
entry:
Expand Down
13 changes: 5 additions & 8 deletions llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
Original file line number Diff line number Diff line change
Expand Up @@ -59,26 +59,23 @@ define float @foo2(double* %x0, double* %x1) nounwind {
; CHECK-NEXT: addvl sp, sp, #-4
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: ptrue p0.b
; CHECK-NEXT: add x9, sp, #16
; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: ld4d { z1.d - z4.d }, p0/z, [x0]
; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1]
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: fmov s0, #1.00000000
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: mov w1, #1
; CHECK-NEXT: mov w2, #2
; CHECK-NEXT: st1d { z16.d }, p0, [x9]
; CHECK-NEXT: add x9, sp, #16
; CHECK-NEXT: mov w3, #3
; CHECK-NEXT: mov w4, #4
; CHECK-NEXT: mov w5, #5
; CHECK-NEXT: mov w6, #6
; CHECK-NEXT: st1d { z17.d }, p0, [x9, #1, mul vl]
; CHECK-NEXT: add x9, sp, #16
; CHECK-NEXT: mov w7, #7
; CHECK-NEXT: st1d { z18.d }, p0, [x9, #2, mul vl]
; CHECK-NEXT: add x9, sp, #16
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: st1d { z16.d }, p0, [x9]
; CHECK-NEXT: st1d { z17.d }, p0, [x9, #1, mul vl]
; CHECK-NEXT: st1d { z18.d }, p0, [x9, #2, mul vl]
; CHECK-NEXT: st1d { z19.d }, p0, [x9, #3, mul vl]
; CHECK-NEXT: str x8, [sp]
; CHECK-NEXT: bl callee2
Expand Down
Loading

0 comments on commit 5ecd363

Please sign in to comment.