Skip to content

Commit

Permalink
[GVN] Improve PRE on load instructions
Browse files Browse the repository at this point in the history
This patch implements the enhancement proposed by
#59312.

Suppose we have following code

   v0 = load %addr
   br %LoadBB

LoadBB:
   v1 = load %addr
   ...

PredBB:
   ...
   br %cond, label %LoadBB, label %SuccBB

SuccBB:
   v2 = load %addr
   ...

Instruction v1 in LoadBB is partially redundant, edge (PredBB, LoadBB) is a
critical edge. SuccBB is another successor of PredBB, it contains another load
v2 which is identical to v1. Current GVN splits the critical edge
(PredBB, LoadBB) and inserts a new load in it. A better method is move the load
of v2 into PredBB, then v1 can be changed to a PHI instruction.

If there are two or more similar predecessors, like the test case in the bug
entry, current GVN simply gives up because otherwise it needs to split multiple
critical edges. But we can move all loads in successor blocks into predecessors.

Differential Revision: https://reviews.llvm.org/D141712
  • Loading branch information
weiguozhi committed Jun 6, 2023
1 parent 266ffd7 commit 84bcfa0
Show file tree
Hide file tree
Showing 10 changed files with 568 additions and 84 deletions.
9 changes: 8 additions & 1 deletion llvm/include/llvm/Transforms/Scalar/GVN.h
Expand Up @@ -329,6 +329,11 @@ class GVNPass : public PassInfoMixin<GVNPass> {
AvailValInBlkVect &ValuesPerBlock,
UnavailBlkVect &UnavailableBlocks);

/// Given a critical edge from Pred to LoadBB, find a load instruction
/// which is identical to Load from another successor of Pred.
LoadInst *findLoadToHoistIntoPred(BasicBlock *Pred, BasicBlock *LoadBB,
LoadInst *Load);

bool PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
UnavailBlkVect &UnavailableBlocks);

Expand All @@ -342,7 +347,8 @@ class GVNPass : public PassInfoMixin<GVNPass> {
/// AvailableLoads (connected by Phis if needed).
void eliminatePartiallyRedundantLoad(
LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
MapVector<BasicBlock *, Value *> &AvailableLoads);
MapVector<BasicBlock *, Value *> &AvailableLoads,
MapVector<BasicBlock *, LoadInst *> *CriticalEdgePredAndLoad);

// Other helper routines
bool processInstruction(Instruction *I);
Expand All @@ -355,6 +361,7 @@ class GVNPass : public PassInfoMixin<GVNPass> {
BasicBlock *Curr, unsigned int ValNo);
Value *findLeader(const BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
void removeInstruction(Instruction *I);
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
Expand Down
161 changes: 135 additions & 26 deletions llvm/lib/Transforms/Scalar/GVN.cpp
Expand Up @@ -94,6 +94,8 @@ STATISTIC(NumGVNSimpl, "Number of instructions simplified");
STATISTIC(NumGVNEqProp, "Number of equalities propagated");
STATISTIC(NumPRELoad, "Number of loads PRE'd");
STATISTIC(NumPRELoopLoad, "Number of loop loads PRE'd");
STATISTIC(NumPRELoadMoved2CEPred,
"Number of loads moved to predecessor of a critical edge in PRE");

STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax,
"Number of blocks speculated as available in "
Expand Down Expand Up @@ -127,6 +129,11 @@ static cl::opt<uint32_t> MaxNumVisitedInsts(
cl::desc("Max number of visited instructions when trying to find "
"dominating value of select dependency (default = 100)"));

static cl::opt<uint32_t> MaxNumInsnsPerBlock(
"gvn-max-num-insns", cl::Hidden, cl::init(100),
cl::desc("Max number of instructions to scan in each basic block in GVN "
"(default = 100)"));

struct llvm::GVNPass::Expression {
uint32_t opcode;
bool commutative = false;
Expand Down Expand Up @@ -936,6 +943,18 @@ static bool IsValueFullyAvailableInBlock(
return !UnavailableBB;
}

/// If the specified OldValue exists in ValuesPerBlock, replace its value with
/// NewValue.
static void replaceValuesPerBlockEntry(
SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, Value *OldValue,
Value *NewValue) {
for (AvailableValueInBlock &V : ValuesPerBlock) {
if ((V.AV.isSimpleValue() && V.AV.getSimpleValue() == OldValue) ||
(V.AV.isCoercedLoadValue() && V.AV.getCoercedLoadValue() == OldValue))
V = AvailableValueInBlock::get(V.BB, NewValue);
}
}

/// Given a set of loads specified by ValuesPerBlock,
/// construct SSA form, allowing us to eliminate Load. This returns the value
/// that should be used at Load's definition site.
Expand Down Expand Up @@ -1329,9 +1348,67 @@ void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
"post condition violation");
}

/// Given the following code, v1 is partially available on some edges, but not
/// available on the edge from PredBB. This function tries to find if there is
/// another identical load in the other successor of PredBB.
///
/// v0 = load %addr
/// br %LoadBB
///
/// LoadBB:
/// v1 = load %addr
/// ...
///
/// PredBB:
/// ...
/// br %cond, label %LoadBB, label %SuccBB
///
/// SuccBB:
/// v2 = load %addr
/// ...
///
LoadInst *GVNPass::findLoadToHoistIntoPred(BasicBlock *Pred, BasicBlock *LoadBB,
LoadInst *Load) {
// For simplicity we handle a Pred has 2 successors only.
auto *Term = Pred->getTerminator();
if (Term->getNumSuccessors() != 2 || Term->isExceptionalTerminator())
return nullptr;
auto *SuccBB = Term->getSuccessor(0);
if (SuccBB == LoadBB)
SuccBB = Term->getSuccessor(1);
if (!SuccBB->getSinglePredecessor())
return nullptr;

unsigned int NumInsts = MaxNumInsnsPerBlock;
for (Instruction &Inst : *SuccBB) {
if (Inst.isDebugOrPseudoInst())
continue;
if (--NumInsts == 0)
return nullptr;

if (!Inst.isIdenticalTo(Load))
continue;

MemDepResult Dep = MD->getDependency(&Inst);
// If an identical load doesn't depends on any local instructions, it can
// be safely moved to PredBB.
// Also check for the implicit control flow instructions. See the comments
// in PerformLoadPRE for details.
if (Dep.isNonLocal() && !ICF->isDominatedByICFIFromSameBlock(&Inst))
return cast<LoadInst>(&Inst);

// Otherwise there is something in the same BB clobbers the memory, we can't
// move this and later load to PredBB.
return nullptr;
}

return nullptr;
}

void GVNPass::eliminatePartiallyRedundantLoad(
LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
MapVector<BasicBlock *, Value *> &AvailableLoads) {
MapVector<BasicBlock *, Value *> &AvailableLoads,
MapVector<BasicBlock *, LoadInst *> *CriticalEdgePredAndLoad) {
for (const auto &AvailableLoad : AvailableLoads) {
BasicBlock *UnavailableBlock = AvailableLoad.first;
Value *LoadPtr = AvailableLoad.second;
Expand Down Expand Up @@ -1385,6 +1462,23 @@ void GVNPass::eliminatePartiallyRedundantLoad(
AvailableValueInBlock::get(UnavailableBlock, NewLoad));
MD->invalidateCachedPointerInfo(LoadPtr);
LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');

// For PredBB in CriticalEdgePredAndLoad we need to replace the uses of old
// load instruction with the new created load instruction.
if (CriticalEdgePredAndLoad) {
auto I = CriticalEdgePredAndLoad->find(UnavailableBlock);
if (I != CriticalEdgePredAndLoad->end()) {
++NumPRELoadMoved2CEPred;
ICF->insertInstructionTo(NewLoad, UnavailableBlock);
LoadInst *OldLoad = I->second;
combineMetadataForCSE(NewLoad, OldLoad, false);
OldLoad->replaceAllUsesWith(NewLoad);
replaceValuesPerBlockEntry(ValuesPerBlock, OldLoad, NewLoad);
if (uint32_t ValNo = VN.lookup(OldLoad, false))
removeFromLeaderTable(ValNo, OldLoad, OldLoad->getParent());
removeInstruction(OldLoad);
}
}
}

// Perform PHI construction.
Expand Down Expand Up @@ -1472,7 +1566,12 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
for (BasicBlock *UnavailableBB : UnavailableBlocks)
FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable;

SmallVector<BasicBlock *, 4> CriticalEdgePred;
// The edge from Pred to LoadBB is a critical edge will be splitted.
SmallVector<BasicBlock *, 4> CriticalEdgePredSplit;
// The edge from Pred to LoadBB is a critical edge, another successor of Pred
// contains a load can be moved to Pred. This data structure maps the Pred to
// the movable load.
MapVector<BasicBlock *, LoadInst *> CriticalEdgePredAndLoad;
for (BasicBlock *Pred : predecessors(LoadBB)) {
// If any predecessor block is an EH pad that does not allow non-PHI
// instructions before the terminator, we can't PRE the load.
Expand Down Expand Up @@ -1512,47 +1611,59 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
return false;
}

CriticalEdgePred.push_back(Pred);
if (LoadInst *LI = findLoadToHoistIntoPred(Pred, LoadBB, Load))
CriticalEdgePredAndLoad[Pred] = LI;
else
CriticalEdgePredSplit.push_back(Pred);
} else {
// Only add the predecessors that will not be split for now.
PredLoads[Pred] = nullptr;
}
}

// Decide whether PRE is profitable for this load.
unsigned NumUnavailablePreds = PredLoads.size() + CriticalEdgePred.size();
unsigned NumInsertPreds = PredLoads.size() + CriticalEdgePredSplit.size();
unsigned NumUnavailablePreds = NumInsertPreds +
CriticalEdgePredAndLoad.size();
assert(NumUnavailablePreds != 0 &&
"Fully available value should already be eliminated!");
(void)NumUnavailablePreds;

// If this load is unavailable in multiple predecessors, reject it.
// If we need to insert new load in multiple predecessors, reject it.
// FIXME: If we could restructure the CFG, we could make a common pred with
// all the preds that don't have an available Load and insert a new load into
// that one block.
if (NumUnavailablePreds != 1)
if (NumInsertPreds > 1)
return false;

// Now we know where we will insert load. We must ensure that it is safe
// to speculatively execute the load at that points.
if (MustEnsureSafetyOfSpeculativeExecution) {
if (CriticalEdgePred.size())
if (CriticalEdgePredSplit.size())
if (!isSafeToSpeculativelyExecute(Load, LoadBB->getFirstNonPHI(), AC, DT))
return false;
for (auto &PL : PredLoads)
if (!isSafeToSpeculativelyExecute(Load, PL.first->getTerminator(), AC,
DT))
return false;
for (auto &CEP : CriticalEdgePredAndLoad)
if (!isSafeToSpeculativelyExecute(Load, CEP.first->getTerminator(), AC,
DT))
return false;
}

// Split critical edges, and update the unavailable predecessors accordingly.
for (BasicBlock *OrigPred : CriticalEdgePred) {
for (BasicBlock *OrigPred : CriticalEdgePredSplit) {
BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
PredLoads[NewPred] = nullptr;
LLVM_DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
<< LoadBB->getName() << '\n');
}

for (auto &CEP : CriticalEdgePredAndLoad)
PredLoads[CEP.first] = nullptr;

// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
const DataLayout &DL = Load->getModule()->getDataLayout();
Expand Down Expand Up @@ -1609,7 +1720,7 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
}
// HINT: Don't revert the edge-splitting as following transformation may
// also need to split these critical edges.
return !CriticalEdgePred.empty();
return !CriticalEdgePredSplit.empty();
}

// Okay, we can eliminate this load by inserting a reload in the predecessor
Expand All @@ -1634,7 +1745,8 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
VN.lookupOrAdd(I);
}

eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, PredLoads);
eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, PredLoads,
&CriticalEdgePredAndLoad);
++NumPRELoad;
return true;
}
Expand Down Expand Up @@ -1713,7 +1825,8 @@ bool GVNPass::performLoopLoadPRE(LoadInst *Load,
AvailableLoads[Preheader] = LoadPtr;

LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOOP LOAD: " << *Load << '\n');
eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads);
eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads,
/*CriticalEdgePredAndLoad*/ nullptr);
++NumPRELoopLoad;
return true;
}
Expand Down Expand Up @@ -2693,12 +2806,7 @@ bool GVNPass::processBlock(BasicBlock *BB) {
LLVM_DEBUG(dbgs() << "GVN removed: " << *I << '\n');
salvageKnowledge(I, AC);
salvageDebugInfo(*I);
if (MD) MD->removeInstruction(I);
if (MSSAU)
MSSAU->removeMemoryAccess(I);
LLVM_DEBUG(verifyRemoved(I));
ICF->removeInstruction(I);
I->eraseFromParent();
removeInstruction(I);
}
InstrsToErase.clear();

Expand Down Expand Up @@ -2915,15 +3023,7 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) {
removeFromLeaderTable(ValNo, CurInst, CurrentBlock);

LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
if (MD)
MD->removeInstruction(CurInst);
if (MSSAU)
MSSAU->removeMemoryAccess(CurInst);
LLVM_DEBUG(verifyRemoved(CurInst));
// FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes
// some assertion failures.
ICF->removeInstruction(CurInst);
CurInst->eraseFromParent();
removeInstruction(CurInst);
++NumGVNInstr;

return true;
Expand Down Expand Up @@ -3019,6 +3119,15 @@ void GVNPass::cleanupGlobalSets() {
InvalidBlockRPONumbers = true;
}

void GVNPass::removeInstruction(Instruction *I) {
if (MD) MD->removeInstruction(I);
if (MSSAU)
MSSAU->removeMemoryAccess(I);
LLVM_DEBUG(verifyRemoved(I));
ICF->removeInstruction(I);
I->eraseFromParent();
}

/// Verify that the specified instruction does not occur in our
/// internal data structures.
void GVNPass::verifyRemoved(const Instruction *Inst) const {
Expand Down
Expand Up @@ -57,7 +57,7 @@ bb15:
; CHECK-NEXT: br label %bb15

; CHECK-LABEL: bb15:
; CHECK: %tmp17 = phi i8 [ %tmp8, %bb15split ], [ %tmp17.pre, %bb1.bb15_crit_edge ]
; CHECK: %tmp17 = phi i8 [ %tmp12.pre3, %bb15split ], [ %tmp17.pre, %bb1.bb15_crit_edge ]

bb19: ; preds = %bb15
ret i1 %tmp18
Expand Down
13 changes: 6 additions & 7 deletions llvm/test/Transforms/GVN/PRE/2017-06-28-pre-load-dbgloc.ll
@@ -1,6 +1,6 @@
; This test checks if debug loc is propagated to load/store created by GVN/Instcombine.
; RUN: opt < %s -passes=gvn -S | FileCheck %s --check-prefixes=ALL,GVN
; RUN: opt < %s -passes=gvn,instcombine -S | FileCheck %s --check-prefixes=ALL,INSTCOMBINE
; RUN: opt < %s -passes=gvn -S | FileCheck %s --check-prefixes=ALL
; RUN: opt < %s -passes=gvn,instcombine -S | FileCheck %s --check-prefixes=ALL

; struct node {
; int *v;
Expand Down Expand Up @@ -35,10 +35,9 @@ define i32 @test(ptr readonly %desc) local_unnamed_addr #0 !dbg !4 {
entry:
%tobool = icmp eq ptr %desc, null
br i1 %tobool, label %cond.end, label %cond.false, !dbg !9
; ALL: br i1 %tobool, label %entry.cond.end_crit_edge, label %cond.false, !dbg [[LOC_15_6:![0-9]+]]
; ALL: entry.cond.end_crit_edge:
; GVN: %.pre = load ptr, ptr null, align 8, !dbg [[LOC_16_13:![0-9]+]]
; INSTCOMBINE:store ptr poison, ptr null, align 4294967296, !dbg [[LOC_16_13:![0-9]+]]
; ALL: %.pre = load ptr, ptr %desc, align 8, !dbg [[LOC_16_13:![0-9]+]]
; ALL: br i1 %tobool, label %cond.end, label %cond.false, !dbg [[LOC_15_6:![0-9]+]]
; ALL: cond.false:

cond.false:
%0 = load ptr, ptr %desc, align 8, !dbg !11
Expand Down Expand Up @@ -72,5 +71,5 @@ declare i32 @bar(ptr, ptr) local_unnamed_addr #1
!11 = !DILocation(line: 15, column: 34, scope: !4)

;ALL: [[SCOPE:![0-9]+]] = distinct !DISubprogram(name: "test",{{.*}}
;ALL: [[LOC_15_6]] = !DILocation(line: 15, column: 6, scope: [[SCOPE]])
;ALL: [[LOC_16_13]] = !DILocation(line: 16, column: 13, scope: [[SCOPE]])
;ALL: [[LOC_15_6]] = !DILocation(line: 15, column: 6, scope: [[SCOPE]])
Expand Up @@ -35,18 +35,17 @@ define i32 @test_no_null_opt(ptr readonly %desc) local_unnamed_addr #0 !dbg !4 {
entry:
%tobool = icmp eq ptr %desc, null
br i1 %tobool, label %cond.end, label %cond.false, !dbg !9
; ALL: br i1 %tobool, label %entry.cond.end_crit_edge, label %cond.false, !dbg [[LOC_15_6:![0-9]+]]
; ALL: entry.cond.end_crit_edge:
; ALL: load ptr, ptr null, align {{[0-9]+}}, !dbg [[LOC_16_13:![0-9]+]]
; ALL: %.pre = load ptr, ptr %desc, align 8, !dbg [[LOC_16_13:![0-9]+]]
; ALL: br i1 %tobool, label %cond.end, label %cond.false, !dbg [[LOC_15_6:![0-9]+]]
; ALL: cond.false:

cond.false:
%0 = load ptr, ptr %desc, align 8, !dbg !11
%1 = load ptr, ptr %0, align 8
br label %cond.end, !dbg !9

cond.end:
; ALL: phi ptr [ %0, %cond.false ], [ %.pre, %entry.cond.end_crit_edge ]
; ALL: phi ptr [ %1, %cond.false ], [ null, %entry.cond.end_crit_edge ]
; ALL: phi ptr [ %0, %cond.false ], [ null, %entry ]

%2 = phi ptr [ %1, %cond.false ], [ null, %entry ], !dbg !9
%3 = load ptr, ptr %desc, align 8, !dbg !10
Expand Down Expand Up @@ -75,5 +74,5 @@ declare i32 @bar(ptr, ptr) local_unnamed_addr #1
!11 = !DILocation(line: 15, column: 34, scope: !4)

;ALL: [[SCOPE:![0-9]+]] = distinct !DISubprogram(name: "test_no_null_opt",{{.*}}
;ALL: [[LOC_15_6]] = !DILocation(line: 15, column: 6, scope: [[SCOPE]])
;ALL: [[LOC_16_13]] = !DILocation(line: 16, column: 13, scope: [[SCOPE]])
;ALL: [[LOC_15_6]] = !DILocation(line: 15, column: 6, scope: [[SCOPE]])

0 comments on commit 84bcfa0

Please sign in to comment.