Skip to content

Commit

Permalink
[CGP] Merge empty case blocks if no extra moves are added.
Browse files Browse the repository at this point in the history
Summary:
Currently we skip merging when extra moves may be added in the header of switch instead of the case block, if the case block is used as an incoming
block of a PHI. If all the incoming values of the PHIs are non-constants and the destination block is dominated by the switch block then extra moves are likely not added by ISel, so there is no need to skip merging in this case.

Reviewers: efriedma, junbuml, davidxl, hfinkel, qcolombet

Reviewed By: efriedma

Subscribers: dberlin, kuhar, mcrosier, llvm-commits

Differential Revision: https://reviews.llvm.org/D37343

llvm-svn: 316711
  • Loading branch information
Balaram Makam committed Oct 26, 2017
1 parent bfa8432 commit cddf3c5
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 13 deletions.
47 changes: 36 additions & 11 deletions llvm/lib/CodeGen/CodeGenPrepare.cpp
Expand Up @@ -212,6 +212,7 @@ class TypePromotionTransaction;
const TargetTransformInfo *TTI = nullptr;
const TargetLibraryInfo *TLInfo;
const LoopInfo *LI;
DominatorTree *DT;
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;

Expand Down Expand Up @@ -262,6 +263,7 @@ class TypePromotionTransaction;

void getAnalysisUsage(AnalysisUsage &AU) const override {
// FIXME: When we can selectively preserve passes, preserve the domtree.
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<ProfileSummaryInfoWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
Expand Down Expand Up @@ -343,6 +345,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();

OptSize = F.optForSize();

if (ProfileGuidedSectionPrefix) {
Expand Down Expand Up @@ -755,6 +759,11 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
return true;

SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
SmallVector<PHINode *, 16> PNs;

for (auto DestBBI = DestBB->begin();
auto *DestPN = dyn_cast<PHINode>(&*DestBBI); ++DestBBI)
PNs.push_back(DestPN);

// Find all other incoming blocks from which incoming values of all PHIs in
// DestBB are the same as the ones from BB.
Expand All @@ -764,16 +773,10 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
if (DestBBPred == BB)
continue;

bool HasAllSameValue = true;
BasicBlock::const_iterator DestBBI = DestBB->begin();
while (const PHINode *DestPN = dyn_cast<PHINode>(DestBBI++)) {
if (DestPN->getIncomingValueForBlock(BB) !=
DestPN->getIncomingValueForBlock(DestBBPred)) {
HasAllSameValue = false;
break;
}
}
if (HasAllSameValue)
if (llvm::all_of(PNs, [&](PHINode *PN) {
return (PN->getIncomingValueForBlock(BB) ==
PN->getIncomingValueForBlock(DestBBPred));
}))
SameIncomingValueBBs.insert(DestBBPred);
}

Expand All @@ -783,6 +786,14 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
if (SameIncomingValueBBs.count(Pred))
return true;

// Check to see if none of the phis have constant incoming values for BB and
// Pred dominates DestBB, in such case extra COPYs are likely not added, so
// there is no reason to skip merging.
if (DT->dominates(Pred, DestBB) && llvm::none_of(PNs, [BB](PHINode *PN) {
return isa<Constant>(PN->getIncomingValueForBlock(BB));
}))
return true;

if (!BFI) {
Function &F = *BB->getParent();
LoopInfo LI{DominatorTree(F)};
Expand Down Expand Up @@ -885,7 +896,7 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
// Remember if SinglePred was the entry block of the function. If so, we
// will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
MergeBasicBlockIntoOnlyPred(DestBB, nullptr);
MergeBasicBlockIntoOnlyPred(DestBB, DT);

if (isEntry && BB != &BB->getParent()->getEntryBlock())
BB->moveBefore(&BB->getParent()->getEntryBlock());
Expand Down Expand Up @@ -926,7 +937,21 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {

// The PHIs are now updated, change everything that refers to BB to use
// DestBB and remove BB.
SmallVector<DominatorTree::UpdateType, 3> Updates;
for (auto *PredBB : predecessors(BB)) {
if (PredBB == BB)
continue;
DominatorTree::UpdateType UT = {DominatorTree::Delete, PredBB, BB};
if (!is_contained(Updates, UT)) {
Updates.push_back(UT);
if (PredBB != DestBB)
Updates.push_back({DominatorTree::Insert, PredBB, DestBB});
}
}
BB->replaceAllUsesWith(DestBB);
DT->applyUpdates(Updates);
BB->getTerminator()->eraseFromParent();
DT->deleteEdge(BB, DestBB);
BB->eraseFromParent();
++NumBlocksElim;

Expand Down
100 changes: 98 additions & 2 deletions llvm/test/Transforms/CodeGenPrepare/skip-merging-case-block.ll
Expand Up @@ -143,11 +143,107 @@ declare void @calldefault(...) local_unnamed_addr
!1 = !{!"branch_weights", i32 1 , i32 5, i32 1,i32 1, i32 1}
!2 = !{!"branch_weights", i32 1 , i32 4, i32 1,i32 1, i32 1}

; while.cond does not dominate return, expect to skip merging empty block
; return.loopexit into return.
@b = external global i32, align 4
@a = external global i32*, align 8

define void @f_switch4(i32 %i) local_unnamed_addr personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
; CHECK-LABEL: @f_switch4
entry:
%0 = load i32, i32* @b, align 4
%cond = icmp eq i32 %0, 6
br i1 %cond, label %return, label %if.end

if.end: ; preds = %entry
%add = add i32 %i, 2
%1 = load i32*, i32** @a, align 8
%magicptr = ptrtoint i32* %1 to i32
br label %while.cond

; CHECK-LABEL: while.cond:
; CHECK: i32 0, label %return.loopexit
; CHECK: i32 47, label %return.loopexit
while.cond: ; preds = %while.cond.backedge, %if.end
switch i32 %magicptr, label %while.cond.if.end10_crit_edge [
i32 0, label %return.loopexit
i32 47, label %return.loopexit
]

while.cond.if.end10_crit_edge: ; preds = %while.cond
br label %while.cond.backedge

while.cond.backedge: ; preds = %while.cond.if.end10_crit_edge, %if.then9
br label %while.cond

return.loopexit: ; preds = %while.cond
br label %return

; CHECK_LABEL: return:
; CHECK: %{{.*}} = phi i32 [ 0, %entry ], [ %add, %return.loopexit ]
return: ; preds = %return.loopexit, %entry
%retval.4 = phi i32 [ 0, %entry ], [ %add, %return.loopexit ]
ret void
}
declare i32 @__gxx_personality_v0(...)

; Expect to merge empty block while.cond2.loopexit into while.cond2
define i32 @f_switch5(i32 %i) local_unnamed_addr {
; CHECK-LABEL: @f_switch5
entry:
%0 = load i32, i32* @b, align 4
%cond = icmp eq i32 %0, 6
br i1 %cond, label %while.cond.preheader, label %sw.epilog

while.cond.preheader: ; preds = %entry
%1 = load i32*, i32** @a, align 8
%magicptr = ptrtoint i32* %1 to i64
%arrayidx = getelementptr inbounds i32, i32* %1, i64 1
br label %while.cond

; CHECK-LABEL: while.cond:
; CHECK: i64 32, label %while.cond2
; CHECK: i64 0, label %while.cond2
while.cond: ; preds = %land.rhs, %while.cond.preheader
switch i64 %magicptr, label %land.rhs [
i64 32, label %while.cond2.loopexit
i64 0, label %while.cond2.loopexit
]

land.rhs: ; preds = %while.cond
%2 = load i32, i32* %arrayidx, align 4
%tobool1 = icmp eq i32 %2, 0
br i1 %tobool1, label %while.cond2thread-pre-split.loopexit, label %while.cond

while.cond2thread-pre-split.loopexit: ; preds = %land.rhs
br label %while.cond2thread-pre-split

while.cond2thread-pre-split: ; preds = %while.body4, %while.cond2thread-pre-split.loopexit
%.pr = phi i32* [ %.pr.pre, %while.body4 ], [ %1, %while.cond2thread-pre-split.loopexit ]
br label %while.cond2

while.cond2.loopexit: ; preds = %while.cond, %while.cond
br label %while.cond2

; CHECK-LABEL: while.cond2:
; CHECK: %{{.*}} = phi i32* [ %.pr.pre, %while.body4 ], [ %1, %land.rhs ], [ %1, %while.cond ], [ %1, %while.cond ]
while.cond2: ; preds = %while.cond2.loopexit, %while.cond2thread-pre-split
%3 = phi i32* [ %.pr, %while.cond2thread-pre-split ], [ %1, %while.cond2.loopexit ]
%tobool3 = icmp eq i32* %3, null
br i1 %tobool3, label %sw.epilog, label %while.body4

while.body4: ; preds = %while.cond2
tail call void bitcast (void (...)* @fn2 to void ()*)()
%.pr.pre = load i32*, i32** @a, align 8
br label %while.cond2thread-pre-split

sw.epilog: ; preds = %while.cond2, %entry
ret i32 undef
}


; This test that BFI/BPI is created without any assertion in isMergingEmptyBlockProfitable()
; in the case where empty blocks are removed before creating BFI/BPI.
@b = common global i32 0, align 4
@a = common global i32* null, align 8
define i32 @should_not_assert(i32 %i) local_unnamed_addr {
entry:
%0 = load i32, i32* @b, align 4
Expand Down

0 comments on commit cddf3c5

Please sign in to comment.