Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7361,6 +7361,95 @@ static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
return PredPred;
}

/// Fold the following pattern:
/// bb0:
/// br i1 %cond1, label %bb1, label %bb2
/// bb1:
/// br i1 %cond2, label %bb3, label %bb4
/// bb2:
/// br i1 %cond2, label %bb4, label %bb3
/// bb3:
/// ...
/// bb4:
/// ...
/// into
/// bb0:
/// %cond = xor i1 %cond1, %cond2
/// br i1 %cond, label %bb4, label %bb3
/// bb3:
/// ...
/// bb4:
/// ...
/// NOTE: %cond2 always dominates the terminator of bb0.
static bool mergeNestedCondBranch(BranchInst *BI, DomTreeUpdater *DTU) {
BasicBlock *BB = BI->getParent();
BasicBlock *BB1 = BI->getSuccessor(0);
BasicBlock *BB2 = BI->getSuccessor(1);
auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
if (Succ == BB)
return false;
if (&Succ->front() != Succ->getTerminator())
return false;
SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
if (!SuccBI || !SuccBI->isConditional())
return false;
BasicBlock *Succ1 = SuccBI->getSuccessor(0);
BasicBlock *Succ2 = SuccBI->getSuccessor(1);
return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
!isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
};
BranchInst *BB1BI, *BB2BI;
if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
return false;

if (BB1BI->getCondition() != BB2BI->getCondition() ||
BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
return false;

BasicBlock *BB3 = BB1BI->getSuccessor(0);
BasicBlock *BB4 = BB1BI->getSuccessor(1);
IRBuilder<> Builder(BI);
BI->setCondition(
Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
BB1->removePredecessor(BB);
BI->setSuccessor(0, BB4);
BB2->removePredecessor(BB);
BI->setSuccessor(1, BB3);
if (DTU) {
SmallVector<DominatorTree::UpdateType, 4> Updates;
Updates.push_back({DominatorTree::Delete, BB, BB1});
Updates.push_back({DominatorTree::Insert, BB, BB4});
Updates.push_back({DominatorTree::Delete, BB, BB2});
Updates.push_back({DominatorTree::Insert, BB, BB3});

DTU->applyUpdates(Updates);
}
bool HasWeight = false;
uint64_t BBTWeight, BBFWeight;
if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
HasWeight = true;
else
BBTWeight = BBFWeight = 1;
uint64_t BB1TWeight, BB1FWeight;
if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
HasWeight = true;
else
BB1TWeight = BB1FWeight = 1;
uint64_t BB2TWeight, BB2FWeight;
if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
HasWeight = true;
else
BB2TWeight = BB2FWeight = 1;
if (HasWeight) {
uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
FitWeights(Weights);
setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
}
return true;
}

bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
assert(
!isa<ConstantInt>(BI->getCondition()) &&
Expand Down Expand Up @@ -7468,6 +7557,10 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
return requestResimplify();

// Look for nested conditional branches.
if (mergeNestedCondBranch(BI, DTU))
return requestResimplify();

return false;
}

Expand Down
50 changes: 34 additions & 16 deletions llvm/test/CodeGen/ARM/and-cmp0-sink.ll
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ exit:
}

; Test with a mask that can be encoded with T32 instruction set, but not with A32.
define i32 @f0(i1 %c0, i32 %v) {
define i32 @f0(i1 %c0, i32 %v, ptr %p) {
; V7M-LABEL: f0:
; V7M: @ %bb.0: @ %E
; V7M-NEXT: lsls r0, r0, #31
Expand All @@ -198,7 +198,9 @@ define i32 @f0(i1 %c0, i32 %v) {
; V7M-NEXT: bxeq lr
; V7M-NEXT: b .LBB1_3
; V7M-NEXT: .LBB1_2: @ %B
; V7M-NEXT: movs r0, #1
; V7M-NEXT: tst.w r1, #16843009
; V7M-NEXT: str r0, [r2]
; V7M-NEXT: itt ne
; V7M-NEXT: movne r0, #0
; V7M-NEXT: bxne lr
Expand All @@ -208,10 +210,10 @@ define i32 @f0(i1 %c0, i32 %v) {
;
; V7A-LABEL: f0:
; V7A: @ %bb.0: @ %E
; V7A-NEXT: movw r2, #257
; V7A-NEXT: movw r3, #257
; V7A-NEXT: tst r0, #1
; V7A-NEXT: movt r2, #257
; V7A-NEXT: and r1, r1, r2
; V7A-NEXT: movt r3, #257
; V7A-NEXT: and r1, r1, r3
; V7A-NEXT: beq .LBB1_3
; V7A-NEXT: @ %bb.1: @ %A
; V7A-NEXT: cmp r1, #0
Expand All @@ -221,8 +223,10 @@ define i32 @f0(i1 %c0, i32 %v) {
; V7A-NEXT: mov r0, #1
; V7A-NEXT: bx lr
; V7A-NEXT: .LBB1_3: @ %B
; V7A-NEXT: mov r0, #0
; V7A-NEXT: mov r0, #1
; V7A-NEXT: cmp r1, #0
; V7A-NEXT: str r0, [r2]
; V7A-NEXT: mov r0, #0
; V7A-NEXT: moveq r0, #1
; V7A-NEXT: bx lr
;
Expand All @@ -237,7 +241,9 @@ define i32 @f0(i1 %c0, i32 %v) {
; V7A-T-NEXT: bxeq lr
; V7A-T-NEXT: b .LBB1_3
; V7A-T-NEXT: .LBB1_2: @ %B
; V7A-T-NEXT: movs r0, #1
; V7A-T-NEXT: tst.w r1, #16843009
; V7A-T-NEXT: str r0, [r2]
; V7A-T-NEXT: itt ne
; V7A-T-NEXT: movne r0, #0
; V7A-T-NEXT: bxne lr
Expand All @@ -247,18 +253,20 @@ define i32 @f0(i1 %c0, i32 %v) {
;
; V6M-LABEL: f0:
; V6M: @ %bb.0: @ %E
; V6M-NEXT: ldr r2, .LCPI1_0
; V6M-NEXT: ands r2, r1
; V6M-NEXT: ldr r3, .LCPI1_0
; V6M-NEXT: ands r3, r1
; V6M-NEXT: lsls r0, r0, #31
; V6M-NEXT: beq .LBB1_3
; V6M-NEXT: @ %bb.1: @ %A
; V6M-NEXT: cmp r2, #0
; V6M-NEXT: cmp r3, #0
; V6M-NEXT: bne .LBB1_5
; V6M-NEXT: @ %bb.2:
; V6M-NEXT: movs r0, #0
; V6M-NEXT: bx lr
; V6M-NEXT: .LBB1_3: @ %B
; V6M-NEXT: cmp r2, #0
; V6M-NEXT: movs r0, #1
; V6M-NEXT: str r0, [r2]
; V6M-NEXT: cmp r3, #0
; V6M-NEXT: beq .LBB1_5
; V6M-NEXT: @ %bb.4:
; V6M-NEXT: movs r0, #0
Expand All @@ -280,6 +288,7 @@ A:

B:
%c2 = icmp eq i32 %a, 0
store i32 1, ptr %p, align 4
br i1 %c2, label %D, label %C

C:
Expand All @@ -294,7 +303,7 @@ X:
}

; Test with a mask that can be encoded both with T32 and A32 instruction sets.
define i32 @f1(i1 %c0, i32 %v) {
define i32 @f1(i1 %c0, i32 %v, ptr %p) {
; V7M-LABEL: f1:
; V7M: @ %bb.0: @ %E
; V7M-NEXT: lsls r0, r0, #31
Expand All @@ -306,7 +315,9 @@ define i32 @f1(i1 %c0, i32 %v) {
; V7M-NEXT: bxeq lr
; V7M-NEXT: b .LBB2_3
; V7M-NEXT: .LBB2_2: @ %B
; V7M-NEXT: movs r0, #1
; V7M-NEXT: tst.w r1, #100663296
; V7M-NEXT: str r0, [r2]
; V7M-NEXT: itt ne
; V7M-NEXT: movne r0, #0
; V7M-NEXT: bxne lr
Expand All @@ -326,8 +337,10 @@ define i32 @f1(i1 %c0, i32 %v) {
; V7A-NEXT: mov r0, #1
; V7A-NEXT: bx lr
; V7A-NEXT: .LBB2_3: @ %B
; V7A-NEXT: mov r0, #0
; V7A-NEXT: mov r0, #1
; V7A-NEXT: tst r1, #100663296
; V7A-NEXT: str r0, [r2]
; V7A-NEXT: mov r0, #0
; V7A-NEXT: moveq r0, #1
; V7A-NEXT: bx lr
;
Expand All @@ -342,7 +355,9 @@ define i32 @f1(i1 %c0, i32 %v) {
; V7A-T-NEXT: bxeq lr
; V7A-T-NEXT: b .LBB2_3
; V7A-T-NEXT: .LBB2_2: @ %B
; V7A-T-NEXT: movs r0, #1
; V7A-T-NEXT: tst.w r1, #100663296
; V7A-T-NEXT: str r0, [r2]
; V7A-T-NEXT: itt ne
; V7A-T-NEXT: movne r0, #0
; V7A-T-NEXT: bxne lr
Expand All @@ -352,19 +367,21 @@ define i32 @f1(i1 %c0, i32 %v) {
;
; V6M-LABEL: f1:
; V6M: @ %bb.0: @ %E
; V6M-NEXT: movs r2, #3
; V6M-NEXT: lsls r2, r2, #25
; V6M-NEXT: ands r2, r1
; V6M-NEXT: movs r3, #3
; V6M-NEXT: lsls r3, r3, #25
; V6M-NEXT: ands r3, r1
; V6M-NEXT: lsls r0, r0, #31
; V6M-NEXT: beq .LBB2_3
; V6M-NEXT: @ %bb.1: @ %A
; V6M-NEXT: cmp r2, #0
; V6M-NEXT: cmp r3, #0
; V6M-NEXT: bne .LBB2_5
; V6M-NEXT: @ %bb.2:
; V6M-NEXT: movs r0, #0
; V6M-NEXT: bx lr
; V6M-NEXT: .LBB2_3: @ %B
; V6M-NEXT: cmp r2, #0
; V6M-NEXT: movs r0, #1
; V6M-NEXT: str r0, [r2]
; V6M-NEXT: cmp r3, #0
; V6M-NEXT: beq .LBB2_5
; V6M-NEXT: @ %bb.4:
; V6M-NEXT: movs r0, #0
Expand All @@ -382,6 +399,7 @@ A:

B:
%c2 = icmp eq i32 %a, 0
store i32 1, ptr %p, align 4
br i1 %c2, label %D, label %C

C:
Expand Down
Loading