-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[SimplifyCFG] Simplify nested branches #97067
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-arm @llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThis patch folds the following pattern (I don't know what to call this):
into
Alive2: https://alive2.llvm.org/ce/z/5iOJEL I found this pattern in some verilator-generated code, which is widely used in RTL simulation. This fold will reduces branches and improves the performance of CPU frontend. To my surprise, this pattern is also common in C/C++ code base. Full diff: https://github.com/llvm/llvm-project/pull/97067.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 6847bb7502429..092ac3d5e5087 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7468,6 +7468,91 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
return requestResimplify();
+ {
+ // Fold the following pattern:
+ // bb0:
+ // br i1 %cond1, label %bb1, label %bb2
+ // bb1:
+ // br i1 %cond2, label %bb3, label %bb4
+ // bb2:
+ // br i1 %cond2, label %bb4, label %bb3
+ // bb3:
+ // ...
+ // bb4:
+ // ...
+ // into
+ // bb0:
+ // %cond = xor i1 %cond1, %cond2
+ // br i1 %cond, label %bb4, label %bb3
+ // bb3:
+ // ...
+ // bb4:
+ // ...
+ // NOTE: %cond2 always dominates the terminator of bb0.
+
+ BasicBlock *BB1 = BI->getSuccessor(0);
+ BasicBlock *BB2 = BI->getSuccessor(1);
+ auto IsSimpleSuccessor = [BB](BasicBlock *Succ, BranchInst *&SuccBI) {
+ if (Succ == BB)
+ return false;
+ if (Succ->sizeWithoutDebug() > 1)
+ return false;
+ SuccBI = dyn_cast<BranchInst>(Succ->getTerminator());
+ if (!SuccBI || !SuccBI->isConditional())
+ return false;
+ BasicBlock *Succ1 = SuccBI->getSuccessor(0);
+ BasicBlock *Succ2 = SuccBI->getSuccessor(1);
+ return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
+ !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
+ };
+ BranchInst *BB1BI, *BB2BI;
+ if (IsSimpleSuccessor(BB1, BB1BI) && IsSimpleSuccessor(BB2, BB2BI) &&
+ BB1BI->getCondition() == BB2BI->getCondition() &&
+ BB1BI->getSuccessor(0) == BB2BI->getSuccessor(1) &&
+ BB1BI->getSuccessor(1) == BB2BI->getSuccessor(0)) {
+ BasicBlock *BB3 = BB1BI->getSuccessor(0);
+ BasicBlock *BB4 = BB1BI->getSuccessor(1);
+ IRBuilder<> Builder(BI);
+ BI->setCondition(
+ Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
+ BB1->removePredecessor(BB);
+ BI->setSuccessor(0, BB4);
+ BB2->removePredecessor(BB);
+ BI->setSuccessor(1, BB3);
+ if (DTU) {
+ SmallVector<DominatorTree::UpdateType, 4> Updates;
+ Updates.push_back({DominatorTree::Delete, BB, BB1});
+ Updates.push_back({DominatorTree::Insert, BB, BB4});
+ Updates.push_back({DominatorTree::Delete, BB, BB2});
+ Updates.push_back({DominatorTree::Insert, BB, BB3});
+
+ DTU->applyUpdates(Updates);
+ }
+ bool HasWeight = false;
+ uint64_t BBTWeight, BBFWeight;
+ if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
+ HasWeight = true;
+ else
+ BBTWeight = BBFWeight = 1;
+ uint64_t BB1TWeight, BB1FWeight;
+ if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
+ HasWeight = true;
+ else
+ BB1TWeight = BB1FWeight = 1;
+ uint64_t BB2TWeight, BB2FWeight;
+ if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
+ HasWeight = true;
+ else
+ BB2TWeight = BB2FWeight = 1;
+ uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
+ BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
+ if (HasWeight) {
+ FitWeights(Weights);
+ setBranchWeights(BI, Weights[0], Weights[1], /*IsExpected=*/false);
+ }
+ }
+ }
+
return false;
}
diff --git a/llvm/test/Transforms/SimplifyCFG/branch-fold.ll b/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
index 2f5fb4f33013d..491eb52feafc7 100644
--- a/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
+++ b/llvm/test/Transforms/SimplifyCFG/branch-fold.ll
@@ -146,3 +146,294 @@ Succ:
}
declare void @dummy()
+
+define void @fold_nested_branch1(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND1:%.*]], [[COND2:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[BB4:%.*]], label [[BB3:%.*]]
+; CHECK: common.ret:
+; CHECK-NEXT: ret void
+; CHECK: bb3:
+; CHECK-NEXT: call void @sideeffect1()
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: call void @sideeffect2()
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+ br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+ br i1 %cond2, label %bb4, label %bb3
+
+bb3:
+ call void @sideeffect1()
+ ret void
+
+bb4:
+ call void @sideeffect2()
+ ret void
+}
+
+define void @fold_nested_branch2(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[COMMON_RET:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br i1 [[COND2]], label [[BB4:%.*]], label [[BB3]]
+; CHECK: common.ret:
+; CHECK-NEXT: ret void
+; CHECK: bb3:
+; CHECK-NEXT: call void @sideeffect1()
+; CHECK-NEXT: br label [[COMMON_RET]]
+; CHECK: bb4:
+; CHECK-NEXT: call void @sideeffect2()
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+ br i1 %cond2, label %bb3, label %bb5
+
+bb2:
+ br i1 %cond2, label %bb4, label %bb3
+
+bb3:
+ call void @sideeffect1()
+ ret void
+
+bb4:
+ call void @sideeffect2()
+ ret void
+
+bb5:
+ ret void
+}
+
+define void @fold_nested_branch3(i1 %cond1, i1 %cond2, i1 %cond3) {
+; CHECK-LABEL: @fold_nested_branch3(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br i1 [[COND3:%.*]], label [[BB4]], label [[BB3]]
+; CHECK: common.ret:
+; CHECK-NEXT: ret void
+; CHECK: bb3:
+; CHECK-NEXT: call void @sideeffect1()
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: call void @sideeffect2()
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+ br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+ br i1 %cond3, label %bb4, label %bb3
+
+bb3:
+ call void @sideeffect1()
+ ret void
+
+bb4:
+ call void @sideeffect2()
+ ret void
+}
+
+define void @fold_nested_branch4(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: call void @sideeffect1()
+; CHECK-NEXT: br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br i1 [[COND2]], label [[BB4]], label [[BB3]]
+; CHECK: common.ret:
+; CHECK-NEXT: ret void
+; CHECK: bb3:
+; CHECK-NEXT: call void @sideeffect1()
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: call void @sideeffect2()
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+ call void @sideeffect1()
+ br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+ br i1 %cond2, label %bb4, label %bb3
+
+bb3:
+ call void @sideeffect1()
+ ret void
+
+bb4:
+ call void @sideeffect2()
+ ret void
+}
+
+define i32 @fold_nested_branch5(i1 %cond1, i1 %cond2, i32 %x) {
+; CHECK-LABEL: @fold_nested_branch5(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br i1 [[COND2:%.*]], label [[COMMON_RET:%.*]], label [[BB4:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br i1 [[COND2]], label [[BB4]], label [[COMMON_RET]]
+; CHECK: common.ret:
+; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BB4]] ], [ 0, [[BB1]] ], [ [[X:%.*]], [[BB2]] ]
+; CHECK-NEXT: ret i32 [[COMMON_RET_OP]]
+; CHECK: bb4:
+; CHECK-NEXT: call void @sideeffect2()
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+ br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+ br i1 %cond2, label %bb4, label %bb3
+
+bb3:
+ %ret = phi i32 [ 0, %bb1 ], [ %x, %bb2 ]
+ ret i32 %ret
+
+bb4:
+ call void @sideeffect2()
+ ret i32 0
+}
+
+define void @fold_nested_branch6(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch6(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND1_NOT:%.*]] = xor i1 [[COND1:%.*]], true
+; CHECK-NEXT: [[BRMERGE:%.*]] = select i1 [[COND1_NOT]], i1 true, i1 [[COND2:%.*]]
+; CHECK-NEXT: br i1 [[BRMERGE]], label [[BB3:%.*]], label [[BB4:%.*]]
+; CHECK: common.ret:
+; CHECK-NEXT: ret void
+; CHECK: bb3:
+; CHECK-NEXT: call void @sideeffect1()
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: call void @sideeffect2()
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+ br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+ br i1 %cond2, label %bb1, label %bb3
+
+bb3:
+ call void @sideeffect1()
+ ret void
+
+bb4:
+ call void @sideeffect2()
+ ret void
+}
+
+define void @fold_nested_branch7(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch7(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[BB0:%.*]]
+; CHECK: bb0:
+; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br i1 [[COND2:%.*]], label [[BB3:%.*]], label [[BB4:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br i1 [[COND2]], label [[BB0]], label [[BB3]]
+; CHECK: common.ret:
+; CHECK-NEXT: ret void
+; CHECK: bb3:
+; CHECK-NEXT: call void @sideeffect1()
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: call void @sideeffect2()
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ br label %bb0
+
+bb0:
+ br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+ br i1 %cond2, label %bb3, label %bb4
+
+bb2:
+ br i1 %cond2, label %bb0, label %bb3
+
+bb3:
+ call void @sideeffect1()
+ ret void
+
+bb4:
+ call void @sideeffect2()
+ ret void
+}
+
+; freq(bb4) = 1 * 4 + 2 * 5 = 14
+; freq(bb3) = 1 * 3 + 2 * 6 = 15
+define void @fold_nested_branch_prof(i1 %cond1, i1 %cond2) {
+; CHECK-LABEL: @fold_nested_branch_prof(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND1:%.*]], [[COND2:%.*]]
+; CHECK-NEXT: br i1 [[TMP0]], label [[BB4:%.*]], label [[BB3:%.*]], !prof ![[PROF0:[0-9]+]]
+; CHECK: common.ret:
+; CHECK-NEXT: ret void
+; CHECK: bb3:
+; CHECK-NEXT: call void @sideeffect1()
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: call void @sideeffect2()
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ br i1 %cond1, label %bb1, label %bb2, !prof !0 ; 1:2
+
+bb1:
+ br i1 %cond2, label %bb3, label %bb4, !prof !1 ; 3:4
+
+bb2:
+ br i1 %cond2, label %bb4, label %bb3, !prof !2 ; 5:6
+
+bb3:
+ call void @sideeffect1()
+ ret void
+
+bb4:
+ call void @sideeffect2()
+ ret void
+}
+
+!0 = !{!"branch_weights", i32 1, i32 2}
+!1 = !{!"branch_weights", i32 3, i32 4}
+!2 = !{!"branch_weights", i32 5, i32 6}
+
+;CHECK: ![[PROF0]] = !{!"branch_weights", i32 14, i32 15}
+
+declare void @sideeffect1()
+declare void @sideeffect2()
|
seem like #83417 ? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks reasonable, some nits.
There is a test failure in CodeGen/ARM/and-cmp0-sink.ll. |
d1debff
to
f9d7f5f
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/66/builds/806 Here is the relevant piece of the build log for the reference:
|
This patch folds the following pattern (I don't know what to call this): ``` bb0: br i1 %cond1, label %bb1, label %bb2 bb1: br i1 %cond2, label %bb3, label %bb4 bb2: br i1 %cond2, label %bb4, label %bb3 bb3: ... bb4: ... ``` into ``` bb0: %cond = xor i1 %cond1, %cond2 br i1 %cond, label %bb4, label %bb3 bb3: ... bb4: ... ``` Alive2: https://alive2.llvm.org/ce/z/5iOJEL Closes llvm#97022. Closes llvm#83417. I found this pattern in some verilator-generated code, which is widely used in RTL simulation. This fold will reduces branches and improves the performance of CPU frontend. To my surprise, this pattern is also common in C/C++ code base. Affected libraries/applications: cmake/cvc5/freetype/git/gromacs/jq/linux/openblas/openmpi/openssl/php/postgres/ruby/sqlite/wireshark/z3/...
This patch folds the following pattern (I don't know what to call this):
into
Alive2: https://alive2.llvm.org/ce/z/5iOJEL
Closes #97022.
Closes #83417.
I found this pattern in some verilator-generated code, which is widely used in RTL simulation. This fold will reduces branches and improves the performance of CPU frontend. To my surprise, this pattern is also common in C/C++ code base.
Affected libraries/applications: cmake/cvc5/freetype/git/gromacs/jq/linux/openblas/openmpi/openssl/php/postgres/ruby/sqlite/wireshark/z3/...