-
Notifications
You must be signed in to change notification settings - Fork 15k
[SimplifyCFG] Propagate profile in simplifySwitchOfPowersOfTwo
#165804
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SimplifyCFG] Propagate profile in simplifySwitchOfPowersOfTwo
#165804
Conversation
Favour a `cttz`-indexed table lookup over an indirect jump table when the default switch case is reachable, by branching non-power-of-two inputs to the default case. Proofs: https://alive2.llvm.org/ce/z/HeRAtf.
|
@llvm/pr-subscribers-llvm-transforms Author: Mircea Trofin (mtrofin) ChangesFull diff: https://github.com/llvm/llvm-project/pull/165804.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index b03fb6213d61c..b697a0a888e90 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7632,7 +7632,20 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
auto *DefaultCaseBB = SI->getDefaultDest();
BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
auto It = OrigBB->getTerminator()->getIterator();
+ SmallVector<uint32_t> Weights;
+ auto HasWeights =
+ !ProfcheckDisableMetadataFixes && extractBranchWeights(*SI, Weights);
auto *BI = BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
+ if (HasWeights) {
+ // The new branch weigths are deducible from the switch's as shown below.
+ // We operate in uint64_t domain first to avoid overflow, then let the
+ // canonical weight fitting deal with values over 32 bits. Note that the
+ // "default" target is on the 'false' branch.
+ SmallVector<uint64_t> NewWeights(2);
+ NewWeights[1] = Weights[0];
+ NewWeights[0] = sum_of(static_cast_to<uint64_t>(drop_begin(Weights)));
+ setFittedBranchWeights(*BI, NewWeights, /*IsExpected=*/false);
+ }
// BI is handling the default case for SI, and so should share its DebugLoc.
BI->setDebugLoc(SI->getDebugLoc());
It->eraseFromParent();
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
index aa95b3fd235e5..d0a14176f9810 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
@@ -1,8 +1,13 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt -passes='simplifycfg<switch-to-lookup>' -simplifycfg-require-and-preserve-domtree=1 -S < %s | FileCheck %s
target triple = "x86_64-unknown-linux-gnu"
+;.
+; CHECK: @switch.table.switch_of_powers_two = private unnamed_addr constant [7 x i32] [i32 3, i32 poison, i32 poison, i32 2, i32 1, i32 0, i32 42], align 4
+; CHECK: @switch.table.switch_of_powers_two_default_reachable = private unnamed_addr constant [7 x i32] [i32 3, i32 5, i32 5, i32 2, i32 1, i32 0, i32 42], align 4
+; CHECK: @switch.table.switch_of_powers_two_default_reachable_multipreds = private unnamed_addr constant [7 x i32] [i32 3, i32 poison, i32 poison, i32 2, i32 1, i32 0, i32 42], align 4
+;.
define i32 @switch_of_powers_two(i32 %arg) {
; CHECK-LABEL: define i32 @switch_of_powers_two(
; CHECK-SAME: i32 [[ARG:%.*]]) {
@@ -35,17 +40,17 @@ return:
ret i32 %phi
}
-define i32 @switch_of_powers_two_default_reachable(i32 %arg) {
+define i32 @switch_of_powers_two_default_reachable(i32 %arg) !prof !0 {
; CHECK-LABEL: define i32 @switch_of_powers_two_default_reachable(
-; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-SAME: i32 [[ARG:%.*]]) !prof [[PROF0:![0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]])
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1
-; CHECK-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[RETURN:.*]]
+; CHECK-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[RETURN:.*]], !prof [[PROF1:![0-9]+]]
; CHECK: [[ENTRY_SPLIT]]:
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true)
; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
-; CHECK-NEXT: br i1 [[TMP3]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]]
+; CHECK-NEXT: br i1 [[TMP3]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]], !prof [[PROF1]]
; CHECK: [[SWITCH_LOOKUP]]:
; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64
; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable, i64 0, i64 [[TMP4]]
@@ -62,7 +67,7 @@ entry:
i32 16, label %bb3
i32 32, label %bb4
i32 64, label %bb5
- ]
+ ], !prof !1
default_case: br label %return
bb1: br label %return
@@ -128,3 +133,12 @@ return:
%phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [ %pn, %default_case ]
ret i32 %phi
}
+
+!0 = !{!"function_entry_count", i32 10}
+!1 = !{!"branch_weights", i32 3, i32 5, i32 7, i32 11, i32 13, i32 17}
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i32 10}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 53, i32 3}
+;.
diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt
index aef7c0987fda7..83bffc70574a8 100644
--- a/llvm/utils/profcheck-xfail.txt
+++ b/llvm/utils/profcheck-xfail.txt
@@ -1317,8 +1317,6 @@ Transforms/SimpleLoopUnswitch/pr60736.ll
Transforms/SimpleLoopUnswitch/trivial-unswitch-freeze-individual-conditions.ll
Transforms/SimpleLoopUnswitch/trivial-unswitch.ll
Transforms/SimpleLoopUnswitch/trivial-unswitch-logical-and-or.ll
-Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll
-Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
Transforms/StackProtector/cross-dso-cfi-stack-chk-fail.ll
Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll
Transforms/StructurizeCFG/hoist-zerocost.ll
|
302b115 to
828f31a
Compare
d07d953 to
d4dd4af
Compare
828f31a to
9c82487
Compare
dc83014 to
5ebce76
Compare
d4dd4af to
40b6404
Compare
5ebce76 to
6a1a84b
Compare
6a1a84b to
3949ad0
Compare
|
I'm tempted to land this to make the profcheck bot green, and continue review on the calculation afterwards. Hope folks are OK with this. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems simple enough to me and specific to PGO builds. If you need to land this to keep the bot green, I think addressing any feedback Postcommit is sufficient.

simplifySwitchOfPowersOfTwo converts (when applicable, see00f5a1e30b) a switch to a conditional branch. Its false case goes to thedefault target of the former switch, and the true case goes to a BB performing acttz. We can calculate the branch weights from the branch weights of the old switch.Issue #147390