-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[SimplifyCFG] Extend simplifySwitchOfPowersOfTwo
to reachable defaults
#161807
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[SimplifyCFG] Extend simplifySwitchOfPowersOfTwo
to reachable defaults
#161807
Conversation
Favour a `cttz`-indexed table lookup over an indirect jump table when the default switch case is reachable, by branching non-power-of-two inputs to the default case. Proofs: https://alive2.llvm.org/ce/z/HeRAtf.
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-llvm-transforms Author: Antonio Frighetto (antoniofrighetto) ChangesFavour a Proofs: https://alive2.llvm.org/ce/z/HeRAtf. Full diff: https://github.com/llvm/llvm-project/pull/161807.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 8bba634521e3e..aeb304195c283 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7422,6 +7422,7 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
/// log2(C)-indexed value table (instead of traditionally emitting a load of the
/// address of the jump target, and indirectly jump to it).
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
+ DomTreeUpdater *DTU,
const DataLayout &DL,
const TargetTransformInfo &TTI) {
Value *Condition = SI->getCondition();
@@ -7444,12 +7445,6 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
if (SI->getNumCases() < 4)
return false;
- // We perform this optimization only for switches with
- // unreachable default case.
- // This assumtion will save us from checking if `Condition` is a power of two.
- if (!SI->defaultDestUnreachable())
- return false;
-
// Check that switch cases are powers of two.
SmallVector<uint64_t, 4> Values;
for (const auto &Case : SI->cases()) {
@@ -7469,6 +7464,24 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
Builder.SetInsertPoint(SI);
+ if (!SI->defaultDestUnreachable()) {
+ // Let non-power-of-two inputs jump to the default case, when the latter is
+ // reachable.
+ auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
+ auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
+
+ auto *OrigBB = SI->getParent();
+ auto *DefaultCaseBB = SI->getDefaultDest();
+ BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
+ auto It = OrigBB->getTerminator()->getIterator();
+ BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
+ It->eraseFromParent();
+
+ addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
+ if (DTU)
+ DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
+ }
+
// Replace each case with its trailing zeros number.
for (auto &Case : SI->cases()) {
auto *OrigValue = Case.getCaseValue();
@@ -7827,7 +7840,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
simplifySwitchLookup(SI, Builder, DTU, DL, TTI))
return requestResimplify();
- if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
+ if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
return requestResimplify();
if (reduceSwitchRange(SI, Builder, DL, TTI))
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
index 49eb1991ccba2..aa95b3fd235e5 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
@@ -34,3 +34,97 @@ return:
%phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ]
ret i32 %phi
}
+
+define i32 @switch_of_powers_two_default_reachable(i32 %arg) {
+; CHECK-LABEL: define i32 @switch_of_powers_two_default_reachable(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]])
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1
+; CHECK-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[RETURN:.*]]
+; CHECK: [[ENTRY_SPLIT]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true)
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
+; CHECK-NEXT: br i1 [[TMP3]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]]
+; CHECK: [[SWITCH_LOOKUP]]:
+; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable, i64 0, i64 [[TMP4]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: br label %[[RETURN]]
+; CHECK: [[RETURN]]:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 5, %[[ENTRY]] ], [ 5, %[[ENTRY_SPLIT]] ], [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ]
+; CHECK-NEXT: ret i32 [[PHI]]
+;
+entry:
+ switch i32 %arg, label %default_case [
+ i32 1, label %bb1
+ i32 8, label %bb2
+ i32 16, label %bb3
+ i32 32, label %bb4
+ i32 64, label %bb5
+ ]
+
+default_case: br label %return
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+
+return:
+ %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [ 5, %default_case ]
+ ret i32 %phi
+}
+
+define i32 @switch_of_powers_two_default_reachable_multipreds(i32 %arg, i1 %cond) {
+; CHECK-LABEL: define i32 @switch_of_powers_two_default_reachable_multipreds(
+; CHECK-SAME: i32 [[ARG:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 [[COND]], label %[[SWITCH:.*]], label %[[RETURN:.*]]
+; CHECK: [[SWITCH]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]])
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1
+; CHECK-NEXT: br i1 [[TMP1]], label %[[SWITCH_SPLIT:.*]], label %[[RETURN]]
+; CHECK: [[SWITCH_SPLIT]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true)
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
+; CHECK-NEXT: [[SWITCH_MASKINDEX:%.*]] = trunc i32 [[TMP2]] to i8
+; CHECK-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i8 121, [[SWITCH_MASKINDEX]]
+; CHECK-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP3]], i1 [[SWITCH_LOBIT]], i1 false
+; CHECK-NEXT: br i1 [[OR_COND]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]]
+; CHECK: [[SWITCH_LOOKUP]]:
+; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64
+; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable_multipreds, i64 0, i64 [[TMP4]]
+; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT: br label %[[RETURN]]
+; CHECK: [[RETURN]]:
+; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ARG]], %[[SWITCH_SPLIT]] ], [ [[ARG]], %[[SWITCH]] ], [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ]
+; CHECK-NEXT: ret i32 [[PHI]]
+;
+entry:
+ br i1 %cond, label %switch, label %default_case
+
+switch:
+ switch i32 %arg, label %default_case [
+ i32 1, label %bb1
+ i32 8, label %bb2
+ i32 16, label %bb3
+ i32 32, label %bb4
+ i32 64, label %bb5
+ ]
+
+default_case:
+ %pn = phi i32 [ 0, %entry ], [ %arg, %switch ]
+ br label %return
+
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+
+return:
+ %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [ %pn, %default_case ]
+ ret i32 %phi
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thank you!
Favour a
cttz
-indexed table lookup over an indirect jump table when the default switch case is reachable, by branching non-power-of-two inputs to the default case.Proofs: https://alive2.llvm.org/ce/z/HeRAtf.