Skip to content

Conversation

antoniofrighetto
Copy link
Contributor

Favour a cttz-indexed table lookup over an indirect jump table when the default switch case is reachable, by branching non-power-of-two inputs to the default case.

Proofs: https://alive2.llvm.org/ce/z/HeRAtf.

Favour a `cttz`-indexed table lookup over an indirect jump table when
the default switch case is reachable, by branching non-power-of-two
inputs to the default case.

Proofs: https://alive2.llvm.org/ce/z/HeRAtf.
@llvmbot
Copy link
Member

llvmbot commented Oct 3, 2025

@llvm/pr-subscribers-backend-risc-v

@llvm/pr-subscribers-llvm-transforms

Author: Antonio Frighetto (antoniofrighetto)

Changes

Favour a cttz-indexed table lookup over an indirect jump table when the default switch case is reachable, by branching non-power-of-two inputs to the default case.

Proofs: https://alive2.llvm.org/ce/z/HeRAtf.


Full diff: https://github.com/llvm/llvm-project/pull/161807.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Utils/SimplifyCFG.cpp (+20-7)
  • (modified) llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll (+94)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 8bba634521e3e..aeb304195c283 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -7422,6 +7422,7 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
 /// log2(C)-indexed value table (instead of traditionally emitting a load of the
 /// address of the jump target, and indirectly jump to it).
 static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
+                                        DomTreeUpdater *DTU,
                                         const DataLayout &DL,
                                         const TargetTransformInfo &TTI) {
   Value *Condition = SI->getCondition();
@@ -7444,12 +7445,6 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
   if (SI->getNumCases() < 4)
     return false;
 
-  // We perform this optimization only for switches with
-  // unreachable default case.
-  // This assumtion will save us from checking if `Condition` is a power of two.
-  if (!SI->defaultDestUnreachable())
-    return false;
-
   // Check that switch cases are powers of two.
   SmallVector<uint64_t, 4> Values;
   for (const auto &Case : SI->cases()) {
@@ -7469,6 +7464,24 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
 
   Builder.SetInsertPoint(SI);
 
+  if (!SI->defaultDestUnreachable()) {
+    // Let non-power-of-two inputs jump to the default case, when the latter is
+    // reachable.
+    auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
+    auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
+
+    auto *OrigBB = SI->getParent();
+    auto *DefaultCaseBB = SI->getDefaultDest();
+    BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
+    auto It = OrigBB->getTerminator()->getIterator();
+    BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It);
+    It->eraseFromParent();
+
+    addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
+    if (DTU)
+      DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
+  }
+
   // Replace each case with its trailing zeros number.
   for (auto &Case : SI->cases()) {
     auto *OrigValue = Case.getCaseValue();
@@ -7827,7 +7840,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
       simplifySwitchLookup(SI, Builder, DTU, DL, TTI))
     return requestResimplify();
 
-  if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
+  if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
     return requestResimplify();
 
   if (reduceSwitchRange(SI, Builder, DL, TTI))
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
index 49eb1991ccba2..aa95b3fd235e5 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll
@@ -34,3 +34,97 @@ return:
   %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ]
   ret i32 %phi
 }
+
+define i32 @switch_of_powers_two_default_reachable(i32 %arg) {
+; CHECK-LABEL: define i32 @switch_of_powers_two_default_reachable(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[RETURN:.*]]
+; CHECK:       [[ENTRY_SPLIT]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
+; CHECK-NEXT:    br i1 [[TMP3]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]]
+; CHECK:       [[SWITCH_LOOKUP]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable, i64 0, i64 [[TMP4]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 5, %[[ENTRY]] ], [ 5, %[[ENTRY_SPLIT]] ], [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ]
+; CHECK-NEXT:    ret i32 [[PHI]]
+;
+entry:
+  switch i32 %arg, label %default_case [
+  i32 1,  label %bb1
+  i32 8,  label %bb2
+  i32 16, label %bb3
+  i32 32, label %bb4
+  i32 64, label %bb5
+  ]
+
+default_case: br label %return
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+
+return:
+  %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [ 5, %default_case ]
+  ret i32 %phi
+}
+
+define i32 @switch_of_powers_two_default_reachable_multipreds(i32 %arg, i1 %cond) {
+; CHECK-LABEL: define i32 @switch_of_powers_two_default_reachable_multipreds(
+; CHECK-SAME: i32 [[ARG:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    br i1 [[COND]], label %[[SWITCH:.*]], label %[[RETURN:.*]]
+; CHECK:       [[SWITCH]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1
+; CHECK-NEXT:    br i1 [[TMP1]], label %[[SWITCH_SPLIT:.*]], label %[[RETURN]]
+; CHECK:       [[SWITCH_SPLIT]]:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true)
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7
+; CHECK-NEXT:    [[SWITCH_MASKINDEX:%.*]] = trunc i32 [[TMP2]] to i8
+; CHECK-NEXT:    [[SWITCH_SHIFTED:%.*]] = lshr i8 121, [[SWITCH_MASKINDEX]]
+; CHECK-NEXT:    [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1
+; CHECK-NEXT:    [[OR_COND:%.*]] = select i1 [[TMP3]], i1 [[SWITCH_LOBIT]], i1 false
+; CHECK-NEXT:    br i1 [[OR_COND]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]]
+; CHECK:       [[SWITCH_LOOKUP]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable_multipreds, i64 0, i64 [[TMP4]]
+; CHECK-NEXT:    [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ARG]], %[[SWITCH_SPLIT]] ], [ [[ARG]], %[[SWITCH]] ], [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ]
+; CHECK-NEXT:    ret i32 [[PHI]]
+;
+entry:
+  br i1 %cond, label %switch, label %default_case
+
+switch:
+  switch i32 %arg, label %default_case [
+  i32 1,  label %bb1
+  i32 8,  label %bb2
+  i32 16, label %bb3
+  i32 32, label %bb4
+  i32 64, label %bb5
+  ]
+
+default_case:
+  %pn = phi i32 [ 0, %entry ], [ %arg, %switch ]
+  br label %return
+
+bb1: br label %return
+bb2: br label %return
+bb3: br label %return
+bb4: br label %return
+bb5: br label %return
+
+return:
+  %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [ %pn, %default_case ]
+  ret i32 %phi
+}

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Thank you!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants