diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 8bba634521e3e..aeb304195c283 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7422,6 +7422,7 @@ static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, /// log2(C)-indexed value table (instead of traditionally emitting a load of the /// address of the jump target, and indirectly jump to it). static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, + DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) { Value *Condition = SI->getCondition(); @@ -7444,12 +7445,6 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, if (SI->getNumCases() < 4) return false; - // We perform this optimization only for switches with - // unreachable default case. - // This assumtion will save us from checking if `Condition` is a power of two. - if (!SI->defaultDestUnreachable()) - return false; - // Check that switch cases are powers of two. SmallVector Values; for (const auto &Case : SI->cases()) { @@ -7469,6 +7464,24 @@ static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, Builder.SetInsertPoint(SI); + if (!SI->defaultDestUnreachable()) { + // Let non-power-of-two inputs jump to the default case, when the latter is + // reachable. + auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition); + auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1)); + + auto *OrigBB = SI->getParent(); + auto *DefaultCaseBB = SI->getDefaultDest(); + BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU); + auto It = OrigBB->getTerminator()->getIterator(); + BranchInst::Create(SplitBB, DefaultCaseBB, IsPow2, It); + It->eraseFromParent(); + + addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB); + if (DTU) + DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}}); + } + // Replace each case with its trailing zeros number. for (auto &Case : SI->cases()) { auto *OrigValue = Case.getCaseValue(); @@ -7827,7 +7840,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { simplifySwitchLookup(SI, Builder, DTU, DL, TTI)) return requestResimplify(); - if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI)) + if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI)) return requestResimplify(); if (reduceSwitchRange(SI, Builder, DL, TTI)) diff --git a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll index c2e632d0e724c..b1cf0e4bcd908 100644 --- a/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll +++ b/llvm/test/Transforms/SimplifyCFG/RISCV/switch-of-powers-of-two.ll @@ -61,30 +61,49 @@ return: ret i32 %p } -; Check that switch's of powers of two range is not reduced if default case is reachable +; Check that switch's of powers of two range with the default case reachable is reduced +; w/ Zbb enabled, by jumping non-power-of-two inputs to the default block. define i32 @switch_of_powers_reachable_default(i32 %x) { -; CHECK-LABEL: @switch_of_powers_reachable_default( -; CHECK-NEXT: entry: -; CHECK-NEXT: switch i32 [[X:%.*]], label [[RETURN:%.*]] [ -; CHECK-NEXT: i32 1, label [[BB1:%.*]] -; CHECK-NEXT: i32 8, label [[BB2:%.*]] -; CHECK-NEXT: i32 16, label [[BB3:%.*]] -; CHECK-NEXT: i32 32, label [[BB4:%.*]] -; CHECK-NEXT: i32 64, label [[BB5:%.*]] -; CHECK-NEXT: ] -; CHECK: bb1: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: bb2: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: bb3: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: bb4: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: bb5: -; CHECK-NEXT: br label [[RETURN]] -; CHECK: return: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ -1, [[ENTRY:%.*]] ] -; CHECK-NEXT: ret i32 [[P]] +; RV64I-LABEL: @switch_of_powers_reachable_default( +; RV64I-NEXT: entry: +; RV64I-NEXT: switch i32 [[X:%.*]], label [[RETURN:%.*]] [ +; RV64I-NEXT: i32 1, label [[BB1:%.*]] +; RV64I-NEXT: i32 8, label [[BB2:%.*]] +; RV64I-NEXT: i32 16, label [[BB3:%.*]] +; RV64I-NEXT: i32 32, label [[BB4:%.*]] +; RV64I-NEXT: i32 64, label [[BB5:%.*]] +; RV64I-NEXT: ] +; RV64I: bb1: +; RV64I-NEXT: br label [[RETURN]] +; RV64I: bb2: +; RV64I-NEXT: br label [[RETURN]] +; RV64I: bb3: +; RV64I-NEXT: br label [[RETURN]] +; RV64I: bb4: +; RV64I-NEXT: br label [[RETURN]] +; RV64I: bb5: +; RV64I-NEXT: br label [[RETURN]] +; RV64I: return: +; RV64I-NEXT: [[P:%.*]] = phi i32 [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 1, [[BB3]] ], [ 0, [[BB4]] ], [ 42, [[BB5]] ], [ -1, [[ENTRY:%.*]] ] +; RV64I-NEXT: ret i32 [[P]] +; +; RV64ZBB-LABEL: @switch_of_powers_reachable_default( +; RV64ZBB-NEXT: entry: +; RV64ZBB-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]) +; RV64ZBB-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1 +; RV64ZBB-NEXT: br i1 [[TMP1]], label [[ENTRY_SPLIT:%.*]], label [[RETURN:%.*]] +; RV64ZBB: entry.split: +; RV64ZBB-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[X]], i1 true) +; RV64ZBB-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7 +; RV64ZBB-NEXT: br i1 [[TMP3]], label [[SWITCH_LOOKUP:%.*]], label [[RETURN]] +; RV64ZBB: switch.lookup: +; RV64ZBB-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64 +; RV64ZBB-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_reachable_default, i64 0, i64 [[TMP4]] +; RV64ZBB-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 +; RV64ZBB-NEXT: br label [[RETURN]] +; RV64ZBB: return: +; RV64ZBB-NEXT: [[P:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ -1, [[ENTRY_SPLIT]] ], [ [[SWITCH_LOAD]], [[SWITCH_LOOKUP]] ] +; RV64ZBB-NEXT: ret i32 [[P]] ; entry: switch i32 %x, label %default_case [ diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll index 49eb1991ccba2..aa95b3fd235e5 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/switch-of-powers-of-two.ll @@ -34,3 +34,97 @@ return: %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ] ret i32 %phi } + +define i32 @switch_of_powers_two_default_reachable(i32 %arg) { +; CHECK-LABEL: define i32 @switch_of_powers_two_default_reachable( +; CHECK-SAME: i32 [[ARG:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1 +; CHECK-NEXT: br i1 [[TMP1]], label %[[ENTRY_SPLIT:.*]], label %[[RETURN:.*]] +; CHECK: [[ENTRY_SPLIT]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7 +; CHECK-NEXT: br i1 [[TMP3]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]] +; CHECK: [[SWITCH_LOOKUP]]: +; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64 +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable, i64 0, i64 [[TMP4]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 5, %[[ENTRY]] ], [ 5, %[[ENTRY_SPLIT]] ], [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + switch i32 %arg, label %default_case [ + i32 1, label %bb1 + i32 8, label %bb2 + i32 16, label %bb3 + i32 32, label %bb4 + i32 64, label %bb5 + ] + +default_case: br label %return +bb1: br label %return +bb2: br label %return +bb3: br label %return +bb4: br label %return +bb5: br label %return + +return: + %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [ 5, %default_case ] + ret i32 %phi +} + +define i32 @switch_of_powers_two_default_reachable_multipreds(i32 %arg, i1 %cond) { +; CHECK-LABEL: define i32 @switch_of_powers_two_default_reachable_multipreds( +; CHECK-SAME: i32 [[ARG:%.*]], i1 [[COND:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 [[COND]], label %[[SWITCH:.*]], label %[[RETURN:.*]] +; CHECK: [[SWITCH]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.ctpop.i32(i32 [[ARG]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 1 +; CHECK-NEXT: br i1 [[TMP1]], label %[[SWITCH_SPLIT:.*]], label %[[RETURN]] +; CHECK: [[SWITCH_SPLIT]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[ARG]], i1 true) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7 +; CHECK-NEXT: [[SWITCH_MASKINDEX:%.*]] = trunc i32 [[TMP2]] to i8 +; CHECK-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i8 121, [[SWITCH_MASKINDEX]] +; CHECK-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP3]], i1 [[SWITCH_LOBIT]], i1 false +; CHECK-NEXT: br i1 [[OR_COND]], label %[[SWITCH_LOOKUP:.*]], label %[[RETURN]] +; CHECK: [[SWITCH_LOOKUP]]: +; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64 +; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [7 x i32], ptr @switch.table.switch_of_powers_two_default_reachable_multipreds, i64 0, i64 [[TMP4]] +; CHECK-NEXT: [[SWITCH_LOAD:%.*]] = load i32, ptr [[SWITCH_GEP]], align 4 +; CHECK-NEXT: br label %[[RETURN]] +; CHECK: [[RETURN]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[ARG]], %[[SWITCH_SPLIT]] ], [ [[ARG]], %[[SWITCH]] ], [ [[SWITCH_LOAD]], %[[SWITCH_LOOKUP]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + br i1 %cond, label %switch, label %default_case + +switch: + switch i32 %arg, label %default_case [ + i32 1, label %bb1 + i32 8, label %bb2 + i32 16, label %bb3 + i32 32, label %bb4 + i32 64, label %bb5 + ] + +default_case: + %pn = phi i32 [ 0, %entry ], [ %arg, %switch ] + br label %return + +bb1: br label %return +bb2: br label %return +bb3: br label %return +bb4: br label %return +bb5: br label %return + +return: + %phi = phi i32 [ 3, %bb1 ], [ 2, %bb2 ], [ 1, %bb3 ], [ 0, %bb4 ], [ 42, %bb5 ], [ %pn, %default_case ] + ret i32 %phi +}