diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h index ee3cc950cdb50..2d0f95741077f 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h @@ -24,6 +24,7 @@ struct SimplifyCFGOptions { int BonusInstThreshold = 1; bool ForwardSwitchCondToPhi = false; bool ConvertSwitchRangeToICmp = false; + bool ConvertSwitchToArithmetic = false; bool ConvertSwitchToLookupTable = false; bool NeedCanonicalLoop = true; bool HoistCommonInsts = false; @@ -48,6 +49,10 @@ struct SimplifyCFGOptions { ConvertSwitchRangeToICmp = B; return *this; } + SimplifyCFGOptions &convertSwitchToArithmetic(bool B) { + ConvertSwitchToArithmetic = B; + return *this; + } SimplifyCFGOptions &convertSwitchToLookupTable(bool B) { ConvertSwitchToLookupTable = B; return *this; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 8cf277657a54a..9d8aaf1207beb 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1017,6 +1017,8 @@ Expected parseSimplifyCFGOptions(StringRef Params) { Result.forwardSwitchCondToPhi(Enable); } else if (ParamName == "switch-range-to-icmp") { Result.convertSwitchRangeToICmp(Enable); + } else if (ParamName == "switch-to-arithmetic") { + Result.convertSwitchToArithmetic(Enable); } else if (ParamName == "switch-to-lookup") { Result.convertSwitchToLookupTable(Enable); } else if (ParamName == "keep-loops") { diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 98821bb1408a7..ec99d7d03148c 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -775,6 +775,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() .convertSwitchRangeToICmp(true) + .convertSwitchToArithmetic(true) .hoistCommonInsts(true) .sinkCommonInsts(true))); FPM.addPass(InstCombinePass()); @@ -1372,6 +1373,7 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) .convertSwitchRangeToICmp(true) + .convertSwitchToArithmetic(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) .hoistCommonInsts(true) @@ -1588,6 +1590,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, OptimizePM.addPass( SimplifyCFGPass(SimplifyCFGOptions() .convertSwitchRangeToICmp(true) + .convertSwitchToArithmetic(true) .speculateUnpredictables(true) .hoistLoadsStoresWithCondFaulting(true))); @@ -2172,6 +2175,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // Delete basic blocks, which optimization passes may have killed. LateFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() .convertSwitchRangeToICmp(true) + .convertSwitchToArithmetic(true) .hoistCommonInsts(true) .speculateUnpredictables(true))); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM))); @@ -2355,4 +2359,4 @@ AAManager PassBuilder::buildDefaultAAPipeline() { bool PassBuilder::isInstrumentedPGOUse() const { return (PGOOpt && PGOOpt->Action == PGOOptions::IRUse) || !UseCtxProfile.empty(); -} \ No newline at end of file +} diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 1d015971dfbdf..b702da4ed9151 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -684,8 +684,9 @@ FUNCTION_PASS_WITH_PARAMS( parseSimplifyCFGOptions, "no-speculate-blocks;speculate-blocks;no-simplify-cond-branch;" "simplify-cond-branch;no-forward-switch-cond;forward-switch-cond;" - "no-switch-range-to-icmp;switch-range-to-icmp;no-switch-to-lookup;" - "switch-to-lookup;no-keep-loops;keep-loops;no-hoist-common-insts;" + "no-switch-range-to-icmp;switch-range-to-icmp;no-switch-to-arithmetic;" + "switch-to-arithmetic;no-switch-to-lookup;switch-to-lookup;" + "no-keep-loops;keep-loops;no-hoist-common-insts;" "hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;" "hoist-loads-stores-with-cond-faulting;no-sink-common-insts;" "sink-common-insts;no-speculate-unpredictables;speculate-unpredictables;" diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 60e5df08c6efd..7ffccf73bd39d 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -355,6 +355,8 @@ void SimplifyCFGPass::printPipeline( OS << (Options.ForwardSwitchCondToPhi ? "" : "no-") << "forward-switch-cond;"; OS << (Options.ConvertSwitchRangeToICmp ? "" : "no-") << "switch-range-to-icmp;"; + OS << (Options.ConvertSwitchToArithmetic ? "" : "no-") + << "switch-to-arithmetic;"; OS << (Options.ConvertSwitchToLookupTable ? "" : "no-") << "switch-to-lookup;"; OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;"; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index a1f759dd1df83..d5ead2183d8bf 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -6508,6 +6508,9 @@ class SwitchReplacement { /// Return true if the replacement is a lookup table. bool isLookupTable(); + /// Return true if the replacement is a bit map. + bool isBitMap(); + private: // Depending on the switch, there are different alternatives. enum { @@ -6798,6 +6801,8 @@ Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; } bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; } +bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; } + static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) { // 40% is the default density for building a jump table in optsize/minsize // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this @@ -6963,7 +6968,8 @@ static void reuseTableCompare( /// lookup tables. static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, - const TargetTransformInfo &TTI) { + const TargetTransformInfo &TTI, + bool ConvertSwitchToLookupTable) { assert(SI->getNumCases() > 1 && "Degenerate switch?"); BasicBlock *BB = SI->getParent(); @@ -7128,6 +7134,8 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, bool AnyLookupTables = any_of( PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); }); + bool AnyBitMaps = any_of(PhiToReplacementMap, + [](auto &KV) { return KV.second.isBitMap(); }); // A few conditions prevent the generation of lookup tables: // 1. The target does not support lookup tables. @@ -7140,6 +7148,12 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, Fn->getFnAttribute("no-jump-tables").getValueAsBool())) return false; + // In the early optimization pipeline, disable formation of lookup tables, + // bit maps and mask checks, as they may inhibit further optimization. + if (!ConvertSwitchToLookupTable && + (AnyLookupTables || AnyBitMaps || NeedMask)) + return false; + Builder.SetInsertPoint(SI); // TableIndex is the switch condition - TableIndexOffset if we don't // use the condition directly @@ -7780,14 +7794,10 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI)) return requestResimplify(); - // The conversion from switch to lookup tables results in difficult-to-analyze - // code and makes pruning branches much harder. This is a problem if the - // switch expression itself can still be restricted as a result of inlining or - // CVP. Therefore, only apply this transformation during late stages of the - // optimisation pipeline. - if (Options.ConvertSwitchToLookupTable && - simplifySwitchLookup(SI, Builder, DTU, DL, TTI)) - return requestResimplify(); + if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable) + if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI, + Options.ConvertSwitchToLookupTable)) + return requestResimplify(); if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI)) return requestResimplify(); diff --git a/llvm/test/Other/new-pm-print-pipeline.ll b/llvm/test/Other/new-pm-print-pipeline.ll index 6fa57f17174e9..3536932f4432e 100644 --- a/llvm/test/Other/new-pm-print-pipeline.ll +++ b/llvm/test/Other/new-pm-print-pipeline.ll @@ -50,7 +50,7 @@ ; CHECK-17: function(print,print) ; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg,simplifycfg)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18 -; CHECK-18: function(simplifycfg,simplifycfg) +; CHECK-18: function(simplifycfg,simplifycfg) ; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(loop-vectorize,loop-vectorize)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-19 ; CHECK-19: function(loop-vectorize,loop-vectorize) diff --git a/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll b/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll index c9063d3ec26ff..25267dcc6dbcb 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch-transformations-no-lut.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -passes='simplifycfg' < %s | FileCheck %s --check-prefix=OPTNOLUT +; RUN: opt -S -passes='simplifycfg' < %s | FileCheck %s --check-prefix=OPTNOLUT ; RUN: %if amdgpu-registered-target %{ opt -mtriple=amdgcn--amdpal -S -passes='simplifycfg' < %s | FileCheck %s --check-prefix=TTINOLUT %} ; target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -7,23 +7,11 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" define i32 @linear_transform_with_default(i32 %x) { ; OPTNOLUT-LABEL: define i32 @linear_transform_with_default( ; OPTNOLUT-SAME: i32 [[X:%.*]]) { -; OPTNOLUT-NEXT: [[ENTRY:.*]]: -; OPTNOLUT-NEXT: switch i32 [[X]], label %[[END:.*]] [ -; OPTNOLUT-NEXT: i32 0, label %[[CASE0:.*]] -; OPTNOLUT-NEXT: i32 1, label %[[CASE1:.*]] -; OPTNOLUT-NEXT: i32 2, label %[[CASE2:.*]] -; OPTNOLUT-NEXT: i32 3, label %[[CASE3:.*]] -; OPTNOLUT-NEXT: ] -; OPTNOLUT: [[CASE0]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE1]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE2]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE3]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[END]]: -; OPTNOLUT-NEXT: [[IDX:%.*]] = phi i32 [ 1, %[[CASE0]] ], [ 4, %[[CASE1]] ], [ 7, %[[CASE2]] ], [ 10, %[[CASE3]] ], [ 13, %[[ENTRY]] ] +; OPTNOLUT-NEXT: [[ENTRY:.*:]] +; OPTNOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 4 +; OPTNOLUT-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[X]], 3 +; OPTNOLUT-NEXT: [[SWITCH_OFFSET:%.*]] = add nsw i32 [[SWITCH_IDX_MULT]], 1 +; OPTNOLUT-NEXT: [[IDX:%.*]] = select i1 [[TMP0]], i32 [[SWITCH_OFFSET]], i32 13 ; OPTNOLUT-NEXT: ret i32 [[IDX]] ; ; TTINOLUT-LABEL: define i32 @linear_transform_with_default( @@ -138,26 +126,8 @@ end: define i32 @linear_transform_no_default(i32 %x) { ; OPTNOLUT-LABEL: define i32 @linear_transform_no_default( ; OPTNOLUT-SAME: i32 [[X:%.*]]) { -; OPTNOLUT-NEXT: [[ENTRY:.*]]: -; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [ -; OPTNOLUT-NEXT: i32 0, label %[[END:.*]] -; OPTNOLUT-NEXT: i32 1, label %[[CASE1:.*]] -; OPTNOLUT-NEXT: i32 2, label %[[CASE2:.*]] -; OPTNOLUT-NEXT: i32 3, label %[[CASE3:.*]] -; OPTNOLUT-NEXT: i32 4, label %[[CASE4:.*]] -; OPTNOLUT-NEXT: ] -; OPTNOLUT: [[CASE1]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE2]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE3]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[CASE4]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[DEFAULT]]: -; OPTNOLUT-NEXT: unreachable -; OPTNOLUT: [[END]]: -; OPTNOLUT-NEXT: [[SWITCH_IDX_MULT:%.*]] = phi i32 [ 3, %[[CASE1]] ], [ 6, %[[CASE2]] ], [ 9, %[[CASE3]] ], [ 12, %[[CASE4]] ], [ 0, %[[ENTRY]] ] +; OPTNOLUT-NEXT: [[ENTRY:.*:]] +; OPTNOLUT-NEXT: [[SWITCH_IDX_MULT:%.*]] = mul nsw i32 [[X]], 3 ; OPTNOLUT-NEXT: ret i32 [[SWITCH_IDX_MULT]] ; ; TTINOLUT-LABEL: define i32 @linear_transform_no_default( @@ -350,18 +320,9 @@ end: define i32 @single_value_withdefault(i32 %x) { ; OPTNOLUT-LABEL: define i32 @single_value_withdefault( ; OPTNOLUT-SAME: i32 [[X:%.*]]) { -; OPTNOLUT-NEXT: [[ENTRY:.*]]: -; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [ -; OPTNOLUT-NEXT: i32 0, label %[[END:.*]] -; OPTNOLUT-NEXT: i32 1, label %[[END]] -; OPTNOLUT-NEXT: i32 2, label %[[END]] -; OPTNOLUT-NEXT: i32 3, label %[[END]] -; OPTNOLUT-NEXT: i32 4, label %[[END]] -; OPTNOLUT-NEXT: ] -; OPTNOLUT: [[DEFAULT]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[END]]: -; OPTNOLUT-NEXT: [[DOT:%.*]] = phi i32 [ 3, %[[DEFAULT]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ] +; OPTNOLUT-NEXT: [[ENTRY:.*:]] +; OPTNOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 5 +; OPTNOLUT-NEXT: [[DOT:%.*]] = select i1 [[TMP0]], i32 2, i32 3 ; OPTNOLUT-NEXT: ret i32 [[DOT]] ; ; TTINOLUT-LABEL: define i32 @single_value_withdefault( @@ -401,18 +362,9 @@ end: define i32 @single_value_no_jump_tables(i32 %x) "no-jump-tables"="true" { ; OPTNOLUT-LABEL: define i32 @single_value_no_jump_tables( ; OPTNOLUT-SAME: i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { -; OPTNOLUT-NEXT: [[ENTRY:.*]]: -; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [ -; OPTNOLUT-NEXT: i32 0, label %[[END:.*]] -; OPTNOLUT-NEXT: i32 1, label %[[END]] -; OPTNOLUT-NEXT: i32 2, label %[[END]] -; OPTNOLUT-NEXT: i32 3, label %[[END]] -; OPTNOLUT-NEXT: i32 4, label %[[END]] -; OPTNOLUT-NEXT: ] -; OPTNOLUT: [[DEFAULT]]: -; OPTNOLUT-NEXT: br label %[[END]] -; OPTNOLUT: [[END]]: -; OPTNOLUT-NEXT: [[IDX:%.*]] = phi i32 [ 3, %[[DEFAULT]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ], [ 2, %[[ENTRY]] ] +; OPTNOLUT-NEXT: [[ENTRY:.*:]] +; OPTNOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[X]], 5 +; OPTNOLUT-NEXT: [[IDX:%.*]] = select i1 [[TMP0]], i32 2, i32 3 ; OPTNOLUT-NEXT: ret i32 [[IDX]] ; ; TTINOLUT-LABEL: define i32 @single_value_no_jump_tables( @@ -449,6 +401,60 @@ end: ret i32 %idx } +define i1 @single_value_with_mask(i32 %x) { +; OPTNOLUT-LABEL: define i1 @single_value_with_mask( +; OPTNOLUT-SAME: i32 [[X:%.*]]) { +; OPTNOLUT-NEXT: [[ENTRY:.*]]: +; OPTNOLUT-NEXT: switch i32 [[X]], label %[[DEFAULT:.*]] [ +; OPTNOLUT-NEXT: i32 18, label %[[END:.*]] +; OPTNOLUT-NEXT: i32 21, label %[[END]] +; OPTNOLUT-NEXT: i32 48, label %[[END]] +; OPTNOLUT-NEXT: i32 16, label %[[END]] +; OPTNOLUT-NEXT: ] +; OPTNOLUT: [[DEFAULT]]: +; OPTNOLUT-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 80 +; OPTNOLUT-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i1 false, i1 true +; OPTNOLUT-NEXT: br label %[[END]] +; OPTNOLUT: [[END]]: +; OPTNOLUT-NEXT: [[RES:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ false, %[[ENTRY]] ], [ [[SEL]], %[[DEFAULT]] ] +; OPTNOLUT-NEXT: ret i1 [[RES]] +; +; TTINOLUT-LABEL: define i1 @single_value_with_mask( +; TTINOLUT-SAME: i32 [[X:%.*]]) { +; TTINOLUT-NEXT: [[ENTRY:.*]]: +; TTINOLUT-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i32 [[X]], 16 +; TTINOLUT-NEXT: [[TMP0:%.*]] = icmp ult i32 [[SWITCH_TABLEIDX]], 33 +; TTINOLUT-NEXT: [[SWITCH_MASKINDEX:%.*]] = zext i32 [[SWITCH_TABLEIDX]] to i64 +; TTINOLUT-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i64 4294967333, [[SWITCH_MASKINDEX]] +; TTINOLUT-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i64 [[SWITCH_SHIFTED]] to i1 +; TTINOLUT-NEXT: [[OR_COND:%.*]] = select i1 [[TMP0]], i1 [[SWITCH_LOBIT]], i1 false +; TTINOLUT-NEXT: br i1 [[OR_COND]], label %[[END:.*]], label %[[DEFAULT:.*]] +; TTINOLUT: [[DEFAULT]]: +; TTINOLUT-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 80 +; TTINOLUT-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i1 false, i1 true +; TTINOLUT-NEXT: br label %[[END]] +; TTINOLUT: [[END]]: +; TTINOLUT-NEXT: [[RES:%.*]] = phi i1 [ [[SEL]], %[[DEFAULT]] ], [ false, %[[ENTRY]] ] +; TTINOLUT-NEXT: ret i1 [[RES]] +; +entry: + switch i32 %x, label %default [ + i32 18, label %end + i32 21, label %end + i32 48, label %end + i32 16, label %end + ] + +default: + %cmp = icmp eq i32 %x, 80 + %sel = select i1 %cmp, i1 false, i1 true + br label %end + +end: + %res = phi i1 [ false, %entry ], [ false, %entry ], [ false, %entry ], [ false, %entry ], [ %sel, %default ] + ret i1 %res +} + define i32 @lookup_table(i32 %x) { ; OPTNOLUT-LABEL: define i32 @lookup_table( ; OPTNOLUT-SAME: i32 [[X:%.*]]) {