diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 084d9d87c4778..48055ad6ea7e4 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -7227,6 +7227,7 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest); BranchInst *RangeCheckBranch = nullptr; + BranchInst *CondBranch = nullptr; Builder.SetInsertPoint(SI); const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize); @@ -7241,6 +7242,7 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize)); RangeCheckBranch = Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest()); + CondBranch = RangeCheckBranch; if (DTU) Updates.push_back({DominatorTree::Insert, BB, LookupBB}); } @@ -7279,7 +7281,7 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted"); Value *LoBit = Builder.CreateTrunc( Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit"); - Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest()); + CondBranch = Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest()); if (DTU) { Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB}); Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()}); @@ -7319,19 +7321,32 @@ static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, if (DTU) Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest}); + SmallVector BranchWeights; + const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes && + extractBranchWeights(*SI, BranchWeights); + uint64_t ToLookupWeight = 0; + uint64_t ToDefaultWeight = 0; + // Remove the switch. SmallPtrSet RemovedSuccessors; - for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) { - BasicBlock *Succ = SI->getSuccessor(i); + for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) { + BasicBlock *Succ = SI->getSuccessor(I); - if (Succ == SI->getDefaultDest()) + if (Succ == SI->getDefaultDest()) { + if (HasBranchWeights) + ToDefaultWeight += BranchWeights[I]; continue; + } Succ->removePredecessor(BB); if (DTU && RemovedSuccessors.insert(Succ).second) Updates.push_back({DominatorTree::Delete, BB, Succ}); + if (HasBranchWeights) + ToLookupWeight += BranchWeights[I]; } SI->eraseFromParent(); - + if (HasBranchWeights) + setFittedBranchWeights(*CondBranch, {ToLookupWeight, ToDefaultWeight}, + /*IsExpected=*/false); if (DTU) DTU->applyUpdates(Updates); diff --git a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll index f9e79cabac51d..bee6b375ea11a 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll @@ -1565,14 +1565,14 @@ end: ; lookup (since i3 can only hold values in the range of explicit ; values) and simultaneously trying to generate a branch to deal with ; the fact that we have holes in the range. -define i32 @covered_switch_with_bit_tests(i3) { +define i32 @covered_switch_with_bit_tests(i3) !prof !0 { ; CHECK-LABEL: @covered_switch_with_bit_tests( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SWITCH_TABLEIDX:%.*]] = sub i3 [[TMP0:%.*]], -4 ; CHECK-NEXT: [[SWITCH_MASKINDEX:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i8 ; CHECK-NEXT: [[SWITCH_SHIFTED:%.*]] = lshr i8 -61, [[SWITCH_MASKINDEX]] ; CHECK-NEXT: [[SWITCH_LOBIT:%.*]] = trunc i8 [[SWITCH_SHIFTED]] to i1 -; CHECK-NEXT: br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[L6:%.*]] +; CHECK-NEXT: br i1 [[SWITCH_LOBIT]], label [[SWITCH_LOOKUP:%.*]], label [[L6:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: switch.lookup: ; CHECK-NEXT: [[TMP1:%.*]] = zext i3 [[SWITCH_TABLEIDX]] to i64 ; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [8 x i32], ptr @switch.table.covered_switch_with_bit_tests, i64 0, i64 [[TMP1]] @@ -1588,7 +1588,7 @@ entry: i3 -4, label %l5 i3 3, label %l1 i3 2, label %l1 - ] + ], !prof !1 l1: br label %l2 @@ -2425,3 +2425,10 @@ return: %res = phi i1 [ 0, %bb0 ], [ 1, %bb1 ] ret i1 %res } + +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 3, i32 5, i32 7, i32 11, i32 13} +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 36, i32 3} +;. diff --git a/llvm/test/Transforms/SimplifyCFG/rangereduce.ll b/llvm/test/Transforms/SimplifyCFG/rangereduce.ll index 17d65a4d4fa5e..d1fba91d1e505 100644 --- a/llvm/test/Transforms/SimplifyCFG/rangereduce.ll +++ b/llvm/test/Transforms/SimplifyCFG/rangereduce.ll @@ -1,15 +1,22 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -passes=simplifycfg -simplifycfg-require-and-preserve-domtree=1 -switch-to-lookup -S | FileCheck %s ; RUN: opt < %s -passes='simplifycfg' -S | FileCheck %s target datalayout = "e-n32" -define i32 @test1(i32 %a) { +;. +; CHECK: @switch.table.test1 = private unnamed_addr constant [4 x i32] [i32 11984, i32 1143, i32 99783, i32 99783], align 4 +; CHECK: @switch.table.test3 = private unnamed_addr constant [3 x i32] [i32 11984, i32 1143, i32 99783], align 4 +; CHECK: @switch.table.test6 = private unnamed_addr constant [4 x i32] [i32 99783, i32 99783, i32 1143, i32 11984], align 4 +; CHECK: @switch.table.test8 = private unnamed_addr constant [5 x i32] [i32 11984, i32 1143, i32 99783, i32 8867, i32 99783], align 4 +; CHECK: @switch.table.test9 = private unnamed_addr constant [8 x i32] [i32 99783, i32 8867, i32 99783, i32 8867, i32 8867, i32 8867, i32 11984, i32 1143], align 4 +;. +define i32 @test1(i32 %a) !prof !0 { ; CHECK-LABEL: @test1( ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[A:%.*]], 97 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.fshl.i32(i32 [[TMP1]], i32 [[TMP1]], i32 30) ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 4 -; CHECK-NEXT: br i1 [[TMP3]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]] +; CHECK-NEXT: br i1 [[TMP3]], label [[SWITCH_LOOKUP:%.*]], label [[COMMON_RET:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK: switch.lookup: ; CHECK-NEXT: [[TMP4:%.*]] = zext nneg i32 [[TMP2]] to i64 ; CHECK-NEXT: [[SWITCH_GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @switch.table.test1, i64 0, i64 [[TMP4]] @@ -24,7 +31,7 @@ define i32 @test1(i32 %a) { i32 101, label %two i32 105, label %three i32 109, label %three - ] + ], !prof !1 def: ret i32 8867 @@ -310,3 +317,12 @@ three: ret i32 99783 } +!0 = !{!"function_entry_count", i32 100} +!1 = !{!"branch_weights", i32 5, i32 7, i32 11, i32 13, i32 17} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { optsize } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 100} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 48, i32 5} +;.