From 7550b95bfaf8407dc03b65cf3b09e03502bacc73 Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Wed, 12 Nov 2025 18:38:02 -0500 Subject: [PATCH 1/3] [PGO] Fix zeroed estimated trip count The premise of this patch is that an estimated trip count of 0 is always invalid. Before PR #152775, `llvm::getLoopEstimatedTripCount` never returned 0. PR #152775 changed that behavior but kept documentation saying it returns a positive count. Some passes continue to rely on the previous behavior, as reported in issue #164254. And yet some passes call `llvm::setLoopEstimatedTripCount` with a value of 0. To understand why it seems like an estimated trip count can be 0 but cannot, consider the example of LoopPeel. Given a loop with an estimated trip count of 10, if LoopPeel peels 2 iterations, it seems reasonable that the remaining loop will have an estimated trip count of 8. However, what should the remaining loop's estimated trip count be when peeling 10 iterations? What about 50? Naively, it seems like the answers are 0 and -40, respectively. But neither is valid. Recall that we are talking about estimates. That means, the probability is likely *low* but not 0 that execution will reach iteration 11, iteration 51, or the remaining loop. In the unlikely case that it does reach them, it executes them. In other words, if execution reaches the loop header, at least one iteration of the remaining loop executes, and the probability is likely low that more will execute. Thus, a pass like LoopPeel might naively calculate that the remaining loop's estimated trip count is 0, but it must be at least 1. We could try to ensure that all passes never set the estimated trip count as 0. For now, this patch instead: - Asserts that `llvm.loop.estimated_trip_count` never ends up as 0. - If `EstimatedloopInvocationWeight` is not specified, adjusts `llvm::setLoopEstimatedTripCount` to convert 0 to 1. - If `EstimatedloopInvocationWeight` is specified, adjusts `llvm::setLoopEstimatedTripCount` to set zeroed branch weights and remove any `llvm.loop.estimated_trip_count`. The effect is that `llvm::getLoopEstimatedTripCount` will return `std::nullopt`. For passes that still use `EstimatedloopInvocationWeight`, this patch thus restores the behavior from before PR #152775. Eventually, no passes should use `EstimatedloopInvocationWeight`. --- llvm/docs/LangRef.rst | 13 +- .../include/llvm/Transforms/Utils/LoopUtils.h | 29 +++- llvm/lib/IR/Verifier.cpp | 7 +- llvm/lib/Transforms/Utils/LoopUtils.cpp | 47 +++-- .../branch-weights-freq/unroll-epilog.ll | 10 +- .../LoopUnroll/runtime-loop-branchweight.ll | 6 +- .../LoopVectorize/AArch64/check-prof-info.ll | 164 +++++++++--------- .../LoopVectorize/branch-weights.ll | 26 ++- .../llvm.loop.estimated_trip_count.ll | 16 +- .../Transforms/Utils/LoopUtilsTest.cpp | 80 +++++++++ 10 files changed, 268 insertions(+), 130 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index ab085ca0b1499..57344cdbbafa7 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -7989,9 +7989,9 @@ this metadata is added (i.e., has been distributed). See ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This metadata records an estimated trip count for the loop. The first operand -is the string ``llvm.loop.estimated_trip_count``. The second operand is an -integer constant of type ``i32`` or smaller specifying the estimate. For -example: +is the string ``llvm.loop.estimated_trip_count``. The second operand is a +positive integer constant of type ``i32`` or smaller specifying the estimate. +For example: .. code-block:: llvm @@ -8033,6 +8033,13 @@ pass should record the new estimates by calling loop, ``llvm::getLoopEstimatedTripCount`` returns its value instead of estimating the trip count from the loop's ``branch_weights`` metadata. +Especially after a transformation like loop peeling, the probability of reaching +a loop's header might be very low. Regardless, in the case that it is reached, +at least one iteration will execute, so an estimated trip count of zero is +invalid. Some passes thus rely on non-zero estimated trip counts. +Nevertheless, some passes naively compute it as zero. To avoid misbehavior, +``llvm::setLoopEstimatedTripCount`` converts zero to one. + '``llvm.licm.disable``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 86eb21389756c..8b77e33b1d12c 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -322,6 +322,16 @@ LLVM_ABI TransformationMode hasLICMVersioningTransformation(const Loop *L); LLVM_ABI void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V = 0); +/// Set \p StringMD in the loop metadata of \p TheLoop while keeping other +/// values intact: +/// - If \p V is \c std::nullopt, remove \p StringMD, or do nothing if +/// \p StringMD is not present, perhaps because there is no loop metadata. +/// - Else, set \p StringMD to \p V. Either add \p StringMD if absent, update +/// it if the current value is different, or do nothing if the current value +/// is the same. +LLVM_ABI void setLoopStringMetadata(Loop *TheLoop, const char *StringMD, + std::optional V); + /// Return either: /// - \c std::nullopt, if the implementation is unable to handle the loop form /// of \p L (e.g., \p L must have a latch block that controls the loop exit). @@ -353,14 +363,23 @@ getLoopEstimatedTripCount(Loop *L, /// to handle the loop form of \p L (e.g., \p L must have a latch block that /// controls the loop exit). Otherwise, return true. /// -/// In addition, if \p EstimatedLoopInvocationWeight, set the branch weight -/// metadata of \p L to reflect that \p L has an estimated -/// \p EstimatedTripCount iterations and has \c *EstimatedLoopInvocationWeight -/// exit weight through the loop's latch. +/// Some passes rely on estimated trip counts to be non-zero because, once a +/// loop header is reached, at least one iteration will execute. However, some +/// passes naively compute it as zero. To avoid misbehavior, if +/// \p EstimatedTripCount is zero, interpret it as one. +/// +/// In addition, if \p EstimatedLoopInvocationWeight: +/// - Set the branch weight metadata of \p L to reflect that \p L has an +/// estimated \p EstimatedTripCount iterations and has +/// \c *EstimatedLoopInvocationWeight exit weight through the loop's latch. +/// - If \p EstimatedTripCount is zero, zero the branch weights, and drop +/// \c llvm.loop.estimated_trip_count entirely. \c getLoopEstimatedTripCount +/// will then return \c std::nullopt. /// /// TODO: Eventually, once all passes have migrated away from setting branch /// weights to indicate estimated trip counts, this function will drop the -/// \p EstimatedLoopInvocationWeight parameter. +/// \p EstimatedLoopInvocationWeight parameter, so its historical handling of +/// \p EstimatedTripCount == 0 should no longer be needed. LLVM_ABI bool setLoopEstimatedTripCount( Loop *L, unsigned EstimatedTripCount, std::optional EstimatedLoopInvocationWeight = std::nullopt); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index a4f647409094c..8fd7209b814c0 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1094,9 +1094,10 @@ void Verifier::visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs) { Check(MD.getNumOperands() == 2, "Expected two operands", &MD); auto *Count = dyn_cast_or_null(MD.getOperand(1)); Check(Count && Count->getType()->isIntegerTy() && - cast(Count->getType())->getBitWidth() <= 32, - "Expected second operand to be an integer constant of type i32 or " - "smaller", + cast(Count->getType())->getBitWidth() <= 32 && + !cast(Count->getValue())->isZero(), + "Expected second operand to be a positive integer constant of type " + "i32 or smaller", &MD); } diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 6e60b94be78e3..a5b9184f95fad 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -214,9 +214,15 @@ static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) { /// different. void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD, unsigned V) { + setLoopStringMetadata(TheLoop, StringMD, V); +} + +void llvm::setLoopStringMetadata(Loop *TheLoop, const char *StringMD, + std::optional V) { SmallVector MDs(1); // If the loop already has metadata, retain it. MDNode *LoopID = TheLoop->getLoopID(); + bool Found = false; if (LoopID) { for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { MDNode *Node = cast(LoopID->getOperand(i)); @@ -224,21 +230,27 @@ void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD, if (Node->getNumOperands() == 2) { MDString *S = dyn_cast(Node->getOperand(0)); if (S && S->getString() == StringMD) { - ConstantInt *IntMD = - mdconst::extract_or_null(Node->getOperand(1)); - if (IntMD && IntMD->getSExtValue() == V) - // It is already in place. Do nothing. - return; - // We need to update the value, so just skip it here and it will - // be added after copying other existed nodes. + Found = true; + if (V) { + ConstantInt *IntMD = + mdconst::extract_or_null(Node->getOperand(1)); + if (IntMD && IntMD->getSExtValue() == *V) + // It is already in place. Do nothing. + return; + } + // We need to update/remove the value, so just skip it here and it + // will be added/removed after copying other existed nodes. continue; } } MDs.push_back(Node); } } + if (!V && !Found) + return; // Add new metadata. - MDs.push_back(createStringMetadata(TheLoop, StringMD, V)); + if (V) + MDs.push_back(createStringMetadata(TheLoop, StringMD, *V)); // Replace current metadata node with new one. LLVMContext &Context = TheLoop->getHeader()->getContext(); MDNode *NewLoopID = MDNode::get(Context, MDs); @@ -914,6 +926,7 @@ llvm::getLoopEstimatedTripCount(Loop *L, // Return the estimated trip count from metadata unless the metadata is // missing or has no value. if (auto TC = getOptionalIntLoopAttribute(L, LLVMLoopEstimatedTripCount)) { + assert(TC != 0 && "Reached loop header executes at least one iteration"); LLVM_DEBUG(dbgs() << "getLoopEstimatedTripCount: " << LLVMLoopEstimatedTripCount << " metadata has trip " << "count of " << *TC << " for " << DbgLoop(L) << "\n"); @@ -936,8 +949,13 @@ bool llvm::setLoopEstimatedTripCount( if (!LatchBranch) return false; - // Set the metadata. - addStringMetadataToLoop(L, LLVMLoopEstimatedTripCount, EstimatedTripCount); + // Set the metadata. Some users of the estimated trip count rely on the value + // to be non-zero. + if (!EstimatedloopInvocationWeight) { + setLoopStringMetadata(L, LLVMLoopEstimatedTripCount, + EstimatedTripCount == 0 ? 1 : EstimatedTripCount); + return true; + } // At the moment, we currently support changing the estimated trip count in // the latch branch's branch weights only. We could extend this API to @@ -946,8 +964,13 @@ bool llvm::setLoopEstimatedTripCount( // TODO: Eventually, once all passes have migrated away from setting branch // weights to indicate estimated trip counts, we will not set branch weights // here at all. - if (!EstimatedloopInvocationWeight) - return true; + + // Set the metadata. Some users of the estimated trip count rely on the value + // to be non-zero. + setLoopStringMetadata(L, LLVMLoopEstimatedTripCount, + EstimatedTripCount == 0 + ? std::nullopt + : std::optional(EstimatedTripCount)); // Calculate taken and exit weights. unsigned LatchExitWeight = 0; diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll index 96b31d801c2f9..3e166e9997638 100644 --- a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll +++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll @@ -111,7 +111,8 @@ do.end: !0 = !{!"branch_weights", i32 1, i32 10} ; ------------------------------------------------------------------------------ -; Check branch weight metadata and estimated trip count metadata. +; Check branch weight metadata and estimated trip count metadata. The minimum +; valid estimated trip count is 1 not 0. ; ; UR2: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 195225786, i32 1952257862} ; UR4: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 534047398, i32 1613436250} @@ -135,7 +136,7 @@ do.end: ; UR4: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 2} ; UR10: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1} ; UR11: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1} -; UR12: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; UR12: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1} ; UR: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"} ; ; UR2: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1022611260, i32 1124872388} @@ -151,10 +152,9 @@ do.end: ; UR4: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} ; UR10: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} -; UR11: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} +; UR11: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} ; UR12: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} ; ; UR4: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3} -; For UR10, llvm.loop.estimated_trip_count is the same for both loops. -; UR11: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; For UR10 and UR11, llvm.loop.estimated_trip_count is the same for both loops. ; UR12: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 11} diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll index 2f8f98d40e86f..7dda642b9a006 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll @@ -74,6 +74,8 @@ for.end: ; 1073706403 / (1073706403 + 1073777245). ; CHECK: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1073706403, i32 1073777245} -; 10000%4 = 0 +; 10000%4 = 0, so the probability of reaching the remainder loop header is low. +; If it is reached, at least one iteration will execute. The minimum valid +; estimated trip count is 1. ; CHECK: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} -; CHECK: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; CHECK: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll b/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll index f39c6bd4c0d0d..35a691494e6f5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll @@ -48,11 +48,11 @@ define void @foo_i32(i64 %n) { ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF9:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF8:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC1-LABEL: define void @foo_i32( @@ -90,11 +90,11 @@ define void @foo_i32(i64 %n) { ; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF10:![0-9]+]] +; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF9:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V2-IC4: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC4: [[FOR_BODY]]: -; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-V2-IC4: [[FOR_COND_CLEANUP]]: ; entry: @@ -124,21 +124,21 @@ define void @foo_i8(i64 %n) { ; CHECK-V1-IC1: [[VECTOR_PH]]: ; CHECK-V1-IC1: br label %[[VECTOR_BODY:.*]] ; CHECK-V1-IC1: [[VECTOR_BODY]]: -; CHECK-V1-IC1: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-V1-IC1: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF8:![0-9]+]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-V1-IC1: [[MIDDLE_BLOCK]]: -; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF9]] +; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF8]] ; CHECK-V1-IC1: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V1-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF12:![0-9]+]] +; CHECK-V1-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]] ; CHECK-V1-IC1: [[VEC_EPILOG_PH]]: ; CHECK-V1-IC1: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V1-IC1: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V1-IC1: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-V1-IC1: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-V1-IC1: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V1-IC1: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF14:![0-9]+]] +; CHECK-V1-IC1: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF13:![0-9]+]] ; CHECK-V1-IC1: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V1-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1: [[FOR_BODY]]: -; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-V1-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V1-IC1-FORCE-EPI4-LABEL: define void @foo_i8( @@ -150,21 +150,21 @@ define void @foo_i8(i64 %n) { ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[MIDDLE_BLOCK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF12]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF11]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF15:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF14:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF9]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF8]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF10]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF9]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC1-LABEL: define void @foo_i8( @@ -176,21 +176,21 @@ define void @foo_i8(i64 %n) { ; CHECK-V2-IC1: [[VECTOR_PH]]: ; CHECK-V2-IC1: br label %[[VECTOR_BODY:.*]] ; CHECK-V2-IC1: [[VECTOR_BODY]]: -; CHECK-V2-IC1: br i1 [[TMP4:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-V2-IC1: br i1 [[TMP4:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-V2-IC1: [[MIDDLE_BLOCK]]: -; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF13:![0-9]+]] +; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF12:![0-9]+]] ; CHECK-V2-IC1: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V2-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF14:![0-9]+]] +; CHECK-V2-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF13:![0-9]+]] ; CHECK-V2-IC1: [[VEC_EPILOG_PH]]: ; CHECK-V2-IC1: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V2-IC1: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V2-IC1: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-V2-IC1: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-V2-IC1: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-V2-IC1: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF6]] ; CHECK-V2-IC1: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V2-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC1: [[FOR_BODY]]: -; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF7]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF7]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-V2-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC4-LABEL: define void @foo_i8( @@ -202,21 +202,21 @@ define void @foo_i8(i64 %n) { ; CHECK-V2-IC4: [[VECTOR_PH]]: ; CHECK-V2-IC4: br label %[[VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF6]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF6]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-V2-IC4: [[MIDDLE_BLOCK]]: ; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF1]] ; CHECK-V2-IC4: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF15:![0-9]+]] +; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF14:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_PH]]: ; CHECK-V2-IC4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF17:![0-9]+]] +; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF16:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V2-IC4: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC4: [[FOR_BODY]]: -; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF11]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF10]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-V2-IC4: [[FOR_COND_CLEANUP]]: ; entry: @@ -244,13 +244,13 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V1-IC1: [[VECTOR_PH]]: ; CHECK-V1-IC1: br label %[[VECTOR_BODY:.*]] ; CHECK-V1-IC1: [[VECTOR_BODY]]: -; CHECK-V1-IC1: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-V1-IC1: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-V1-IC1: [[MIDDLE_BLOCK]]: ; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]] ; CHECK-V1-IC1: [[SCALAR_PH]]: ; CHECK-V1-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1: [[FOR_BODY]]: -; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-V1-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V1-IC1-FORCE-EPI4-LABEL: define void @foo_i32_no_bw( @@ -262,7 +262,7 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[MIDDLE_BLOCK]]: ; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_ITER_CHECK]]: @@ -270,13 +270,13 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC1-LABEL: define void @foo_i32_no_bw( @@ -286,13 +286,13 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V2-IC1: [[VECTOR_PH]]: ; CHECK-V2-IC1: br label %[[VECTOR_BODY:.*]] ; CHECK-V2-IC1: [[VECTOR_BODY]]: -; CHECK-V2-IC1: br i1 [[TMP2:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-V2-IC1: br i1 [[TMP2:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-V2-IC1: [[MIDDLE_BLOCK]]: ; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]] ; CHECK-V2-IC1: [[SCALAR_PH]]: ; CHECK-V2-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC1: [[FOR_BODY]]: -; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-V2-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC4-LABEL: define void @foo_i32_no_bw( @@ -304,7 +304,7 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V2-IC4: [[VECTOR_PH]]: ; CHECK-V2-IC4: br label %[[VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-V2-IC4: [[MIDDLE_BLOCK]]: ; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK-V2-IC4: [[VEC_EPILOG_ITER_CHECK]]: @@ -312,13 +312,13 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V2-IC4: [[VEC_EPILOG_PH]]: ; CHECK-V2-IC4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-V2-IC4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V2-IC4: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC4: [[FOR_BODY]]: -; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK-V2-IC4: [[FOR_COND_CLEANUP]]: ; entry: @@ -347,17 +347,16 @@ for.cond.cleanup: ; preds = %for.body ; CHECK-V1-IC1: [[META4]] = !{!"llvm.loop.estimated_trip_count", i32 128} ; CHECK-V1-IC1: [[PROF5]] = !{!"branch_weights", i32 1, i32 7} ; CHECK-V1-IC1: [[PROF6]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V1-IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META3]], [[META2]], [[META8:![0-9]+]]} -; CHECK-V1-IC1: [[META8]] = !{!"llvm.loop.estimated_trip_count", i32 0} -; CHECK-V1-IC1: [[PROF9]] = !{!"branch_weights", i32 1, i32 31} -; CHECK-V1-IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META3]], [[META11:![0-9]+]]} -; CHECK-V1-IC1: [[META11]] = !{!"llvm.loop.estimated_trip_count", i32 32} -; CHECK-V1-IC1: [[PROF12]] = !{!"branch_weights", i32 16, i32 16} -; CHECK-V1-IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META8]], [[META3]]} -; CHECK-V1-IC1: [[PROF14]] = !{!"branch_weights", i32 1, i32 15} -; CHECK-V1-IC1: [[LOOP15]] = distinct !{[[LOOP15]], [[META3]], [[META2]], [[META8]]} -; CHECK-V1-IC1: [[LOOP16]] = distinct !{[[LOOP16]], [[META2]], [[META3]]} -; CHECK-V1-IC1: [[LOOP17]] = distinct !{[[LOOP17]], [[META3]], [[META2]]} +; CHECK-V1-IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META3]], [[META2]]} +; CHECK-V1-IC1: [[PROF8]] = !{!"branch_weights", i32 1, i32 31} +; CHECK-V1-IC1: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META3]], [[META10:![0-9]+]]} +; CHECK-V1-IC1: [[META10]] = !{!"llvm.loop.estimated_trip_count", i32 32} +; CHECK-V1-IC1: [[PROF11]] = !{!"branch_weights", i32 16, i32 16} +; CHECK-V1-IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META3]]} +; CHECK-V1-IC1: [[PROF13]] = !{!"branch_weights", i32 1, i32 15} +; CHECK-V1-IC1: [[LOOP14]] = distinct !{[[LOOP14]], [[META3]], [[META2]]} +; CHECK-V1-IC1: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META3]]} +; CHECK-V1-IC1: [[LOOP16]] = distinct !{[[LOOP16]], [[META3]], [[META2]]} ;. ; CHECK-V1-IC1-FORCE-EPI4: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} ; CHECK-V1-IC1-FORCE-EPI4: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]], [[META4:![0-9]+]]} @@ -366,20 +365,19 @@ for.cond.cleanup: ; preds = %for.body ; CHECK-V1-IC1-FORCE-EPI4: [[META4]] = !{!"llvm.loop.estimated_trip_count", i32 128} ; CHECK-V1-IC1-FORCE-EPI4: [[PROF5]] = !{!"branch_weights", i32 1, i32 7} ; CHECK-V1-IC1-FORCE-EPI4: [[PROF6]] = !{!"branch_weights", i32 4, i32 4} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META8:![0-9]+]], [[META3]]} -; CHECK-V1-IC1-FORCE-EPI4: [[META8]] = !{!"llvm.loop.estimated_trip_count", i32 0} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF9]] = !{!"branch_weights", i32 1, i32 3} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF10]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META3]], [[META2]], [[META8]]} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF12]] = !{!"branch_weights", i32 1, i32 31} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META3]], [[META14:![0-9]+]]} -; CHECK-V1-IC1-FORCE-EPI4: [[META14]] = !{!"llvm.loop.estimated_trip_count", i32 32} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF15]] = !{!"branch_weights", i32 4, i32 28} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP16]] = distinct !{[[LOOP16]], [[META2]], [[META8]], [[META3]]} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP17]] = distinct !{[[LOOP17]], [[META3]], [[META2]], [[META8]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META3]]} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF8]] = !{!"branch_weights", i32 1, i32 3} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META3]], [[META2]]} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF11]] = !{!"branch_weights", i32 1, i32 31} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META3]], [[META13:![0-9]+]]} +; CHECK-V1-IC1-FORCE-EPI4: [[META13]] = !{!"llvm.loop.estimated_trip_count", i32 32} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF14]] = !{!"branch_weights", i32 4, i32 28} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META3]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP16]] = distinct !{[[LOOP16]], [[META3]], [[META2]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META3]]} ; CHECK-V1-IC1-FORCE-EPI4: [[LOOP18]] = distinct !{[[LOOP18]], [[META2]], [[META3]]} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP19]] = distinct !{[[LOOP19]], [[META2]], [[META3]]} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP20]] = distinct !{[[LOOP20]], [[META3]], [[META2]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP19]] = distinct !{[[LOOP19]], [[META3]], [[META2]]} ;. ; CHECK-V2-IC1: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} ; CHECK-V2-IC1: [[PROF1]] = !{!"branch_weights", i32 1, i32 255} @@ -389,17 +387,16 @@ for.cond.cleanup: ; preds = %for.body ; CHECK-V2-IC1: [[META5]] = !{!"llvm.loop.estimated_trip_count", i32 256} ; CHECK-V2-IC1: [[PROF6]] = !{!"branch_weights", i32 1, i32 3} ; CHECK-V2-IC1: [[PROF7]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V2-IC1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]], [[META3]], [[META9:![0-9]+]]} -; CHECK-V2-IC1: [[META9]] = !{!"llvm.loop.estimated_trip_count", i32 0} -; CHECK-V2-IC1: [[PROF10]] = !{!"branch_weights", i32 1, i32 63} -; CHECK-V2-IC1: [[LOOP11]] = distinct !{[[LOOP11]], [[META3]], [[META4]], [[META12:![0-9]+]]} -; CHECK-V2-IC1: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 64} -; CHECK-V2-IC1: [[PROF13]] = !{!"branch_weights", i32 1, i32 15} -; CHECK-V2-IC1: [[PROF14]] = !{!"branch_weights", i32 4, i32 12} -; CHECK-V2-IC1: [[LOOP15]] = distinct !{[[LOOP15]], [[META3]], [[META9]], [[META4]]} -; CHECK-V2-IC1: [[LOOP16]] = distinct !{[[LOOP16]], [[META4]], [[META3]], [[META9]]} -; CHECK-V2-IC1: [[LOOP17]] = distinct !{[[LOOP17]], [[META3]], [[META4]]} -; CHECK-V2-IC1: [[LOOP18]] = distinct !{[[LOOP18]], [[META4]], [[META3]]} +; CHECK-V2-IC1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]], [[META3]]} +; CHECK-V2-IC1: [[PROF9]] = !{!"branch_weights", i32 1, i32 63} +; CHECK-V2-IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META3]], [[META4]], [[META11:![0-9]+]]} +; CHECK-V2-IC1: [[META11]] = !{!"llvm.loop.estimated_trip_count", i32 64} +; CHECK-V2-IC1: [[PROF12]] = !{!"branch_weights", i32 1, i32 15} +; CHECK-V2-IC1: [[PROF13]] = !{!"branch_weights", i32 4, i32 12} +; CHECK-V2-IC1: [[LOOP14]] = distinct !{[[LOOP14]], [[META3]], [[META4]]} +; CHECK-V2-IC1: [[LOOP15]] = distinct !{[[LOOP15]], [[META4]], [[META3]]} +; CHECK-V2-IC1: [[LOOP16]] = distinct !{[[LOOP16]], [[META3]], [[META4]]} +; CHECK-V2-IC1: [[LOOP17]] = distinct !{[[LOOP17]], [[META4]], [[META3]]} ;. ; CHECK-V2-IC4: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} ; CHECK-V2-IC4: [[PROF1]] = !{!"branch_weights", i32 1, i32 63} @@ -409,18 +406,17 @@ for.cond.cleanup: ; preds = %for.body ; CHECK-V2-IC4: [[META5]] = !{!"llvm.loop.estimated_trip_count", i32 64} ; CHECK-V2-IC4: [[PROF6]] = !{!"branch_weights", i32 1, i32 15} ; CHECK-V2-IC4: [[PROF7]] = !{!"branch_weights", i32 4, i32 12} -; CHECK-V2-IC4: [[LOOP8]] = distinct !{[[LOOP8]], [[META3]], [[META9:![0-9]+]], [[META4]]} -; CHECK-V2-IC4: [[META9]] = !{!"llvm.loop.estimated_trip_count", i32 0} -; CHECK-V2-IC4: [[PROF10]] = !{!"branch_weights", i32 1, i32 3} -; CHECK-V2-IC4: [[PROF11]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V2-IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META4]], [[META3]], [[META9]]} -; CHECK-V2-IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META3]], [[META4]], [[META14:![0-9]+]]} -; CHECK-V2-IC4: [[META14]] = !{!"llvm.loop.estimated_trip_count", i32 16} -; CHECK-V2-IC4: [[PROF15]] = !{!"branch_weights", i32 8, i32 56} -; CHECK-V2-IC4: [[LOOP16]] = distinct !{[[LOOP16]], [[META3]], [[META9]], [[META4]]} -; CHECK-V2-IC4: [[PROF17]] = !{!"branch_weights", i32 1, i32 7} -; CHECK-V2-IC4: [[LOOP18]] = distinct !{[[LOOP18]], [[META4]], [[META3]], [[META9]]} +; CHECK-V2-IC4: [[LOOP8]] = distinct !{[[LOOP8]], [[META3]], [[META4]]} +; CHECK-V2-IC4: [[PROF9]] = !{!"branch_weights", i32 1, i32 3} +; CHECK-V2-IC4: [[PROF10]] = !{!"branch_weights", i32 0, i32 0} +; CHECK-V2-IC4: [[LOOP11]] = distinct !{[[LOOP11]], [[META4]], [[META3]]} +; CHECK-V2-IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META3]], [[META4]], [[META13:![0-9]+]]} +; CHECK-V2-IC4: [[META13]] = !{!"llvm.loop.estimated_trip_count", i32 16} +; CHECK-V2-IC4: [[PROF14]] = !{!"branch_weights", i32 8, i32 56} +; CHECK-V2-IC4: [[LOOP15]] = distinct !{[[LOOP15]], [[META3]], [[META4]]} +; CHECK-V2-IC4: [[PROF16]] = !{!"branch_weights", i32 1, i32 7} +; CHECK-V2-IC4: [[LOOP17]] = distinct !{[[LOOP17]], [[META4]], [[META3]]} +; CHECK-V2-IC4: [[LOOP18]] = distinct !{[[LOOP18]], [[META3]], [[META4]]} ; CHECK-V2-IC4: [[LOOP19]] = distinct !{[[LOOP19]], [[META3]], [[META4]]} -; CHECK-V2-IC4: [[LOOP20]] = distinct !{[[LOOP20]], [[META3]], [[META4]]} -; CHECK-V2-IC4: [[LOOP21]] = distinct !{[[LOOP21]], [[META4]], [[META3]]} +; CHECK-V2-IC4: [[LOOP20]] = distinct !{[[LOOP20]], [[META4]], [[META3]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/branch-weights.ll b/llvm/test/Transforms/LoopVectorize/branch-weights.ll index 4445141549069..1ed2e62d79aae 100644 --- a/llvm/test/Transforms/LoopVectorize/branch-weights.ll +++ b/llvm/test/Transforms/LoopVectorize/branch-weights.ll @@ -43,7 +43,7 @@ define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 { ; MAINVF4IC1_EPI4: br label %[[LOOP:.*]] ; MAINVF4IC1_EPI4: [[LOOP]]: ; MAINVF4IC1_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]] -; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF13:![0-9]+]], !llvm.loop [[LOOP14:![0-9]+]] +; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]] ; MAINVF4IC1_EPI4: [[EXIT_LOOPEXIT]]: ; MAINVF4IC1_EPI4: br label %[[EXIT]] ; MAINVF4IC1_EPI4: [[EXIT]]: @@ -81,12 +81,12 @@ define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 { ; MAINVF4IC2_EPI4: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; MAINVF4IC2_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]] -; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF13:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF12:![0-9]+]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; MAINVF4IC2_EPI4: br label %[[LOOP:.*]] ; MAINVF4IC2_EPI4: [[LOOP]]: ; MAINVF4IC2_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]] -; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF14:![0-9]+]], !llvm.loop [[LOOP15:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF13:![0-9]+]], !llvm.loop [[LOOP14:![0-9]+]] ; MAINVF4IC2_EPI4: [[EXIT_LOOPEXIT]]: ; MAINVF4IC2_EPI4: br label %[[EXIT]] ; MAINVF4IC2_EPI4: [[EXIT]]: @@ -127,11 +127,10 @@ exit: ; MAINVF4IC1_EPI4: [[PROF8]] = !{!"branch_weights", i32 1, i32 3} ; MAINVF4IC1_EPI4: [[PROF9]] = !{!"branch_weights", i32 4, i32 0} ; MAINVF4IC1_EPI4: [[PROF10]] = !{!"branch_weights", i32 0, i32 0} -; MAINVF4IC1_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]], [[META12:![0-9]+]]} -; MAINVF4IC1_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0} -; MAINVF4IC1_EPI4: [[PROF13]] = !{!"branch_weights", i32 2, i32 1} -; MAINVF4IC1_EPI4: [[LOOP14]] = distinct !{[[LOOP14]], [[META5]], [[META15:![0-9]+]]} -; MAINVF4IC1_EPI4: [[META15]] = !{!"llvm.loop.estimated_trip_count", i32 3} +; MAINVF4IC1_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]]} +; MAINVF4IC1_EPI4: [[PROF12]] = !{!"branch_weights", i32 2, i32 1} +; MAINVF4IC1_EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]], [[META14:![0-9]+]]} +; MAINVF4IC1_EPI4: [[META14]] = !{!"llvm.loop.estimated_trip_count", i32 3} ;. ; MAINVF4IC2_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13} ; MAINVF4IC2_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1} @@ -144,10 +143,9 @@ exit: ; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 1, i32 7} ; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 4, i32 4} ; MAINVF4IC2_EPI4: [[PROF10]] = !{!"branch_weights", i32 0, i32 0} -; MAINVF4IC2_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]], [[META12:![0-9]+]]} -; MAINVF4IC2_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0} -; MAINVF4IC2_EPI4: [[PROF13]] = !{!"branch_weights", i32 1, i32 3} -; MAINVF4IC2_EPI4: [[PROF14]] = !{!"branch_weights", i32 2, i32 1} -; MAINVF4IC2_EPI4: [[LOOP15]] = distinct !{[[LOOP15]], [[META5]], [[META16:![0-9]+]]} -; MAINVF4IC2_EPI4: [[META16]] = !{!"llvm.loop.estimated_trip_count", i32 3} +; MAINVF4IC2_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]]} +; MAINVF4IC2_EPI4: [[PROF12]] = !{!"branch_weights", i32 1, i32 3} +; MAINVF4IC2_EPI4: [[PROF13]] = !{!"branch_weights", i32 2, i32 1} +; MAINVF4IC2_EPI4: [[LOOP14]] = distinct !{[[LOOP14]], [[META5]], [[META15:![0-9]+]]} +; MAINVF4IC2_EPI4: [[META15]] = !{!"llvm.loop.estimated_trip_count", i32 3} ;. diff --git a/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll b/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll index b1e456f5b0ad6..fdc1d8c3c8e14 100644 --- a/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll +++ b/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll @@ -15,7 +15,7 @@ exit: ; GOOD-NOT: {{.}} -; BAD-VALUE: Expected second operand to be an integer constant of type i32 or smaller +; BAD-VALUE: Expected second operand to be a positive integer constant of type i32 or smaller ; BAD-VALUE-NEXT: !1 = !{!"llvm.loop.estimated_trip_count", ; TOO-FEW: Expected two operands @@ -36,12 +36,24 @@ exit: ; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i16 5}' >> %t ; RUN: %{RUN} GOOD -; i32 value. +; i32 arbitrary value. ; RUN: cp %s %t ; RUN: chmod u+w %t ; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i32 5}' >> %t ; RUN: %{RUN} GOOD +; i32 boundary value of 1. +; RUN: cp %s %t +; RUN: chmod u+w %t +; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i32 1}' >> %t +; RUN: %{RUN} GOOD + +; i32 boundary value of 0. +; RUN: cp %s %t +; RUN: chmod u+w %t +; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i32 0}' >> %t +; RUN: not %{RUN} BAD-VALUE + ; i64 value. ; RUN: cp %s %t ; RUN: chmod u+w %t diff --git a/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp b/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp index ce002e9239960..005b93868212b 100644 --- a/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp +++ b/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp @@ -14,6 +14,7 @@ #include "llvm/AsmParser/Parser.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/Support/SourceMgr.h" #include "gtest/gtest.h" @@ -195,3 +196,82 @@ TEST(LoopUtils, nestedLoopSharedLatchEstimatedTripCount) { EXPECT_EQ(getLoopEstimatedTripCount(Outer), std::nullopt); }); } + +// setLoopEstimatedTripCount implements special handling of zero. +TEST(LoopUtils, zeroEstimatedTripCount) { + LLVMContext C; + const char *IR = + "define void @foo(i1 %c) {\n" + "entry:\n" + " br label %loop0\n" + "loop0:\n" + " br i1 %c, label %loop0, label %loop1\n" + "loop1:\n" + " br i1 %c, label %loop1, label %loop2, !llvm.loop !1\n" + "loop2:\n" + " br i1 %c, label %loop2, label %exit, !prof !5, !llvm.loop !2\n" + "exit:\n" + " ret void\n" + "}\n" + "!1 = distinct !{!1, !3}\n" + "!2 = distinct !{!2, !3, !4}\n" + "!3 = !{!\"foo\", i32 5}\n" + "!4 = !{!\"llvm.loop.estimated_trip_count\", i32 10}\n" + "!5 = !{!\"branch_weights\", i32 1, i32 9}\n" + "\n"; + + // With EstimatedLoopInvocationWeight, setLoopEstimatedTripCount sets zeroed + // branch weights and discards any llvm.loop.estimated_trip_count, so + // getLoopEstimatedTripCount returns std::nullopt. Other loop metadata, if + // any, is not touched. + std::unique_ptr M = parseIR(C, IR); + run(*M, "foo", + [&](Function &F, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI) { + assert(LI.end() - LI.begin() == 3 && "Expected three loops"); + for (Loop *L : LI) { + Instruction &LatchBranch = *L->getLoopLatch()->getTerminator(); + std::optional Foo = getOptionalIntLoopAttribute(L, "foo"); + + EXPECT_EQ(setLoopEstimatedTripCount( + L, 0, /*EstimatedLoopInvocationWeight=*/1), + true); + + SmallVector Weights; + EXPECT_EQ(extractBranchWeights(LatchBranch, Weights), true); + EXPECT_EQ(Weights[0], 0u); + EXPECT_EQ(Weights[1], 0u); + EXPECT_EQ(getOptionalIntLoopAttribute(L, "foo"), Foo); + EXPECT_EQ(getOptionalIntLoopAttribute(L, LLVMLoopEstimatedTripCount), + std::nullopt); + EXPECT_EQ(getLoopEstimatedTripCount(L), std::nullopt); + } + }); + + // Without EstimatedLoopInvocationWeight, setLoopEstimatedTripCount sets + // llvm.loop.estimated_trip_count to 1 and does not touch branch weights or + // other loop metadata. getLoopEstimatedTripCount returns 1. + M = parseIR(C, IR); + run(*M, "foo", + [&](Function &F, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI) { + assert(LI.end() - LI.begin() == 3 && "Expected three loops"); + for (Loop *L : LI) { + Instruction &LatchBranch = *L->getLoopLatch()->getTerminator(); + std::optional Foo = getOptionalIntLoopAttribute(L, "foo"); + SmallVector WeightsOld; + bool HasWeights = extractBranchWeights(LatchBranch, WeightsOld); + + EXPECT_EQ(setLoopEstimatedTripCount(L, 0), true); + + SmallVector WeightsNew; + EXPECT_EQ(extractBranchWeights(LatchBranch, WeightsNew), HasWeights); + if (HasWeights) { + EXPECT_EQ(WeightsNew[0], WeightsOld[0]); + EXPECT_EQ(WeightsNew[1], WeightsOld[1]); + } + EXPECT_EQ(getOptionalIntLoopAttribute(L, "foo"), Foo); + EXPECT_EQ(getOptionalIntLoopAttribute(L, LLVMLoopEstimatedTripCount), + 1); + EXPECT_EQ(getLoopEstimatedTripCount(L), 1); + } + }); +} From e2cc9dc40b8d3d4c62256178114ec69558690e04 Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Wed, 12 Nov 2025 19:42:35 -0500 Subject: [PATCH 2/3] Update tests missed because didn't built with asserts --- llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll | 2 +- llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll index e95121593e4f7..8d424b13dd278 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll @@ -86,5 +86,5 @@ attributes #1 = { nounwind optsize } ;CHECK: !16 = !{!"branch_weights", i32 3001, i32 1001} ;CHECK: !17 = distinct !{!17, !18, !19, {{.*}}} ;CHECK: !18 = !{!"llvm.loop.peeled.count", i32 4} -;CHECK: !19 = !{!"llvm.loop.estimated_trip_count", i32 0} +;CHECK: !19 = !{!"llvm.loop.estimated_trip_count", i32 1} diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll index dec126f289d32..b2eeddb6e25f2 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll @@ -106,5 +106,5 @@ attributes #1 = { nounwind optsize } ;CHECK: !15 = !{!"branch_weights", i32 3001, i32 1001} ;CHECK: !16 = distinct !{!16, !17, !18, {{.*}}} ;CHECK: !17 = !{!"llvm.loop.peeled.count", i32 4} -;CHECK: !18 = !{!"llvm.loop.estimated_trip_count", i32 0} +;CHECK: !18 = !{!"llvm.loop.estimated_trip_count", i32 1} From f42fcfac6761582fe9c19ab186ee083133a8f9d0 Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Thu, 13 Nov 2025 21:11:03 -0500 Subject: [PATCH 3/3] Rethink fix: Don't convert 0 to 1. --- llvm/docs/LangRef.rst | 26 ++- .../include/llvm/Transforms/Utils/LoopUtils.h | 27 +-- llvm/lib/IR/Verifier.cpp | 7 +- llvm/lib/Transforms/Utils/LoopUtils.cpp | 55 ++---- .../branch-weights-freq/unroll-epilog.ll | 10 +- .../LoopUnroll/peel-loop-pgo-deopt.ll | 2 +- .../Transforms/LoopUnroll/peel-loop-pgo.ll | 2 +- .../LoopUnroll/runtime-loop-branchweight.ll | 6 +- .../LoopVectorize/AArch64/check-prof-info.ll | 164 +++++++++--------- .../LoopVectorize/branch-weights.ll | 26 +-- .../vectorize-zero-estimated-trip-count.ll | 34 ++++ .../llvm.loop.estimated_trip_count.ll | 4 +- .../Transforms/Utils/LoopUtilsTest.cpp | 21 +-- 13 files changed, 197 insertions(+), 187 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/vectorize-zero-estimated-trip-count.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 57344cdbbafa7..d75ea219c3a2a 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -7989,9 +7989,9 @@ this metadata is added (i.e., has been distributed). See ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This metadata records an estimated trip count for the loop. The first operand -is the string ``llvm.loop.estimated_trip_count``. The second operand is a -positive integer constant of type ``i32`` or smaller specifying the estimate. -For example: +is the string ``llvm.loop.estimated_trip_count``. The second operand is an +integer constant of type ``i32`` or smaller specifying the estimate. For +example: .. code-block:: llvm @@ -8033,12 +8033,20 @@ pass should record the new estimates by calling loop, ``llvm::getLoopEstimatedTripCount`` returns its value instead of estimating the trip count from the loop's ``branch_weights`` metadata. -Especially after a transformation like loop peeling, the probability of reaching -a loop's header might be very low. Regardless, in the case that it is reached, -at least one iteration will execute, so an estimated trip count of zero is -invalid. Some passes thus rely on non-zero estimated trip counts. -Nevertheless, some passes naively compute it as zero. To avoid misbehavior, -``llvm::setLoopEstimatedTripCount`` converts zero to one. +Zero +"""" + +Some passes set ``llvm.loop.estimated_trip_count`` to 0. For example, after +peeling 10 or more iterations from a loop with an estimated trip count of 10, +``llvm.loop.estimated_trip_count`` becomes 0 on the remaining loop. It +indicates that, each time execution reaches the peeled iterations, execution is +estimated to exit them without reaching the remaining loop's header. + +Even if the probability of reaching a loop's header is low, if it is reached, it +is the start of an iteration. Consequently, some passes historically assume +that ``llvm::getLoopEstimatedTripCount`` always returns a positive count or +``std::nullopt``. Thus, it returns ``std::nullopt`` when +``llvm.loop.estimated_trip_count`` is 0. '``llvm.licm.disable``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 8b77e33b1d12c..0afba21dfaf81 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -322,21 +322,14 @@ LLVM_ABI TransformationMode hasLICMVersioningTransformation(const Loop *L); LLVM_ABI void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V = 0); -/// Set \p StringMD in the loop metadata of \p TheLoop while keeping other -/// values intact: -/// - If \p V is \c std::nullopt, remove \p StringMD, or do nothing if -/// \p StringMD is not present, perhaps because there is no loop metadata. -/// - Else, set \p StringMD to \p V. Either add \p StringMD if absent, update -/// it if the current value is different, or do nothing if the current value -/// is the same. -LLVM_ABI void setLoopStringMetadata(Loop *TheLoop, const char *StringMD, - std::optional V); - /// Return either: /// - \c std::nullopt, if the implementation is unable to handle the loop form /// of \p L (e.g., \p L must have a latch block that controls the loop exit). /// - The value of \c llvm.loop.estimated_trip_count from the loop metadata of -/// \p L, if that metadata is present. +/// \p L, if that metadata is present. In the special case that the value is +/// zero, return \c std::nullopt instead as that is historically what callers +/// expect when a loop is estimated to execute no iterations (i.e., its header +/// is not reached). /// - Else, a new estimate of the trip count from the latch branch weights of /// \p L. /// @@ -363,23 +356,15 @@ getLoopEstimatedTripCount(Loop *L, /// to handle the loop form of \p L (e.g., \p L must have a latch block that /// controls the loop exit). Otherwise, return true. /// -/// Some passes rely on estimated trip counts to be non-zero because, once a -/// loop header is reached, at least one iteration will execute. However, some -/// passes naively compute it as zero. To avoid misbehavior, if -/// \p EstimatedTripCount is zero, interpret it as one. -/// /// In addition, if \p EstimatedLoopInvocationWeight: /// - Set the branch weight metadata of \p L to reflect that \p L has an /// estimated \p EstimatedTripCount iterations and has /// \c *EstimatedLoopInvocationWeight exit weight through the loop's latch. -/// - If \p EstimatedTripCount is zero, zero the branch weights, and drop -/// \c llvm.loop.estimated_trip_count entirely. \c getLoopEstimatedTripCount -/// will then return \c std::nullopt. +/// - If \p EstimatedTripCount is zero, zero the branch weights. /// /// TODO: Eventually, once all passes have migrated away from setting branch /// weights to indicate estimated trip counts, this function will drop the -/// \p EstimatedLoopInvocationWeight parameter, so its historical handling of -/// \p EstimatedTripCount == 0 should no longer be needed. +/// \p EstimatedLoopInvocationWeight parameter. LLVM_ABI bool setLoopEstimatedTripCount( Loop *L, unsigned EstimatedTripCount, std::optional EstimatedLoopInvocationWeight = std::nullopt); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 8fd7209b814c0..a4f647409094c 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1094,10 +1094,9 @@ void Verifier::visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs) { Check(MD.getNumOperands() == 2, "Expected two operands", &MD); auto *Count = dyn_cast_or_null(MD.getOperand(1)); Check(Count && Count->getType()->isIntegerTy() && - cast(Count->getType())->getBitWidth() <= 32 && - !cast(Count->getValue())->isZero(), - "Expected second operand to be a positive integer constant of type " - "i32 or smaller", + cast(Count->getType())->getBitWidth() <= 32, + "Expected second operand to be an integer constant of type i32 or " + "smaller", &MD); } diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index a5b9184f95fad..7d70d9b9834a7 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -214,15 +214,9 @@ static MDNode *createStringMetadata(Loop *TheLoop, StringRef Name, unsigned V) { /// different. void llvm::addStringMetadataToLoop(Loop *TheLoop, const char *StringMD, unsigned V) { - setLoopStringMetadata(TheLoop, StringMD, V); -} - -void llvm::setLoopStringMetadata(Loop *TheLoop, const char *StringMD, - std::optional V) { SmallVector MDs(1); // If the loop already has metadata, retain it. MDNode *LoopID = TheLoop->getLoopID(); - bool Found = false; if (LoopID) { for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { MDNode *Node = cast(LoopID->getOperand(i)); @@ -230,27 +224,21 @@ void llvm::setLoopStringMetadata(Loop *TheLoop, const char *StringMD, if (Node->getNumOperands() == 2) { MDString *S = dyn_cast(Node->getOperand(0)); if (S && S->getString() == StringMD) { - Found = true; - if (V) { - ConstantInt *IntMD = - mdconst::extract_or_null(Node->getOperand(1)); - if (IntMD && IntMD->getSExtValue() == *V) - // It is already in place. Do nothing. - return; - } - // We need to update/remove the value, so just skip it here and it - // will be added/removed after copying other existed nodes. + ConstantInt *IntMD = + mdconst::extract_or_null(Node->getOperand(1)); + if (IntMD && IntMD->getSExtValue() == V) + // It is already in place. Do nothing. + return; + // We need to update the value, so just skip it here and it will + // be added after copying other existed nodes. continue; } } MDs.push_back(Node); } } - if (!V && !Found) - return; // Add new metadata. - if (V) - MDs.push_back(createStringMetadata(TheLoop, StringMD, *V)); + MDs.push_back(createStringMetadata(TheLoop, StringMD, V)); // Replace current metadata node with new one. LLVMContext &Context = TheLoop->getHeader()->getContext(); MDNode *NewLoopID = MDNode::get(Context, MDs); @@ -924,13 +912,14 @@ llvm::getLoopEstimatedTripCount(Loop *L, } // Return the estimated trip count from metadata unless the metadata is - // missing or has no value. + // missing or has no value. Return std::nullopt if it's zero. if (auto TC = getOptionalIntLoopAttribute(L, LLVMLoopEstimatedTripCount)) { - assert(TC != 0 && "Reached loop header executes at least one iteration"); LLVM_DEBUG(dbgs() << "getLoopEstimatedTripCount: " << LLVMLoopEstimatedTripCount << " metadata has trip " - << "count of " << *TC << " for " << DbgLoop(L) << "\n"); - return TC; + << "count of " << *TC + << (*TC == 0 ? " (returning std::nullopt)" : "") + << " for " << DbgLoop(L) << "\n"); + return *TC == 0 ? std::nullopt : std::optional(*TC); } // Estimate the trip count from latch branch weights. @@ -949,13 +938,8 @@ bool llvm::setLoopEstimatedTripCount( if (!LatchBranch) return false; - // Set the metadata. Some users of the estimated trip count rely on the value - // to be non-zero. - if (!EstimatedloopInvocationWeight) { - setLoopStringMetadata(L, LLVMLoopEstimatedTripCount, - EstimatedTripCount == 0 ? 1 : EstimatedTripCount); - return true; - } + // Set the metadata. + addStringMetadataToLoop(L, LLVMLoopEstimatedTripCount, EstimatedTripCount); // At the moment, we currently support changing the estimated trip count in // the latch branch's branch weights only. We could extend this API to @@ -964,13 +948,8 @@ bool llvm::setLoopEstimatedTripCount( // TODO: Eventually, once all passes have migrated away from setting branch // weights to indicate estimated trip counts, we will not set branch weights // here at all. - - // Set the metadata. Some users of the estimated trip count rely on the value - // to be non-zero. - setLoopStringMetadata(L, LLVMLoopEstimatedTripCount, - EstimatedTripCount == 0 - ? std::nullopt - : std::optional(EstimatedTripCount)); + if (!EstimatedloopInvocationWeight) + return true; // Calculate taken and exit weights. unsigned LatchExitWeight = 0; diff --git a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll index 3e166e9997638..96b31d801c2f9 100644 --- a/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll +++ b/llvm/test/Transforms/LoopUnroll/branch-weights-freq/unroll-epilog.ll @@ -111,8 +111,7 @@ do.end: !0 = !{!"branch_weights", i32 1, i32 10} ; ------------------------------------------------------------------------------ -; Check branch weight metadata and estimated trip count metadata. The minimum -; valid estimated trip count is 1 not 0. +; Check branch weight metadata and estimated trip count metadata. ; ; UR2: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 195225786, i32 1952257862} ; UR4: ![[#PROF_UR_GUARD]] = !{!"branch_weights", i32 534047398, i32 1613436250} @@ -136,7 +135,7 @@ do.end: ; UR4: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 2} ; UR10: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1} ; UR11: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1} -; UR12: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1} +; UR12: ![[#LOOP_UR_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0} ; UR: ![[#DISABLE]] = !{!"llvm.loop.unroll.disable"} ; ; UR2: ![[#PROF_RM_GUARD]] = !{!"branch_weights", i32 1022611260, i32 1124872388} @@ -152,9 +151,10 @@ do.end: ; UR4: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} ; UR10: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} -; UR11: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_UR_TC:]], ![[#DISABLE:]]} +; UR11: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} ; UR12: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} ; ; UR4: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 3} -; For UR10 and UR11, llvm.loop.estimated_trip_count is the same for both loops. +; For UR10, llvm.loop.estimated_trip_count is the same for both loops. +; UR11: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0} ; UR12: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 11} diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll index 8d424b13dd278..e95121593e4f7 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll @@ -86,5 +86,5 @@ attributes #1 = { nounwind optsize } ;CHECK: !16 = !{!"branch_weights", i32 3001, i32 1001} ;CHECK: !17 = distinct !{!17, !18, !19, {{.*}}} ;CHECK: !18 = !{!"llvm.loop.peeled.count", i32 4} -;CHECK: !19 = !{!"llvm.loop.estimated_trip_count", i32 1} +;CHECK: !19 = !{!"llvm.loop.estimated_trip_count", i32 0} diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll index b2eeddb6e25f2..dec126f289d32 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll @@ -106,5 +106,5 @@ attributes #1 = { nounwind optsize } ;CHECK: !15 = !{!"branch_weights", i32 3001, i32 1001} ;CHECK: !16 = distinct !{!16, !17, !18, {{.*}}} ;CHECK: !17 = !{!"llvm.loop.peeled.count", i32 4} -;CHECK: !18 = !{!"llvm.loop.estimated_trip_count", i32 1} +;CHECK: !18 = !{!"llvm.loop.estimated_trip_count", i32 0} diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll index 7dda642b9a006..2f8f98d40e86f 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll @@ -74,8 +74,6 @@ for.end: ; 1073706403 / (1073706403 + 1073777245). ; CHECK: ![[#PROF_RM_LATCH]] = !{!"branch_weights", i32 1073706403, i32 1073777245} -; 10000%4 = 0, so the probability of reaching the remainder loop header is low. -; If it is reached, at least one iteration will execute. The minimum valid -; estimated trip count is 1. +; 10000%4 = 0 ; CHECK: ![[#LOOP_RM_LATCH]] = distinct !{![[#LOOP_RM_LATCH]], ![[#LOOP_RM_TC:]], ![[#DISABLE:]]} -; CHECK: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 1} +; CHECK: ![[#LOOP_RM_TC]] = !{!"llvm.loop.estimated_trip_count", i32 0} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll b/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll index 35a691494e6f5..f39c6bd4c0d0d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll @@ -48,11 +48,11 @@ define void @foo_i32(i64 %n) { ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF8:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF9:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC1-LABEL: define void @foo_i32( @@ -90,11 +90,11 @@ define void @foo_i32(i64 %n) { ; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF9:![0-9]+]] +; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF10:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V2-IC4: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC4: [[FOR_BODY]]: -; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-V2-IC4: [[FOR_COND_CLEANUP]]: ; entry: @@ -124,21 +124,21 @@ define void @foo_i8(i64 %n) { ; CHECK-V1-IC1: [[VECTOR_PH]]: ; CHECK-V1-IC1: br label %[[VECTOR_BODY:.*]] ; CHECK-V1-IC1: [[VECTOR_BODY]]: -; CHECK-V1-IC1: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF8:![0-9]+]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-V1-IC1: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-V1-IC1: [[MIDDLE_BLOCK]]: -; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF8]] +; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF9]] ; CHECK-V1-IC1: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V1-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]] +; CHECK-V1-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF12:![0-9]+]] ; CHECK-V1-IC1: [[VEC_EPILOG_PH]]: ; CHECK-V1-IC1: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V1-IC1: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V1-IC1: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-V1-IC1: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-V1-IC1: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V1-IC1: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF13:![0-9]+]] +; CHECK-V1-IC1: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF14:![0-9]+]] ; CHECK-V1-IC1: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V1-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1: [[FOR_BODY]]: -; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-V1-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V1-IC1-FORCE-EPI4-LABEL: define void @foo_i8( @@ -150,21 +150,21 @@ define void @foo_i8(i64 %n) { ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[MIDDLE_BLOCK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF11]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF12]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF14:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF15:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF8]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF9]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF9]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF10]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC1-LABEL: define void @foo_i8( @@ -176,21 +176,21 @@ define void @foo_i8(i64 %n) { ; CHECK-V2-IC1: [[VECTOR_PH]]: ; CHECK-V2-IC1: br label %[[VECTOR_BODY:.*]] ; CHECK-V2-IC1: [[VECTOR_BODY]]: -; CHECK-V2-IC1: br i1 [[TMP4:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-V2-IC1: br i1 [[TMP4:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-V2-IC1: [[MIDDLE_BLOCK]]: -; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF12:![0-9]+]] +; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF13:![0-9]+]] ; CHECK-V2-IC1: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V2-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF13:![0-9]+]] +; CHECK-V2-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF14:![0-9]+]] ; CHECK-V2-IC1: [[VEC_EPILOG_PH]]: ; CHECK-V2-IC1: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V2-IC1: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V2-IC1: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-V2-IC1: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-V2-IC1: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-V2-IC1: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF6]] ; CHECK-V2-IC1: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V2-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC1: [[FOR_BODY]]: -; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF7]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF7]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-V2-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC4-LABEL: define void @foo_i8( @@ -202,21 +202,21 @@ define void @foo_i8(i64 %n) { ; CHECK-V2-IC4: [[VECTOR_PH]]: ; CHECK-V2-IC4: br label %[[VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF6]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF6]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-V2-IC4: [[MIDDLE_BLOCK]]: ; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF1]] ; CHECK-V2-IC4: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF14:![0-9]+]] +; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF15:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_PH]]: ; CHECK-V2-IC4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF16:![0-9]+]] +; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF17:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V2-IC4: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC4: [[FOR_BODY]]: -; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF10]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF11]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-V2-IC4: [[FOR_COND_CLEANUP]]: ; entry: @@ -244,13 +244,13 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V1-IC1: [[VECTOR_PH]]: ; CHECK-V1-IC1: br label %[[VECTOR_BODY:.*]] ; CHECK-V1-IC1: [[VECTOR_BODY]]: -; CHECK-V1-IC1: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-V1-IC1: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-V1-IC1: [[MIDDLE_BLOCK]]: ; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]] ; CHECK-V1-IC1: [[SCALAR_PH]]: ; CHECK-V1-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1: [[FOR_BODY]]: -; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-V1-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V1-IC1-FORCE-EPI4-LABEL: define void @foo_i32_no_bw( @@ -262,7 +262,7 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[MIDDLE_BLOCK]]: ; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_ITER_CHECK]]: @@ -270,13 +270,13 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC1-LABEL: define void @foo_i32_no_bw( @@ -286,13 +286,13 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V2-IC1: [[VECTOR_PH]]: ; CHECK-V2-IC1: br label %[[VECTOR_BODY:.*]] ; CHECK-V2-IC1: [[VECTOR_BODY]]: -; CHECK-V2-IC1: br i1 [[TMP2:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-V2-IC1: br i1 [[TMP2:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-V2-IC1: [[MIDDLE_BLOCK]]: ; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]] ; CHECK-V2-IC1: [[SCALAR_PH]]: ; CHECK-V2-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC1: [[FOR_BODY]]: -; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-V2-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC4-LABEL: define void @foo_i32_no_bw( @@ -304,7 +304,7 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V2-IC4: [[VECTOR_PH]]: ; CHECK-V2-IC4: br label %[[VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-V2-IC4: [[MIDDLE_BLOCK]]: ; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK-V2-IC4: [[VEC_EPILOG_ITER_CHECK]]: @@ -312,13 +312,13 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V2-IC4: [[VEC_EPILOG_PH]]: ; CHECK-V2-IC4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-V2-IC4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V2-IC4: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC4: [[FOR_BODY]]: -; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK-V2-IC4: [[FOR_COND_CLEANUP]]: ; entry: @@ -347,16 +347,17 @@ for.cond.cleanup: ; preds = %for.body ; CHECK-V1-IC1: [[META4]] = !{!"llvm.loop.estimated_trip_count", i32 128} ; CHECK-V1-IC1: [[PROF5]] = !{!"branch_weights", i32 1, i32 7} ; CHECK-V1-IC1: [[PROF6]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V1-IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META3]], [[META2]]} -; CHECK-V1-IC1: [[PROF8]] = !{!"branch_weights", i32 1, i32 31} -; CHECK-V1-IC1: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META3]], [[META10:![0-9]+]]} -; CHECK-V1-IC1: [[META10]] = !{!"llvm.loop.estimated_trip_count", i32 32} -; CHECK-V1-IC1: [[PROF11]] = !{!"branch_weights", i32 16, i32 16} -; CHECK-V1-IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META3]]} -; CHECK-V1-IC1: [[PROF13]] = !{!"branch_weights", i32 1, i32 15} -; CHECK-V1-IC1: [[LOOP14]] = distinct !{[[LOOP14]], [[META3]], [[META2]]} -; CHECK-V1-IC1: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META3]]} -; CHECK-V1-IC1: [[LOOP16]] = distinct !{[[LOOP16]], [[META3]], [[META2]]} +; CHECK-V1-IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META3]], [[META2]], [[META8:![0-9]+]]} +; CHECK-V1-IC1: [[META8]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; CHECK-V1-IC1: [[PROF9]] = !{!"branch_weights", i32 1, i32 31} +; CHECK-V1-IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META3]], [[META11:![0-9]+]]} +; CHECK-V1-IC1: [[META11]] = !{!"llvm.loop.estimated_trip_count", i32 32} +; CHECK-V1-IC1: [[PROF12]] = !{!"branch_weights", i32 16, i32 16} +; CHECK-V1-IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META8]], [[META3]]} +; CHECK-V1-IC1: [[PROF14]] = !{!"branch_weights", i32 1, i32 15} +; CHECK-V1-IC1: [[LOOP15]] = distinct !{[[LOOP15]], [[META3]], [[META2]], [[META8]]} +; CHECK-V1-IC1: [[LOOP16]] = distinct !{[[LOOP16]], [[META2]], [[META3]]} +; CHECK-V1-IC1: [[LOOP17]] = distinct !{[[LOOP17]], [[META3]], [[META2]]} ;. ; CHECK-V1-IC1-FORCE-EPI4: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} ; CHECK-V1-IC1-FORCE-EPI4: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]], [[META4:![0-9]+]]} @@ -365,19 +366,20 @@ for.cond.cleanup: ; preds = %for.body ; CHECK-V1-IC1-FORCE-EPI4: [[META4]] = !{!"llvm.loop.estimated_trip_count", i32 128} ; CHECK-V1-IC1-FORCE-EPI4: [[PROF5]] = !{!"branch_weights", i32 1, i32 7} ; CHECK-V1-IC1-FORCE-EPI4: [[PROF6]] = !{!"branch_weights", i32 4, i32 4} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META3]]} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF8]] = !{!"branch_weights", i32 1, i32 3} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META3]], [[META2]]} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF11]] = !{!"branch_weights", i32 1, i32 31} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META3]], [[META13:![0-9]+]]} -; CHECK-V1-IC1-FORCE-EPI4: [[META13]] = !{!"llvm.loop.estimated_trip_count", i32 32} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF14]] = !{!"branch_weights", i32 4, i32 28} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META3]]} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP16]] = distinct !{[[LOOP16]], [[META3]], [[META2]]} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META3]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META8:![0-9]+]], [[META3]]} +; CHECK-V1-IC1-FORCE-EPI4: [[META8]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF9]] = !{!"branch_weights", i32 1, i32 3} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF10]] = !{!"branch_weights", i32 0, i32 0} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META3]], [[META2]], [[META8]]} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF12]] = !{!"branch_weights", i32 1, i32 31} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META3]], [[META14:![0-9]+]]} +; CHECK-V1-IC1-FORCE-EPI4: [[META14]] = !{!"llvm.loop.estimated_trip_count", i32 32} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF15]] = !{!"branch_weights", i32 4, i32 28} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP16]] = distinct !{[[LOOP16]], [[META2]], [[META8]], [[META3]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP17]] = distinct !{[[LOOP17]], [[META3]], [[META2]], [[META8]]} ; CHECK-V1-IC1-FORCE-EPI4: [[LOOP18]] = distinct !{[[LOOP18]], [[META2]], [[META3]]} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP19]] = distinct !{[[LOOP19]], [[META3]], [[META2]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP19]] = distinct !{[[LOOP19]], [[META2]], [[META3]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP20]] = distinct !{[[LOOP20]], [[META3]], [[META2]]} ;. ; CHECK-V2-IC1: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} ; CHECK-V2-IC1: [[PROF1]] = !{!"branch_weights", i32 1, i32 255} @@ -387,16 +389,17 @@ for.cond.cleanup: ; preds = %for.body ; CHECK-V2-IC1: [[META5]] = !{!"llvm.loop.estimated_trip_count", i32 256} ; CHECK-V2-IC1: [[PROF6]] = !{!"branch_weights", i32 1, i32 3} ; CHECK-V2-IC1: [[PROF7]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V2-IC1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]], [[META3]]} -; CHECK-V2-IC1: [[PROF9]] = !{!"branch_weights", i32 1, i32 63} -; CHECK-V2-IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META3]], [[META4]], [[META11:![0-9]+]]} -; CHECK-V2-IC1: [[META11]] = !{!"llvm.loop.estimated_trip_count", i32 64} -; CHECK-V2-IC1: [[PROF12]] = !{!"branch_weights", i32 1, i32 15} -; CHECK-V2-IC1: [[PROF13]] = !{!"branch_weights", i32 4, i32 12} -; CHECK-V2-IC1: [[LOOP14]] = distinct !{[[LOOP14]], [[META3]], [[META4]]} -; CHECK-V2-IC1: [[LOOP15]] = distinct !{[[LOOP15]], [[META4]], [[META3]]} -; CHECK-V2-IC1: [[LOOP16]] = distinct !{[[LOOP16]], [[META3]], [[META4]]} -; CHECK-V2-IC1: [[LOOP17]] = distinct !{[[LOOP17]], [[META4]], [[META3]]} +; CHECK-V2-IC1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]], [[META3]], [[META9:![0-9]+]]} +; CHECK-V2-IC1: [[META9]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; CHECK-V2-IC1: [[PROF10]] = !{!"branch_weights", i32 1, i32 63} +; CHECK-V2-IC1: [[LOOP11]] = distinct !{[[LOOP11]], [[META3]], [[META4]], [[META12:![0-9]+]]} +; CHECK-V2-IC1: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 64} +; CHECK-V2-IC1: [[PROF13]] = !{!"branch_weights", i32 1, i32 15} +; CHECK-V2-IC1: [[PROF14]] = !{!"branch_weights", i32 4, i32 12} +; CHECK-V2-IC1: [[LOOP15]] = distinct !{[[LOOP15]], [[META3]], [[META9]], [[META4]]} +; CHECK-V2-IC1: [[LOOP16]] = distinct !{[[LOOP16]], [[META4]], [[META3]], [[META9]]} +; CHECK-V2-IC1: [[LOOP17]] = distinct !{[[LOOP17]], [[META3]], [[META4]]} +; CHECK-V2-IC1: [[LOOP18]] = distinct !{[[LOOP18]], [[META4]], [[META3]]} ;. ; CHECK-V2-IC4: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} ; CHECK-V2-IC4: [[PROF1]] = !{!"branch_weights", i32 1, i32 63} @@ -406,17 +409,18 @@ for.cond.cleanup: ; preds = %for.body ; CHECK-V2-IC4: [[META5]] = !{!"llvm.loop.estimated_trip_count", i32 64} ; CHECK-V2-IC4: [[PROF6]] = !{!"branch_weights", i32 1, i32 15} ; CHECK-V2-IC4: [[PROF7]] = !{!"branch_weights", i32 4, i32 12} -; CHECK-V2-IC4: [[LOOP8]] = distinct !{[[LOOP8]], [[META3]], [[META4]]} -; CHECK-V2-IC4: [[PROF9]] = !{!"branch_weights", i32 1, i32 3} -; CHECK-V2-IC4: [[PROF10]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V2-IC4: [[LOOP11]] = distinct !{[[LOOP11]], [[META4]], [[META3]]} -; CHECK-V2-IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META3]], [[META4]], [[META13:![0-9]+]]} -; CHECK-V2-IC4: [[META13]] = !{!"llvm.loop.estimated_trip_count", i32 16} -; CHECK-V2-IC4: [[PROF14]] = !{!"branch_weights", i32 8, i32 56} -; CHECK-V2-IC4: [[LOOP15]] = distinct !{[[LOOP15]], [[META3]], [[META4]]} -; CHECK-V2-IC4: [[PROF16]] = !{!"branch_weights", i32 1, i32 7} -; CHECK-V2-IC4: [[LOOP17]] = distinct !{[[LOOP17]], [[META4]], [[META3]]} -; CHECK-V2-IC4: [[LOOP18]] = distinct !{[[LOOP18]], [[META3]], [[META4]]} +; CHECK-V2-IC4: [[LOOP8]] = distinct !{[[LOOP8]], [[META3]], [[META9:![0-9]+]], [[META4]]} +; CHECK-V2-IC4: [[META9]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; CHECK-V2-IC4: [[PROF10]] = !{!"branch_weights", i32 1, i32 3} +; CHECK-V2-IC4: [[PROF11]] = !{!"branch_weights", i32 0, i32 0} +; CHECK-V2-IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META4]], [[META3]], [[META9]]} +; CHECK-V2-IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META3]], [[META4]], [[META14:![0-9]+]]} +; CHECK-V2-IC4: [[META14]] = !{!"llvm.loop.estimated_trip_count", i32 16} +; CHECK-V2-IC4: [[PROF15]] = !{!"branch_weights", i32 8, i32 56} +; CHECK-V2-IC4: [[LOOP16]] = distinct !{[[LOOP16]], [[META3]], [[META9]], [[META4]]} +; CHECK-V2-IC4: [[PROF17]] = !{!"branch_weights", i32 1, i32 7} +; CHECK-V2-IC4: [[LOOP18]] = distinct !{[[LOOP18]], [[META4]], [[META3]], [[META9]]} ; CHECK-V2-IC4: [[LOOP19]] = distinct !{[[LOOP19]], [[META3]], [[META4]]} -; CHECK-V2-IC4: [[LOOP20]] = distinct !{[[LOOP20]], [[META4]], [[META3]]} +; CHECK-V2-IC4: [[LOOP20]] = distinct !{[[LOOP20]], [[META3]], [[META4]]} +; CHECK-V2-IC4: [[LOOP21]] = distinct !{[[LOOP21]], [[META4]], [[META3]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/branch-weights.ll b/llvm/test/Transforms/LoopVectorize/branch-weights.ll index 1ed2e62d79aae..4445141549069 100644 --- a/llvm/test/Transforms/LoopVectorize/branch-weights.ll +++ b/llvm/test/Transforms/LoopVectorize/branch-weights.ll @@ -43,7 +43,7 @@ define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 { ; MAINVF4IC1_EPI4: br label %[[LOOP:.*]] ; MAINVF4IC1_EPI4: [[LOOP]]: ; MAINVF4IC1_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]] -; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]] +; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF13:![0-9]+]], !llvm.loop [[LOOP14:![0-9]+]] ; MAINVF4IC1_EPI4: [[EXIT_LOOPEXIT]]: ; MAINVF4IC1_EPI4: br label %[[EXIT]] ; MAINVF4IC1_EPI4: [[EXIT]]: @@ -81,12 +81,12 @@ define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 { ; MAINVF4IC2_EPI4: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; MAINVF4IC2_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]] -; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF12:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF13:![0-9]+]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; MAINVF4IC2_EPI4: br label %[[LOOP:.*]] ; MAINVF4IC2_EPI4: [[LOOP]]: ; MAINVF4IC2_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]] -; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF13:![0-9]+]], !llvm.loop [[LOOP14:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF14:![0-9]+]], !llvm.loop [[LOOP15:![0-9]+]] ; MAINVF4IC2_EPI4: [[EXIT_LOOPEXIT]]: ; MAINVF4IC2_EPI4: br label %[[EXIT]] ; MAINVF4IC2_EPI4: [[EXIT]]: @@ -127,10 +127,11 @@ exit: ; MAINVF4IC1_EPI4: [[PROF8]] = !{!"branch_weights", i32 1, i32 3} ; MAINVF4IC1_EPI4: [[PROF9]] = !{!"branch_weights", i32 4, i32 0} ; MAINVF4IC1_EPI4: [[PROF10]] = !{!"branch_weights", i32 0, i32 0} -; MAINVF4IC1_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]]} -; MAINVF4IC1_EPI4: [[PROF12]] = !{!"branch_weights", i32 2, i32 1} -; MAINVF4IC1_EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]], [[META14:![0-9]+]]} -; MAINVF4IC1_EPI4: [[META14]] = !{!"llvm.loop.estimated_trip_count", i32 3} +; MAINVF4IC1_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]], [[META12:![0-9]+]]} +; MAINVF4IC1_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; MAINVF4IC1_EPI4: [[PROF13]] = !{!"branch_weights", i32 2, i32 1} +; MAINVF4IC1_EPI4: [[LOOP14]] = distinct !{[[LOOP14]], [[META5]], [[META15:![0-9]+]]} +; MAINVF4IC1_EPI4: [[META15]] = !{!"llvm.loop.estimated_trip_count", i32 3} ;. ; MAINVF4IC2_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13} ; MAINVF4IC2_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1} @@ -143,9 +144,10 @@ exit: ; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 1, i32 7} ; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 4, i32 4} ; MAINVF4IC2_EPI4: [[PROF10]] = !{!"branch_weights", i32 0, i32 0} -; MAINVF4IC2_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]]} -; MAINVF4IC2_EPI4: [[PROF12]] = !{!"branch_weights", i32 1, i32 3} -; MAINVF4IC2_EPI4: [[PROF13]] = !{!"branch_weights", i32 2, i32 1} -; MAINVF4IC2_EPI4: [[LOOP14]] = distinct !{[[LOOP14]], [[META5]], [[META15:![0-9]+]]} -; MAINVF4IC2_EPI4: [[META15]] = !{!"llvm.loop.estimated_trip_count", i32 3} +; MAINVF4IC2_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]], [[META12:![0-9]+]]} +; MAINVF4IC2_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; MAINVF4IC2_EPI4: [[PROF13]] = !{!"branch_weights", i32 1, i32 3} +; MAINVF4IC2_EPI4: [[PROF14]] = !{!"branch_weights", i32 2, i32 1} +; MAINVF4IC2_EPI4: [[LOOP15]] = distinct !{[[LOOP15]], [[META5]], [[META16:![0-9]+]]} +; MAINVF4IC2_EPI4: [[META16]] = !{!"llvm.loop.estimated_trip_count", i32 3} ;. diff --git a/llvm/test/Transforms/LoopVectorize/vectorize-zero-estimated-trip-count.ll b/llvm/test/Transforms/LoopVectorize/vectorize-zero-estimated-trip-count.ll new file mode 100644 index 0000000000000..436324b4bab7a --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vectorize-zero-estimated-trip-count.ll @@ -0,0 +1,34 @@ +; Check that an estimated trip count of zero does not crash or otherwise break +; LoopVectorize behavior while it tries to create runtime memory checks inside +; an outer loop. + +; RUN: opt -passes=loop-vectorize -S %s | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +; Look for basic signs that vectorization ran and produced memory checks. +; CHECK: @test( +; CHECK: vector.memcheck: +; CHECK: vector.body: +; CHECK: inner: + +define void @test(ptr addrspace(1) %p, i32 %n) { +entry: + br label %outer +outer: + br label %inner +inner: + %i = phi i32 [ %inc, %inner ], [ 0, %outer ] + store i32 0, ptr addrspace(1) %p + %load = load i32, ptr addrspace(1) null + %inc = add i32 %i, 1 + %cmp = icmp slt i32 %i, %n + br i1 %cmp, label %inner, label %outer.latch +outer.latch: + br i1 %cmp, label %outer, label %exit, !llvm.loop !0 +exit: + ret void +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.estimated_trip_count", i32 0} diff --git a/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll b/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll index fdc1d8c3c8e14..e0ec110efae86 100644 --- a/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll +++ b/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll @@ -15,7 +15,7 @@ exit: ; GOOD-NOT: {{.}} -; BAD-VALUE: Expected second operand to be a positive integer constant of type i32 or smaller +; BAD-VALUE: Expected second operand to be an integer constant of type i32 or smaller ; BAD-VALUE-NEXT: !1 = !{!"llvm.loop.estimated_trip_count", ; TOO-FEW: Expected two operands @@ -52,7 +52,7 @@ exit: ; RUN: cp %s %t ; RUN: chmod u+w %t ; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i32 0}' >> %t -; RUN: not %{RUN} BAD-VALUE +; RUN: %{RUN} GOOD ; i64 value. ; RUN: cp %s %t diff --git a/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp b/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp index 005b93868212b..9fc9fb5b5a97e 100644 --- a/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp +++ b/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp @@ -197,7 +197,7 @@ TEST(LoopUtils, nestedLoopSharedLatchEstimatedTripCount) { }); } -// setLoopEstimatedTripCount implements special handling of zero. +// {get,set}LoopEstimatedTripCount implement special handling of zero. TEST(LoopUtils, zeroEstimatedTripCount) { LLVMContext C; const char *IR = @@ -220,10 +220,10 @@ TEST(LoopUtils, zeroEstimatedTripCount) { "!5 = !{!\"branch_weights\", i32 1, i32 9}\n" "\n"; - // With EstimatedLoopInvocationWeight, setLoopEstimatedTripCount sets zeroed - // branch weights and discards any llvm.loop.estimated_trip_count, so - // getLoopEstimatedTripCount returns std::nullopt. Other loop metadata, if - // any, is not touched. + // With EstimatedLoopInvocationWeight, setLoopEstimatedTripCount sets branch + // weights and llvm.loop.estimated_trip_count all to 0, so + // getLoopEstimatedTripCount returns std::nullopt. It does not touch other + // loop metadata, if any. std::unique_ptr M = parseIR(C, IR); run(*M, "foo", [&](Function &F, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI) { @@ -242,14 +242,15 @@ TEST(LoopUtils, zeroEstimatedTripCount) { EXPECT_EQ(Weights[1], 0u); EXPECT_EQ(getOptionalIntLoopAttribute(L, "foo"), Foo); EXPECT_EQ(getOptionalIntLoopAttribute(L, LLVMLoopEstimatedTripCount), - std::nullopt); + 0); EXPECT_EQ(getLoopEstimatedTripCount(L), std::nullopt); } }); // Without EstimatedLoopInvocationWeight, setLoopEstimatedTripCount sets - // llvm.loop.estimated_trip_count to 1 and does not touch branch weights or - // other loop metadata. getLoopEstimatedTripCount returns 1. + // llvm.loop.estimated_trip_count to 0, so getLoopEstimatedTripCount returns + // std::nullopt. It does not touch branch weights or other loop metadata, if + // any. M = parseIR(C, IR); run(*M, "foo", [&](Function &F, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI) { @@ -270,8 +271,8 @@ TEST(LoopUtils, zeroEstimatedTripCount) { } EXPECT_EQ(getOptionalIntLoopAttribute(L, "foo"), Foo); EXPECT_EQ(getOptionalIntLoopAttribute(L, LLVMLoopEstimatedTripCount), - 1); - EXPECT_EQ(getLoopEstimatedTripCount(L), 1); + 0); + EXPECT_EQ(getLoopEstimatedTripCount(L), std::nullopt); } }); }