diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 43d31b03932cf..d61ea07830123 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -7840,6 +7840,54 @@ If a loop was successfully processed by the loop distribution pass, this metadata is added (i.e., has been distributed). See :ref:`Transformation Metadata ` for details. +'``llvm.loop.estimated_trip_count``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This metadata records an estimated trip count for the loop. The first operand +is the string ``llvm.loop.estimated_trip_count``. The second operand is an +integer constant of type ``i32`` or smaller specifying the estimate. For +example: + +.. code-block:: llvm + + !0 = !{!"llvm.loop.estimated_trip_count", i32 8} + +Purpose +""""""" + +A loop's estimated trip count is an estimate of the average number of loop +iterations (specifically, the number of times the loop's header executes) each +time execution reaches the loop. It is usually only an estimate based on, for +example, profile data. The actual number of iterations might vary widely. + +The estimated trip count serves as a parameter for various loop transformations +and typically helps estimate transformation cost. For example, it can help +determine how many iterations to peel or how aggressively to unroll. + +Initialization and Maintenance +"""""""""""""""""""""""""""""" + +Passes should interact with estimated trip counts always via +``llvm::getLoopEstimatedTripCount`` and ``llvm::setLoopEstimatedTripCount``. + +When the ``llvm.loop.estimated_trip_count`` metadata is not present on a loop, +``llvm::getLoopEstimatedTripCount`` estimates the loop's trip count from the +loop's ``branch_weights`` metadata under the assumption that the latter still +accurately encodes the program's original profile data. However, as passes +transform existing loops and create new loops, they must be free to update and +create ``branch_weights`` metadata in a way that maintains accurate block +frequencies. Trip counts estimated from this new ``branch_weights`` metadata +are not necessarily useful to the passes that consume estimated trip counts. + +For this reason, when a pass transforms or creates loops, the pass should +separately estimate new trip counts based on the estimated trip counts that +``llvm::getLoopEstimatedTripCount`` returns at the start of the pass, and the +pass should record the new estimates by calling +``llvm::setLoopEstimatedTripCount``, which creates or updates +``llvm.loop.estimated_trip_count`` metadata. Once this metadata is present on a +loop, ``llvm::getLoopEstimatedTripCount`` returns its value instead of +estimating the trip count from the loop's ``branch_weights`` metadata. + '``llvm.licm.disable``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 33203ad85aa32..4ba31b5545cb2 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -919,8 +919,8 @@ class MDOperand { // Check if MDOperand is of type MDString and equals `Str`. bool equalsStr(StringRef Str) const { - return isa(this->get()) && - cast(this->get())->getString() == Str; + return isa_and_nonnull(get()) && + cast(get())->getString() == Str; } ~MDOperand() { untrack(); } diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h index 61434735506f9..ce9f4c2de2cae 100644 --- a/llvm/include/llvm/IR/ProfDataUtils.h +++ b/llvm/include/llvm/IR/ProfDataUtils.h @@ -30,6 +30,10 @@ struct MDProfLabels { LLVM_ABI static const char *UnknownBranchWeightsMarker; }; +/// Profile-based loop metadata that should be accessed only by using +/// \c llvm::getLoopEstimatedTripCount and \c llvm::setLoopEstimatedTripCount. +LLVM_ABI extern const char *LLVMLoopEstimatedTripCount; + /// Checks if an Instruction has MD_prof Metadata LLVM_ABI bool hasProfMD(const Instruction &I); diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index 96e3d3d47f2d0..5bef67eb021ca 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -323,22 +323,48 @@ LLVM_ABI TransformationMode hasLICMVersioningTransformation(const Loop *L); LLVM_ABI void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V = 0); -/// Returns a loop's estimated trip count based on branch weight metadata. -/// In addition if \p EstimatedLoopInvocationWeight is not null it is -/// initialized with weight of loop's latch leading to the exit. -/// Returns a valid positive trip count, saturated at UINT_MAX, or std::nullopt -/// when a meaningful estimate cannot be made. +/// Return either: +/// - \c std::nullopt, if the implementation is unable to handle the loop form +/// of \p L (e.g., \p L must have a latch block that controls the loop exit). +/// - The value of \c llvm.loop.estimated_trip_count from the loop metadata of +/// \p L, if that metadata is present. +/// - Else, a new estimate of the trip count from the latch branch weights of +/// \p L. +/// +/// An estimated trip count is always a valid positive trip count, saturated at +/// \c UINT_MAX. +/// +/// In addition, if \p EstimatedLoopInvocationWeight, then either: +/// - Set \c *EstimatedLoopInvocationWeight to the weight of the latch's branch +/// to the loop exit. +/// - Do not set it, and return \c std::nullopt, if the current implementation +/// cannot compute that weight (e.g., if \p L does not have a latch block that +/// controls the loop exit) or the weight is zero (because zero cannot be +/// used to compute new branch weights that reflect the estimated trip count). +/// +/// TODO: Eventually, once all passes have migrated away from setting branch +/// weights to indicate estimated trip counts, this function will drop the +/// \p EstimatedLoopInvocationWeight parameter. LLVM_ABI std::optional getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight = nullptr); -/// Set a loop's branch weight metadata to reflect that loop has \p -/// EstimatedTripCount iterations and \p EstimatedLoopInvocationWeight exits -/// through latch. Returns true if metadata is successfully updated, false -/// otherwise. Note that loop must have a latch block which controls loop exit -/// in order to succeed. -LLVM_ABI bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, - unsigned EstimatedLoopInvocationWeight); +/// Set \c llvm.loop.estimated_trip_count with the value \p EstimatedTripCount +/// in the loop metadata of \p L. Return false if the implementation is unable +/// to handle the loop form of \p L (e.g., \p L must have a latch block that +/// controls the loop exit). Otherwise, return true. +/// +/// In addition, if \p EstimatedLoopInvocationWeight, set the branch weight +/// metadata of \p L to reflect that \p L has an estimated +/// \p EstimatedTripCount iterations and has \c *EstimatedLoopInvocationWeight +/// exit weight through the loop's latch. +/// +/// TODO: Eventually, once all passes have migrated away from setting branch +/// weights to indicate estimated trip counts, this function will drop the +/// \p EstimatedLoopInvocationWeight parameter. +LLVM_ABI bool setLoopEstimatedTripCount( + Loop *L, unsigned EstimatedTripCount, + std::optional EstimatedLoopInvocationWeight = std::nullopt); /// Check inner loop (L) backedge count is known to be invariant on all /// iterations of its outer loop. If the loop has no parent, this is trivially diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index d0b91d9356613..5827292cee39b 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -95,6 +95,7 @@ const char *MDProfLabels::FunctionEntryCount = "function_entry_count"; const char *MDProfLabels::SyntheticFunctionEntryCount = "synthetic_function_entry_count"; const char *MDProfLabels::UnknownBranchWeightsMarker = "unknown"; +const char *LLVMLoopEstimatedTripCount = "llvm.loop.estimated_trip_count"; bool hasProfMD(const Instruction &I) { return I.hasMetadata(LLVMContext::MD_prof); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 7d362ce308812..c06b60fd2d9a9 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1076,6 +1076,18 @@ void Verifier::visitMDNode(const MDNode &MD, AreDebugLocsAllowed AllowLocs) { } } + // Check llvm.loop.estimated_trip_count. + if (MD.getNumOperands() > 0 && + MD.getOperand(0).equalsStr(LLVMLoopEstimatedTripCount)) { + Check(MD.getNumOperands() == 2, "Expected two operands", &MD); + auto *Count = dyn_cast_or_null(MD.getOperand(1)); + Check(Count && Count->getType()->isIntegerTy() && + cast(Count->getType())->getBitWidth() <= 32, + "Expected second operand to be an integer constant of type i32 or " + "smaller", + &MD); + } + // Check these last, so we diagnose problems in operands first. Check(!MD.isTemporary(), "Expected no forward declarations!", &MD); Check(MD.isResolved(), "All nodes should be resolved!", &MD); diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index b172ef6ba0803..7b1a7ce6995f8 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -804,26 +804,51 @@ static BranchInst *getExpectedExitLoopLatchBranch(Loop *L) { return LatchBR; } -/// Return the estimated trip count for any exiting branch which dominates -/// the loop latch. -static std::optional getEstimatedTripCount(BranchInst *ExitingBranch, - Loop *L, - uint64_t &OrigExitWeight) { +struct DbgLoop { + const Loop *L; + explicit DbgLoop(const Loop *L) : L(L) {} +}; + +#ifndef NDEBUG +static inline raw_ostream &operator<<(raw_ostream &OS, DbgLoop D) { + OS << "function "; + D.L->getHeader()->getParent()->printAsOperand(OS, /*PrintType=*/false); + return OS << " " << *D.L; +} +#endif // NDEBUG + +static std::optional estimateLoopTripCount(Loop *L) { + // Currently we take the estimate exit count only from the loop latch, + // ignoring other exiting blocks. This can overestimate the trip count + // if we exit through another exit, but can never underestimate it. + // TODO: incorporate information from other exits + BranchInst *ExitingBranch = getExpectedExitLoopLatchBranch(L); + if (!ExitingBranch) { + LLVM_DEBUG(dbgs() << "estimateLoopTripCount: Failed to find exiting " + << "latch branch of required form in " << DbgLoop(L) + << "\n"); + return std::nullopt; + } + // To estimate the number of times the loop body was executed, we want to // know the number of times the backedge was taken, vs. the number of times // we exited the loop. uint64_t LoopWeight, ExitWeight; - if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight)) + if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight)) { + LLVM_DEBUG(dbgs() << "estimateLoopTripCount: Failed to extract branch " + << "weights for " << DbgLoop(L) << "\n"); return std::nullopt; + } if (L->contains(ExitingBranch->getSuccessor(1))) std::swap(LoopWeight, ExitWeight); - if (!ExitWeight) + if (!ExitWeight) { // Don't have a way to return predicated infinite + LLVM_DEBUG(dbgs() << "estimateLoopTripCount: Failed because of zero exit " + << "probability for " << DbgLoop(L) << "\n"); return std::nullopt; - - OrigExitWeight = ExitWeight; + } // Estimated exit count is a ratio of the loop weight by the weight of the // edge exiting the loop, rounded to nearest. @@ -834,43 +859,102 @@ static std::optional getEstimatedTripCount(BranchInst *ExitingBranch, return std::numeric_limits::max(); // Estimated trip count is one plus estimated exit count. - return ExitCount + 1; + uint64_t TC = ExitCount + 1; + LLVM_DEBUG(dbgs() << "estimateLoopTripCount: Estimated trip count of " << TC + << " for " << DbgLoop(L) << "\n"); + return TC; } std::optional llvm::getLoopEstimatedTripCount(Loop *L, unsigned *EstimatedLoopInvocationWeight) { - // Currently we take the estimate exit count only from the loop latch, - // ignoring other exiting blocks. This can overestimate the trip count - // if we exit through another exit, but can never underestimate it. - // TODO: incorporate information from other exits - if (BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L)) { - uint64_t ExitWeight; - if (std::optional EstTripCount = - getEstimatedTripCount(LatchBranch, L, ExitWeight)) { - if (EstimatedLoopInvocationWeight) - *EstimatedLoopInvocationWeight = ExitWeight; - return *EstTripCount; - } + // If EstimatedLoopInvocationWeight, we do not support this loop if + // getExpectedExitLoopLatchBranch returns nullptr. + // + // FIXME: Also, this is a stop-gap solution for nested loops. It avoids + // mistaking LLVMLoopEstimatedTripCount metadata to be for an outer loop when + // it was created for an inner loop. The problem is that loop metadata is + // attached to the branch instruction in the loop latch block, but that can be + // shared by the loops. A solution is to attach loop metadata to loop headers + // instead, but that would be a large change to LLVM. + // + // Until that happens, we work around the problem as follows. + // getExpectedExitLoopLatchBranch (which also guards + // setLoopEstimatedTripCount) returns nullptr for a loop unless the loop has + // one latch and that latch has exactly two successors one of which is an exit + // from the loop. If the latch is shared by nested loops, then that condition + // might hold for the inner loop but cannot hold for the outer loop: + // - Because the latch is shared, it must have at least two successors: the + // inner loop header and the outer loop header, which is also an exit for + // the inner loop. That satisifies the condition for the inner loop. + // - To satsify the condition for the outer loop, the latch must have a third + // successor that is an exit for the outer loop. But that violates the + // condition for both loops. + BranchInst *ExitingBranch = getExpectedExitLoopLatchBranch(L); + if (!ExitingBranch) + return std::nullopt; + + // If requested, either compute *EstimatedLoopInvocationWeight or return + // nullopt if cannot. + // + // TODO: Eventually, once all passes have migrated away from setting branch + // weights to indicate estimated trip counts, this function will drop the + // EstimatedLoopInvocationWeight parameter. + if (EstimatedLoopInvocationWeight) { + uint64_t LoopWeight = 0, ExitWeight = 0; // Inits expected to be unused. + if (!extractBranchWeights(*ExitingBranch, LoopWeight, ExitWeight)) + return std::nullopt; + if (L->contains(ExitingBranch->getSuccessor(1))) + std::swap(LoopWeight, ExitWeight); + if (!ExitWeight) + return std::nullopt; + *EstimatedLoopInvocationWeight = ExitWeight; } - return std::nullopt; + + // Return the estimated trip count from metadata unless the metadata is + // missing or has no value. + if (auto TC = getOptionalIntLoopAttribute(L, LLVMLoopEstimatedTripCount)) { + LLVM_DEBUG(dbgs() << "getLoopEstimatedTripCount: " + << LLVMLoopEstimatedTripCount << " metadata has trip " + << "count of " << *TC << " for " << DbgLoop(L) << "\n"); + return TC; + } + + // Estimate the trip count from latch branch weights. + return estimateLoopTripCount(L); } -bool llvm::setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount, - unsigned EstimatedloopInvocationWeight) { - // At the moment, we currently support changing the estimate trip count of - // the latch branch only. We could extend this API to manipulate estimated - // trip counts for any exit. +bool llvm::setLoopEstimatedTripCount( + Loop *L, unsigned EstimatedTripCount, + std::optional EstimatedloopInvocationWeight) { + // If EstimatedLoopInvocationWeight, we do not support this loop if + // getExpectedExitLoopLatchBranch returns nullptr. + // + // FIXME: See comments in getLoopEstimatedTripCount for why this is required + // here regardless of EstimatedLoopInvocationWeight. BranchInst *LatchBranch = getExpectedExitLoopLatchBranch(L); if (!LatchBranch) return false; + // Set the metadata. + addStringMetadataToLoop(L, LLVMLoopEstimatedTripCount, EstimatedTripCount); + + // At the moment, we currently support changing the estimated trip count in + // the latch branch's branch weights only. We could extend this API to + // manipulate estimated trip counts for any exit. + // + // TODO: Eventually, once all passes have migrated away from setting branch + // weights to indicate estimated trip counts, we will not set branch weights + // here at all. + if (!EstimatedloopInvocationWeight) + return true; + // Calculate taken and exit weights. unsigned LatchExitWeight = 0; unsigned BackedgeTakenWeight = 0; - if (EstimatedTripCount > 0) { - LatchExitWeight = EstimatedloopInvocationWeight; + if (EstimatedTripCount != 0) { + LatchExitWeight = *EstimatedloopInvocationWeight; BackedgeTakenWeight = (EstimatedTripCount - 1) * LatchExitWeight; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll b/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll index fc459a376710d..f39c6bd4c0d0d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/check-prof-info.ll @@ -22,11 +22,11 @@ define void @foo_i32(i64 %n) { ; CHECK-V1-IC1: [[VECTOR_BODY]]: ; CHECK-V1-IC1: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF0]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK-V1-IC1: [[MIDDLE_BLOCK]]: -; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF4:![0-9]+]] +; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF5:![0-9]+]] ; CHECK-V1-IC1: [[SCALAR_PH]]: ; CHECK-V1-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1: [[FOR_BODY]]: -; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF5:![0-9]+]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-V1-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V1-IC1-FORCE-EPI4-LABEL: define void @foo_i32( @@ -40,19 +40,19 @@ define void @foo_i32(i64 %n) { ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_BODY]]: ; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF0]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[MIDDLE_BLOCK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF4:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF5:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF5:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF6:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF9:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF8:![0-9]+]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC1-LABEL: define void @foo_i32( @@ -64,11 +64,11 @@ define void @foo_i32(i64 %n) { ; CHECK-V2-IC1: [[VECTOR_BODY]]: ; CHECK-V2-IC1: br i1 [[TMP2:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK-V2-IC1: [[MIDDLE_BLOCK]]: -; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF5:![0-9]+]] +; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]], !prof [[PROF6:![0-9]+]] ; CHECK-V2-IC1: [[SCALAR_PH]]: ; CHECK-V2-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC1: [[FOR_BODY]]: -; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-V2-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC4-LABEL: define void @foo_i32( @@ -82,19 +82,19 @@ define void @foo_i32(i64 %n) { ; CHECK-V2-IC4: [[VECTOR_BODY]]: ; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK-V2-IC4: [[MIDDLE_BLOCK]]: -; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF5:![0-9]+]] +; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF6:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF6:![0-9]+]] +; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_PH]]: ; CHECK-V2-IC4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF8:![0-9]+]] +; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF10:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V2-IC4: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC4: [[FOR_BODY]]: -; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-V2-IC4: [[FOR_COND_CLEANUP]]: ; entry: @@ -124,21 +124,21 @@ define void @foo_i8(i64 %n) { ; CHECK-V1-IC1: [[VECTOR_PH]]: ; CHECK-V1-IC1: br label %[[VECTOR_BODY:.*]] ; CHECK-V1-IC1: [[VECTOR_BODY]]: -; CHECK-V1-IC1: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-V1-IC1: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-V1-IC1: [[MIDDLE_BLOCK]]: -; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7]] +; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF9]] ; CHECK-V1-IC1: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V1-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF9:![0-9]+]] +; CHECK-V1-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF12:![0-9]+]] ; CHECK-V1-IC1: [[VEC_EPILOG_PH]]: ; CHECK-V1-IC1: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V1-IC1: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V1-IC1: br i1 [[TMP15:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-V1-IC1: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-V1-IC1: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V1-IC1: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF11:![0-9]+]] +; CHECK-V1-IC1: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF14:![0-9]+]] ; CHECK-V1-IC1: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V1-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1: [[FOR_BODY]]: -; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF5]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-V1-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V1-IC1-FORCE-EPI4-LABEL: define void @foo_i8( @@ -150,21 +150,21 @@ define void @foo_i8(i64 %n) { ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[MIDDLE_BLOCK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF10]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF12]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF12:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF15:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF9]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF8]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF10]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC1-LABEL: define void @foo_i8( @@ -176,21 +176,21 @@ define void @foo_i8(i64 %n) { ; CHECK-V2-IC1: [[VECTOR_PH]]: ; CHECK-V2-IC1: br label %[[VECTOR_BODY:.*]] ; CHECK-V2-IC1: [[VECTOR_BODY]]: -; CHECK-V2-IC1: br i1 [[TMP4:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF8:![0-9]+]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-V2-IC1: br i1 [[TMP4:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK-V2-IC1: [[MIDDLE_BLOCK]]: -; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF10:![0-9]+]] +; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF13:![0-9]+]] ; CHECK-V2-IC1: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V2-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]] +; CHECK-V2-IC1: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF14:![0-9]+]] ; CHECK-V2-IC1: [[VEC_EPILOG_PH]]: ; CHECK-V2-IC1: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V2-IC1: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V2-IC1: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-V2-IC1: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK-V2-IC1: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V2-IC1: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF5]] +; CHECK-V2-IC1: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF6]] ; CHECK-V2-IC1: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V2-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC1: [[FOR_BODY]]: -; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF6]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF7]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-V2-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC4-LABEL: define void @foo_i8( @@ -202,21 +202,21 @@ define void @foo_i8(i64 %n) { ; CHECK-V2-IC4: [[VECTOR_PH]]: ; CHECK-V2-IC4: br label %[[VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF5]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF6]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-V2-IC4: [[MIDDLE_BLOCK]]: ; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF1]] ; CHECK-V2-IC4: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF12:![0-9]+]] +; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF15:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_PH]]: ; CHECK-V2-IC4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF14:![0-9]+]] +; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF17:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V2-IC4: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC4: [[FOR_BODY]]: -; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF9]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !prof [[PROF11]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-V2-IC4: [[FOR_COND_CLEANUP]]: ; entry: @@ -244,13 +244,13 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V1-IC1: [[VECTOR_PH]]: ; CHECK-V1-IC1: br label %[[VECTOR_BODY:.*]] ; CHECK-V1-IC1: [[VECTOR_BODY]]: -; CHECK-V1-IC1: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-V1-IC1: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK-V1-IC1: [[MIDDLE_BLOCK]]: ; CHECK-V1-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]] ; CHECK-V1-IC1: [[SCALAR_PH]]: ; CHECK-V1-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1: [[FOR_BODY]]: -; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-V1-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-V1-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V1-IC1-FORCE-EPI4-LABEL: define void @foo_i32_no_bw( @@ -262,21 +262,21 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP6:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[MIDDLE_BLOCK]]: ; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF5]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF6]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[TMP9:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-V1-IC1-FORCE-EPI4: br i1 [[CMP_N7:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-V1-IC1-FORCE-EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V1-IC1-FORCE-EPI4: br label %[[FOR_BODY:.*]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_BODY]]: -; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-V1-IC1-FORCE-EPI4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK-V1-IC1-FORCE-EPI4: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC1-LABEL: define void @foo_i32_no_bw( @@ -286,13 +286,13 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V2-IC1: [[VECTOR_PH]]: ; CHECK-V2-IC1: br label %[[VECTOR_BODY:.*]] ; CHECK-V2-IC1: [[VECTOR_BODY]]: -; CHECK-V2-IC1: br i1 [[TMP2:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-V2-IC1: br i1 [[TMP2:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK-V2-IC1: [[MIDDLE_BLOCK]]: ; CHECK-V2-IC1: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]] ; CHECK-V2-IC1: [[SCALAR_PH]]: ; CHECK-V2-IC1: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC1: [[FOR_BODY]]: -; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-V2-IC1: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK-V2-IC1: [[FOR_COND_CLEANUP]]: ; ; CHECK-V2-IC4-LABEL: define void @foo_i32_no_bw( @@ -304,21 +304,21 @@ define void @foo_i32_no_bw(i64 %n) { ; CHECK-V2-IC4: [[VECTOR_PH]]: ; CHECK-V2-IC4: br label %[[VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP8:%.*]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK-V2-IC4: [[MIDDLE_BLOCK]]: ; CHECK-V2-IC4: br i1 [[CMP_N:%.*]], label %[[FOR_COND_CLEANUP:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK-V2-IC4: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF6]] +; CHECK-V2-IC4: br i1 [[MIN_EPILOG_ITERS_CHECK:%.*]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF7]] ; CHECK-V2-IC4: [[VEC_EPILOG_PH]]: ; CHECK-V2-IC4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK-V2-IC4: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-V2-IC4: br i1 [[TMP11:%.*]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK-V2-IC4: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-V2-IC4: br i1 [[CMP_N10:%.*]], label %[[FOR_COND_CLEANUP]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-V2-IC4: [[VEC_EPILOG_SCALAR_PH]]: ; CHECK-V2-IC4: br label %[[FOR_BODY:.*]] ; CHECK-V2-IC4: [[FOR_BODY]]: -; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-V2-IC4: br i1 [[EXITCOND:%.*]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK-V2-IC4: [[FOR_COND_CLEANUP]]: ; entry: @@ -341,74 +341,86 @@ for.cond.cleanup: ; preds = %for.body !0 = !{!"branch_weights", i32 1, i32 1023} ;. ; CHECK-V1-IC1: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} -; CHECK-V1-IC1: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]]} +; CHECK-V1-IC1: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]], [[META4:![0-9]+]]} ; CHECK-V1-IC1: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK-V1-IC1: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-V1-IC1: [[PROF4]] = !{!"branch_weights", i32 1, i32 7} -; CHECK-V1-IC1: [[PROF5]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V1-IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META3]], [[META2]]} -; CHECK-V1-IC1: [[PROF7]] = !{!"branch_weights", i32 1, i32 31} -; CHECK-V1-IC1: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META3]]} -; CHECK-V1-IC1: [[PROF9]] = !{!"branch_weights", i32 16, i32 16} -; CHECK-V1-IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META3]]} -; CHECK-V1-IC1: [[PROF11]] = !{!"branch_weights", i32 1, i32 15} -; CHECK-V1-IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META3]], [[META2]]} -; CHECK-V1-IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META3]]} -; CHECK-V1-IC1: [[LOOP14]] = distinct !{[[LOOP14]], [[META3]], [[META2]]} +; CHECK-V1-IC1: [[META4]] = !{!"llvm.loop.estimated_trip_count", i32 128} +; CHECK-V1-IC1: [[PROF5]] = !{!"branch_weights", i32 1, i32 7} +; CHECK-V1-IC1: [[PROF6]] = !{!"branch_weights", i32 0, i32 0} +; CHECK-V1-IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META3]], [[META2]], [[META8:![0-9]+]]} +; CHECK-V1-IC1: [[META8]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; CHECK-V1-IC1: [[PROF9]] = !{!"branch_weights", i32 1, i32 31} +; CHECK-V1-IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META3]], [[META11:![0-9]+]]} +; CHECK-V1-IC1: [[META11]] = !{!"llvm.loop.estimated_trip_count", i32 32} +; CHECK-V1-IC1: [[PROF12]] = !{!"branch_weights", i32 16, i32 16} +; CHECK-V1-IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META8]], [[META3]]} +; CHECK-V1-IC1: [[PROF14]] = !{!"branch_weights", i32 1, i32 15} +; CHECK-V1-IC1: [[LOOP15]] = distinct !{[[LOOP15]], [[META3]], [[META2]], [[META8]]} +; CHECK-V1-IC1: [[LOOP16]] = distinct !{[[LOOP16]], [[META2]], [[META3]]} +; CHECK-V1-IC1: [[LOOP17]] = distinct !{[[LOOP17]], [[META3]], [[META2]]} ;. ; CHECK-V1-IC1-FORCE-EPI4: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]], [[META4:![0-9]+]]} ; CHECK-V1-IC1-FORCE-EPI4: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK-V1-IC1-FORCE-EPI4: [[META3]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF4]] = !{!"branch_weights", i32 1, i32 7} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF5]] = !{!"branch_weights", i32 4, i32 4} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META3]]} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 3} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF8]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP9]] = distinct !{[[LOOP9]], [[META3]], [[META2]]} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF10]] = !{!"branch_weights", i32 1, i32 31} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META3]]} -; CHECK-V1-IC1-FORCE-EPI4: [[PROF12]] = !{!"branch_weights", i32 4, i32 28} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META3]]} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP14]] = distinct !{[[LOOP14]], [[META3]], [[META2]]} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META3]]} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP16]] = distinct !{[[LOOP16]], [[META2]], [[META3]]} -; CHECK-V1-IC1-FORCE-EPI4: [[LOOP17]] = distinct !{[[LOOP17]], [[META3]], [[META2]]} +; CHECK-V1-IC1-FORCE-EPI4: [[META4]] = !{!"llvm.loop.estimated_trip_count", i32 128} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF5]] = !{!"branch_weights", i32 1, i32 7} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF6]] = !{!"branch_weights", i32 4, i32 4} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META8:![0-9]+]], [[META3]]} +; CHECK-V1-IC1-FORCE-EPI4: [[META8]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF9]] = !{!"branch_weights", i32 1, i32 3} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF10]] = !{!"branch_weights", i32 0, i32 0} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META3]], [[META2]], [[META8]]} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF12]] = !{!"branch_weights", i32 1, i32 31} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META3]], [[META14:![0-9]+]]} +; CHECK-V1-IC1-FORCE-EPI4: [[META14]] = !{!"llvm.loop.estimated_trip_count", i32 32} +; CHECK-V1-IC1-FORCE-EPI4: [[PROF15]] = !{!"branch_weights", i32 4, i32 28} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP16]] = distinct !{[[LOOP16]], [[META2]], [[META8]], [[META3]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP17]] = distinct !{[[LOOP17]], [[META3]], [[META2]], [[META8]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP18]] = distinct !{[[LOOP18]], [[META2]], [[META3]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP19]] = distinct !{[[LOOP19]], [[META2]], [[META3]]} +; CHECK-V1-IC1-FORCE-EPI4: [[LOOP20]] = distinct !{[[LOOP20]], [[META3]], [[META2]]} ;. ; CHECK-V2-IC1: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} ; CHECK-V2-IC1: [[PROF1]] = !{!"branch_weights", i32 1, i32 255} -; CHECK-V2-IC1: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]} +; CHECK-V2-IC1: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]], [[META5:![0-9]+]]} ; CHECK-V2-IC1: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK-V2-IC1: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-V2-IC1: [[PROF5]] = !{!"branch_weights", i32 1, i32 3} -; CHECK-V2-IC1: [[PROF6]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V2-IC1: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]} -; CHECK-V2-IC1: [[PROF8]] = !{!"branch_weights", i32 1, i32 63} -; CHECK-V2-IC1: [[LOOP9]] = distinct !{[[LOOP9]], [[META3]], [[META4]]} -; CHECK-V2-IC1: [[PROF10]] = !{!"branch_weights", i32 1, i32 15} -; CHECK-V2-IC1: [[PROF11]] = !{!"branch_weights", i32 4, i32 12} -; CHECK-V2-IC1: [[LOOP12]] = distinct !{[[LOOP12]], [[META3]], [[META4]]} -; CHECK-V2-IC1: [[LOOP13]] = distinct !{[[LOOP13]], [[META4]], [[META3]]} -; CHECK-V2-IC1: [[LOOP14]] = distinct !{[[LOOP14]], [[META3]], [[META4]]} -; CHECK-V2-IC1: [[LOOP15]] = distinct !{[[LOOP15]], [[META4]], [[META3]]} +; CHECK-V2-IC1: [[META5]] = !{!"llvm.loop.estimated_trip_count", i32 256} +; CHECK-V2-IC1: [[PROF6]] = !{!"branch_weights", i32 1, i32 3} +; CHECK-V2-IC1: [[PROF7]] = !{!"branch_weights", i32 0, i32 0} +; CHECK-V2-IC1: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]], [[META3]], [[META9:![0-9]+]]} +; CHECK-V2-IC1: [[META9]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; CHECK-V2-IC1: [[PROF10]] = !{!"branch_weights", i32 1, i32 63} +; CHECK-V2-IC1: [[LOOP11]] = distinct !{[[LOOP11]], [[META3]], [[META4]], [[META12:![0-9]+]]} +; CHECK-V2-IC1: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 64} +; CHECK-V2-IC1: [[PROF13]] = !{!"branch_weights", i32 1, i32 15} +; CHECK-V2-IC1: [[PROF14]] = !{!"branch_weights", i32 4, i32 12} +; CHECK-V2-IC1: [[LOOP15]] = distinct !{[[LOOP15]], [[META3]], [[META9]], [[META4]]} +; CHECK-V2-IC1: [[LOOP16]] = distinct !{[[LOOP16]], [[META4]], [[META3]], [[META9]]} +; CHECK-V2-IC1: [[LOOP17]] = distinct !{[[LOOP17]], [[META3]], [[META4]]} +; CHECK-V2-IC1: [[LOOP18]] = distinct !{[[LOOP18]], [[META4]], [[META3]]} ;. ; CHECK-V2-IC4: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} ; CHECK-V2-IC4: [[PROF1]] = !{!"branch_weights", i32 1, i32 63} -; CHECK-V2-IC4: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]} +; CHECK-V2-IC4: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]], [[META5:![0-9]+]]} ; CHECK-V2-IC4: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK-V2-IC4: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK-V2-IC4: [[PROF5]] = !{!"branch_weights", i32 1, i32 15} -; CHECK-V2-IC4: [[PROF6]] = !{!"branch_weights", i32 4, i32 12} -; CHECK-V2-IC4: [[LOOP7]] = distinct !{[[LOOP7]], [[META3]], [[META4]]} -; CHECK-V2-IC4: [[PROF8]] = !{!"branch_weights", i32 1, i32 3} -; CHECK-V2-IC4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0} -; CHECK-V2-IC4: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]], [[META3]]} -; CHECK-V2-IC4: [[LOOP11]] = distinct !{[[LOOP11]], [[META3]], [[META4]]} -; CHECK-V2-IC4: [[PROF12]] = !{!"branch_weights", i32 8, i32 56} -; CHECK-V2-IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META3]], [[META4]]} -; CHECK-V2-IC4: [[PROF14]] = !{!"branch_weights", i32 1, i32 7} -; CHECK-V2-IC4: [[LOOP15]] = distinct !{[[LOOP15]], [[META4]], [[META3]]} -; CHECK-V2-IC4: [[LOOP16]] = distinct !{[[LOOP16]], [[META3]], [[META4]]} -; CHECK-V2-IC4: [[LOOP17]] = distinct !{[[LOOP17]], [[META3]], [[META4]]} -; CHECK-V2-IC4: [[LOOP18]] = distinct !{[[LOOP18]], [[META4]], [[META3]]} +; CHECK-V2-IC4: [[META5]] = !{!"llvm.loop.estimated_trip_count", i32 64} +; CHECK-V2-IC4: [[PROF6]] = !{!"branch_weights", i32 1, i32 15} +; CHECK-V2-IC4: [[PROF7]] = !{!"branch_weights", i32 4, i32 12} +; CHECK-V2-IC4: [[LOOP8]] = distinct !{[[LOOP8]], [[META3]], [[META9:![0-9]+]], [[META4]]} +; CHECK-V2-IC4: [[META9]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; CHECK-V2-IC4: [[PROF10]] = !{!"branch_weights", i32 1, i32 3} +; CHECK-V2-IC4: [[PROF11]] = !{!"branch_weights", i32 0, i32 0} +; CHECK-V2-IC4: [[LOOP12]] = distinct !{[[LOOP12]], [[META4]], [[META3]], [[META9]]} +; CHECK-V2-IC4: [[LOOP13]] = distinct !{[[LOOP13]], [[META3]], [[META4]], [[META14:![0-9]+]]} +; CHECK-V2-IC4: [[META14]] = !{!"llvm.loop.estimated_trip_count", i32 16} +; CHECK-V2-IC4: [[PROF15]] = !{!"branch_weights", i32 8, i32 56} +; CHECK-V2-IC4: [[LOOP16]] = distinct !{[[LOOP16]], [[META3]], [[META9]], [[META4]]} +; CHECK-V2-IC4: [[PROF17]] = !{!"branch_weights", i32 1, i32 7} +; CHECK-V2-IC4: [[LOOP18]] = distinct !{[[LOOP18]], [[META4]], [[META3]], [[META9]]} +; CHECK-V2-IC4: [[LOOP19]] = distinct !{[[LOOP19]], [[META3]], [[META4]]} +; CHECK-V2-IC4: [[LOOP20]] = distinct !{[[LOOP20]], [[META3]], [[META4]]} +; CHECK-V2-IC4: [[LOOP21]] = distinct !{[[LOOP21]], [[META4]], [[META3]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index d261827d4e111..439e1f181b5df 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -46,7 +46,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 99, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 1 ; CHECK-NEXT: [[ICMP17:%.*]] = icmp eq i64 [[AND]], 0 -; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[LOOP_LATCH]], !prof [[PROF5:![0-9]+]] +; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[LOOP_LATCH]], !prof [[PROF6:![0-9]+]] ; CHECK: bb18: ; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1 ; CHECK-NEXT: [[GETELEMENTPTR19:%.*]] = getelementptr inbounds i64, ptr [[ARR]], i64 [[OR]] @@ -55,7 +55,7 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK: loop.latch: ; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 ; CHECK-NEXT: [[ICMP22:%.*]] = icmp eq i64 [[IV_NEXT]], 90 -; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[LOOP_HEADER]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[LOOP_HEADER]], !prof [[PROF7:![0-9]+]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: bb6: ; CHECK-NEXT: ret void ; @@ -96,10 +96,12 @@ attributes #0 = {"target-cpu"="haswell" "target-features"="+avx2" } ;. ; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 127} ; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 23} -; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]} +; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]], [[META5:![0-9]+]]} ; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[PROF5]] = !{!"branch_weights", i32 1, i32 1} -; CHECK: [[PROF6]] = !{!"branch_weights", i32 0, i32 0} -; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META4]], [[META3]]} +; CHECK: [[META5]] = !{!"llvm.loop.estimated_trip_count", i32 24} +; CHECK: [[PROF6]] = !{!"branch_weights", i32 1, i32 1} +; CHECK: [[PROF7]] = !{!"branch_weights", i32 0, i32 0} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META4]], [[META3]], [[META9:![0-9]+]]} +; CHECK: [[META9]] = !{!"llvm.loop.estimated_trip_count", i32 0} ;. diff --git a/llvm/test/Transforms/LoopVectorize/branch-weights.ll b/llvm/test/Transforms/LoopVectorize/branch-weights.ll index 7ae06953c5544..4445141549069 100644 --- a/llvm/test/Transforms/LoopVectorize/branch-weights.ll +++ b/llvm/test/Transforms/LoopVectorize/branch-weights.ll @@ -27,23 +27,23 @@ define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 { ; MAINVF4IC1_EPI4: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]] ; MAINVF4IC1_EPI4: [[MIDDLE_BLOCK]]: ; MAINVF4IC1_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] -; MAINVF4IC1_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]] +; MAINVF4IC1_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF8:![0-9]+]] ; MAINVF4IC1_EPI4: [[VEC_EPILOG_ITER_CHECK]]: ; MAINVF4IC1_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4 -; MAINVF4IC1_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]] +; MAINVF4IC1_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF9:![0-9]+]] ; MAINVF4IC1_EPI4: [[VEC_EPILOG_PH]]: ; MAINVF4IC1_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; MAINVF4IC1_EPI4: [[VEC_EPILOG_VECTOR_BODY]]: ; MAINVF4IC1_EPI4: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]] -; MAINVF4IC1_EPI4: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] +; MAINVF4IC1_EPI4: br i1 [[TMP12]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] ; MAINVF4IC1_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; MAINVF4IC1_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]] -; MAINVF4IC1_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF7]] +; MAINVF4IC1_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF8]] ; MAINVF4IC1_EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; MAINVF4IC1_EPI4: br label %[[LOOP:.*]] ; MAINVF4IC1_EPI4: [[LOOP]]: ; MAINVF4IC1_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]] -; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF11:![0-9]+]], !llvm.loop [[LOOP12:![0-9]+]] +; MAINVF4IC1_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF13:![0-9]+]], !llvm.loop [[LOOP14:![0-9]+]] ; MAINVF4IC1_EPI4: [[EXIT_LOOPEXIT]]: ; MAINVF4IC1_EPI4: br label %[[EXIT]] ; MAINVF4IC1_EPI4: [[EXIT]]: @@ -70,23 +70,23 @@ define void @f0(i8 %n, i32 %len, ptr %p) !prof !0 { ; MAINVF4IC2_EPI4: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF3:![0-9]+]], !llvm.loop [[LOOP4:![0-9]+]] ; MAINVF4IC2_EPI4: [[MIDDLE_BLOCK]]: ; MAINVF4IC2_EPI4: [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]] -; MAINVF4IC2_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF7:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]], !prof [[PROF8:![0-9]+]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_ITER_CHECK]]: ; MAINVF4IC2_EPI4: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING:%.*]], 4 -; MAINVF4IC2_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF9:![0-9]+]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_PH]]: ; MAINVF4IC2_EPI4: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_VECTOR_BODY]]: ; MAINVF4IC2_EPI4: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT6:%.*]], [[N_VEC3:%.*]] -; MAINVF4IC2_EPI4: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !prof [[PROF10:![0-9]+]], !llvm.loop [[LOOP11:![0-9]+]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; MAINVF4IC2_EPI4: [[CMP_N8:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC3]] -; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF11:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[CMP_N8]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]], !prof [[PROF13:![0-9]+]] ; MAINVF4IC2_EPI4: [[VEC_EPILOG_SCALAR_PH]]: ; MAINVF4IC2_EPI4: br label %[[LOOP:.*]] ; MAINVF4IC2_EPI4: [[LOOP]]: ; MAINVF4IC2_EPI4: [[CMP_LOOP:%.*]] = icmp ult i32 [[I32:%.*]], [[LEN]] -; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF12:![0-9]+]], !llvm.loop [[LOOP13:![0-9]+]] +; MAINVF4IC2_EPI4: br i1 [[CMP_LOOP]], label %[[LOOP]], label %[[EXIT_LOOPEXIT]], !prof [[PROF14:![0-9]+]], !llvm.loop [[LOOP15:![0-9]+]] ; MAINVF4IC2_EPI4: [[EXIT_LOOPEXIT]]: ; MAINVF4IC2_EPI4: br label %[[EXIT]] ; MAINVF4IC2_EPI4: [[EXIT]]: @@ -120,28 +120,34 @@ exit: ; MAINVF4IC1_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1} ; MAINVF4IC1_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127} ; MAINVF4IC1_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 307} -; MAINVF4IC1_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]} +; MAINVF4IC1_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META7:![0-9]+]]} ; MAINVF4IC1_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1} ; MAINVF4IC1_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"} -; MAINVF4IC1_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 3} -; MAINVF4IC1_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 0} -; MAINVF4IC1_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0} -; MAINVF4IC1_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]} -; MAINVF4IC1_EPI4: [[PROF11]] = !{!"branch_weights", i32 2, i32 1} -; MAINVF4IC1_EPI4: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]]} +; MAINVF4IC1_EPI4: [[META7]] = !{!"llvm.loop.estimated_trip_count", i32 308} +; MAINVF4IC1_EPI4: [[PROF8]] = !{!"branch_weights", i32 1, i32 3} +; MAINVF4IC1_EPI4: [[PROF9]] = !{!"branch_weights", i32 4, i32 0} +; MAINVF4IC1_EPI4: [[PROF10]] = !{!"branch_weights", i32 0, i32 0} +; MAINVF4IC1_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]], [[META12:![0-9]+]]} +; MAINVF4IC1_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; MAINVF4IC1_EPI4: [[PROF13]] = !{!"branch_weights", i32 2, i32 1} +; MAINVF4IC1_EPI4: [[LOOP14]] = distinct !{[[LOOP14]], [[META5]], [[META15:![0-9]+]]} +; MAINVF4IC1_EPI4: [[META15]] = !{!"llvm.loop.estimated_trip_count", i32 3} ;. ; MAINVF4IC2_EPI4: [[PROF0]] = !{!"function_entry_count", i64 13} ; MAINVF4IC2_EPI4: [[PROF1]] = !{!"branch_weights", i32 12, i32 1} ; MAINVF4IC2_EPI4: [[PROF2]] = !{!"branch_weights", i32 1, i32 127} ; MAINVF4IC2_EPI4: [[PROF3]] = !{!"branch_weights", i32 1, i32 153} -; MAINVF4IC2_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]]} +; MAINVF4IC2_EPI4: [[LOOP4]] = distinct !{[[LOOP4]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META7:![0-9]+]]} ; MAINVF4IC2_EPI4: [[META5]] = !{!"llvm.loop.isvectorized", i32 1} ; MAINVF4IC2_EPI4: [[META6]] = !{!"llvm.loop.unroll.runtime.disable"} -; MAINVF4IC2_EPI4: [[PROF7]] = !{!"branch_weights", i32 1, i32 7} -; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 4, i32 4} -; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 0, i32 0} -; MAINVF4IC2_EPI4: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]], [[META6]]} -; MAINVF4IC2_EPI4: [[PROF11]] = !{!"branch_weights", i32 1, i32 3} -; MAINVF4IC2_EPI4: [[PROF12]] = !{!"branch_weights", i32 2, i32 1} -; MAINVF4IC2_EPI4: [[LOOP13]] = distinct !{[[LOOP13]], [[META5]]} +; MAINVF4IC2_EPI4: [[META7]] = !{!"llvm.loop.estimated_trip_count", i32 154} +; MAINVF4IC2_EPI4: [[PROF8]] = !{!"branch_weights", i32 1, i32 7} +; MAINVF4IC2_EPI4: [[PROF9]] = !{!"branch_weights", i32 4, i32 4} +; MAINVF4IC2_EPI4: [[PROF10]] = !{!"branch_weights", i32 0, i32 0} +; MAINVF4IC2_EPI4: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]], [[META6]], [[META12:![0-9]+]]} +; MAINVF4IC2_EPI4: [[META12]] = !{!"llvm.loop.estimated_trip_count", i32 0} +; MAINVF4IC2_EPI4: [[PROF13]] = !{!"branch_weights", i32 1, i32 3} +; MAINVF4IC2_EPI4: [[PROF14]] = !{!"branch_weights", i32 2, i32 1} +; MAINVF4IC2_EPI4: [[LOOP15]] = distinct !{[[LOOP15]], [[META5]], [[META16:![0-9]+]]} +; MAINVF4IC2_EPI4: [[META16]] = !{!"llvm.loop.estimated_trip_count", i32 3} ;. diff --git a/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll b/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll new file mode 100644 index 0000000000000..3c0bc8a39ebeb --- /dev/null +++ b/llvm/test/Verifier/llvm.loop.estimated_trip_count.ll @@ -0,0 +1,61 @@ +; Test "llvm.loop.estimated_trip_count" validation + +; DEFINE: %{RUN} = opt -passes=verify %t -disable-output 2>&1 | \ +; DEFINE: FileCheck %s -allow-empty -check-prefix + +define void @test() { +entry: + br label %body +body: + br i1 0, label %body, label %exit, !llvm.loop !0 +exit: + ret void +} +!0 = distinct !{!0, !1} + +; GOOD-NOT: {{.}} + +; BAD-VALUE: Expected second operand to be an integer constant of type i32 or smaller +; BAD-VALUE-NEXT: !1 = !{!"llvm.loop.estimated_trip_count", + +; TOO-FEW: Expected two operands +; TOO-FEW-NEXT: !1 = !{!"llvm.loop.estimated_trip_count"} + +; TOO-MANY: Expected two operands +; TOO-MANY-NEXT: !1 = !{!"llvm.loop.estimated_trip_count", i32 5, i32 5} + +; No value. +; RUN: cp %s %t +; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count"}' >> %t +; RUN: not %{RUN} TOO-FEW + +; i16 value. +; RUN: cp %s %t +; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i16 5}' >> %t +; RUN: %{RUN} GOOD + +; i32 value. +; RUN: cp %s %t +; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i32 5}' >> %t +; RUN: %{RUN} GOOD + +; i64 value. +; RUN: cp %s %t +; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i64 5}' >> %t +; RUN: not %{RUN} BAD-VALUE + +; MDString value. +; RUN: cp %s %t +; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", !"5"}' >> %t +; RUN: not %{RUN} BAD-VALUE + +; MDNode value. +; RUN: cp %s %t +; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", !2}' >> %t +; RUN: echo '!2 = !{i32 5}' >> %t +; RUN: not %{RUN} BAD-VALUE + +; Too many values. +; RUN: cp %s %t +; RUN: echo '!1 = !{!"llvm.loop.estimated_trip_count", i32 5, i32 5}' >> %t +; RUN: not %{RUN} TOO-MANY diff --git a/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp b/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp index c22a3582bee86..ce002e9239960 100644 --- a/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp +++ b/llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp @@ -142,3 +142,56 @@ TEST(LoopUtils, IsKnownNonPositiveInLoopTest) { EXPECT_EQ(isKnownNonPositiveInLoop(ArgSCEV, L, SE), true); }); } + +// The inner and outer loop here share a latch. Because any loop metadata must +// be attached to that latch, loop metadata cannot distinguish between the two +// loops. Until that problem is solved (by moving loop metadata to loops' +// header blocks instead), getLoopEstimatedTripCount and +// setLoopEstimatedTripCount must refuse to operate on at least one of the two +// loops. They choose to reject the outer loop here because the latch does not +// exit it. +TEST(LoopUtils, nestedLoopSharedLatchEstimatedTripCount) { + LLVMContext C; + std::unique_ptr M = + parseIR(C, "declare i1 @f()\n" + "declare i1 @g()\n" + "define void @foo() {\n" + "entry:\n" + " br label %outer\n" + "outer:\n" + " %c0 = call i1 @f()" + " br i1 %c0, label %inner, label %exit, !prof !0\n" + "inner:\n" + " %c1 = call i1 @g()" + " br i1 %c1, label %inner, label %outer, !prof !1\n" + "exit:\n" + " ret void\n" + "}\n" + "!0 = !{!\"branch_weights\", i32 100, i32 1}\n" + "!1 = !{!\"branch_weights\", i32 4, i32 1}\n" + "\n"); + + run(*M, "foo", + [&](Function &F, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI) { + assert(LI.end() - LI.begin() == 1 && "Expected one outer loop"); + Loop *Outer = *LI.begin(); + assert(Outer->end() - Outer->begin() == 1 && "Expected one inner loop"); + Loop *Inner = *Outer->begin(); + + // Even before llvm.loop.estimated_trip_count is added to either loop, + // getLoopEstimatedTripCount rejects the outer loop. + EXPECT_EQ(getLoopEstimatedTripCount(Inner), 5); + EXPECT_EQ(getLoopEstimatedTripCount(Outer), std::nullopt); + + // setLoopEstimatedTripCount for the inner loop does not affect + // getLoopEstimatedTripCount for the outer loop. + EXPECT_EQ(setLoopEstimatedTripCount(Inner, 100), true); + EXPECT_EQ(getLoopEstimatedTripCount(Inner), 100); + EXPECT_EQ(getLoopEstimatedTripCount(Outer), std::nullopt); + + // setLoopEstimatedTripCount rejects the outer loop. + EXPECT_EQ(setLoopEstimatedTripCount(Outer, 999), false); + EXPECT_EQ(getLoopEstimatedTripCount(Inner), 100); + EXPECT_EQ(getLoopEstimatedTripCount(Outer), std::nullopt); + }); +}