diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index a402d0eb805c1..633d077e64927 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -2126,9 +2126,10 @@ static void unswitchNontrivialInvariants( Loop &L, Instruction &TI, ArrayRef Invariants, IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, - function_ref)> UnswitchCB, + function_ref)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, - function_ref DestroyLoopCB, bool InsertFreeze) { + function_ref DestroyLoopCB, bool InsertFreeze, + bool InjectedCondition) { auto *ParentBB = TI.getParent(); BranchInst *BI = dyn_cast(&TI); SwitchInst *SI = BI ? nullptr : cast(&TI); @@ -2581,7 +2582,7 @@ static void unswitchNontrivialInvariants( for (Loop *UpdatedL : llvm::concat(NonChildClonedLoops, HoistedLoops)) if (UpdatedL->getParentLoop() == ParentL) SibLoops.push_back(UpdatedL); - UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops); + UnswitchCB(IsStillLoop, PartiallyInvariant, InjectedCondition, SibLoops); if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); @@ -2979,13 +2980,6 @@ static bool shouldTryInjectInvariantCondition( /// the metadata. bool shouldTryInjectBasingOnMetadata(const BranchInst *BI, const BasicBlock *TakenSucc) { - // Skip branches that have already been unswithed this way. After successful - // unswitching of injected condition, we will still have a copy of this loop - // which looks exactly the same as original one. To prevent the 2nd attempt - // of unswitching it in the same pass, mark this branch as "nothing to do - // here". - if (BI->hasMetadata("llvm.invariant.condition.injection.disabled")) - return false; SmallVector Weights; if (!extractBranchWeights(*BI, Weights)) return false; @@ -3068,13 +3062,9 @@ injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L, Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock); Builder.SetInsertPoint(CheckBlock); - auto *NewTerm = Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0), - TI->getSuccessor(1)); - + Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0), + TI->getSuccessor(1)); TI->eraseFromParent(); - // Prevent infinite unswitching. - NewTerm->setMetadata("llvm.invariant.condition.injection.disabled", - MDNode::get(BB->getContext(), {})); // Fixup phis. for (auto &I : *InLoopSucc) { @@ -3442,7 +3432,7 @@ static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, static bool unswitchBestCondition( Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, - function_ref)> UnswitchCB, + function_ref)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, function_ref DestroyLoopCB) { // Collect all invariant conditions within this loop (as opposed to an inner @@ -3452,9 +3442,10 @@ static bool unswitchBestCondition( Instruction *PartialIVCondBranch = nullptr; collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo, PartialIVCondBranch, L, LI, AA, MSSAU); - collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo, - PartialIVCondBranch, L, DT, LI, AA, - MSSAU); + if (!findOptionMDForLoop(&L, "llvm.loop.unswitch.injection.disable")) + collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo, + PartialIVCondBranch, L, DT, LI, AA, + MSSAU); // If we didn't find any candidates, we're done. if (UnswitchCandidates.empty()) return false; @@ -3475,8 +3466,11 @@ static bool unswitchBestCondition( return false; } - if (Best.hasPendingInjection()) + bool InjectedCondition = false; + if (Best.hasPendingInjection()) { Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU); + InjectedCondition = true; + } assert(!Best.hasPendingInjection() && "All injections should have been done by now!"); @@ -3504,7 +3498,7 @@ static bool unswitchBestCondition( << ") terminator: " << *Best.TI << "\n"); unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT, LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB, - InsertFreeze); + InsertFreeze, InjectedCondition); return true; } @@ -3533,7 +3527,7 @@ static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, - function_ref)> UnswitchCB, + function_ref)> UnswitchCB, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, function_ref DestroyLoopCB) { @@ -3548,7 +3542,8 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) { // If we unswitched successfully we will want to clean up the loop before // processing it further so just mark it as unswitched and return. - UnswitchCB(/*CurrentLoopValid*/ true, false, {}); + UnswitchCB(/*CurrentLoopValid*/ true, /*PartiallyInvariant*/ false, + /*InjectedCondition*/ false, {}); return true; } @@ -3644,6 +3639,7 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid, bool PartiallyInvariant, + bool InjectedCondition, ArrayRef NewLoops) { // If we did a non-trivial unswitch, we have added new (cloned) loops. if (!NewLoops.empty()) @@ -3663,6 +3659,16 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, Context, L.getLoopID(), {"llvm.loop.unswitch.partial"}, {DisableUnswitchMD}); L.setLoopID(NewLoopID); + } else if (InjectedCondition) { + // Do the same for injection of invariant conditions. + auto &Context = L.getHeader()->getContext(); + MDNode *DisableUnswitchMD = MDNode::get( + Context, + MDString::get(Context, "llvm.loop.unswitch.injection.disable")); + MDNode *NewLoopID = makePostTransformationMetadata( + Context, L.getLoopID(), {"llvm.loop.unswitch.injection"}, + {DisableUnswitchMD}); + L.setLoopID(NewLoopID); } else U.revisitCurrentLoop(); } else @@ -3755,6 +3761,7 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { auto *SE = SEWP ? &SEWP->getSE() : nullptr; auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant, + bool InjectedCondition, ArrayRef NewLoops) { // If we did a non-trivial unswitch, we have added new (cloned) loops. for (auto *NewL : NewLoops) @@ -3765,9 +3772,9 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { // but it is the best we can do in the old PM. if (CurrentLoopValid) { // If the current loop has been unswitched using a partially invariant - // condition, we should not re-add the current loop to avoid unswitching - // on the same condition again. - if (!PartiallyInvariant) + // condition or injected invariant condition, we should not re-add the + // current loop to avoid unswitching on the same condition again. + if (!PartiallyInvariant && !InjectedCondition) LPM.addLoop(*L); } else LPM.markLoopAsDeleted(*L); diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions-exponential.ll b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions-exponential.ll new file mode 100644 index 0000000000000..bd88b4030d675 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions-exponential.ll @@ -0,0 +1,260 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes='simple-loop-unswitch' < %s | FileCheck %s + +; Make sure invariant condition injection does not result in exponential +; size increase. + +; FIXME: It probably shouldn't result in linear size increase either. + +define void @ham(i64 %arg) { +; CHECK-LABEL: define void @ham( +; CHECK-SAME: i64 [[ARG:%.*]]) { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[INJECTED_COND:%.*]] = icmp ule i64 [[ARG]], [[ARG]] +; CHECK-NEXT: [[INJECTED_COND_FR:%.*]] = freeze i1 [[INJECTED_COND]] +; CHECK-NEXT: br i1 [[INJECTED_COND_FR]], label [[BB_SPLIT_US:%.*]], label [[BB_SPLIT:%.*]] +; CHECK: bb.split.us: +; CHECK-NEXT: [[INJECTED_COND1:%.*]] = icmp ule i64 [[ARG]], [[ARG]] +; CHECK-NEXT: [[INJECTED_COND1_FR:%.*]] = freeze i1 [[INJECTED_COND1]] +; CHECK-NEXT: br i1 [[INJECTED_COND1_FR]], label [[BB_SPLIT_US_SPLIT_US:%.*]], label [[BB_SPLIT_US_SPLIT:%.*]] +; CHECK: bb.split.us.split.us: +; CHECK-NEXT: [[INJECTED_COND2:%.*]] = icmp ule i64 [[ARG]], [[ARG]] +; CHECK-NEXT: [[INJECTED_COND2_FR:%.*]] = freeze i1 [[INJECTED_COND2]] +; CHECK-NEXT: br i1 [[INJECTED_COND2_FR]], label [[BB_SPLIT_US_SPLIT_US_SPLIT_US:%.*]], label [[BB_SPLIT_US_SPLIT_US_SPLIT:%.*]] +; CHECK: bb.split.us.split.us.split.us: +; CHECK-NEXT: [[INJECTED_COND3:%.*]] = icmp ule i64 [[ARG]], [[ARG]] +; CHECK-NEXT: [[INJECTED_COND3_FR:%.*]] = freeze i1 [[INJECTED_COND3]] +; CHECK-NEXT: br i1 [[INJECTED_COND3_FR]], label [[BB_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT_US:%.*]], label [[BB_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT:%.*]] +; CHECK: bb.split.us.split.us.split.us.split.us: +; CHECK-NEXT: [[INJECTED_COND4:%.*]] = icmp ule i64 [[ARG]], [[ARG]] +; CHECK-NEXT: [[INJECTED_COND4_FR:%.*]] = freeze i1 [[INJECTED_COND4]] +; CHECK-NEXT: br i1 [[INJECTED_COND4_FR]], label [[BB_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT_US:%.*]], label [[BB_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT:%.*]] +; CHECK: bb.split.us.split.us.split.us.split.us.split.us: +; CHECK-NEXT: br label [[BB1_US_US_US_US_US:%.*]] +; CHECK: bb1.us.us.us.us.us: +; CHECK-NEXT: [[PHI_US_US_US_US_US:%.*]] = phi i64 [ 0, [[BB_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT_US]] ], [ [[ADD_US_US_US_US_US:%.*]], [[BB20_US_US_US_US_US:%.*]] ] +; CHECK-NEXT: [[ADD_US_US_US_US_US]] = add nuw i64 [[PHI_US_US_US_US_US]], 1 +; CHECK-NEXT: [[ICMP_US_US_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US_US_US]], [[ARG]] +; CHECK-NEXT: br i1 [[ICMP_US_US_US_US_US]], label [[BB2_US_US_US_US_US:%.*]], label [[BB21_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT_US:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK: bb2.us.us.us.us.us: +; CHECK-NEXT: [[ICMP3_US_US_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US_US_US]], [[ARG]] +; CHECK-NEXT: br label [[BB4_US_US_US_US_US:%.*]] +; CHECK: bb4.us.us.us.us.us: +; CHECK-NEXT: [[ICMP5_US_US_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US_US_US]], [[ARG]] +; CHECK-NEXT: br label [[BB6_US_US_US_US_US:%.*]] +; CHECK: bb6.us.us.us.us.us: +; CHECK-NEXT: [[ICMP7_US_US_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US_US_US]], [[ARG]] +; CHECK-NEXT: br label [[BB8_US_US_US_US_US:%.*]] +; CHECK: bb8.us.us.us.us.us: +; CHECK-NEXT: [[ICMP9_US_US_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US_US_US]], [[ARG]] +; CHECK-NEXT: br label [[BB10_US_US_US_US_US:%.*]] +; CHECK: bb10.us.us.us.us.us: +; CHECK-NEXT: [[ICMP11_US_US_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US_US_US]], [[ARG]] +; CHECK-NEXT: br label [[BB20_US_US_US_US_US]] +; CHECK: bb20.us.us.us.us.us: +; CHECK-NEXT: br label [[BB1_US_US_US_US_US]] +; CHECK: bb21.split.us.split.us.split.us.split.us.split.us: +; CHECK-NEXT: br label [[BB21_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT_US:%.*]] +; CHECK: bb.split.us.split.us.split.us.split.us.split: +; CHECK-NEXT: br label [[BB1_US_US_US_US:%.*]] +; CHECK: bb1.us.us.us.us: +; CHECK-NEXT: [[PHI_US_US_US_US:%.*]] = phi i64 [ 0, [[BB_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT]] ], [ [[ADD_US_US_US_US:%.*]], [[BB20_US_US_US_US:%.*]] ] +; CHECK-NEXT: [[ADD_US_US_US_US]] = add nuw i64 [[PHI_US_US_US_US]], 1 +; CHECK-NEXT: [[ICMP_US_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US_US]], [[ARG]] +; CHECK-NEXT: br i1 [[ICMP_US_US_US_US]], label [[BB2_US_US_US_US:%.*]], label [[BB21_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT:%.*]], !prof [[PROF0]] +; CHECK: bb2.us.us.us.us: +; CHECK-NEXT: [[ICMP3_US_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US_US]], [[ARG]] +; CHECK-NEXT: br label [[BB4_US_US_US_US:%.*]] +; CHECK: bb4.us.us.us.us: +; CHECK-NEXT: [[ICMP5_US_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US_US]], [[ARG]] +; CHECK-NEXT: br label [[BB6_US_US_US_US:%.*]] +; CHECK: bb6.us.us.us.us: +; CHECK-NEXT: [[ICMP7_US_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US_US]], [[ARG]] +; CHECK-NEXT: br label [[BB8_US_US_US_US:%.*]] +; CHECK: bb8.us.us.us.us: +; CHECK-NEXT: [[ICMP9_US_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US_US]], [[ARG]] +; CHECK-NEXT: br label [[BB10_US_US_US_US:%.*]] +; CHECK: bb10.us.us.us.us: +; CHECK-NEXT: [[ICMP11_US_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US_US]], [[ARG]] +; CHECK-NEXT: br label [[BB10_US_US_US_US_CHECK:%.*]] +; CHECK: bb10.us.us.us.us.check: +; CHECK-NEXT: br i1 [[ICMP11_US_US_US_US]], label [[BB20_US_US_US_US]], label [[BB21_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT]] +; CHECK: bb20.us.us.us.us: +; CHECK-NEXT: br label [[BB1_US_US_US_US]], !llvm.loop [[LOOP1:![0-9]+]] +; CHECK: bb21.split.us.split.us.split.us.split.us.split: +; CHECK-NEXT: br label [[BB21_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT_US]] +; CHECK: bb21.split.us.split.us.split.us.split.us: +; CHECK-NEXT: br label [[BB21_SPLIT_US_SPLIT_US_SPLIT_US:%.*]] +; CHECK: bb.split.us.split.us.split.us.split: +; CHECK-NEXT: br label [[BB1_US_US_US:%.*]] +; CHECK: bb1.us.us.us: +; CHECK-NEXT: [[PHI_US_US_US:%.*]] = phi i64 [ 0, [[BB_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT]] ], [ [[ADD_US_US_US:%.*]], [[BB20_US_US_US:%.*]] ] +; CHECK-NEXT: [[ADD_US_US_US]] = add nuw i64 [[PHI_US_US_US]], 1 +; CHECK-NEXT: [[ICMP_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US]], [[ARG]] +; CHECK-NEXT: br i1 [[ICMP_US_US_US]], label [[BB2_US_US_US:%.*]], label [[BB21_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT:%.*]], !prof [[PROF0]] +; CHECK: bb2.us.us.us: +; CHECK-NEXT: [[ICMP3_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US]], [[ARG]] +; CHECK-NEXT: br label [[BB4_US_US_US:%.*]] +; CHECK: bb4.us.us.us: +; CHECK-NEXT: [[ICMP5_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US]], [[ARG]] +; CHECK-NEXT: br label [[BB6_US_US_US:%.*]] +; CHECK: bb6.us.us.us: +; CHECK-NEXT: [[ICMP7_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US]], [[ARG]] +; CHECK-NEXT: br label [[BB8_US_US_US:%.*]] +; CHECK: bb8.us.us.us: +; CHECK-NEXT: [[ICMP9_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US]], [[ARG]] +; CHECK-NEXT: br label [[BB8_US_US_US_CHECK:%.*]] +; CHECK: bb8.us.us.us.check: +; CHECK-NEXT: br i1 [[ICMP9_US_US_US]], label [[BB10_US_US_US:%.*]], label [[BB21_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT]] +; CHECK: bb10.us.us.us: +; CHECK-NEXT: [[ICMP11_US_US_US:%.*]] = icmp ult i64 [[PHI_US_US_US]], [[ARG]] +; CHECK-NEXT: br i1 [[ICMP11_US_US_US]], label [[BB20_US_US_US]], label [[BB21_SPLIT_US_SPLIT_US_SPLIT_US_SPLIT]], !prof [[PROF0]] +; CHECK: bb20.us.us.us: +; CHECK-NEXT: br label [[BB1_US_US_US]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: bb21.split.us.split.us.split.us.split: +; CHECK-NEXT: br label [[BB21_SPLIT_US_SPLIT_US_SPLIT_US]] +; CHECK: bb21.split.us.split.us.split.us: +; CHECK-NEXT: br label [[BB21_SPLIT_US_SPLIT_US:%.*]] +; CHECK: bb.split.us.split.us.split: +; CHECK-NEXT: br label [[BB1_US_US:%.*]] +; CHECK: bb1.us.us: +; CHECK-NEXT: [[PHI_US_US:%.*]] = phi i64 [ 0, [[BB_SPLIT_US_SPLIT_US_SPLIT]] ], [ [[ADD_US_US:%.*]], [[BB20_US_US:%.*]] ] +; CHECK-NEXT: [[ADD_US_US]] = add nuw i64 [[PHI_US_US]], 1 +; CHECK-NEXT: [[ICMP_US_US:%.*]] = icmp ult i64 [[PHI_US_US]], [[ARG]] +; CHECK-NEXT: br i1 [[ICMP_US_US]], label [[BB2_US_US:%.*]], label [[BB21_SPLIT_US_SPLIT_US_SPLIT:%.*]], !prof [[PROF0]] +; CHECK: bb2.us.us: +; CHECK-NEXT: [[ICMP3_US_US:%.*]] = icmp ult i64 [[PHI_US_US]], [[ARG]] +; CHECK-NEXT: br label [[BB4_US_US:%.*]] +; CHECK: bb4.us.us: +; CHECK-NEXT: [[ICMP5_US_US:%.*]] = icmp ult i64 [[PHI_US_US]], [[ARG]] +; CHECK-NEXT: br label [[BB6_US_US:%.*]] +; CHECK: bb6.us.us: +; CHECK-NEXT: [[ICMP7_US_US:%.*]] = icmp ult i64 [[PHI_US_US]], [[ARG]] +; CHECK-NEXT: br label [[BB6_US_US_CHECK:%.*]] +; CHECK: bb6.us.us.check: +; CHECK-NEXT: br i1 [[ICMP7_US_US]], label [[BB8_US_US:%.*]], label [[BB21_SPLIT_US_SPLIT_US_SPLIT]] +; CHECK: bb8.us.us: +; CHECK-NEXT: [[ICMP9_US_US:%.*]] = icmp ult i64 [[PHI_US_US]], [[ARG]] +; CHECK-NEXT: br i1 [[ICMP9_US_US]], label [[BB10_US_US:%.*]], label [[BB21_SPLIT_US_SPLIT_US_SPLIT]], !prof [[PROF0]] +; CHECK: bb10.us.us: +; CHECK-NEXT: [[ICMP11_US_US:%.*]] = icmp ult i64 [[PHI_US_US]], [[ARG]] +; CHECK-NEXT: br i1 [[ICMP11_US_US]], label [[BB20_US_US]], label [[BB21_SPLIT_US_SPLIT_US_SPLIT]], !prof [[PROF0]] +; CHECK: bb20.us.us: +; CHECK-NEXT: br label [[BB1_US_US]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: bb21.split.us.split.us.split: +; CHECK-NEXT: br label [[BB21_SPLIT_US_SPLIT_US]] +; CHECK: bb21.split.us.split.us: +; CHECK-NEXT: br label [[BB21_SPLIT_US:%.*]] +; CHECK: bb.split.us.split: +; CHECK-NEXT: br label [[BB1_US:%.*]] +; CHECK: bb1.us: +; CHECK-NEXT: [[PHI_US:%.*]] = phi i64 [ 0, [[BB_SPLIT_US_SPLIT]] ], [ [[ADD_US:%.*]], [[BB20_US:%.*]] ] +; CHECK-NEXT: [[ADD_US]] = add nuw i64 [[PHI_US]], 1 +; CHECK-NEXT: [[ICMP_US:%.*]] = icmp ult i64 [[PHI_US]], [[ARG]] +; CHECK-NEXT: br i1 [[ICMP_US]], label [[BB2_US:%.*]], label [[BB21_SPLIT_US_SPLIT:%.*]], !prof [[PROF0]] +; CHECK: bb2.us: +; CHECK-NEXT: [[ICMP3_US:%.*]] = icmp ult i64 [[PHI_US]], [[ARG]] +; CHECK-NEXT: br label [[BB4_US:%.*]] +; CHECK: bb4.us: +; CHECK-NEXT: [[ICMP5_US:%.*]] = icmp ult i64 [[PHI_US]], [[ARG]] +; CHECK-NEXT: br label [[BB4_US_CHECK:%.*]] +; CHECK: bb4.us.check: +; CHECK-NEXT: br i1 [[ICMP5_US]], label [[BB6_US:%.*]], label [[BB22_SPLIT_US:%.*]] +; CHECK: bb6.us: +; CHECK-NEXT: [[ICMP7_US:%.*]] = icmp ult i64 [[PHI_US]], [[ARG]] +; CHECK-NEXT: br i1 [[ICMP7_US]], label [[BB8_US:%.*]], label [[BB21_SPLIT_US_SPLIT]], !prof [[PROF0]] +; CHECK: bb8.us: +; CHECK-NEXT: [[ICMP9_US:%.*]] = icmp ult i64 [[PHI_US]], [[ARG]] +; CHECK-NEXT: br i1 [[ICMP9_US]], label [[BB10_US:%.*]], label [[BB21_SPLIT_US_SPLIT]], !prof [[PROF0]] +; CHECK: bb10.us: +; CHECK-NEXT: [[ICMP11_US:%.*]] = icmp ult i64 [[PHI_US]], [[ARG]] +; CHECK-NEXT: br i1 [[ICMP11_US]], label [[BB20_US]], label [[BB21_SPLIT_US_SPLIT]], !prof [[PROF0]] +; CHECK: bb20.us: +; CHECK-NEXT: br label [[BB1_US]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: bb21.split.us.split: +; CHECK-NEXT: br label [[BB21_SPLIT_US]] +; CHECK: bb21.split.us: +; CHECK-NEXT: br label [[BB21:%.*]] +; CHECK: bb22.split.us: +; CHECK-NEXT: br label [[BB22:%.*]] +; CHECK: bb.split: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 0, [[BB_SPLIT]] ], [ [[ADD:%.*]], [[BB20:%.*]] ] +; CHECK-NEXT: [[ADD]] = add nuw i64 [[PHI]], 1 +; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[PHI]], [[ARG]] +; CHECK-NEXT: br i1 [[ICMP]], label [[BB2:%.*]], label [[BB21_SPLIT:%.*]], !prof [[PROF0]] +; CHECK: bb2: +; CHECK-NEXT: [[ICMP3:%.*]] = icmp ult i64 [[PHI]], [[ARG]] +; CHECK-NEXT: br label [[BB2_CHECK:%.*]] +; CHECK: bb2.check: +; CHECK-NEXT: br i1 [[ICMP3]], label [[BB4:%.*]], label [[BB21_SPLIT]] +; CHECK: bb4: +; CHECK-NEXT: [[ICMP5:%.*]] = icmp ult i64 [[PHI]], [[ARG]] +; CHECK-NEXT: br i1 [[ICMP5]], label [[BB6:%.*]], label [[BB22_SPLIT:%.*]], !prof [[PROF0]] +; CHECK: bb6: +; CHECK-NEXT: [[ICMP7:%.*]] = icmp ult i64 [[PHI]], [[ARG]] +; CHECK-NEXT: br i1 [[ICMP7]], label [[BB8:%.*]], label [[BB21_SPLIT]], !prof [[PROF0]] +; CHECK: bb8: +; CHECK-NEXT: [[ICMP9:%.*]] = icmp ult i64 [[PHI]], [[ARG]] +; CHECK-NEXT: br i1 [[ICMP9]], label [[BB10:%.*]], label [[BB21_SPLIT]], !prof [[PROF0]] +; CHECK: bb10: +; CHECK-NEXT: [[ICMP11:%.*]] = icmp ult i64 [[PHI]], [[ARG]] +; CHECK-NEXT: br i1 [[ICMP11]], label [[BB20]], label [[BB21_SPLIT]], !prof [[PROF0]] +; CHECK: bb20: +; CHECK-NEXT: br label [[BB1]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: bb21.split: +; CHECK-NEXT: br label [[BB21]] +; CHECK: bb21: +; CHECK-NEXT: call void @zot() +; CHECK-NEXT: ret void +; CHECK: bb22.split: +; CHECK-NEXT: br label [[BB22]] +; CHECK: bb22: +; CHECK-NEXT: call void @zot() +; CHECK-NEXT: ret void +; +bb: + br label %bb1 + +bb1: ; preds = %bb20, %bb + %phi = phi i64 [ 0, %bb ], [ %add, %bb20 ] + %add = add nuw i64 %phi, 1 + %icmp = icmp ult i64 %phi, %arg + br i1 %icmp, label %bb2, label %bb21, !prof !0 + +bb2: ; preds = %bb1 + %icmp3 = icmp ult i64 %phi, %arg + br i1 %icmp3, label %bb4, label %bb21, !prof !0 + +bb4: ; preds = %bb2 + %icmp5 = icmp ult i64 %phi, %arg + br i1 %icmp5, label %bb6, label %bb22, !prof !0 + +bb6: ; preds = %bb4 + %icmp7 = icmp ult i64 %phi, %arg + br i1 %icmp7, label %bb8, label %bb21, !prof !0 + +bb8: ; preds = %bb6 + %icmp9 = icmp ult i64 %phi, %arg + br i1 %icmp9, label %bb10, label %bb21, !prof !0 + +bb10: ; preds = %bb8 + %icmp11 = icmp ult i64 %phi, %arg + br i1 %icmp11, label %bb20, label %bb21, !prof !0 + +bb20: ; preds = %bb18 + br label %bb1 + +bb21: ; preds = %bb18, %bb16, %bb14, %bb12, %bb10, %bb8, %bb6, %bb2, %bb1 + call void @zot() + ret void + +bb22: ; preds = %bb4 + call void @zot() + ret void +} + +declare void @zot() + +!0 = !{!"branch_weights", i32 2000, i32 1} diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll index 615f36027450a..536e0c6a0e74a 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true -passes="loop(simple-loop-unswitch),simplifycfg" | FileCheck %s ; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true -passes="loop-mssa(simple-loop-unswitch),simplifycfg" -verify-memoryssa | FileCheck %s @@ -29,13 +29,13 @@ define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr noun ; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[GUARDED:%.*]], label [[COMMON_RET]], !prof [[PROF1]] ; CHECK: guarded: ; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[EL]], [[X]] -; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !llvm.invariant.condition.injection.disabled !0 +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]] ; CHECK: backedge: ; CHECK-NEXT: [[ARR_PTR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[EL]] ; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: common.ret: ; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BACKEDGE]] ], [ 0, [[GUARDED_US]] ], [ -1, [[LOOP]] ], [ -1, [[LOOP_US]] ], [ -2, [[GUARDED]] ] ; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] @@ -160,7 +160,7 @@ define i32 @test_01_constants(ptr noundef %p, ptr noundef %arr, ptr noundef %x_p ; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], 1000 -; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: common.ret: ; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BACKEDGE]] ], [ 0, [[GUARDED_US]] ], [ -1, [[LOOP]] ], [ -1, [[LOOP_US]] ] ; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] @@ -210,7 +210,7 @@ define i32 @test_01_neg_degenerate_profile(ptr noundef %p, i32 noundef %n, i32 n ; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[GUARDED:%.*]], label [[COMMON_RET:%.*]], !prof [[PROF1]] ; CHECK: guarded: ; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[EL]], [[X]] -; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !prof [[PROF2:![0-9]+]] +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !prof [[PROF5:![0-9]+]] ; CHECK: backedge: ; CHECK-NEXT: [[ARR_PTR:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 [[EL]] ; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 @@ -267,7 +267,7 @@ define i32 @test_01_neg_cold(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[GUARDED:%.*]], label [[COMMON_RET:%.*]], !prof [[PROF1]] ; CHECK: guarded: ; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[EL]], [[X]] -; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !prof [[PROF3:![0-9]+]] +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !prof [[PROF6:![0-9]+]] ; CHECK: backedge: ; CHECK-NEXT: [[ARR_PTR:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 [[EL]] ; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 @@ -321,10 +321,10 @@ define i32 @test_01_neg_overflowing_metadata(ptr noundef %p, i32 noundef %n, i32 ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV]] ; CHECK-NEXT: [[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4 ; CHECK-NEXT: [[BOUND_CHECK:%.*]] = icmp ult i32 [[EL]], [[LIMIT:%.*]] -; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[GUARDED:%.*]], label [[COMMON_RET:%.*]], !prof [[PROF4:![0-9]+]] +; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[GUARDED:%.*]], label [[COMMON_RET:%.*]], !prof [[PROF7:![0-9]+]] ; CHECK: guarded: ; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[EL]], [[X]] -; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !prof [[PROF4]] +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !prof [[PROF7]] ; CHECK: backedge: ; CHECK-NEXT: [[ARR_PTR:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 [[EL]] ; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 @@ -395,13 +395,13 @@ define i32 @test_02(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr noun ; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[GUARDED:%.*]], label [[COMMON_RET]], !prof [[PROF1]] ; CHECK: guarded: ; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[EL]], [[X]] -; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !llvm.invariant.condition.injection.disabled !0 +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]] ; CHECK: backedge: ; CHECK-NEXT: [[ARR_PTR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[EL]] ; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: common.ret: ; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BACKEDGE]] ], [ 0, [[GUARDED_US]] ], [ -1, [[LOOP]] ], [ -1, [[LOOP_US]] ], [ -2, [[GUARDED]] ] ; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] @@ -465,13 +465,13 @@ define i32 @test_02_inverse(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[GUARDED:%.*]], label [[COMMON_RET]], !prof [[PROF1]] ; CHECK: guarded: ; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp uge i32 [[EL]], [[X]] -; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[COMMON_RET]], label [[BACKEDGE]], !llvm.invariant.condition.injection.disabled !0 +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[COMMON_RET]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[ARR_PTR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[EL]] ; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: common.ret: ; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BACKEDGE]] ], [ 0, [[GUARDED_US]] ], [ -1, [[LOOP]] ], [ -1, [[LOOP_US]] ], [ -2, [[GUARDED]] ] ; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] @@ -519,7 +519,7 @@ define i32 @test_03(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr noun ; CHECK-NEXT: [[EL_PTR_US:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV_US]] ; CHECK-NEXT: [[EL_US:%.*]] = load i32, ptr [[EL_PTR_US]], align 4 ; CHECK-NEXT: [[BOUND_CHECK_US:%.*]] = icmp slt i32 [[EL_US]], 0 -; CHECK-NEXT: br i1 [[BOUND_CHECK_US]], label [[COMMON_RET:%.*]], label [[GUARDED_US]], !prof [[PROF5:![0-9]+]] +; CHECK-NEXT: br i1 [[BOUND_CHECK_US]], label [[COMMON_RET:%.*]], label [[GUARDED_US]], !prof [[PROF10:![0-9]+]] ; CHECK: guarded.us: ; CHECK-NEXT: [[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[X]] ; CHECK-NEXT: [[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 [[EL_US]] @@ -532,16 +532,16 @@ define i32 @test_03(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr noun ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] ; CHECK-NEXT: [[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4 ; CHECK-NEXT: [[BOUND_CHECK:%.*]] = icmp slt i32 [[EL]], 0 -; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[COMMON_RET]], label [[GUARDED:%.*]], !prof [[PROF5]] +; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[COMMON_RET]], label [[GUARDED:%.*]], !prof [[PROF10]] ; CHECK: guarded: ; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[EL]], [[X]] -; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !llvm.invariant.condition.injection.disabled !0 +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]] ; CHECK: backedge: ; CHECK-NEXT: [[ARR_PTR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[EL]] ; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: common.ret: ; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BACKEDGE]] ], [ 0, [[GUARDED_US]] ], [ -1, [[LOOP]] ], [ -1, [[LOOP_US]] ], [ -2, [[GUARDED]] ] ; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] @@ -589,7 +589,7 @@ define i32 @test_04(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr noun ; CHECK-NEXT: [[EL_PTR_US:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 [[IV_US]] ; CHECK-NEXT: [[EL_US:%.*]] = load i8, ptr [[EL_PTR_US]], align 4 ; CHECK-NEXT: [[BOUND_CHECK_US:%.*]] = icmp slt i8 [[EL_US]], 0 -; CHECK-NEXT: br i1 [[BOUND_CHECK_US]], label [[COMMON_RET:%.*]], label [[GUARDED_US]], !prof [[PROF5]] +; CHECK-NEXT: br i1 [[BOUND_CHECK_US]], label [[COMMON_RET:%.*]], label [[GUARDED_US]], !prof [[PROF10]] ; CHECK: guarded.us: ; CHECK-NEXT: [[EL_WIDE_US:%.*]] = zext i8 [[EL_US]] to i32 ; CHECK-NEXT: [[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_WIDE_US]], [[X]] @@ -603,17 +603,17 @@ define i32 @test_04(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr noun ; CHECK-NEXT: [[EL_PTR:%.*]] = getelementptr i8, ptr [[P]], i32 [[IV]] ; CHECK-NEXT: [[EL:%.*]] = load i8, ptr [[EL_PTR]], align 4 ; CHECK-NEXT: [[BOUND_CHECK:%.*]] = icmp slt i8 [[EL]], 0 -; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[COMMON_RET]], label [[GUARDED:%.*]], !prof [[PROF5]] +; CHECK-NEXT: br i1 [[BOUND_CHECK]], label [[COMMON_RET]], label [[GUARDED:%.*]], !prof [[PROF10]] ; CHECK: guarded: ; CHECK-NEXT: [[EL_WIDE:%.*]] = zext i8 [[EL]] to i32 ; CHECK-NEXT: [[RANGE_CHECK:%.*]] = icmp ult i32 [[EL_WIDE]], [[X]] -; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]], !llvm.invariant.condition.injection.disabled !0 +; CHECK-NEXT: br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[COMMON_RET]] ; CHECK: backedge: ; CHECK-NEXT: [[ARR_PTR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[EL_WIDE]] ; CHECK-NEXT: store i32 [[IV]], ptr [[ARR_PTR]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]] +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[COMMON_RET]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: common.ret: ; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[BACKEDGE]] ], [ 0, [[GUARDED_US]] ], [ -1, [[LOOP]] ], [ -1, [[LOOP_US]] ], [ -2, [[GUARDED]] ] ; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] @@ -650,3 +650,18 @@ bound_check_failed: ; preds = %loop range_check_failed: ; preds = %guarded ret i32 -2 } +;. +; CHECK: [[META0:![0-9]+]] = !{} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 100, i32 1} +; CHECK: [[LOOP2]] = distinct !{!2, !3} +; CHECK: [[META3:![0-9]+]] = !{!"llvm.loop.unswitch.injection.disable"} +; CHECK: [[LOOP4]] = distinct !{!4, !3} +; CHECK: [[PROF5]] = !{!"branch_weights", i32 0, i32 0} +; CHECK: [[PROF6]] = !{!"branch_weights", i32 2, i32 3} +; CHECK: [[PROF7]] = !{!"branch_weights", i32 -1, i32 -1000} +; CHECK: [[LOOP8]] = distinct !{!8, !3} +; CHECK: [[LOOP9]] = distinct !{!9, !3} +; CHECK: [[PROF10]] = !{!"branch_weights", i32 1, i32 100} +; CHECK: [[LOOP11]] = distinct !{!11, !3} +; CHECK: [[LOOP12]] = distinct !{!12, !3} +;.