diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index 6171f3391cb2d..4245441101dc1 100644 --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -160,8 +160,10 @@ class UnrollCostEstimator { const SmallPtrSetImpl &EphValues, unsigned BEInsns); - /// Whether it is legal to unroll this loop. - LLVM_ABI bool canUnroll() const; + /// Whether it is legal to unroll this loop. If \p ORE and \p L are provided, + /// emit an optimization remark on failure. + LLVM_ABI bool canUnroll(OptimizationRemarkEmitter *ORE = nullptr, + const Loop *L = nullptr) const; uint64_t getRolledLoopSize() const { return LoopSize.getValue(); } diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 9fdf7ef1b0a86..7dd38b9da1721 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -711,20 +711,28 @@ UnrollCostEstimator::UnrollCostEstimator( LoopSize = BEInsns + 1; } -bool UnrollCostEstimator::canUnroll() const { +bool UnrollCostEstimator::canUnroll(OptimizationRemarkEmitter *ORE, + const Loop *L) const { + auto ReportCannotUnroll = [&](StringRef Reason) { + LLVM_DEBUG(dbgs().indent(1) << "Not unrolling: " << Reason << ".\n"); + if (ORE && L) + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "CannotUnrollLoop", + L->getStartLoc(), L->getHeader()) + << "unable to unroll loop: " << Reason; + }); + }; + if (Convergence == ConvergenceKind::ExtendedLoop) { - LLVM_DEBUG(dbgs().indent(1) - << "Not unrolling: contains convergent operations.\n"); + ReportCannotUnroll("contains convergent operations"); return false; } if (!LoopSize.isValid()) { - LLVM_DEBUG(dbgs().indent(1) - << "Not unrolling: loop size could not be computed.\n"); + ReportCannotUnroll("loop size could not be computed"); return false; } if (NotDuplicatable) { - LLVM_DEBUG(dbgs().indent(1) - << "Not unrolling: contains non-duplicatable instructions.\n"); + ReportCannotUnroll("contains non-duplicatable instructions"); return false; } return true; @@ -802,7 +810,8 @@ static std::optional shouldPragmaUnroll(Loop *L, const UnrollPragmaInfo &PInfo, const unsigned TripMultiple, const unsigned TripCount, unsigned MaxTripCount, const UnrollCostEstimator UCE, - const TargetTransformInfo::UnrollingPreferences &UP) { + const TargetTransformInfo::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) { // Using unroll pragma // 1st priority is unroll count set by "unroll-count" option. @@ -832,6 +841,15 @@ shouldPragmaUnroll(Loop *L, const UnrollPragmaInfo &PInfo, << "Not unrolling with pragma count " << PInfo.PragmaCount << ": remainder not allowed, count does not divide trip " << "multiple " << TripMultiple << ".\n"); + ORE->emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, "PragmaUnrollCountRejected", + L->getStartLoc(), L->getHeader()) + << "may be unable to unroll loop with count " + << ore::NV("PragmaCount", PInfo.PragmaCount) + << ": remainder loop is not allowed and count does not divide " + "trip multiple " + << ore::NV("TripMultiple", TripMultiple); + }); } if (PInfo.PragmaFullUnroll) { @@ -842,6 +860,14 @@ shouldPragmaUnroll(Loop *L, const UnrollPragmaInfo &PInfo, if (TripCount > PragmaUnrollFullMaxIterations) { LLVM_DEBUG(dbgs().indent(2) << "Won't unroll; trip count is too large.\n"); + ORE->emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, + "PragmaFullUnrollTripCountTooLarge", + L->getStartLoc(), L->getHeader()) + << "may be unable to fully unroll loop: trip count " + << ore::NV("TripCount", TripCount) << " exceeds limit " + << ore::NV("Limit", PragmaUnrollFullMaxIterations); + }); return std::nullopt; } @@ -851,6 +877,12 @@ shouldPragmaUnroll(Loop *L, const UnrollPragmaInfo &PInfo, } LLVM_DEBUG(dbgs().indent(2) << "Not fully unrolling: unknown trip count.\n"); + ORE->emit([&]() { + return OptimizationRemarkAnalysis(DEBUG_TYPE, + "PragmaFullUnrollUnknownTripCount", + L->getStartLoc(), L->getHeader()) + << "may be unable to fully unroll loop: trip count is unknown"; + }); } if (PInfo.PragmaEnableUnroll && !TripCount && MaxTripCount && @@ -1031,7 +1063,7 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, // 2nd priority is unroll count set by pragma. LLVM_DEBUG(dbgs().indent(1) << "Trying pragma unroll...\n"); if (auto UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, - MaxTripCount, UCE, UP)) { + MaxTripCount, UCE, UP, ORE)) { UP.Count = *UnrollFactor; if (PInfo.UserUnrollCount || (PInfo.PragmaCount > 0)) { @@ -1108,43 +1140,10 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, LLVM_DEBUG(dbgs().indent(1) << "Trying partial unroll...\n"); if (auto UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP)) { UP.Count = *UnrollFactor; - - if ((PInfo.PragmaFullUnroll || PInfo.PragmaEnableUnroll) && TripCount && - UP.Count != TripCount) - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, - "FullUnrollAsDirectedTooLarge", - L->getStartLoc(), L->getHeader()) - << "unable to fully unroll loop as directed by unroll metadata " - "because unrolled size is too large"; - }); - - if (UP.PartialThreshold != NoThreshold) { - if (UP.Count == 0) { - if (PInfo.PragmaEnableUnroll) - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, - "UnrollAsDirectedTooLarge", - L->getStartLoc(), L->getHeader()) - << "unable to unroll loop as directed by " - "llvm.loop.unroll.enable metadata because unrolled size " - "is too large"; - }); - } - } return; } assert(TripCount == 0 && "All cases when TripCount is constant should be covered here."); - if (PInfo.PragmaFullUnroll) - ORE->emit([&]() { - return OptimizationRemarkMissed( - DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount", - L->getStartLoc(), L->getHeader()) - << "unable to fully unroll loop as directed by " - "llvm.loop.unroll.full metadata because loop has a runtime " - "trip count"; - }); // 7th priority is runtime unrolling. LLVM_DEBUG(dbgs().indent(1) << "Trying runtime unroll...\n"); @@ -1206,22 +1205,6 @@ void llvm::computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, "multiple, " << TripMultiple << ". Reducing unroll count from " << OrigCount << " to " << UP.Count << ".\n"); - - using namespace ore; - - if (PInfo.PragmaCount > 0 && !UP.AllowRemainder) - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, - "DifferentUnrollCountFromDirected", - L->getStartLoc(), L->getHeader()) - << "Unable to unroll loop the number of times directed by " - "llvm.loop.unroll.count metadata because remainder loop is " - "restricted (that could be architecture specific or because " - "the loop contains a convergent instruction) and so must " - "have an unroll count that divides the loop trip multiple of " - << NV("TripMultiple", TripMultiple) << ". Unrolling instead " - << NV("UnrollCount", UP.Count) << " time(s)."; - }); } if (UP.Count > UP.MaxCount) @@ -1291,6 +1274,13 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, if (!L->isLoopSimplifyForm()) { LLVM_DEBUG(dbgs().indent(1) << "Not unrolling loop which is not in loop-simplify form.\n"); + if (TM & TM_ForcedByUser) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NotInLoopSimplifyForm", + L->getStartLoc(), L->getHeader()) + << "unable to unroll loop: not in loop-simplify form"; + }); + } return LoopUnrollResult::Unmodified; } @@ -1316,6 +1306,13 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0) && !OptForSize) { LLVM_DEBUG(dbgs().indent(1) << "Not unrolling: all thresholds are zero.\n"); + if (TM & TM_ForcedByUser) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnrollThresholdsZero", + L->getStartLoc(), L->getHeader()) + << "unable to unroll loop: unroll threshold is zero"; + }); + } return LoopUnrollResult::Unmodified; } @@ -1323,7 +1320,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, CodeMetrics::collectEphemeralValues(L, &AC, EphValues); UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns); - if (!UCE.canUnroll()) + if (!UCE.canUnroll((TM & TM_ForcedByUser) ? &ORE : nullptr, L)) return LoopUnrollResult::Unmodified; unsigned LoopSize = UCE.getRolledLoopSize(); @@ -1337,6 +1334,14 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, if (UCE.NumInlineCandidates != 0) { LLVM_DEBUG(dbgs().indent(1) << "Not unrolling loop with inlinable calls.\n"); + if (TM & TM_ForcedByUser) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, + "InlineCandidatesPreventUnroll", + L->getStartLoc(), L->getHeader()) + << "unable to unroll loop: contains inlinable calls"; + }); + } return LoopUnrollResult::Unmodified; } @@ -1390,6 +1395,13 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, if (!UP.Count) { LLVM_DEBUG(dbgs().indent(1) << "Not unrolling: no viable strategy found.\n"); + if (TM & TM_ForcedByUser) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "NoUnrollStrategy", + L->getStartLoc(), L->getHeader()) + << "unable to unroll loop: no viable unroll count found"; + }); + } return LoopUnrollResult::Unmodified; } @@ -1435,6 +1447,8 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, // Save loop properties before it is transformed. MDNode *OrigLoopID = L->getLoopID(); UnrollPragmaInfo PInfo(L); + DebugLoc LoopStartLoc = L->getStartLoc(); + BasicBlock *LoopHeader = L->getHeader(); // Unroll the loop. Loop *RemainderLoop = nullptr; @@ -1451,8 +1465,37 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, ULO.AddAdditionalAccumulators = UP.AddAdditionalAccumulators; LoopUnrollResult UnrollResult = UnrollLoop( L, ULO, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop, AA); - if (UnrollResult == LoopUnrollResult::Unmodified) + if (UnrollResult == LoopUnrollResult::Unmodified) { + if (PInfo.ExplicitUnroll) { + LLVM_DEBUG(dbgs().indent(1) + << "Failed to unroll loop as explicitly requested.\n"); + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "FailedToUnrollAsRequested", + LoopStartLoc, LoopHeader) + << "failed to unroll loop as explicitly requested"; + }); + } return LoopUnrollResult::Unmodified; + } + + if (PInfo.PragmaFullUnroll && ULO.Count != TripCount) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "FullUnrollAsDirectedFailed", + LoopStartLoc, LoopHeader) + << "unable to fully unroll loop as directed; " + << "unrolled by factor " << ore::NV("UnrollCount", ULO.Count); + }); + } + if (PInfo.PragmaCount > 0 && ULO.Count != PInfo.PragmaCount) { + ORE.emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, "UnrollCountDiffers", + LoopStartLoc, LoopHeader) + << "unable to unroll loop with requested count " + << ore::NV("RequestedCount", PInfo.PragmaCount) + << "; unrolled by factor " + << ore::NV("UnrollCount", ULO.Count); + }); + } if (RemainderLoop) { std::optional RemainderLoopID = diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index ab35d217f0d93..d11ae25aba3d0 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -620,11 +620,15 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, << NV("UnrollCount", ULO.Count) << " iterations"; }); } else { - LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " - << ULO.Count); - if (ULO.Runtime) - LLVM_DEBUG(dbgs() << " with run-time trip count"); - LLVM_DEBUG(dbgs() << "!\n"); + LLVM_DEBUG({ + dbgs() << "UNROLLING loop %" << Header->getName() << " by " << ULO.Count; + if (ULO.Runtime) { + dbgs() << " with run-time trip count"; + if (ULO.UnrollRemainder) + dbgs() << " (remainder unrolled)"; + } + dbgs() << "!\n"; + }); if (ORE) ORE->emit([&]() { @@ -632,7 +636,8 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, L->getHeader()); Diag << "unrolled loop by a factor of " << NV("UnrollCount", ULO.Count); if (ULO.Runtime) - Diag << " with run-time trip count"; + Diag << " with run-time trip count" + << (ULO.UnrollRemainder ? " (remainder unrolled)" : ""); return Diag; }); } diff --git a/llvm/test/Transforms/LoopUnroll/debug.ll b/llvm/test/Transforms/LoopUnroll/debug-and-remarks.ll similarity index 83% rename from llvm/test/Transforms/LoopUnroll/debug.ll rename to llvm/test/Transforms/LoopUnroll/debug-and-remarks.ll index 6b611952c0935..41cd79f9873d0 100644 --- a/llvm/test/Transforms/LoopUnroll/debug.ll +++ b/llvm/test/Transforms/LoopUnroll/debug-and-remarks.ll @@ -1,12 +1,33 @@ -; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace -; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-allow-partial < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PARTIAL-ALLOW -; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-count=4 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=USER-COUNT -; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-count=9999 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=USER-COUNT-EXCEED -; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-peel-count=2 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=EXPLICIT-PEEL -; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-threshold=0 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=ZERO-THRESH -; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-full-max-count=2 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=MAX-COUNT -; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-allow-partial -unroll-partial-threshold=4 < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PARTIAL-NOPROFIT -; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -unroll-allow-remainder=false < %s 2>&1 | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PRAGMA-NOREMAINDER +; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \ +; RUN: -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll < %s 2>&1 \ +; RUN: | FileCheck %s --match-full-lines --strict-whitespace +; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \ +; RUN: -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-allow-partial < %s 2>&1 \ +; RUN: | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PARTIAL-ALLOW +; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \ +; RUN: -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-count=4 < %s 2>&1 \ +; RUN: | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=USER-COUNT +; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \ +; RUN: -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-count=9999 < %s 2>&1 \ +; RUN: | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=USER-COUNT-EXCEED +; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \ +; RUN: -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-peel-count=2 < %s 2>&1 \ +; RUN: | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=EXPLICIT-PEEL +; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \ +; RUN: -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-threshold=0 < %s 2>&1 \ +; RUN: | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=ZERO-THRESH +; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \ +; RUN: -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-full-max-count=2 < %s 2>&1 \ +; RUN: | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=MAX-COUNT +; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \ +; RUN: -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-allow-partial -unroll-partial-threshold=4 < %s 2>&1 \ +; RUN: | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PARTIAL-NOPROFIT +; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \ +; RUN: -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-allow-remainder=false < %s 2>&1 \ +; RUN: | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=PRAGMA-NOREMAINDER +; RUN: opt -disable-output -passes=loop-unroll -debug-only=loop-unroll -pass-remarks=loop-unroll \ +; RUN: -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -unroll-remainder < %s 2>&1 \ +; RUN: | FileCheck %s --match-full-lines --strict-whitespace --check-prefix=REMAINDER ; REQUIRES: asserts @@ -16,6 +37,7 @@ ; CHECK-NEXT: Explicit unroll requested: pragma-full ; CHECK-NEXT: Trying pragma unroll... ; CHECK-NEXT: Not fully unrolling: unknown trip count. +; CHECK-NEXT:remark: :0:0: may be unable to fully unroll loop: trip count is unknown ; CHECK-NEXT: Trying full unroll... ; CHECK-NEXT: Trying upper-bound unroll... ; CHECK-NEXT: Trying loop peeling... @@ -23,6 +45,7 @@ ; CHECK-NEXT: Trying runtime unroll... ; CHECK-NEXT: Will not try to unroll loop with runtime trip count because -unroll-runtime not given ; CHECK-NEXT: Not unrolling: no viable strategy found. +; CHECK-NEXT:remark: :0:0: unable to unroll loop: no viable unroll count found define i32 @pragma_full_unroll_unknown_tc(ptr %A, i32 %n) { entry: @@ -77,6 +100,7 @@ exit: ; CHECK-LABEL:Loop Unroll: F[extended_convergence] Loop %for.body (depth=1) ; CHECK-NEXT: Not unrolling: contains convergent operations. +; CHECK-NEXT:remark: :0:0: unable to unroll loop: contains convergent operations declare void @convergent_func() convergent declare token @llvm.experimental.convergence.anchor() @@ -103,6 +127,7 @@ exit: ; CHECK-LABEL:Loop Unroll: F[noduplicate_prevents_unroll] Loop %for.body (depth=1) ; CHECK-NEXT: Not unrolling: contains non-duplicatable instructions. +; CHECK-NEXT:remark: :0:0: unable to unroll loop: contains non-duplicatable instructions declare void @noduplicate_func() noduplicate @@ -127,6 +152,7 @@ exit: ; CHECK-LABEL:Loop Unroll: F[indirectbr_loop] Loop %for.body (depth=1) ; CHECK-NEXT: Not unrolling loop which is not in loop-simplify form. +; CHECK-NEXT:remark: :0:0: unable to unroll loop: not in loop-simplify form define i32 @indirectbr_loop(ptr %A, ptr %target) { entry: @@ -150,6 +176,7 @@ exit: ; CHECK-LABEL:Loop Unroll: F[inline_prevents_unroll] Loop %for.body (depth=1) ; CHECK-NEXT:Loop Size = 8 ; CHECK-NEXT: Not unrolling loop with inlinable calls. +; CHECK-NEXT:remark: :0:0: unable to unroll loop: contains inlinable calls define internal i32 @single_use_helper(i32 %x) { %add = add i32 %x, 42 @@ -197,6 +224,7 @@ exit: ; CHECK-NEXT: Profitable after cost analysis. ; CHECK-NEXT: Exiting block %for.body: TripCount=10, TripMultiple=0, BreakoutTrip=0 ; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 10! +; CHECK-NEXT:remark: :0:0: completely unrolled loop with 10 iterations define i32 @full_unroll_profitability_analysis(ptr %A, ptr %B) { entry: @@ -296,6 +324,7 @@ exit: ; CHECK-NEXT: Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}. ; CHECK-NEXT: Exiting block %for.body: TripCount=4, TripMultiple=0, BreakoutTrip=0 ; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 4! +; CHECK-NEXT:remark: :0:0: completely unrolled loop with 4 iterations define i32 @full_unroll_size_under_threshold(ptr %A) { entry: @@ -323,6 +352,7 @@ exit: ; CHECK-NEXT: Fully unrolling with trip count: 6. ; CHECK-NEXT: Exiting block %for.body: TripCount=6, TripMultiple=0, BreakoutTrip=0 ; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 6! +; CHECK-NEXT:remark: :0:0: completely unrolled loop with 6 iterations define i32 @pragma_full_known_tc(ptr %A) { entry: @@ -350,6 +380,7 @@ exit: ; CHECK-NEXT: Unrolling with pragma count: 3. ; CHECK-NEXT: Exiting block %for.body: TripCount=12, TripMultiple=0, BreakoutTrip=0 ; CHECK-NEXT:UNROLLING loop %for.body by 3! +; CHECK-NEXT:remark: :0:0: unrolled loop by a factor of 3 define i32 @pragma_count_unroll(ptr %A) { entry: @@ -430,6 +461,7 @@ exit: ; CHECK-NEXT: Unrolling with max trip count: 3. ; CHECK-NEXT: Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1 ; CHECK-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 3! +; CHECK-NEXT:remark: :0:0: completely unrolled loop with 3 iterations define i32 @upper_bound_unroll(ptr %A, i32 %n) { entry: @@ -526,6 +558,7 @@ exit: ; CHECK-NEXT: Trying loop peeling... ; CHECK-NEXT: Peeling with count: 1. ; CHECK-NEXT:PEELING loop %for.header with iteration count 1! +; CHECK-NEXT:remark: :0:0: peeled loop by 1 iterations declare void @foo() @@ -567,6 +600,22 @@ exit: ; CHECK-NEXT: Runtime unrolling with count: 8 ; CHECK-NEXT: Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1 ; CHECK:UNROLLING loop %for.body by 8 with run-time trip count! +; CHECK-NEXT:remark: :0:0: unrolled loop by a factor of 8 with run-time trip count +; +; REMAINDER-LABEL:Loop Unroll: F[runtime_unroll_simple] Loop %for.body (depth=1) +; REMAINDER-NEXT:Loop Size = 6 +; REMAINDER-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1 +; REMAINDER-NEXT: Explicit unroll requested: pragma-enable +; REMAINDER-NEXT: Trying pragma unroll... +; REMAINDER-NEXT: Trying full unroll... +; REMAINDER-NEXT: Trying upper-bound unroll... +; REMAINDER-NEXT: Trying loop peeling... +; REMAINDER-NEXT: Trying partial unroll... +; REMAINDER-NEXT: Trying runtime unroll... +; REMAINDER-NEXT: Runtime unrolling with count: 8 +; REMAINDER-NEXT: Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1 +; REMAINDER:UNROLLING loop %for.body by 8 with run-time trip count (remainder unrolled)! +; REMAINDER-NEXT:remark: :0:0: unrolled loop by a factor of 8 with run-time trip count (remainder unrolled) define i32 @runtime_unroll_simple(ptr %A, i32 %n) { entry: @@ -602,6 +651,7 @@ exit: ; PARTIAL-ALLOW-NEXT: Partially unrolling with count: {{[0-9]+}} ; PARTIAL-ALLOW-NEXT: Exiting block %for.body: TripCount=200, TripMultiple=0, BreakoutTrip=0 ; PARTIAL-ALLOW-NEXT:UNROLLING loop %for.body by {{[0-9]+}}! +; PARTIAL-ALLOW-NEXT:remark: :0:0: unrolled loop by a factor of {{[0-9]+}} define i32 @partial_unroll_cost_analysis(ptr %A) { entry: @@ -627,6 +677,7 @@ exit: ; CHECK-NEXT: Explicit unroll requested: pragma-full ; CHECK-NEXT: Trying pragma unroll... ; CHECK-NEXT: Won't unroll; trip count is too large. +; CHECK-NEXT:remark: :0:0: may be unable to fully unroll loop: trip count 1000001 exceeds limit 1000000 ; CHECK-NEXT: Trying full unroll... ; CHECK-NEXT: Unrolled size {{[0-9]+}} exceeds threshold {{[0-9]+}}; checking for cost benefit. ; CHECK-NEXT: Not analyzing loop cost: trip count too large. @@ -637,6 +688,8 @@ exit: ; CHECK-NEXT: Partially unrolling with count: {{[0-9]+}} ; CHECK-NEXT: Exiting block %for.body: TripCount=1000001, TripMultiple=0, BreakoutTrip=0 ; CHECK-NEXT:UNROLLING loop %for.body by {{[0-9]+}}! +; CHECK-NEXT:remark: :0:0: unrolled loop by a factor of {{[0-9]+}} +; CHECK-NEXT:remark: :0:0: unable to fully unroll loop as directed; unrolled by factor {{[0-9]+}} define i32 @pragma_full_tc_too_large(ptr %A) { entry: @@ -729,6 +782,7 @@ exit: ; USER-COUNT-NEXT: Unrolling with user-specified count: 4. ; USER-COUNT-NEXT: Exiting block %for.body: TripCount=12, TripMultiple=0, BreakoutTrip=0 ; USER-COUNT-NEXT:UNROLLING loop %for.body by 4! +; USER-COUNT-NEXT:remark: :0:0: unrolled loop by a factor of 4 ; ; USER-COUNT-EXCEED-LABEL:Loop Unroll: F[user_count_unroll] Loop %for.body (depth=1) ; USER-COUNT-EXCEED-NEXT:Loop Size = 6 @@ -740,6 +794,7 @@ exit: ; USER-COUNT-EXCEED-NEXT: Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}. ; USER-COUNT-EXCEED-NEXT: Exiting block %for.body: TripCount=12, TripMultiple=0, BreakoutTrip=0 ; USER-COUNT-EXCEED-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 12! +; USER-COUNT-EXCEED-NEXT:remark: :0:0: completely unrolled loop with 12 iterations define i32 @user_count_unroll(ptr %A) { entry: @@ -764,6 +819,7 @@ exit: ; EXPLICIT-PEEL-NEXT: Computing unroll count: TripCount=0, MaxTripCount=2147483647, TripMultiple=1 ; EXPLICIT-PEEL-NEXT: Using explicit peel count: 2. ; EXPLICIT-PEEL-NEXT:PEELING loop %for.body with iteration count 2! +; EXPLICIT-PEEL-NEXT:remark: :0:0: peeled loop by 2 iterations define i32 @explicit_peel_count(ptr %A, i32 %n) { entry: @@ -787,6 +843,7 @@ exit: ; ZERO-THRESH-LABEL:Loop Unroll: F[zero_thresh_unroll] Loop %for.body (depth=1) ; ZERO-THRESH-NEXT: Not unrolling: all thresholds are zero. +; ZERO-THRESH-NEXT:remark: :0:0: unable to unroll loop: unroll threshold is zero define i32 @zero_thresh_unroll(ptr %A) { entry: @@ -800,7 +857,7 @@ for.body: %add = add i32 %sum, %load %inc = add i32 %i, 1 %cmp = icmp ult i32 %inc, 8 - br i1 %cmp, label %for.body, label %exit + br i1 %cmp, label %for.body, label %exit, !llvm.loop !16 exit: ret i32 %add @@ -875,10 +932,13 @@ exit: ; PRAGMA-NOREMAINDER-NEXT: Explicit unroll requested: pragma-count(3) ; PRAGMA-NOREMAINDER-NEXT: Trying pragma unroll... ; PRAGMA-NOREMAINDER-NEXT: Not unrolling with pragma count 3: remainder not allowed, count does not divide trip multiple 10. +; PRAGMA-NOREMAINDER-NEXT:remark: :0:0: may be unable to unroll loop with count 3: remainder loop is not allowed and count does not divide trip multiple 10 ; PRAGMA-NOREMAINDER-NEXT: Trying full unroll... ; PRAGMA-NOREMAINDER-NEXT: Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}. ; PRAGMA-NOREMAINDER-NEXT: Exiting block %for.body: TripCount=10, TripMultiple=0, BreakoutTrip=0 ; PRAGMA-NOREMAINDER-NEXT:COMPLETELY UNROLLING loop %for.body with trip count 10! +; PRAGMA-NOREMAINDER-NEXT:remark: :0:0: completely unrolled loop with 10 iterations +; PRAGMA-NOREMAINDER-NEXT:remark: :0:0: unable to unroll loop with requested count 3; unrolled by factor 10 define i32 @pragma_count_no_remainder(ptr %A) { entry: @@ -898,6 +958,36 @@ exit: ret i32 %add } +; CHECK-LABEL:Loop Unroll: F[header_address_taken] Loop %for.body (depth=1) +; CHECK-NEXT:Loop Size = 6 +; CHECK-NEXT: Computing unroll count: TripCount=4, MaxTripCount=0, TripMultiple=4 +; CHECK-NEXT: Explicit unroll requested: pragma-enable +; CHECK-NEXT: Trying pragma unroll... +; CHECK-NEXT: Trying full unroll... +; CHECK-NEXT: Unrolling: size {{[0-9]+}} < threshold {{[0-9]+}}. +; CHECK-NEXT: Won't unroll loop: address of header block is taken. +; CHECK-NEXT: Failed to unroll loop as explicitly requested. +; CHECK-NEXT:remark: :0:0: failed to unroll loop as explicitly requested + +define i32 @header_address_taken(ptr %A) { +entry: + store ptr blockaddress(@header_address_taken, %for.body), ptr %A + br label %for.body + +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %sum = phi i32 [ 0, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i + %load = load i32, ptr %arrayidx + %add = add i32 %sum, %load + %inc = add i32 %i, 1 + %cmp = icmp ult i32 %inc, 4 + br i1 %cmp, label %for.body, label %exit, !llvm.loop !15 + +exit: + ret i32 %add +} + !0 = distinct !{!0, !3} !1 = distinct !{!1, !4} !2 = distinct !{!2, !4} @@ -913,3 +1003,5 @@ exit: !12 = distinct !{!12, !3} !13 = distinct !{!13, !4} !14 = distinct !{!14, !6} +!15 = distinct !{!15, !4} +!16 = distinct !{!16, !4}