diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index deb0dc2d57a86..70b9ff33c5d55 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1124,7 +1124,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, - bool OnlyWhenForced, bool ForgetAllSCEV, + bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV, std::optional ProvidedCount, std::optional ProvidedThreshold, std::optional ProvidedAllowPartial, @@ -1133,6 +1133,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, std::optional ProvidedAllowPeeling, std::optional ProvidedAllowProfileBasedPeeling, std::optional ProvidedFullUnrollMaxCount) { + LLVM_DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"); @@ -1304,6 +1305,13 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, return LoopUnrollResult::Unmodified; } + // Do not attempt partial/runtime unrolling in FullLoopUnrolling + if (OnlyFullUnroll && !(UP.Count >= MaxTripCount)) { + LLVM_DEBUG( + dbgs() << "Not attempting partial/runtime unroll in FullLoopUnroll.\n"); + return LoopUnrollResult::Unmodified; + } + // At this point, UP.Runtime indicates that run-time unrolling is allowed. // However, we only want to actually perform it if we don't know the trip // count and the unroll count doesn't divide the known trip multiple. @@ -1420,10 +1428,10 @@ class LoopUnroll : public LoopPass { LoopUnrollResult Result = tryToUnrollLoop( L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel, - OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold, - ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, - ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling, - ProvidedFullUnrollMaxCount); + /*OnlyFullUnroll*/ false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount, + ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, + ProvidedUpperBound, ProvidedAllowPeeling, + ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount); if (Result == LoopUnrollResult::FullyUnrolled) LPM.markLoopAsDeleted(*L); @@ -1497,8 +1505,8 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE, /*BFI*/ nullptr, /*PSI*/ nullptr, - /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced, - ForgetSCEV, /*Count*/ std::nullopt, + /*PreserveLCSSA*/ true, OptLevel, /*OnlyFullUnroll*/ true, + OnlyWhenForced, ForgetSCEV, /*Count*/ std::nullopt, /*Threshold*/ std::nullopt, /*AllowPartial*/ false, /*Runtime*/ false, /*UpperBound*/ false, /*AllowPeeling*/ true, @@ -1623,8 +1631,9 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, // flavors of unrolling during construction time (by setting UnrollOpts). LoopUnrollResult Result = tryToUnrollLoop( &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI, - /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced, - UnrollOpts.ForgetSCEV, /*Count*/ std::nullopt, + /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*OnlyFullUnroll*/ false, + UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV, + /*Count*/ std::nullopt, /*Threshold*/ std::nullopt, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling, UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount); diff --git a/llvm/test/Transforms/LoopUnroll/full-unroll-avoid-partial.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-avoid-partial.ll new file mode 100644 index 0000000000000..7f266a754d1bc --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/full-unroll-avoid-partial.ll @@ -0,0 +1,94 @@ +; RUN: opt -S -passes=loop-unroll --debug-only=loop-unroll < %s 2>&1 | FileCheck %s -check-prefix=LOOP-UNROLL +; RUN: opt -S -passes='require,loop(loop-unroll-full)' --debug-only=loop-unroll < %s 2>&1 | FileCheck %s -check-prefix=LOOP-UNROLL-FULL + +; REQUIRES: asserts + +%struct.HIP_vector_type = type { %union.anon } +%union.anon = type { <2 x float> } + + +; LOOP-UNROLL-LABEL: Loop Unroll: F[pragma_unroll] Loop %for.body +; LOOP-UNROLL-NEXT: Loop Size = 9 +; LOOP-UNROLL-NEXT: runtime unrolling with count: 8 +; LOOP-UNROLL-NEXT: Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1 +; LOOP-UNROLL-NEXT: Trying runtime unrolling on Loop: +; LOOP-UNROLL-NEXT: Loop at depth 1 containing: %for.body
+; LOOP-UNROLL-NEXT: Using epilog remainder. +; LOOP-UNROLL-NEXT: UNROLLING loop %for.body by 8 with run-time trip count! + +; LOOP-UNROLL-FULL-LABEL: Loop Unroll: F[pragma_unroll] Loop %for.body +; LOOP-UNROLL-FULL-NEXT: Loop Size = 9 +; LOOP-UNROLL-FULL-NEXT: runtime unrolling with count: 8 +; LOOP-UNROLL-FULL-NEXT: Not attempting partial/runtime unroll in FullLoopUnroll +define void @pragma_unroll(ptr %queue, i32 %num_elements) { +entry: + %cmp5 = icmp sgt i32 %num_elements, 0 + br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %i.06 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %add = add nuw nsw i32 %i.06, 1 + %idxprom = zext i32 %add to i64 + %arrayidx = getelementptr inbounds %struct.HIP_vector_type, ptr %queue, i64 %idxprom + %idxprom1 = zext i32 %i.06 to i64 + %arrayidx2 = getelementptr inbounds %struct.HIP_vector_type, ptr %queue, i64 %idxprom1 + %0 = load i64, ptr %arrayidx, align 8 + store i64 %0, ptr %arrayidx2, align 8 + %exitcond = icmp ne i32 %add, %num_elements + br i1 %exitcond, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !1 +} + +; LOOP-UNROLL-LABEL: Loop Unroll: F[pragma_unroll_count1] Loop %for.body +; LOOP-UNROLL-NEXT: Loop Size = 9 +; LOOP-UNROLL-NEXT: Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1 +; LOOP-UNROLL-NEXT: Trying runtime unrolling on Loop: +; LOOP-UNROLL-NEXT: Loop at depth 1 containing: %for.body
+; LOOP-UNROLL-NEXT: Using epilog remainder. +; LOOP-UNROLL-NEXT: UNROLLING loop %for.body by 5 with run-time trip count! + +; LOOP-UNROLL-FULL-LABEL: Loop Unroll: F[pragma_unroll_count1] Loop %for.body +; LOOP-UNROLL-FULL-NEXT: Loop Size = 9 +; LOOP-UNROLL-FULL-NEXT: Not attempting partial/runtime unroll in FullLoopUnroll +define void @pragma_unroll_count1(ptr %queue, i32 %num_elements) { +entry: + %cmp5 = icmp sgt i32 %num_elements, 0 + br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + %i.06 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ] + %add = add nuw nsw i32 %i.06, 1 + %idxprom = zext i32 %add to i64 + %arrayidx = getelementptr inbounds %struct.HIP_vector_type, ptr %queue, i64 %idxprom + %idxprom1 = zext i32 %i.06 to i64 + %arrayidx2 = getelementptr inbounds %struct.HIP_vector_type, ptr %queue, i64 %idxprom1 + %0 = load i64, ptr %arrayidx, align 8 + store i64 %0, ptr %arrayidx2, align 8 + %exitcond = icmp ne i32 %add, %num_elements + br i1 %exitcond, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !3 +} + +; LOOP-UNROLL: llvm.loop.unroll.disable +; LOOP-UNROLL-FULL: llvm.loop.unroll.enable +!0 = !{!"llvm.loop.unroll.enable"} +!1 = distinct !{!1, !0} + +!2 = !{!"llvm.loop.unroll.count", i32 5} +!3 = distinct !{!3, !2} diff --git a/llvm/test/Transforms/LoopUnroll/revisit.ll b/llvm/test/Transforms/LoopUnroll/revisit.ll index 80a4917b2d7f0..de1f02ac997da 100644 --- a/llvm/test/Transforms/LoopUnroll/revisit.ll +++ b/llvm/test/Transforms/LoopUnroll/revisit.ll @@ -1,17 +1,11 @@ ; This test checks that nested loops are revisited in various scenarios when ; unrolling. Note that if we ever start doing outer loop peeling a test case -; for that should be added here that will look essentially like a hybrid of the -; current two cases. +; for that should be added here. ; ; RUN: opt < %s -disable-output -debug-pass-manager 2>&1 \ ; RUN: -passes='require,loop(loop-unroll-full)' \ ; RUN: | FileCheck %s -; -; Also run in a special mode that visits children. -; RUN: opt < %s -disable-output -debug-pass-manager -unroll-revisit-child-loops 2>&1 \ -; RUN: -passes='require,loop(loop-unroll-full)' \ -; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-CHILDREN - +; ; Basic test is fully unrolled and we revisit the post-unroll new sibling ; loops, including the ones that used to be child loops. define void @full_unroll(ptr %ptr) { @@ -76,81 +70,3 @@ l0.latch: exit: ret void } - -; Now we test forced runtime partial unrolling with metadata. Here we end up -; duplicating child loops without changing their structure and so they aren't by -; default visited, but will be visited with a special parameter. -define void @partial_unroll(i32 %count, ptr %ptr) { -; CHECK-LABEL: OptimizationRemarkEmitterAnalysis on partial_unroll -; CHECK-NOT: LoopFullUnrollPass - -entry: - br label %l0 - -l0: - %cond.0 = load volatile i1, ptr %ptr - br i1 %cond.0, label %l0.0.ph, label %exit - -l0.0.ph: - br label %l0.0 - -l0.0: - %iv = phi i32 [ %iv.next, %l0.0.latch ], [ 0, %l0.0.ph ] - %iv.next = add i32 %iv, 1 - br label %l0.0.0.ph - -l0.0.0.ph: - br label %l0.0.0 - -l0.0.0: - %cond.0.0.0 = load volatile i1, ptr %ptr - br i1 %cond.0.0.0, label %l0.0.0, label %l0.0.1.ph -; CHECK: LoopFullUnrollPass on l0.0.0 -; CHECK-NOT: LoopFullUnrollPass - -l0.0.1.ph: - br label %l0.0.1 - -l0.0.1: - %cond.0.0.1 = load volatile i1, ptr %ptr - br i1 %cond.0.0.1, label %l0.0.1, label %l0.0.latch -; CHECK: LoopFullUnrollPass on l0.0.1 -; CHECK-NOT: LoopFullUnrollPass - -l0.0.latch: - %cmp = icmp slt i32 %iv.next, %count - br i1 %cmp, label %l0.0, label %l0.latch, !llvm.loop !1 -; CHECK: LoopFullUnrollPass on l0.0 -; CHECK-NOT: LoopFullUnrollPass -; -; Partial unrolling occurs which introduces both new child loops and new sibling -; loops. We only visit the child loops in a special mode, not by default. -; CHECK-CHILDREN: LoopFullUnrollPass on l0.0.0 -; CHECK-CHILDREN-NOT: LoopFullUnrollPass -; CHECK-CHILDREN: LoopFullUnrollPass on l0.0.1 -; CHECK-CHILDREN-NOT: LoopFullUnrollPass -; CHECK-CHILDREN: LoopFullUnrollPass on l0.0.0.1 -; CHECK-CHILDREN-NOT: LoopFullUnrollPass -; CHECK-CHILDREN: LoopFullUnrollPass on l0.0.1.1 -; CHECK-CHILDREN-NOT: LoopFullUnrollPass -; -; When we revisit children, we also revisit the current loop. -; CHECK-CHILDREN: LoopFullUnrollPass on l0.0 -; CHECK-CHILDREN-NOT: LoopFullUnrollPass -; -; Revisit the children of the outer loop that are part of the epilogue. -; -; CHECK: LoopFullUnrollPass on l0.0.1.epil -; CHECK-NOT: LoopFullUnrollPass -; CHECK: LoopFullUnrollPass on l0.0.0.epil -; CHECK-NOT: LoopFullUnrollPass -l0.latch: - br label %l0 -; CHECK: LoopFullUnrollPass on l0 -; CHECK-NOT: LoopFullUnrollPass - -exit: - ret void -} -!1 = !{!1, !2} -!2 = !{!"llvm.loop.unroll.count", i32 2}