Skip to content

Commit

Permalink
[LoopUnroll] Prevent LoopFullUnrollPass to perform partial/runtime un…
Browse files Browse the repository at this point in the history
…rolling

FullLoopUnroll was performing runtime unrolling in certain cases when
'#pragma unroll' was specified. Patch to fix this by introducing new parameter
to tryToUnrollLoop() to differentiate between LoopUnrollPass and
FullLoopUnrollPass. Based on the discussion here
(https://discourse.llvm.org/t/loop-unroller-fails-to-unroll-loop/69834)

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D148071
  • Loading branch information
Yashwant Singh authored and Yashwant Singh committed Apr 13, 2023
1 parent 18bfc92 commit aea2a14
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 95 deletions.
27 changes: 18 additions & 9 deletions llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1124,7 +1124,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
const TargetTransformInfo &TTI, AssumptionCache &AC,
OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel,
bool OnlyWhenForced, bool ForgetAllSCEV,
bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV,
std::optional<unsigned> ProvidedCount,
std::optional<unsigned> ProvidedThreshold,
std::optional<bool> ProvidedAllowPartial,
Expand All @@ -1133,6 +1133,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
std::optional<bool> ProvidedAllowPeeling,
std::optional<bool> ProvidedAllowProfileBasedPeeling,
std::optional<unsigned> ProvidedFullUnrollMaxCount) {

LLVM_DEBUG(dbgs() << "Loop Unroll: F["
<< L->getHeader()->getParent()->getName() << "] Loop %"
<< L->getHeader()->getName() << "\n");
Expand Down Expand Up @@ -1304,6 +1305,13 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
return LoopUnrollResult::Unmodified;
}

// Do not attempt partial/runtime unrolling in FullLoopUnrolling
if (OnlyFullUnroll && !(UP.Count >= MaxTripCount)) {
LLVM_DEBUG(
dbgs() << "Not attempting partial/runtime unroll in FullLoopUnroll.\n");
return LoopUnrollResult::Unmodified;
}

// At this point, UP.Runtime indicates that run-time unrolling is allowed.
// However, we only want to actually perform it if we don't know the trip
// count and the unroll count doesn't divide the known trip multiple.
Expand Down Expand Up @@ -1420,10 +1428,10 @@ class LoopUnroll : public LoopPass {

LoopUnrollResult Result = tryToUnrollLoop(
L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,
OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold,
ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
ProvidedFullUnrollMaxCount);
/*OnlyFullUnroll*/ false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount,
ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
ProvidedUpperBound, ProvidedAllowPeeling,
ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount);

if (Result == LoopUnrollResult::FullyUnrolled)
LPM.markLoopAsDeleted(*L);
Expand Down Expand Up @@ -1497,8 +1505,8 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
bool Changed =
tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE,
/*BFI*/ nullptr, /*PSI*/ nullptr,
/*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
ForgetSCEV, /*Count*/ std::nullopt,
/*PreserveLCSSA*/ true, OptLevel, /*OnlyFullUnroll*/ true,
OnlyWhenForced, ForgetSCEV, /*Count*/ std::nullopt,
/*Threshold*/ std::nullopt, /*AllowPartial*/ false,
/*Runtime*/ false, /*UpperBound*/ false,
/*AllowPeeling*/ true,
Expand Down Expand Up @@ -1623,8 +1631,9 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
// flavors of unrolling during construction time (by setting UnrollOpts).
LoopUnrollResult Result = tryToUnrollLoop(
&L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
/*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
UnrollOpts.ForgetSCEV, /*Count*/ std::nullopt,
/*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*OnlyFullUnroll*/ false,
UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV,
/*Count*/ std::nullopt,
/*Threshold*/ std::nullopt, UnrollOpts.AllowPartial,
UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling,
UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount);
Expand Down
94 changes: 94 additions & 0 deletions llvm/test/Transforms/LoopUnroll/full-unroll-avoid-partial.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
; RUN: opt -S -passes=loop-unroll --debug-only=loop-unroll < %s 2>&1 | FileCheck %s -check-prefix=LOOP-UNROLL
; RUN: opt -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' --debug-only=loop-unroll < %s 2>&1 | FileCheck %s -check-prefix=LOOP-UNROLL-FULL

; REQUIRES: asserts

%struct.HIP_vector_type = type { %union.anon }
%union.anon = type { <2 x float> }


; LOOP-UNROLL-LABEL: Loop Unroll: F[pragma_unroll] Loop %for.body
; LOOP-UNROLL-NEXT: Loop Size = 9
; LOOP-UNROLL-NEXT: runtime unrolling with count: 8
; LOOP-UNROLL-NEXT: Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1
; LOOP-UNROLL-NEXT: Trying runtime unrolling on Loop:
; LOOP-UNROLL-NEXT: Loop at depth 1 containing: %for.body<header><latch><exiting>
; LOOP-UNROLL-NEXT: Using epilog remainder.
; LOOP-UNROLL-NEXT: UNROLLING loop %for.body by 8 with run-time trip count!

; LOOP-UNROLL-FULL-LABEL: Loop Unroll: F[pragma_unroll] Loop %for.body
; LOOP-UNROLL-FULL-NEXT: Loop Size = 9
; LOOP-UNROLL-FULL-NEXT: runtime unrolling with count: 8
; LOOP-UNROLL-FULL-NEXT: Not attempting partial/runtime unroll in FullLoopUnroll
define void @pragma_unroll(ptr %queue, i32 %num_elements) {
entry:
%cmp5 = icmp sgt i32 %num_elements, 0
br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader: ; preds = %entry
br label %for.body

for.cond.cleanup.loopexit: ; preds = %for.body
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret void

for.body: ; preds = %for.body.preheader, %for.body
%i.06 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
%add = add nuw nsw i32 %i.06, 1
%idxprom = zext i32 %add to i64
%arrayidx = getelementptr inbounds %struct.HIP_vector_type, ptr %queue, i64 %idxprom
%idxprom1 = zext i32 %i.06 to i64
%arrayidx2 = getelementptr inbounds %struct.HIP_vector_type, ptr %queue, i64 %idxprom1
%0 = load i64, ptr %arrayidx, align 8
store i64 %0, ptr %arrayidx2, align 8
%exitcond = icmp ne i32 %add, %num_elements
br i1 %exitcond, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !1
}

; LOOP-UNROLL-LABEL: Loop Unroll: F[pragma_unroll_count1] Loop %for.body
; LOOP-UNROLL-NEXT: Loop Size = 9
; LOOP-UNROLL-NEXT: Exiting block %for.body: TripCount=0, TripMultiple=1, BreakoutTrip=1
; LOOP-UNROLL-NEXT: Trying runtime unrolling on Loop:
; LOOP-UNROLL-NEXT: Loop at depth 1 containing: %for.body<header><latch><exiting>
; LOOP-UNROLL-NEXT: Using epilog remainder.
; LOOP-UNROLL-NEXT: UNROLLING loop %for.body by 5 with run-time trip count!

; LOOP-UNROLL-FULL-LABEL: Loop Unroll: F[pragma_unroll_count1] Loop %for.body
; LOOP-UNROLL-FULL-NEXT: Loop Size = 9
; LOOP-UNROLL-FULL-NEXT: Not attempting partial/runtime unroll in FullLoopUnroll
define void @pragma_unroll_count1(ptr %queue, i32 %num_elements) {
entry:
%cmp5 = icmp sgt i32 %num_elements, 0
br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader: ; preds = %entry
br label %for.body

for.cond.cleanup.loopexit: ; preds = %for.body
br label %for.cond.cleanup

for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
ret void

for.body: ; preds = %for.body.preheader, %for.body
%i.06 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
%add = add nuw nsw i32 %i.06, 1
%idxprom = zext i32 %add to i64
%arrayidx = getelementptr inbounds %struct.HIP_vector_type, ptr %queue, i64 %idxprom
%idxprom1 = zext i32 %i.06 to i64
%arrayidx2 = getelementptr inbounds %struct.HIP_vector_type, ptr %queue, i64 %idxprom1
%0 = load i64, ptr %arrayidx, align 8
store i64 %0, ptr %arrayidx2, align 8
%exitcond = icmp ne i32 %add, %num_elements
br i1 %exitcond, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !3
}

; LOOP-UNROLL: llvm.loop.unroll.disable
; LOOP-UNROLL-FULL: llvm.loop.unroll.enable
!0 = !{!"llvm.loop.unroll.enable"}
!1 = distinct !{!1, !0}

!2 = !{!"llvm.loop.unroll.count", i32 5}
!3 = distinct !{!3, !2}
88 changes: 2 additions & 86 deletions llvm/test/Transforms/LoopUnroll/revisit.ll
Original file line number Diff line number Diff line change
@@ -1,17 +1,11 @@
; This test checks that nested loops are revisited in various scenarios when
; unrolling. Note that if we ever start doing outer loop peeling a test case
; for that should be added here that will look essentially like a hybrid of the
; current two cases.
; for that should be added here.
;
; RUN: opt < %s -disable-output -debug-pass-manager 2>&1 \
; RUN: -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
; RUN: | FileCheck %s
;
; Also run in a special mode that visits children.
; RUN: opt < %s -disable-output -debug-pass-manager -unroll-revisit-child-loops 2>&1 \
; RUN: -passes='require<opt-remark-emit>,loop(loop-unroll-full)' \
; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-CHILDREN

;
; Basic test is fully unrolled and we revisit the post-unroll new sibling
; loops, including the ones that used to be child loops.
define void @full_unroll(ptr %ptr) {
Expand Down Expand Up @@ -76,81 +70,3 @@ l0.latch:
exit:
ret void
}

; Now we test forced runtime partial unrolling with metadata. Here we end up
; duplicating child loops without changing their structure and so they aren't by
; default visited, but will be visited with a special parameter.
define void @partial_unroll(i32 %count, ptr %ptr) {
; CHECK-LABEL: OptimizationRemarkEmitterAnalysis on partial_unroll
; CHECK-NOT: LoopFullUnrollPass

entry:
br label %l0

l0:
%cond.0 = load volatile i1, ptr %ptr
br i1 %cond.0, label %l0.0.ph, label %exit

l0.0.ph:
br label %l0.0

l0.0:
%iv = phi i32 [ %iv.next, %l0.0.latch ], [ 0, %l0.0.ph ]
%iv.next = add i32 %iv, 1
br label %l0.0.0.ph

l0.0.0.ph:
br label %l0.0.0

l0.0.0:
%cond.0.0.0 = load volatile i1, ptr %ptr
br i1 %cond.0.0.0, label %l0.0.0, label %l0.0.1.ph
; CHECK: LoopFullUnrollPass on l0.0.0
; CHECK-NOT: LoopFullUnrollPass

l0.0.1.ph:
br label %l0.0.1

l0.0.1:
%cond.0.0.1 = load volatile i1, ptr %ptr
br i1 %cond.0.0.1, label %l0.0.1, label %l0.0.latch
; CHECK: LoopFullUnrollPass on l0.0.1
; CHECK-NOT: LoopFullUnrollPass

l0.0.latch:
%cmp = icmp slt i32 %iv.next, %count
br i1 %cmp, label %l0.0, label %l0.latch, !llvm.loop !1
; CHECK: LoopFullUnrollPass on l0.0
; CHECK-NOT: LoopFullUnrollPass
;
; Partial unrolling occurs which introduces both new child loops and new sibling
; loops. We only visit the child loops in a special mode, not by default.
; CHECK-CHILDREN: LoopFullUnrollPass on l0.0.0
; CHECK-CHILDREN-NOT: LoopFullUnrollPass
; CHECK-CHILDREN: LoopFullUnrollPass on l0.0.1
; CHECK-CHILDREN-NOT: LoopFullUnrollPass
; CHECK-CHILDREN: LoopFullUnrollPass on l0.0.0.1
; CHECK-CHILDREN-NOT: LoopFullUnrollPass
; CHECK-CHILDREN: LoopFullUnrollPass on l0.0.1.1
; CHECK-CHILDREN-NOT: LoopFullUnrollPass
;
; When we revisit children, we also revisit the current loop.
; CHECK-CHILDREN: LoopFullUnrollPass on l0.0
; CHECK-CHILDREN-NOT: LoopFullUnrollPass
;
; Revisit the children of the outer loop that are part of the epilogue.
;
; CHECK: LoopFullUnrollPass on l0.0.1.epil
; CHECK-NOT: LoopFullUnrollPass
; CHECK: LoopFullUnrollPass on l0.0.0.epil
; CHECK-NOT: LoopFullUnrollPass
l0.latch:
br label %l0
; CHECK: LoopFullUnrollPass on l0
; CHECK-NOT: LoopFullUnrollPass

exit:
ret void
}
!1 = !{!1, !2}
!2 = !{!"llvm.loop.unroll.count", i32 2}

0 comments on commit aea2a14

Please sign in to comment.