Skip to content

Commit

Permalink
Enable non-power-of-2 #pragma unroll counts.
Browse files Browse the repository at this point in the history
Patch by Evgeny Stupachenko.

Differential Revision: http://reviews.llvm.org/D18202

llvm-svn: 264407
  • Loading branch information
David L Kreitzer committed Mar 25, 2016
1 parent ddca355 commit 8d441eb
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 26 deletions.
9 changes: 4 additions & 5 deletions llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
Expand Up @@ -684,11 +684,6 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
}

if (HasPragma) {
if (PragmaCount != 0)
// If loop has an unroll count pragma mark loop as unrolled to prevent
// unrolling beyond that requested by the pragma.
SetLoopAlreadyUnrolled(L);

// Emit optimization remarks if we are unable to unroll the loop
// as directed by a pragma.
DebugLoc LoopLoc = L->getStartLoc();
Expand Down Expand Up @@ -738,6 +733,10 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
TripMultiple, LI, SE, &DT, &AC, PreserveLCSSA))
return false;

// If loop has an unroll count pragma mark loop as unrolled to prevent
// unrolling beyond that requested by the pragma.
if (HasPragma && PragmaCount != 0)
SetLoopAlreadyUnrolled(L);
return true;
}

Expand Down
52 changes: 31 additions & 21 deletions llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
Expand Up @@ -117,10 +117,10 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,

assert(Count != 0 && "nonsensical Count!");

// If BECount <u (Count - 1) then (BECount + 1) & (Count - 1) == (BECount + 1)
// (since Count is a power of 2). This means %xtraiter is (BECount + 1) and
// and all of the iterations of this loop were executed by the prologue. Note
// that if BECount <u (Count - 1) then (BECount + 1) cannot unsigned-overflow.
// If BECount <u (Count - 1) then (BECount + 1) % Count == (BECount + 1)
// This means %xtraiter is (BECount + 1) and all of the iterations of this
// loop were executed by the prologue. Note that if BECount <u (Count - 1)
// then (BECount + 1) cannot unsigned-overflow.
Value *BrLoopExit =
B.CreateICmpULT(BECount, ConstantInt::get(BECount->getType(), Count - 1));
BasicBlock *Exit = L->getUniqueExitBlock();
Expand Down Expand Up @@ -319,11 +319,6 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR))
return false;

// We only handle cases when the unroll factor is a power of 2.
// Count is the loop unroll factor, the number of extra copies added + 1.
if (!isPowerOf2_32(Count))
return false;

// This constraint lets us deal with an overflowing trip count easily; see the
// comment on ModVal below.
if (Log2_32(Count) > BEWidth)
Expand All @@ -349,18 +344,33 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
PreHeaderBR);

IRBuilder<> B(PreHeaderBR);
Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");

// If ModVal is zero, we know that either
// 1. There are no iterations to be run in the prologue loop.
// OR
// 2. The addition computing TripCount overflowed.
//
// If (2) is true, we know that TripCount really is (1 << BEWidth) and so the
// number of iterations that remain to be run in the original loop is a
// multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
// explicitly check this above).

Value *ModVal;
// Calculate ModVal = (BECount + 1) % Count.
// Note that TripCount is BECount + 1.
if (isPowerOf2_32(Count)) {
ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter");
// 1. There are no iterations to be run in the prologue loop.
// OR
// 2. The addition computing TripCount overflowed.
//
// If (2) is true, we know that TripCount really is (1 << BEWidth) and so
// the number of iterations that remain to be run in the original loop is a
// multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we
// explicitly check this above).
} else {
// As (BECount + 1) can potentially unsigned overflow we count
// (BECount % Count) + 1 which is overflow safe as BECount % Count < Count.
Value *ModValTmp = B.CreateURem(BECount,
ConstantInt::get(BECount->getType(),
Count));
Value *ModValAdd = B.CreateAdd(ModValTmp,
ConstantInt::get(ModValTmp->getType(), 1));
// At that point (BECount % Count) + 1 could be equal to Count.
// To handle this case we need to take mod by Count one more time.
ModVal = B.CreateURem(ModValAdd,
ConstantInt::get(BECount->getType(), Count),
"xtraiter");
}
Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod");

// Branch to either the extra iterations or the cloned/unrolled loop.
Expand Down
37 changes: 37 additions & 0 deletions llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll
Expand Up @@ -322,3 +322,40 @@ for.end: ; preds = %for.body, %entry
ret void
}
!15 = !{!15, !14}

; #pragma clang loop unroll_count(3)
; Loop has a runtime trip count. Runtime unrolling should occur and loop
; should be duplicated (original and 3x unrolled).
;
; CHECK-LABEL: @runtime_loop_with_count3(
; CHECK: for.body.prol:
; CHECK: store
; CHECK-NOT: store
; CHECK: br i1
; CHECK: for.body
; CHECK: store
; CHECK: store
; CHECK: store
; CHECK-NOT: store
; CHECK: br i1
define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
entry:
%cmp3 = icmp sgt i32 %b, 0
br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !16

for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* %arrayidx, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %b
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !16

for.end: ; preds = %for.body, %entry
ret void
}
!16 = !{!16, !17}
!17 = !{!"llvm.loop.unroll.count", i32 3}

0 comments on commit 8d441eb

Please sign in to comment.