Skip to content

Commit d0c9580

Browse files
author
Evgeniy Brevnov
committed
[LV] Unroll factor is expected to be > 0
LV fails with assertion checking that UF > 0. We already set UF to 1 if it is 0 except the case when IC > MaxInterleaveCount. The fix is to set UF to 1 for that case as well. Reviewed By: fhahn Differential Revision: https://reviews.llvm.org/D87679
1 parent 1e4d882 commit d0c9580

File tree

2 files changed

+39
-8
lines changed

2 files changed

+39
-8
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5598,26 +5598,35 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
55985598
}
55995599

56005600
// If trip count is known or estimated compile time constant, limit the
5601-
// interleave count to be less than the trip count divided by VF.
5601+
// interleave count to be less than the trip count divided by VF, provided it
5602+
// is at least 1.
56025603
if (BestKnownTC) {
56035604
MaxInterleaveCount =
56045605
std::min(*BestKnownTC / VF.getKnownMinValue(), MaxInterleaveCount);
5606+
// Make sure MaxInterleaveCount is greater than 0.
5607+
MaxInterleaveCount = std::max(1u, MaxInterleaveCount);
56055608
}
56065609

5610+
assert(MaxInterleaveCount > 0 &&
5611+
"Maximum interleave count must be greater than 0");
5612+
5613+
// Clamp the calculated IC to be between the 1 and the max interleave count
5614+
// that the target and trip count allows.
5615+
if (IC > MaxInterleaveCount)
5616+
IC = MaxInterleaveCount;
5617+
else
5618+
// Make sure IC is greater than 0.
5619+
IC = std::max(1u, IC);
5620+
5621+
assert(IC > 0 && "Interleave count must be greater than 0.");
5622+
56075623
// If we did not calculate the cost for VF (because the user selected the VF)
56085624
// then we calculate the cost of VF here.
56095625
if (LoopCost == 0)
56105626
LoopCost = expectedCost(VF).first;
56115627

56125628
assert(LoopCost && "Non-zero loop cost expected");
56135629

5614-
// Clamp the calculated IC to be between the 1 and the max interleave count
5615-
// that the target and trip count allows.
5616-
if (IC > MaxInterleaveCount)
5617-
IC = MaxInterleaveCount;
5618-
else if (IC < 1)
5619-
IC = 1;
5620-
56215630
// Interleave if we vectorized this loop and there is a reduction that could
56225631
// benefit from interleaving.
56235632
if (VF.isVector() && HasReductions) {
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
; RUN: opt -S -loop-vectorize -mtriple=s390x-linux-gnu -tiny-trip-count-interleave-threshold=4 -vectorizer-min-trip-count=8 < %s | FileCheck %s
2+
; RUN: opt -S -passes=loop-vectorize -mtriple=s390x-linux-gnu -tiny-trip-count-interleave-threshold=4 -vectorizer-min-trip-count=8 < %s | FileCheck %s
3+
4+
define i32 @main(i32 %arg, i8** nocapture readnone %arg1) #0 {
5+
;CHECK: vector.body:
6+
entry:
7+
%0 = alloca i8, align 1
8+
br label %loop
9+
10+
loop:
11+
%storemerge.i.i = phi i8 [ 0, %entry ], [ %tmp12.i.i, %loop ]
12+
store i8 %storemerge.i.i, i8* %0, align 2
13+
%tmp8.i.i = icmp ult i8 %storemerge.i.i, 8
14+
%tmp12.i.i = add nuw nsw i8 %storemerge.i.i, 1
15+
br i1 %tmp8.i.i, label %loop, label %ret
16+
17+
ret:
18+
ret i32 0
19+
}
20+
21+
attributes #0 = { "target-cpu"="z13" }
22+

0 commit comments

Comments
 (0)