Skip to content

Commit

Permalink
[LV] Interleaving should not exceed estimated loop trip count.
Browse files Browse the repository at this point in the history
Currently we may do iterleaving by more than estimated trip count
coming from the profile or computed maximum trip count. The solution is to
use "best known" trip count instead of exact one in interleaving analysis.

Patch by Evgeniy Brevnov.

Differential Revision: https://reviews.llvm.org/D67948
  • Loading branch information
topperc committed Oct 28, 2019
1 parent 3f34573 commit 18824d2
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 13 deletions.
24 changes: 12 additions & 12 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,10 @@ static cl::opt<bool> EnableMaskedInterleavedMemAccesses(
"enable-masked-interleaved-mem-accesses", cl::init(false), cl::Hidden,
cl::desc("Enable vectorization on masked interleaved memory accesses in a loop"));

/// We don't interleave loops with a known constant trip count below this
/// number.
static const unsigned TinyTripCountInterleaveThreshold = 128;
static cl::opt<unsigned> TinyTripCountInterleaveThreshold(
"tiny-trip-count-interleave-threshold", cl::init(128), cl::Hidden,
cl::desc("We don't interleave loops with a estimated constant trip count "
"below this number"));

static cl::opt<unsigned> ForceTargetNumScalarRegs(
"force-target-num-scalar-regs", cl::init(0), cl::Hidden,
Expand Down Expand Up @@ -5143,9 +5144,10 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
if (Legal->getMaxSafeDepDistBytes() != -1U)
return 1;

// Do not interleave loops with a relatively small trip count.
unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
// Do not interleave loops with a relatively small known or estimated trip
// count.
auto BestKnownTC = getSmallBestKnownTC(*PSE.getSE(), TheLoop);
if (BestKnownTC && *BestKnownTC < TinyTripCountInterleaveThreshold)
return 1;

RegisterUsage R = calculateRegisterUsage({VF})[0];
Expand Down Expand Up @@ -5208,12 +5210,10 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(unsigned VF,
MaxInterleaveCount = ForceTargetMaxVectorInterleaveFactor;
}

// If the trip count is constant, limit the interleave count to be less than
// the trip count divided by VF.
if (TC > 0) {
assert(TC >= VF && "VF exceeds trip count?");
if ((TC / VF) < MaxInterleaveCount)
MaxInterleaveCount = (TC / VF);
// If trip count is known or estimated compile time constant, limit the
// interleave count to be less than the trip count divided by VF.
if (BestKnownTC) {
MaxInterleaveCount = std::min(*BestKnownTC / VF, MaxInterleaveCount);
}

// If we did not calculate the cost for VF (because the user selected the VF)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

; CHECK: remark: no_fpmath.c:6:11: loop not vectorized: cannot prove it is safe to reorder floating-point operations (hotness: 300)
; CHECK: remark: no_fpmath.c:6:14: loop not vectorized
; CHECK: remark: no_fpmath.c:17:14: vectorized loop (vectorization width: 2, interleaved count: 2) (hotness: 300)
; CHECK: remark: no_fpmath.c:17:14: vectorized loop (vectorization width: 2, interleaved count: 1) (hotness: 300)

target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
Expand Down

0 comments on commit 18824d2

Please sign in to comment.