Skip to content

Commit

Permalink
[LAA] Pass access type to getPtrStride()
Browse files Browse the repository at this point in the history
Pass the access type to getPtrStride(), so it is not determined
from the pointer element type. Many cases still fetch the element
type at a higher level though, so this only partially addresses
the issue.
  • Loading branch information
nikic committed Sep 11, 2021
1 parent 314b5a0 commit 45c4673
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 37 deletions.
7 changes: 4 additions & 3 deletions llvm/include/llvm/Analysis/LoopAccessAnalysis.h
Expand Up @@ -670,8 +670,8 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
const ValueToValueMap &PtrToStride,
Value *Ptr);

/// If the pointer has a constant stride return it in units of its
/// element size. Otherwise return zero.
/// If the pointer has a constant stride return it in units of the access type
/// size. Otherwise return zero.
///
/// Ensure that it does not wrap in the address space, assuming the predicate
/// associated with \p PSE is true.
Expand All @@ -680,7 +680,8 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
/// to \p PtrToStride and therefore add further predicates to \p PSE.
/// The \p Assume parameter indicates if we are allowed to make additional
/// run-time assumptions.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
const Loop *Lp,
const ValueToValueMap &StridesMap = ValueToValueMap(),
bool Assume = false, bool ShouldCheckWrap = true);

Expand Down
Expand Up @@ -340,7 +340,7 @@ class LoopVectorizationLegality {
/// -1 - Address is consecutive, and decreasing.
/// NOTE: This method must only be used before modifying the original scalar
/// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
int isConsecutivePtr(Value *Ptr) const;
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const;

/// Returns true if the value V is uniform within the loop.
bool isUniform(Value *V);
Expand Down
37 changes: 21 additions & 16 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Expand Up @@ -658,7 +658,8 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE,
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
return true;

int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
Type *AccessTy = Ptr->getType()->getPointerElementType();
int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides);
if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
return true;

Expand Down Expand Up @@ -1025,15 +1026,17 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
}

/// Check whether the access through \p Ptr has a constant stride.
int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
const Loop *Lp, const ValueToValueMap &StridesMap,
bool Assume, bool ShouldCheckWrap) {
int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy,
Value *Ptr, const Loop *Lp,
const ValueToValueMap &StridesMap, bool Assume,
bool ShouldCheckWrap) {
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
unsigned AddrSpace = Ty->getPointerAddressSpace();

// Make sure that the pointer does not point to aggregate types.
auto *PtrTy = cast<PointerType>(Ty);
if (PtrTy->getElementType()->isAggregateType()) {
// Make sure we're not accessing an aggregate type.
// TODO: Why? This doesn't make any sense.
if (AccessTy->isAggregateType()) {
LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
<< *Ptr << "\n");
return 0;
Expand Down Expand Up @@ -1070,8 +1073,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
isNoWrapAddRec(Ptr, AR, PSE, Lp);
if (!IsNoWrapAddRec && !IsInBoundsGEP &&
NullPointerIsDefined(Lp->getHeader()->getParent(),
PtrTy->getAddressSpace())) {
NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace)) {
if (Assume) {
PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
IsNoWrapAddRec = true;
Expand Down Expand Up @@ -1099,7 +1101,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
}

auto &DL = Lp->getHeader()->getModule()->getDataLayout();
int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
int64_t Size = DL.getTypeAllocSize(AccessTy);
const APInt &APStepVal = C->getAPInt();

// Huge step value - give up.
Expand All @@ -1119,7 +1121,7 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
// zero we know that this won't happen without triggering undefined behavior.
if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
(IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
PtrTy->getAddressSpace()))) {
AddrSpace))) {
if (Assume) {
// We can avoid this case by adding a run-time check.
LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
Expand Down Expand Up @@ -1477,6 +1479,8 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
Value *BPtr = B.getPointer();
bool AIsWrite = A.getInt();
bool BIsWrite = B.getInt();
Type *ATy = APtr->getType()->getPointerElementType();
Type *BTy = BPtr->getType()->getPointerElementType();

// Two reads are independent.
if (!AIsWrite && !BIsWrite)
Expand All @@ -1487,8 +1491,10 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
BPtr->getType()->getPointerAddressSpace())
return Dependence::Unknown;

int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
int64_t StrideAPtr =
getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides, true);
int64_t StrideBPtr =
getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides, true);

const SCEV *Src = PSE.getSCEV(APtr);
const SCEV *Sink = PSE.getSCEV(BPtr);
Expand All @@ -1497,6 +1503,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
// dependence.
if (StrideAPtr < 0) {
std::swap(APtr, BPtr);
std::swap(ATy, BTy);
std::swap(Src, Sink);
std::swap(AIsWrite, BIsWrite);
std::swap(AIdx, BIdx);
Expand All @@ -1518,8 +1525,6 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return Dependence::Unknown;
}

Type *ATy = APtr->getType()->getPointerElementType();
Type *BTy = BPtr->getType()->getPointerElementType();
auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
uint64_t Stride = std::abs(StrideAPtr);
Expand Down Expand Up @@ -1981,7 +1986,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
// words may be written to the same address.
bool IsReadOnlyPtr = false;
if (Seen.insert(Ptr).second ||
!getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
!getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides)) {
++NumReads;
IsReadOnlyPtr = true;
}
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Analysis/VectorUtils.cpp
Expand Up @@ -986,7 +986,7 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
// wrap around the address space we would do a memory access at nullptr
// even without the transformation. The wrapping checks are therefore
// deferred until after we've formed the interleaved groups.
int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, Strides,
/*Assume=*/true, /*ShouldCheckWrap=*/false);

const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
Expand Down Expand Up @@ -1205,8 +1205,9 @@ void InterleavedAccessInfo::analyzeInterleaving(
Instruction *Member = Group->getMember(Index);
assert(Member && "Group member does not exist");
Value *MemberPtr = getLoadStorePointerOperand(Member);
if (getPtrStride(PSE, MemberPtr, TheLoop, Strides, /*Assume=*/false,
/*ShouldCheckWrap=*/true))
Type *AccessTy = getLoadStoreType(Member);
if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
/*Assume=*/false, /*ShouldCheckWrap=*/true))
return false;
LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
<< FirstOrLast
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
Expand Up @@ -2060,8 +2060,9 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
return false;
}
if (isa<StoreInst>(I) || isa<LoadInst>(I)) {
Value *Ptr = isa<LoadInst>(I) ? I.getOperand(0) : I.getOperand(1);
int64_t NextStride = getPtrStride(PSE, Ptr, L);
Value *Ptr = getLoadStorePointerOperand(&I);
Type *AccessTy = getLoadStoreType(&I);
int64_t NextStride = getPtrStride(PSE, AccessTy, Ptr, L);
if (NextStride == 1) {
// TODO: for now only allow consecutive strides of 1. We could support
// other strides as long as it is uniform, but let's keep it simple
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
Expand Up @@ -108,8 +108,8 @@ struct StoreToLoadForwardingCandidate {
// Currently we only support accesses with unit stride. FIXME: we should be
// able to handle non unit stirde as well as long as the stride is equal to
// the dependence distance.
if (getPtrStride(PSE, LoadPtr, L) != 1 ||
getPtrStride(PSE, StorePtr, L) != 1)
if (getPtrStride(PSE, LoadType, LoadPtr, L) != 1 ||
getPtrStride(PSE, LoadType, StorePtr, L) != 1)
return false;

auto &DL = Load->getParent()->getModule()->getDataLayout();
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Expand Up @@ -419,7 +419,8 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
return false;
}

int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) const {
int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
Value *Ptr) const {
const ValueToValueMap &Strides =
getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();

Expand All @@ -428,7 +429,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) const {
llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
PGSOQueryType::IRPass);
bool CanAddPredicate = !OptForSize;
int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
CanAddPredicate, false);
if (Stride == 1 || Stride == -1)
return Stride;
return 0;
Expand Down
16 changes: 8 additions & 8 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -1492,14 +1492,14 @@ class LoopVectorizationCostModel {
/// Returns true if the target machine supports masked store operation
/// for the given \p DataType and kind of access to \p Ptr.
bool isLegalMaskedStore(Type *DataType, Value *Ptr, Align Alignment) const {
return Legal->isConsecutivePtr(Ptr) &&
return Legal->isConsecutivePtr(DataType, Ptr) &&
TTI.isLegalMaskedStore(DataType, Alignment);
}

/// Returns true if the target machine supports masked load operation
/// for the given \p DataType and kind of access to \p Ptr.
bool isLegalMaskedLoad(Type *DataType, Value *Ptr, Align Alignment) const {
return Legal->isConsecutivePtr(Ptr) &&
return Legal->isConsecutivePtr(DataType, Ptr) &&
TTI.isLegalMaskedLoad(DataType, Alignment);
}

Expand Down Expand Up @@ -5334,9 +5334,10 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
assert((LI || SI) && "Invalid memory instruction");

auto *Ptr = getLoadStorePointerOperand(I);
auto *ScalarTy = getLoadStoreType(I);

// In order to be widened, the pointer should be consecutive, first of all.
if (!Legal->isConsecutivePtr(Ptr))
if (!Legal->isConsecutivePtr(ScalarTy, Ptr))
return false;

// If the instruction is a store located in a predicated block, it will be
Expand All @@ -5347,7 +5348,6 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened(
// If the instruction's allocated size doesn't equal it's type size, it
// requires padding and will be scalarized.
auto &DL = I->getModule()->getDataLayout();
auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
if (hasIrregularType(ScalarTy, DL))
return false;

Expand Down Expand Up @@ -7088,7 +7088,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
Value *Ptr = getLoadStorePointerOperand(I);
unsigned AS = getLoadStoreAddressSpace(I);
int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
int ConsecutiveStride = Legal->isConsecutivePtr(ValTy, Ptr);
enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;

assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
Expand Down Expand Up @@ -7474,8 +7474,8 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
// We assume that widening is the best solution when possible.
if (memoryInstructionCanBeWidened(&I, VF)) {
InstructionCost Cost = getConsecutiveMemOpCost(&I, VF);
int ConsecutiveStride =
Legal->isConsecutivePtr(getLoadStorePointerOperand(&I));
int ConsecutiveStride = Legal->isConsecutivePtr(
getLoadStoreType(&I), getLoadStorePointerOperand(&I));
assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) &&
"Expected consecutive stride.");
InstWidening Decision =
Expand Down Expand Up @@ -7975,7 +7975,7 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
// Check if the pointer operand of a load or store instruction is
// consecutive.
if (auto *Ptr = getLoadStorePointerOperand(Inst))
return Legal->isConsecutivePtr(Ptr);
return Legal->isConsecutivePtr(getLoadStoreType(Inst), Ptr);
return false;
}

Expand Down

0 comments on commit 45c4673

Please sign in to comment.