Skip to content

Commit

Permalink
Fixed consecutive memory access detection in Loop Vectorizer.
Browse files Browse the repository at this point in the history
It did not handle correctly cases without GEP.

The following loop wasn't vectorized:

for (int i=0; i<len; i++)
  *to++ = *from++;

I use getPtrStride() to find Stride for memory access and return 0 is the Stride is not 1 or -1.

Differential revision: http://reviews.llvm.org/D20789

llvm-svn: 273257
  • Loading branch information
Elena Demikhovsky committed Jun 21, 2016
1 parent d1188f7 commit 9823c99
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 85 deletions.
4 changes: 3 additions & 1 deletion llvm/include/llvm/Analysis/LoopAccessAnalysis.h
Expand Up @@ -679,9 +679,11 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
/// to \p PtrToStride and therefore add further predicates to \p PSE.
/// The \p Assume parameter indicates if we are allowed to make additional
/// run-time assumptions.
/// The \p ShouldCheckWrap indicates that we should ensure that address
/// calculation does not wrap.
int getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
const ValueToValueMap &StridesMap = ValueToValueMap(),
bool Assume = false);
bool Assume = false, bool ShouldCheckWrap = true);

/// \brief Returns true if the memory operations \p A and \p B are consecutive.
/// This is a simple API that does not depend on the analysis pass.
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Analysis/LoopAccessAnalysis.cpp
Expand Up @@ -866,7 +866,7 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
/// \brief Check whether the access through \p Ptr has a constant stride.
int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
const Loop *Lp, const ValueToValueMap &StridesMap,
bool Assume) {
bool Assume, bool ShouldCheckWrap) {
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");

Expand Down Expand Up @@ -905,9 +905,9 @@ int llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
// to access the pointer value "0" which is undefined behavior in address
// space 0, therefore we can also vectorize this case.
bool IsInBoundsGEP = isInBoundsGep(Ptr);
bool IsNoWrapAddRec =
PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
isNoWrapAddRec(Ptr, AR, PSE, Lp);
bool IsNoWrapAddRec = !ShouldCheckWrap ||
PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
isNoWrapAddRec(Ptr, AR, PSE, Lp);
bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
if (Assume) {
Expand Down
87 changes: 7 additions & 80 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -2156,87 +2156,13 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
}

int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
auto *SE = PSE.getSE();
// Make sure that the pointer does not point to structs.
if (Ptr->getType()->getPointerElementType()->isAggregateType())
return 0;

// If this value is a pointer induction variable, we know it is consecutive.
PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
if (Phi && Inductions.count(Phi)) {
InductionDescriptor II = Inductions[Phi];
return II.getConsecutiveDirection();
}

GetElementPtrInst *Gep = getGEPInstruction(Ptr);
if (!Gep)
return 0;

unsigned NumOperands = Gep->getNumOperands();
Value *GpPtr = Gep->getPointerOperand();
// If this GEP value is a consecutive pointer induction variable and all of
// the indices are constant, then we know it is consecutive.
Phi = dyn_cast<PHINode>(GpPtr);
if (Phi && Inductions.count(Phi)) {

// Make sure that the pointer does not point to structs.
PointerType *GepPtrType = cast<PointerType>(GpPtr->getType());
if (GepPtrType->getElementType()->isAggregateType())
return 0;

// Make sure that all of the index operands are loop invariant.
for (unsigned i = 1; i < NumOperands; ++i)
if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
return 0;

InductionDescriptor II = Inductions[Phi];
return II.getConsecutiveDirection();
}

unsigned InductionOperand = getGEPInductionOperand(Gep);

// Check that all of the gep indices are uniform except for our induction
// operand.
for (unsigned i = 0; i != NumOperands; ++i)
if (i != InductionOperand &&
!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
return 0;

// We can emit wide load/stores only if the last non-zero index is the
// induction variable.
const SCEV *Last = nullptr;
if (!getSymbolicStrides() || !getSymbolicStrides()->count(Gep))
Last = PSE.getSCEV(Gep->getOperand(InductionOperand));
else {
// Because of the multiplication by a stride we can have a s/zext cast.
// We are going to replace this stride by 1 so the cast is safe to ignore.
//
// %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
// %0 = trunc i64 %indvars.iv to i32
// %mul = mul i32 %0, %Stride1
// %idxprom = zext i32 %mul to i64 << Safe cast.
// %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom
//
Last = replaceSymbolicStrideSCEV(PSE, *getSymbolicStrides(),
Gep->getOperand(InductionOperand), Gep);
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(Last))
Last =
(C->getSCEVType() == scSignExtend || C->getSCEVType() == scZeroExtend)
? C->getOperand()
: Last;
}
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
const SCEV *Step = AR->getStepRecurrence(*SE);

// The memory is consecutive because the last index is consecutive
// and all other indices are loop invariant.
if (Step->isOne())
return 1;
if (Step->isAllOnesValue())
return -1;
}
const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() :
ValueToValueMap();

int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
if (Stride == 1 || Stride == -1)
return Stride;
return 0;
}

Expand Down Expand Up @@ -2617,7 +2543,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
Ptr = Builder.Insert(Gep2);
} else { // No GEP
// Use the induction element ptr.
assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
assert(isa<SCEVAddRecExpr>(PSE.getSE()->getSCEV(Ptr)) &&
"Invalid induction ptr");
setDebugLocFromInst(Builder, Ptr);
VectorParts &PtrVal = getVectorValue(Ptr);
Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
Expand Down
43 changes: 43 additions & 0 deletions llvm/test/Transforms/LoopVectorize/consec_no_gep.ll
@@ -0,0 +1,43 @@
; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -S | FileCheck %s

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

;; Check consecutive memory access without preceding GEP instruction

; for (int i=0; i<len; i++) {
; *to++ = *from++;
; }

; CHECK-LABEL: @consecutive_no_gep(
; CHECK: vector.body
; CHECK: %[[index:.*]] = phi i64 [ 0, %vector.ph ]
; CHECK: getelementptr float, float* %{{.*}}, i64 %[[index]]
; CHECK: load <4 x float>

define void @consecutive_no_gep(float* noalias nocapture readonly %from, float* noalias nocapture %to, i32 %len) #0 {
entry:
%cmp2 = icmp sgt i32 %len, 0
br i1 %cmp2, label %for.body.preheader, label %for.end

for.body.preheader: ; preds = %entry
br label %for.body

for.body: ; preds = %for.body.preheader, %for.body
%i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
%from.addr.04 = phi float* [ %incdec.ptr, %for.body ], [ %from, %for.body.preheader ]
%to.addr.03 = phi float* [ %incdec.ptr1, %for.body ], [ %to, %for.body.preheader ]
%incdec.ptr = getelementptr inbounds float, float* %from.addr.04, i64 1
%val = load float, float* %from.addr.04, align 4
%incdec.ptr1 = getelementptr inbounds float, float* %to.addr.03, i64 1
store float %val, float* %to.addr.03, align 4
%inc = add nsw i32 %i.05, 1
%cmp = icmp slt i32 %inc, %len
br i1 %cmp, label %for.body, label %for.end.loopexit

for.end.loopexit: ; preds = %for.body
br label %for.end

for.end: ; preds = %for.end.loopexit, %entry
ret void
}

0 comments on commit 9823c99

Please sign in to comment.