262 changes: 129 additions & 133 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -355,10 +355,9 @@ class InnerLoopVectorizer {
/// element.
virtual Value *getBroadcastInstrs(Value *V);

/// This function adds 0, 1, 2 ... to each vector element, starting at zero.
/// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...).
/// The sequence starts at StartIndex.
virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
/// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...)
/// to each vector element of Val. The sequence starts at StartIndex.
virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step);

/// When we go over instructions in the basic block we rely on previous
/// values within the current basic block or on loop invariant values.
Expand Down Expand Up @@ -479,7 +478,7 @@ class InnerLoopUnroller : public InnerLoopVectorizer {
bool IfPredicateStore = false) override;
void vectorizeMemoryInstruction(Instruction *Instr) override;
Value *getBroadcastInstrs(Value *V) override;
Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate) override;
Value *getStepVector(Value *Val, int StartIdx, Value *Step) override;
Value *reverseVector(Value *Vec) override;
};

Expand Down Expand Up @@ -603,11 +602,9 @@ class LoopVectorizationLegality {

/// This enum represents the kinds of inductions that we support.
enum InductionKind {
IK_NoInduction, ///< Not an induction variable.
IK_IntInduction, ///< Integer induction variable. Step = 1.
IK_ReverseIntInduction, ///< Reverse int induction variable. Step = -1.
IK_PtrInduction, ///< Pointer induction var. Step = sizeof(elem).
IK_ReversePtrInduction ///< Reverse ptr indvar. Step = - sizeof(elem).
IK_NoInduction, ///< Not an induction variable.
IK_IntInduction, ///< Integer induction variable. Step = C.
IK_PtrInduction ///< Pointer induction var. Step = C / sizeof(elem).
};

// This enum represents the kind of minmax reduction.
Expand Down Expand Up @@ -697,12 +694,67 @@ class LoopVectorizationLegality {

/// A struct for saving information about induction variables.
struct InductionInfo {
InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
InductionInfo() : StartValue(nullptr), IK(IK_NoInduction) {}
InductionInfo(Value *Start, InductionKind K, ConstantInt *Step)
: StartValue(Start), IK(K), StepValue(Step) {
assert(IK != IK_NoInduction && "Not an induction");
assert(StartValue && "StartValue is null");
assert(StepValue && !StepValue->isZero() && "StepValue is zero");
assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
"StartValue is not a pointer for pointer induction");
assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
"StartValue is not an integer for integer induction");
assert(StepValue->getType()->isIntegerTy() &&
"StepValue is not an integer");
}
InductionInfo()
: StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}

/// Get the consecutive direction. Returns:
/// 0 - unknown or non-consecutive.
/// 1 - consecutive and increasing.
/// -1 - consecutive and decreasing.
int getConsecutiveDirection() const {
if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
return StepValue->getSExtValue();
return 0;
}

/// Compute the transformed value of Index at offset StartValue using step
/// StepValue.
/// For integer induction, returns StartValue + Index * StepValue.
/// For pointer induction, returns StartValue[Index * StepValue].
/// FIXME: The newly created binary instructions should contain nsw/nuw
/// flags, which can be found from the original scalar operations.
Value *transform(IRBuilder<> &B, Value *Index) const {
switch (IK) {
case IK_IntInduction:
assert(Index->getType() == StartValue->getType() &&
"Index type does not match StartValue type");
if (StepValue->isMinusOne())
return B.CreateSub(StartValue, Index);
if (!StepValue->isOne())
Index = B.CreateMul(Index, StepValue);
return B.CreateAdd(StartValue, Index);

case IK_PtrInduction:
if (StepValue->isMinusOne())
Index = B.CreateNeg(Index);
else if (!StepValue->isOne())
Index = B.CreateMul(Index, StepValue);
return B.CreateGEP(StartValue, Index);

case IK_NoInduction:
default:
return nullptr;
}
}

/// Start value.
TrackingVH<Value> StartValue;
/// Induction kind.
InductionKind IK;
/// Step value.
ConstantInt *StepValue;
};

/// ReductionList contains the reduction descriptors for all
Expand Down Expand Up @@ -822,9 +874,9 @@ class LoopVectorizationLegality {
/// pattern corresponding to a min(X, Y) or max(X, Y).
static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I,
ReductionInstDesc &Prev);
/// Returns the induction kind of Phi. This function may return NoInduction
/// if the PHI is not an induction variable.
InductionKind isInductionVariable(PHINode *Phi);
/// Returns the induction kind of Phi and record the step. This function may
/// return NoInduction if the PHI is not an induction variable.
InductionKind isInductionVariable(PHINode *Phi, ConstantInt *&StepValue);

/// \brief Collect memory access with loop invariant strides.
///
Expand Down Expand Up @@ -1592,27 +1644,32 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
return Shuf;
}

Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
bool Negate) {
Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
Value *Step) {
assert(Val->getType()->isVectorTy() && "Must be a vector");
assert(Val->getType()->getScalarType()->isIntegerTy() &&
"Elem must be an integer");
assert(Step->getType() == Val->getType()->getScalarType() &&
"Step has wrong type");
// Create the types.
Type *ITy = Val->getType()->getScalarType();
VectorType *Ty = cast<VectorType>(Val->getType());
int VLen = Ty->getNumElements();
SmallVector<Constant*, 8> Indices;

// Create a vector of consecutive numbers from zero to VF.
for (int i = 0; i < VLen; ++i) {
int64_t Idx = Negate ? (-i) : i;
Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx, Negate));
}
for (int i = 0; i < VLen; ++i)
Indices.push_back(ConstantInt::get(ITy, StartIdx + i));

// Add the consecutive indices to the vector value.
Constant *Cv = ConstantVector::get(Indices);
assert(Cv->getType() == Val->getType() && "Invalid consecutive vec");
return Builder.CreateAdd(Val, Cv, "induction");
Step = Builder.CreateVectorSplat(VLen, Step);
assert(Step->getType() == Val->getType() && "Invalid step vec");
// FIXME: The newly created binary instructions should contain nsw/nuw flags,
// which can be found from the original scalar operations.
Step = Builder.CreateMul(Cv, Step);
return Builder.CreateAdd(Val, Step, "induction");
}

/// \brief Find the operand of the GEP that should be checked for consecutive
Expand Down Expand Up @@ -1650,10 +1707,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
if (Phi && Inductions.count(Phi)) {
InductionInfo II = Inductions[Phi];
if (IK_PtrInduction == II.IK)
return 1;
else if (IK_ReversePtrInduction == II.IK)
return -1;
return II.getConsecutiveDirection();
}

GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
Expand All @@ -1678,10 +1732,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
return 0;

InductionInfo II = Inductions[Phi];
if (IK_PtrInduction == II.IK)
return 1;
else if (IK_ReversePtrInduction == II.IK)
return -1;
return II.getConsecutiveDirection();
}

unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
Expand Down Expand Up @@ -2496,33 +2547,13 @@ void InnerLoopVectorizer::createEmptyLoop() {
Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
II.StartValue->getType(),
"cast.crd");
EndValue = BypassBuilder.CreateAdd(CRD, II.StartValue , "ind.end");
break;
}
case LoopVectorizationLegality::IK_ReverseIntInduction: {
// Convert the CountRoundDown variable to the PHI size.
Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
II.StartValue->getType(),
"cast.crd");
// Handle reverse integer induction counter.
EndValue = BypassBuilder.CreateSub(II.StartValue, CRD, "rev.ind.end");
EndValue = II.transform(BypassBuilder, CRD);
EndValue->setName("ind.end");
break;
}
case LoopVectorizationLegality::IK_PtrInduction: {
// For pointer induction variables, calculate the offset using
// the end index.
EndValue = BypassBuilder.CreateGEP(II.StartValue, CountRoundDown,
"ptr.ind.end");
break;
}
case LoopVectorizationLegality::IK_ReversePtrInduction: {
// The value at the end of the loop for the reverse pointer is calculated
// by creating a GEP with a negative index starting from the start value.
Value *Zero = ConstantInt::get(CountRoundDown->getType(), 0);
Value *NegIdx = BypassBuilder.CreateSub(Zero, CountRoundDown,
"rev.ind.end");
EndValue = BypassBuilder.CreateGEP(II.StartValue, NegIdx,
"rev.ptr.ind.end");
EndValue = II.transform(BypassBuilder, CountRoundDown);
EndValue->setName("ptr.ind.end");
break;
}
}// end of case
Expand Down Expand Up @@ -3137,6 +3168,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
LoopVectorizationLegality::InductionInfo II =
Legal->getInductionVars()->lookup(P);

// FIXME: The newly created binary instructions should contain nsw/nuw flags,
// which can be found from the original scalar operations.
switch (II.IK) {
case LoopVectorizationLegality::IK_NoInduction:
llvm_unreachable("Unknown induction");
Expand All @@ -3154,80 +3187,42 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
"normalized.idx");
NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx,
"offset.idx");
Broadcasted = II.transform(Builder, NormalizedIdx);
Broadcasted->setName("offset.idx");
}
Broadcasted = getBroadcastInstrs(Broadcasted);
// After broadcasting the induction variable we need to make the vector
// consecutive by adding 0, 1, 2, etc.
for (unsigned part = 0; part < UF; ++part)
Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
Entry[part] = getStepVector(Broadcasted, VF * part, II.StepValue);
return;
}
case LoopVectorizationLegality::IK_ReverseIntInduction:
case LoopVectorizationLegality::IK_PtrInduction:
case LoopVectorizationLegality::IK_ReversePtrInduction:
// Handle reverse integer and pointer inductions.
Value *StartIdx = ExtendedIdx;
// This is the normalized GEP that starts counting at zero.
Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
"normalized.idx");

// Handle the reverse integer induction variable case.
if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) {
IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
"resize.norm.idx");
Value *ReverseInd = Builder.CreateSub(II.StartValue, CNI,
"reverse.idx");

// This is a new value so do not hoist it out.
Value *Broadcasted = getBroadcastInstrs(ReverseInd);
// After broadcasting the induction variable we need to make the
// vector consecutive by adding ... -3, -2, -1, 0.
for (unsigned part = 0; part < UF; ++part)
Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
true);
return;
}

// Handle the pointer induction variable case.
assert(P->getType()->isPointerTy() && "Unexpected type.");

// Is this a reverse induction ptr or a consecutive induction ptr.
bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction ==
II.IK);

// This is the normalized GEP that starts counting at zero.
Value *NormalizedIdx =
Builder.CreateSub(Induction, ExtendedIdx, "normalized.idx");
// This is the vector of results. Notice that we don't generate
// vector geps because scalar geps result in better code.
for (unsigned part = 0; part < UF; ++part) {
if (VF == 1) {
int EltIndex = (part) * (Reverse ? -1 : 1);
int EltIndex = part;
Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
Value *GlobalIdx;
if (Reverse)
GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
else
GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");

Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
"next.gep");
Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
Value *SclrGep = II.transform(Builder, GlobalIdx);
SclrGep->setName("next.gep");
Entry[part] = SclrGep;
continue;
}

Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
for (unsigned int i = 0; i < VF; ++i) {
int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
int EltIndex = i + part * VF;
Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
Value *GlobalIdx;
if (!Reverse)
GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
else
GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");

Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
"next.gep");
Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
Value *SclrGep = II.transform(Builder, GlobalIdx);
SclrGep->setName("next.gep");
VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
Builder.getInt32(i),
"insert.gep");
Expand All @@ -3247,7 +3242,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
// Nothing to do for PHIs and BR, since we already took care of the
// loop control flow instructions.
continue;
case Instruction::PHI:{
case Instruction::PHI: {
// Vectorize PHINodes.
widenPHIInstruction(it, Entry, UF, VF, PV);
continue;
Expand Down Expand Up @@ -3368,8 +3363,12 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
CI->getType());
Value *Broadcasted = getBroadcastInstrs(ScalarCast);
LoopVectorizationLegality::InductionInfo II =
Legal->getInductionVars()->lookup(OldInduction);
Constant *Step =
ConstantInt::getSigned(CI->getType(), II.StepValue->getSExtValue());
for (unsigned Part = 0; Part < UF; ++Part)
Entry[Part] = getConsecutiveVector(Broadcasted, VF * Part, false);
Entry[Part] = getStepVector(Broadcasted, VF * Part, Step);
propagateMetadata(Entry, it);
break;
}
Expand Down Expand Up @@ -3716,8 +3715,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {

// This is the value coming from the preheader.
Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
ConstantInt *StepValue = nullptr;
// Check if this is an induction variable.
InductionKind IK = isInductionVariable(Phi);
InductionKind IK = isInductionVariable(Phi, StepValue);

if (IK_NoInduction != IK) {
// Get the widest type.
Expand All @@ -3727,7 +3727,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy);

// Int inductions are special because we only allow one IV.
if (IK == IK_IntInduction) {
if (IK == IK_IntInduction && StepValue->isOne()) {
// Use the phi node with the widest type as induction. Use the last
// one if there are multiple (no good reason for doing this other
// than it is expedient).
Expand All @@ -3736,7 +3736,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
}

DEBUG(dbgs() << "LV: Found an induction variable.\n");
Inductions[Phi] = InductionInfo(StartValue, IK);
Inductions[Phi] = InductionInfo(StartValue, IK, StepValue);

// Until we explicitly handle the case of an induction variable with
// an outside loop user we have to give up vectorizing this loop.
Expand Down Expand Up @@ -5287,7 +5287,8 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
}

LoopVectorizationLegality::InductionKind
LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
LoopVectorizationLegality::isInductionVariable(PHINode *Phi,
ConstantInt *&StepValue) {
Type *PhiTy = Phi->getType();
// We only handle integer and pointer inductions variables.
if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
Expand All @@ -5300,36 +5301,32 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
return IK_NoInduction;
}
const SCEV *Step = AR->getStepRecurrence(*SE);

// Integer inductions need to have a stride of one.
if (PhiTy->isIntegerTy()) {
if (Step->isOne())
return IK_IntInduction;
if (Step->isAllOnesValue())
return IK_ReverseIntInduction;
return IK_NoInduction;
}

const SCEV *Step = AR->getStepRecurrence(*SE);
// Calculate the pointer stride and check if it is consecutive.
const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
if (!C)
return IK_NoInduction;

ConstantInt *CV = C->getValue();
if (PhiTy->isIntegerTy()) {
StepValue = CV;
return IK_IntInduction;
}

assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
Type *PointerElementType = PhiTy->getPointerElementType();
// The pointer stride cannot be determined if the pointer element type is not
// sized.
if (!PointerElementType->isSized())
return IK_NoInduction;

uint64_t Size = DL->getTypeAllocSize(PointerElementType);
if (C->getValue()->equalsInt(Size))
return IK_PtrInduction;
else if (C->getValue()->equalsInt(0 - Size))
return IK_ReversePtrInduction;

return IK_NoInduction;
int64_t Size = static_cast<int64_t>(DL->getTypeAllocSize(PointerElementType));
int64_t CVSize = CV->getSExtValue();
if (CVSize % Size)
return IK_NoInduction;
StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size);
return IK_PtrInduction;
}

bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
Expand Down Expand Up @@ -6311,11 +6308,10 @@ Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) {
return V;
}

Value *InnerLoopUnroller::getConsecutiveVector(Value* Val, int StartIdx,
bool Negate) {
Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step) {
// When unrolling and the VF is 1, we only need to add a simple scalar.
Type *ITy = Val->getType();
assert(!ITy->isVectorTy() && "Val must be a scalar");
Constant *C = ConstantInt::get(ITy, StartIdx, Negate);
return Builder.CreateAdd(Val, C, "induction");
Constant *C = ConstantInt::get(ITy, StartIdx);
return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction");
}
150 changes: 150 additions & 0 deletions llvm/test/Transforms/LoopVectorize/arbitrary-induction-step.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
; RUN: opt -S < %s -loop-vectorize 2>&1 | FileCheck %s
; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 | FileCheck %s --check-prefix=FORCE-VEC

target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnueabi"

; Test integer induction variable of step 2:
; for (int i = 0; i < 1024; i+=2) {
; int tmp = *A++;
; sum += i * tmp;
; }

; CHECK-LABEL: @ind_plus2(
; CHECK: load <4 x i32>*
; CHECK: load <4 x i32>*
; CHECK: mul nsw <4 x i32>
; CHECK: mul nsw <4 x i32>
; CHECK: add nsw <4 x i32>
; CHECK: add nsw <4 x i32>
; CHECK: %index.next = add i64 %index, 8
; CHECK: icmp eq i64 %index.next, 512

; FORCE-VEC-LABEL: @ind_plus2(
; FORCE-VEC: %wide.load = load <2 x i32>*
; FORCE-VEC: mul nsw <2 x i32>
; FORCE-VEC: add nsw <2 x i32>
; FORCE-VEC: %index.next = add i64 %index, 2
; FORCE-VEC: icmp eq i64 %index.next, 512
define i32 @ind_plus2(i32* %A) {
entry:
br label %for.body

for.body: ; preds = %entry, %for.body
%A.addr = phi i32* [ %A, %entry ], [ %inc.ptr, %for.body ]
%i = phi i32 [ 0, %entry ], [ %add1, %for.body ]
%sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
%inc.ptr = getelementptr inbounds i32* %A.addr, i64 1
%0 = load i32* %A.addr, align 4
%mul = mul nsw i32 %0, %i
%add = add nsw i32 %mul, %sum
%add1 = add nsw i32 %i, 2
%cmp = icmp slt i32 %add1, 1024
br i1 %cmp, label %for.body, label %for.end

for.end: ; preds = %for.body
%add.lcssa = phi i32 [ %add, %for.body ]
ret i32 %add.lcssa
}


; Test integer induction variable of step -2:
; for (int i = 1024; i > 0; i-=2) {
; int tmp = *A++;
; sum += i * tmp;
; }

; CHECK-LABEL: @ind_minus2(
; CHECK: load <4 x i32>*
; CHECK: load <4 x i32>*
; CHECK: mul nsw <4 x i32>
; CHECK: mul nsw <4 x i32>
; CHECK: add nsw <4 x i32>
; CHECK: add nsw <4 x i32>
; CHECK: %index.next = add i64 %index, 8
; CHECK: icmp eq i64 %index.next, 512

; FORCE-VEC-LABEL: @ind_minus2(
; FORCE-VEC: %wide.load = load <2 x i32>*
; FORCE-VEC: mul nsw <2 x i32>
; FORCE-VEC: add nsw <2 x i32>
; FORCE-VEC: %index.next = add i64 %index, 2
; FORCE-VEC: icmp eq i64 %index.next, 512
define i32 @ind_minus2(i32* %A) {
entry:
br label %for.body

for.body: ; preds = %entry, %for.body
%A.addr = phi i32* [ %A, %entry ], [ %inc.ptr, %for.body ]
%i = phi i32 [ 1024, %entry ], [ %sub, %for.body ]
%sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
%inc.ptr = getelementptr inbounds i32* %A.addr, i64 1
%0 = load i32* %A.addr, align 4
%mul = mul nsw i32 %0, %i
%add = add nsw i32 %mul, %sum
%sub = add nsw i32 %i, -2
%cmp = icmp sgt i32 %i, 2
br i1 %cmp, label %for.body, label %for.end

for.end: ; preds = %for.body
%add.lcssa = phi i32 [ %add, %for.body ]
ret i32 %add.lcssa
}


; Test pointer induction variable of step 2. As currently we don't support
; masked load/store, vectorization is possible but not beneficial. If loop
; vectorization is not enforced, LV will only do interleave.
; for (int i = 0; i < 1024; i++) {
; int tmp0 = *A++;
; int tmp1 = *A++;
; sum += tmp0 * tmp1;
; }

; CHECK-LABEL: @ptr_ind_plus2(
; CHECK: load i32*
; CHECK: load i32*
; CHECK: load i32*
; CHECK: load i32*
; CHECK: mul nsw i32
; CHECK: mul nsw i32
; CHECK: add nsw i32
; CHECK: add nsw i32
; CHECK: %index.next = add i64 %index, 2
; CHECK: %21 = icmp eq i64 %index.next, 1024

; FORCE-VEC-LABEL: @ptr_ind_plus2(
; FORCE-VEC: load i32*
; FORCE-VEC: insertelement <2 x i32>
; FORCE-VEC: load i32*
; FORCE-VEC: insertelement <2 x i32>
; FORCE-VEC: load i32*
; FORCE-VEC: insertelement <2 x i32>
; FORCE-VEC: load i32*
; FORCE-VEC: insertelement <2 x i32>
; FORCE-VEC: mul nsw <2 x i32>
; FORCE-VEC: add nsw <2 x i32>
; FORCE-VEC: %index.next = add i64 %index, 2
; FORCE-VEC: icmp eq i64 %index.next, 1024
define i32 @ptr_ind_plus2(i32* %A) {
entry:
br label %for.body

for.body: ; preds = %for.body, %entry
%A.addr = phi i32* [ %A, %entry ], [ %inc.ptr1, %for.body ]
%sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
%i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%inc.ptr = getelementptr inbounds i32* %A.addr, i64 1
%0 = load i32* %A.addr, align 4
%inc.ptr1 = getelementptr inbounds i32* %A.addr, i64 2
%1 = load i32* %inc.ptr, align 4
%mul = mul nsw i32 %1, %0
%add = add nsw i32 %mul, %sum
%inc = add nsw i32 %i, 1
%exitcond = icmp eq i32 %inc, 1024
br i1 %exitcond, label %for.end, label %for.body

for.end: ; preds = %for.body
%add.lcssa = phi i32 [ %add, %for.body ]
ret i32 %add.lcssa
}
3 changes: 1 addition & 2 deletions llvm/test/Transforms/LoopVectorize/gcc-examples.ll
Original file line number Diff line number Diff line change
Expand Up @@ -388,9 +388,8 @@ define void @example12() nounwind uwtable ssp {
ret void
}

; Can't vectorize because of reductions.
;CHECK-LABEL: @example13(
;CHECK-NOT: <4 x i32>
;CHECK: <4 x i32>
;CHECK: ret void
define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %out) nounwind uwtable ssp {
br label %.preheader
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/LoopVectorize/reverse_induction.ll
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ loopend:
; CHECK: vector.body
; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
; CHECK: %normalized.idx = sub i64 %index, 0
; CHECK: %reverse.idx = sub i64 1023, %normalized.idx
; CHECK: %offset.idx = sub i64 1023, %normalized.idx
; CHECK: trunc i64 %index to i8

define void @reverse_forward_induction_i64_i8() {
Expand All @@ -124,7 +124,7 @@ while.end:
; CHECK: vector.body:
; CHECK: %index = phi i64 [ 129, %vector.ph ], [ %index.next, %vector.body ]
; CHECK: %normalized.idx = sub i64 %index, 129
; CHECK: %reverse.idx = sub i64 1023, %normalized.idx
; CHECK: %offset.idx = sub i64 1023, %normalized.idx
; CHECK: trunc i64 %index to i8

define void @reverse_forward_induction_i64_i8_signed() {
Expand Down