Skip to content

Commit

Permalink
[LV] Let recordVectorLoopValueForInductionCast to check if IV was cre…
Browse files Browse the repository at this point in the history
…ated from the cast.

Summary:
It turned out to be error-prone to expect the callers to handle that - better to
leave the decision to this routine and make the required data to be explicitly
passed to the function.

This handles the case that was missed in the r322473 and fixes the assert
mentioned in PR36524.

Reviewers: dorit, mssimpso, Ayal, dcaballe

Reviewed By: dcaballe

Subscribers: Ka-Ka, hiraditya, dneilson, hsaito, llvm-commits

Differential Revision: https://reviews.llvm.org/D43812

llvm-svn: 327960
  • Loading branch information
aelovikov-intel committed Mar 20, 2018
1 parent 1bbe00e commit 8b8253f
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 15 deletions.
52 changes: 37 additions & 15 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -542,9 +542,9 @@ class InnerLoopVectorizer {
/// Compute scalar induction steps. \p ScalarIV is the scalar induction
/// variable on which to base the steps, \p Step is the size of the step, and
/// \p EntryVal is the value from the original loop that maps to the steps.
/// Note that \p EntryVal doesn't have to be an induction variable (e.g., it
/// can be a truncate instruction).
void buildScalarSteps(Value *ScalarIV, Value *Step, Value *EntryVal,
/// Note that \p EntryVal doesn't have to be an induction variable - it
/// can also be a truncate instruction.
void buildScalarSteps(Value *ScalarIV, Value *Step, Instruction *EntryVal,
const InductionDescriptor &ID);

/// Create a vector induction phi node based on an existing scalar one. \p
Expand All @@ -571,10 +571,20 @@ class InnerLoopVectorizer {
/// vector loop for both the Phi and the cast.
/// If \p VectorLoopValue is a scalarized value, \p Lane is also specified,
/// Otherwise, \p VectorLoopValue is a widened/vectorized value.
void recordVectorLoopValueForInductionCast (const InductionDescriptor &ID,
Value *VectorLoopValue,
unsigned Part,
unsigned Lane = UINT_MAX);
///
/// \p EntryVal is the value from the original loop that maps to the vector
/// phi node and is used to distinguish what is the IV currently being
/// processed - original one (if \p EntryVal is a phi corresponding to the
/// original IV) or the "newly-created" one based on the proof mentioned above
/// (see also buildScalarSteps() and createVectorIntOrFPInductionPHI()). In the
/// latter case \p EntryVal is a TruncInst and we must not record anything for
/// that IV, but it's error-prone to expect callers of this routine to care
/// about that, hence this explicit parameter.
void recordVectorLoopValueForInductionCast(const InductionDescriptor &ID,
const Instruction *EntryVal,
Value *VectorLoopValue,
unsigned Part,
unsigned Lane = UINT_MAX);

/// Generate a shuffle sequence that will reverse the vector Vec.
virtual Value *reverseVector(Value *Vec);
Expand Down Expand Up @@ -2368,6 +2378,8 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {

void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
const InductionDescriptor &II, Value *Step, Instruction *EntryVal) {
assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
"Expected either an induction phi-node or a truncate of it!");
Value *Start = II.getStartValue();

// Construct the initial value of the vector IV in the vector loop preheader
Expand Down Expand Up @@ -2421,8 +2433,7 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(

if (isa<TruncInst>(EntryVal))
addMetadata(LastInduction, EntryVal);
else
recordVectorLoopValueForInductionCast(II, LastInduction, Part);
recordVectorLoopValueForInductionCast(II, EntryVal, LastInduction, Part);

LastInduction = cast<Instruction>(addFastMathFlag(
Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")));
Expand Down Expand Up @@ -2456,8 +2467,20 @@ bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const {
}

void InnerLoopVectorizer::recordVectorLoopValueForInductionCast(
const InductionDescriptor &ID, Value *VectorLoopVal, unsigned Part,
unsigned Lane) {
const InductionDescriptor &ID, const Instruction *EntryVal,
Value *VectorLoopVal, unsigned Part, unsigned Lane) {
assert((isa<PHINode>(EntryVal) || isa<TruncInst>(EntryVal)) &&
"Expected either an induction phi-node or a truncate of it!");

// This induction variable is not the phi from the original loop but the
// newly-created IV based on the proof that casted Phi is equal to the
// uncasted Phi in the vectorized loop (under a runtime guard possibly). It
// re-uses the same InductionDescriptor that original IV uses but we don't
// have to do any recording in this case - that is done when original IV is
// processed.
if (isa<TruncInst>(EntryVal))
return;

const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
if (Casts.empty())
return;
Expand Down Expand Up @@ -2554,8 +2577,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
VectorLoopValueMap.setVectorValue(EntryVal, Part, EntryPart);
if (Trunc)
addMetadata(EntryPart, Trunc);
else
recordVectorLoopValueForInductionCast(ID, EntryPart, Part);
recordVectorLoopValueForInductionCast(ID, EntryVal, EntryPart, Part);
}
}

Expand Down Expand Up @@ -2626,7 +2648,7 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
}

void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
Value *EntryVal,
Instruction *EntryVal,
const InductionDescriptor &ID) {
// We shouldn't have to build scalar steps if we aren't vectorizing.
assert(VF > 1 && "VF should be greater than one");
Expand Down Expand Up @@ -2661,7 +2683,7 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step));
auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul));
VectorLoopValueMap.setScalarValue(EntryVal, {Part, Lane}, Add);
recordVectorLoopValueForInductionCast(ID, Add, Part, Lane);
recordVectorLoopValueForInductionCast(ID, EntryVal, Add, Part, Lane);
}
}
}
Expand Down
39 changes: 39 additions & 0 deletions llvm/test/Transforms/LoopVectorize/X86/pr36524.ll
@@ -0,0 +1,39 @@
; RUN: opt -S -scev-version-unknown -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"

define void @foo() {
; CHECK-LABEL: @foo(
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 2, i64 3, i64 4, i64 5>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 2, [[INDEX]]
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 1
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 2
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 3
; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 2, [[INDEX]]
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[OFFSET_IDX1]] to i32
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[TMP11]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 0
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
;
entry:
br label %loop

loop:
%0 = phi i64 [ 2, %entry ], [ %3, %loop ]
%1 = and i64 %0, 4294967295
%2 = trunc i64 %0 to i32
%3 = add nuw nsw i64 %1, 1
%4 = icmp sgt i32 %2, 80
br i1 %4, label %exit, label %loop

exit:
ret void
}

0 comments on commit 8b8253f

Please sign in to comment.