Skip to content

Commit

Permalink
Revert "Recommit "[LV] Induction Variable does not remain scalar unde…
Browse files Browse the repository at this point in the history
…r tail-folding.""

This reverts commit ae45b4d. It
causes miscompilations, test case on the mailing list.
  • Loading branch information
d0k committed May 8, 2020
1 parent 13a1b3c commit f936457
Show file tree
Hide file tree
Showing 28 changed files with 1,375 additions and 3,175 deletions.
11 changes: 4 additions & 7 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -1909,9 +1909,11 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, TruncInst *Trunc) {
return;
}

// All IV users are scalar instructions, so only emit a scalar IV, not a
// vectorised IV.
// If we haven't yet vectorized the induction variable, splat the scalar
// induction variable, and build the necessary step vectors.
// TODO: Don't do it unless the vectorized IV is really required.
Value *ScalarIV = CreateScalarIV(Step);
CreateSplatIV(ScalarIV, Step);
buildScalarSteps(ScalarIV, Step, EntryVal, ID);
}

Expand Down Expand Up @@ -4587,11 +4589,6 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
if (Induction.second.getKind() == InductionDescriptor::IK_PtrInduction)
continue;

// If tail-folding is applied, the primary induction variable will be used
// to feed a vector compare.
if (Ind == Legal->getPrimaryInduction() && foldTailByMasking())
continue;

// Determine if all users of the induction variable are scalar after
// vectorization.
auto ScalarInd = llvm::all_of(Ind->users(), [&](User *U) -> bool {
Expand Down
Expand Up @@ -16,6 +16,9 @@

; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph
; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i32> undef, i32 %index, i32 0
; FORCED-NEXT: %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> undef, <2 x i32> zeroinitializer
; FORCED-NEXT: %induction = add <2 x i32> %broadcast.splat, <i32 0, i32 1>
; FORCED-NEXT: %0 = add i32 %index, 0
; FORCED-NEXT: %1 = extractvalue { i64, i64 } %sv, 0
; FORCED-NEXT: %2 = extractvalue { i64, i64 } %sv, 0
Expand Down Expand Up @@ -65,6 +68,9 @@ declare float @pow(float, float) readnone nounwind

; FORCED-LABEL: vector.body: ; preds = %vector.body, %vector.ph
; FORCED-NEXT: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
; FORCED-NEXT: %broadcast.splatinsert = insertelement <2 x i32> undef, i32 %index, i32 0
; FORCED-NEXT: %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> undef, <2 x i32> zeroinitializer
; FORCED-NEXT: %induction = add <2 x i32> %broadcast.splat, <i32 0, i32 1>
; FORCED-NEXT: %0 = add i32 %index, 0
; FORCED-NEXT: %1 = extractvalue { float, float } %sv, 0
; FORCED-NEXT: %2 = extractvalue { float, float } %sv, 0
Expand Down
6 changes: 6 additions & 0 deletions llvm/test/Transforms/LoopVectorize/AArch64/pr36032.ll
Expand Up @@ -65,9 +65,15 @@ define void @_Z1dv() local_unnamed_addr #0 {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP0]], [[INDEX]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[OFFSET_IDX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[OFFSET_IDX4:%.*]] = add i64 [[TMP0]], [[INDEX]]
; CHECK-NEXT: [[TMP18:%.*]] = trunc i64 [[OFFSET_IDX4]] to i32
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i32> undef, i32 [[TMP18]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT5]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION7:%.*]] = add <4 x i32> [[BROADCAST_SPLAT6]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[TMP18]], 0
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[CONV]], [[TMP19]]
; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64
Expand Down
3 changes: 3 additions & 0 deletions llvm/test/Transforms/LoopVectorize/ARM/sphinx.ll
Expand Up @@ -44,6 +44,9 @@ define i32 @test(float* nocapture readonly %x) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1>
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, float* [[X:%.*]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 0
Expand Down
Expand Up @@ -14,7 +14,8 @@ target triple = "hexagon"

; CHECK-LABEL: @test1
; CHECK: vector.body:
; CHECK: icmp ule <64 x i32> %vec.ind
; CHECK: %induction = add <64 x i32>
; CHECK: icmp ule <64 x i32> %induction
; CHECK-NOT: load <{{.*}} x i32>


Expand Down
3 changes: 3 additions & 0 deletions llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll
Expand Up @@ -15,6 +15,9 @@ define dso_local void @test(i32* %Arr, i32 signext %Len) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 [[TMP1]]
Expand Down
Expand Up @@ -19,10 +19,12 @@ define void @func_21() {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ]
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 undef, i32 0>, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[PRED_STORE_CONTINUE4]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE4]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> undef, i64 [[INDEX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1>
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[VEC_IND]], <i64 4, i64 4>
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i64> [[INDUCTION]], <i64 4, i64 4>
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0
; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
; CHECK: pred.load.if:
Expand Down Expand Up @@ -59,7 +61,6 @@ define void @func_21() {
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.continue4:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 6
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
; CHECK: middle.block:
Expand Down
22 changes: 3 additions & 19 deletions llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
Expand Up @@ -19,6 +19,9 @@ define void @f1() {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[OFFSET_IDX]], i32 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> undef, <2 x i32> zeroinitializer
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i16> [[BROADCAST_SPLAT]], <i16 0, i16 1>
; CHECK-NEXT: [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[TMP0]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [2 x i16*], [2 x i16*]* @b, i16 0, i64 [[TMP1]]
Expand All @@ -29,25 +32,6 @@ define void @f1() {
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 2
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 2, 2
; CHECK-NEXT: br i1 [[CMP_N]], label [[BB3:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[BB1:%.*]] ]
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb2:
; CHECK-NEXT: [[C_1_0:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[_TMP9:%.*]], [[BB2]] ]
; CHECK-NEXT: [[_TMP1:%.*]] = zext i16 0 to i64
; CHECK-NEXT: [[_TMP2:%.*]] = getelementptr [1 x %rec8], [1 x %rec8]* @a, i16 0, i64 [[_TMP1]]
; CHECK-NEXT: [[_TMP4:%.*]] = bitcast %rec8* [[_TMP2]] to i16*
; CHECK-NEXT: [[_TMP6:%.*]] = sext i16 [[C_1_0]] to i64
; CHECK-NEXT: [[_TMP7:%.*]] = getelementptr [2 x i16*], [2 x i16*]* @b, i16 0, i64 [[_TMP6]]
; CHECK-NEXT: store i16* [[_TMP4]], i16** [[_TMP7]]
; CHECK-NEXT: [[_TMP9]] = add nsw i16 [[C_1_0]], 1
; CHECK-NEXT: [[_TMP11:%.*]] = icmp slt i16 [[_TMP9]], 2
; CHECK-NEXT: br i1 [[_TMP11]], label [[BB2]], label [[BB3]], !llvm.loop !2
; CHECK: bb3:
; CHECK-NEXT: ret void
;

bb1:
br label %bb2
Expand Down
Expand Up @@ -97,6 +97,9 @@ define double @sumIfVector(double* nocapture readonly %arr) {
; AVX: vector.body:
; AVX-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX-NEXT: [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ]
; AVX-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0
; AVX-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
; AVX-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; AVX-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; AVX-NEXT: [[TMP1:%.*]] = getelementptr double, double* [[ARR:%.*]], i32 [[TMP0]]
; AVX-NEXT: [[TMP2:%.*]] = getelementptr double, double* [[TMP1]], i32 0
Expand Down

0 comments on commit f936457

Please sign in to comment.