diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index ce86096f1a97b..162a3c4b195e5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1257,22 +1257,7 @@ class VPInstruction : public VPRecipeWithIRFlags { } /// Returns true if the recipe only uses the first lane of operand \p Op. - bool onlyFirstLaneUsed(const VPValue *Op) const override { - assert(is_contained(operands(), Op) && - "Op must be an operand of the recipe"); - if (getOperand(0) != Op) - return false; - switch (getOpcode()) { - default: - return false; - case VPInstruction::ActiveLaneMask: - case VPInstruction::CalculateTripCountMinusVF: - case VPInstruction::CanonicalIVIncrementForPart: - case VPInstruction::BranchOnCount: - return true; - }; - llvm_unreachable("switch should return"); - } + bool onlyFirstLaneUsed(const VPValue *Op) const override; /// Returns true if the recipe only uses the first part of operand \p Op. bool onlyFirstPartUsed(const VPValue *Op) const override { diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 297b58d8abc47..9ee0cb2bd6153 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -515,6 +515,26 @@ void VPInstruction::execute(VPTransformState &State) { State.set(this, GeneratedValue, Part); } } +bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { + assert(is_contained(operands(), Op) && "Op must be an operand of the recipe"); + if (Instruction::isBinaryOp(getOpcode())) + return vputils::onlyFirstLaneUsed(this); + + switch (getOpcode()) { + default: + return false; + case Instruction::ICmp: + // TODO: Cover additional opcodes. + return vputils::onlyFirstLaneUsed(this); + case VPInstruction::ActiveLaneMask: + case VPInstruction::CalculateTripCountMinusVF: + case VPInstruction::CanonicalIVIncrementForPart: + case VPInstruction::BranchOnCount: + // TODO: Cover additional operands. + return getOperand(0) == Op; + }; + llvm_unreachable("switch should return"); +} #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPInstruction::dump() const { diff --git a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll index e81fb66239bd4..f05ec30619c5d 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45679-fold-tail-by-masking.ll @@ -67,7 +67,7 @@ define void @pr45679(ptr %A) optsize { ; CHECK-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14 -; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -129,7 +129,7 @@ define void @pr45679(ptr %A) optsize { ; VF2UF2-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1 ; VF2UF2-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 ; VF2UF2-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14 -; VF2UF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] +; VF2UF2-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; VF2UF2: exit: ; VF2UF2-NEXT: ret void ; @@ -139,46 +139,42 @@ define void @pr45679(ptr %A) optsize { ; VF1UF4: vector.ph: ; VF1UF4-NEXT: br label [[VECTOR_BODY:%.*]] ; VF1UF4: vector.body: -; VF1UF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] -; VF1UF4-NEXT: [[VEC_IV:%.*]] = add i32 [[INDEX]], 0 -; VF1UF4-NEXT: [[VEC_IV4:%.*]] = add i32 [[INDEX]], 1 -; VF1UF4-NEXT: [[VEC_IV5:%.*]] = add i32 [[INDEX]], 2 -; VF1UF4-NEXT: [[VEC_IV6:%.*]] = add i32 [[INDEX]], 3 -; VF1UF4-NEXT: [[TMP0:%.*]] = icmp ule i32 [[VEC_IV]], 13 -; VF1UF4-NEXT: [[TMP1:%.*]] = icmp ule i32 [[VEC_IV4]], 13 -; VF1UF4-NEXT: [[TMP2:%.*]] = icmp ule i32 [[VEC_IV5]], 13 -; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i32 [[VEC_IV6]], 13 -; VF1UF4-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; VF1UF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] +; VF1UF4-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; VF1UF4-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 +; VF1UF4-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 +; VF1UF4-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 +; VF1UF4-NEXT: [[TMP4:%.*]] = icmp ule i32 [[TMP0]], 13 +; VF1UF4-NEXT: [[TMP5:%.*]] = icmp ule i32 [[TMP1]], 13 +; VF1UF4-NEXT: [[TMP6:%.*]] = icmp ule i32 [[TMP2]], 13 +; VF1UF4-NEXT: [[TMP7:%.*]] = icmp ule i32 [[TMP3]], 13 +; VF1UF4-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VF1UF4: pred.store.if: -; VF1UF4-NEXT: [[INDUCTION:%.*]] = add i32 [[INDEX]], 0 -; VF1UF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDUCTION]] -; VF1UF4-NEXT: store i32 13, ptr [[TMP4]], align 1 +; VF1UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] +; VF1UF4-NEXT: store i32 13, ptr [[TMP8]], align 1 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]] ; VF1UF4: pred.store.continue: -; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] -; VF1UF4: pred.store.if4: -; VF1UF4-NEXT: [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1 -; VF1UF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDUCTION1]] -; VF1UF4-NEXT: store i32 13, ptr [[TMP5]], align 1 -; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE8]] -; VF1UF4: pred.store.continue5: -; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] -; VF1UF4: pred.store.if6: -; VF1UF4-NEXT: [[INDUCTION2:%.*]] = add i32 [[INDEX]], 2 -; VF1UF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDUCTION2]] -; VF1UF4-NEXT: store i32 13, ptr [[TMP6]], align 1 -; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE10]] -; VF1UF4: pred.store.continue7: -; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] -; VF1UF4: pred.store.if8: -; VF1UF4-NEXT: [[INDUCTION3:%.*]] = add i32 [[INDEX]], 3 -; VF1UF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDUCTION3]] -; VF1UF4-NEXT: store i32 13, ptr [[TMP7]], align 1 -; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE12]] -; VF1UF4: pred.store.continue9: +; VF1UF4-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] +; VF1UF4: pred.store.if1: +; VF1UF4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP1]] +; VF1UF4-NEXT: store i32 13, ptr [[TMP9]], align 1 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE2]] +; VF1UF4: pred.store.continue2: +; VF1UF4-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] +; VF1UF4: pred.store.if3: +; VF1UF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP2]] +; VF1UF4-NEXT: store i32 13, ptr [[TMP10]], align 1 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE4]] +; VF1UF4: pred.store.continue4: +; VF1UF4-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] +; VF1UF4: pred.store.if5: +; VF1UF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP3]] +; VF1UF4-NEXT: store i32 13, ptr [[TMP11]], align 1 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE6]] +; VF1UF4: pred.store.continue6: ; VF1UF4-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 -; VF1UF4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; VF1UF4-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF1UF4-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 +; VF1UF4-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VF1UF4: middle.block: ; VF1UF4-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; VF1UF4: scalar.ph: @@ -190,7 +186,7 @@ define void @pr45679(ptr %A) optsize { ; VF1UF4-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1 ; VF1UF4-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1 ; VF1UF4-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], 14 -; VF1UF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]] +; VF1UF4-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; VF1UF4: exit: ; VF1UF4-NEXT: ret void ; @@ -356,54 +352,50 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF1UF4: vector.ph: ; VF1UF4-NEXT: br label [[VECTOR_BODY:%.*]] ; VF1UF4: vector.body: -; VF1UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ] -; VF1UF4-NEXT: [[VEC_IV:%.*]] = add i64 [[INDEX]], 0 -; VF1UF4-NEXT: [[VEC_IV4:%.*]] = add i64 [[INDEX]], 1 -; VF1UF4-NEXT: [[VEC_IV5:%.*]] = add i64 [[INDEX]], 2 -; VF1UF4-NEXT: [[VEC_IV6:%.*]] = add i64 [[INDEX]], 3 -; VF1UF4-NEXT: [[TMP0:%.*]] = icmp ule i64 [[VEC_IV]], 13 -; VF1UF4-NEXT: [[TMP1:%.*]] = icmp ule i64 [[VEC_IV4]], 13 -; VF1UF4-NEXT: [[TMP2:%.*]] = icmp ule i64 [[VEC_IV5]], 13 -; VF1UF4-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV6]], 13 -; VF1UF4-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; VF1UF4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] +; VF1UF4-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; VF1UF4-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; VF1UF4-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; VF1UF4-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; VF1UF4-NEXT: [[TMP4:%.*]] = icmp ule i64 [[TMP0]], 13 +; VF1UF4-NEXT: [[TMP5:%.*]] = icmp ule i64 [[TMP1]], 13 +; VF1UF4-NEXT: [[TMP6:%.*]] = icmp ule i64 [[TMP2]], 13 +; VF1UF4-NEXT: [[TMP7:%.*]] = icmp ule i64 [[TMP3]], 13 +; VF1UF4-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; VF1UF4: pred.store.if: -; VF1UF4-NEXT: [[INDUCTION:%.*]] = add i64 [[INDEX]], 0 -; VF1UF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDUCTION]] -; VF1UF4-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -; VF1UF4-NEXT: store i64 [[TMP5]], ptr [[B:%.*]], align 8 +; VF1UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] +; VF1UF4-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 +; VF1UF4-NEXT: store i64 [[TMP9]], ptr [[B:%.*]], align 8 ; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE]] ; VF1UF4: pred.store.continue: -; VF1UF4-NEXT: [[TMP6:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_STORE_IF]] ] -; VF1UF4-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] -; VF1UF4: pred.store.if4: -; VF1UF4-NEXT: [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1 -; VF1UF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDUCTION1]] -; VF1UF4-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 -; VF1UF4-NEXT: store i64 [[TMP8]], ptr [[B]], align 8 -; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE8]] -; VF1UF4: pred.store.continue5: -; VF1UF4-NEXT: [[TMP9:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP8]], [[PRED_STORE_IF7]] ] -; VF1UF4-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]] -; VF1UF4: pred.store.if6: -; VF1UF4-NEXT: [[INDUCTION2:%.*]] = add i64 [[INDEX]], 2 -; VF1UF4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDUCTION2]] -; VF1UF4-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 -; VF1UF4-NEXT: store i64 [[TMP11]], ptr [[B]], align 8 -; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE10]] -; VF1UF4: pred.store.continue7: -; VF1UF4-NEXT: [[TMP12:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE8]] ], [ [[TMP11]], [[PRED_STORE_IF9]] ] -; VF1UF4-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]] -; VF1UF4: pred.store.if8: -; VF1UF4-NEXT: [[INDUCTION3:%.*]] = add i64 [[INDEX]], 3 -; VF1UF4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDUCTION3]] -; VF1UF4-NEXT: [[TMP14:%.*]] = load i64, ptr [[TMP13]], align 8 -; VF1UF4-NEXT: store i64 [[TMP14]], ptr [[B]], align 8 -; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE12]] -; VF1UF4: pred.store.continue9: -; VF1UF4-NEXT: [[TMP15:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE10]] ], [ [[TMP14]], [[PRED_STORE_IF11]] ] +; VF1UF4-NEXT: [[TMP10:%.*]] = phi i64 [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_STORE_IF]] ] +; VF1UF4-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] +; VF1UF4: pred.store.if1: +; VF1UF4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] +; VF1UF4-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +; VF1UF4-NEXT: store i64 [[TMP12]], ptr [[B]], align 8 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE2]] +; VF1UF4: pred.store.continue2: +; VF1UF4-NEXT: [[TMP13:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE]] ], [ [[TMP12]], [[PRED_STORE_IF1]] ] +; VF1UF4-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] +; VF1UF4: pred.store.if3: +; VF1UF4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]] +; VF1UF4-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8 +; VF1UF4-NEXT: store i64 [[TMP15]], ptr [[B]], align 8 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE4]] +; VF1UF4: pred.store.continue4: +; VF1UF4-NEXT: [[TMP16:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE2]] ], [ [[TMP15]], [[PRED_STORE_IF3]] ] +; VF1UF4-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] +; VF1UF4: pred.store.if5: +; VF1UF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]] +; VF1UF4-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP17]], align 8 +; VF1UF4-NEXT: store i64 [[TMP18]], ptr [[B]], align 8 +; VF1UF4-NEXT: br label [[PRED_STORE_CONTINUE6]] +; VF1UF4: pred.store.continue6: +; VF1UF4-NEXT: [[TMP19:%.*]] = phi i64 [ poison, [[PRED_STORE_CONTINUE4]] ], [ [[TMP18]], [[PRED_STORE_IF5]] ] ; VF1UF4-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 -; VF1UF4-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; VF1UF4-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF1UF4-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; VF1UF4-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF1UF4: middle.block: ; VF1UF4-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; VF1UF4: scalar.ph: @@ -416,7 +408,7 @@ define void @load_variant(ptr noalias %a, ptr noalias %b) { ; VF1UF4-NEXT: store i64 [[V]], ptr [[B]], align 8 ; VF1UF4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; VF1UF4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 14 -; VF1UF4-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF1UF4-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; VF1UF4: for.end: ; VF1UF4-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll index c07512644f721..0c659a550b31e 100644 --- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll +++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll @@ -16,43 +16,39 @@ define void @VF1-VPlanExe(ptr %dst) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE9:%.*]] ] -; CHECK-NEXT: [[VEC_IV:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[VEC_IV1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[VEC_IV2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[VEC_IV3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i64 [[VEC_IV]], 14 -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i64 [[VEC_IV1]], 14 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i64 [[VEC_IV2]], 14 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ule i64 [[VEC_IV3]], 14 -; CHECK-NEXT: br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ule i64 [[TMP0]], 14 +; CHECK-NEXT: [[TMP5:%.*]] = icmp ule i64 [[TMP1]], 14 +; CHECK-NEXT: [[TMP6:%.*]] = icmp ule i64 [[TMP2]], 14 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ule i64 [[TMP3]], 14 +; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP4]] -; CHECK-NEXT: store i32 0, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP0]] +; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] -; CHECK: pred.store.if4: -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP6]] -; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]] -; CHECK: pred.store.continue5: -; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]] -; CHECK: pred.store.if6: -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP8]] +; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] +; CHECK: pred.store.if1: +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP1]] ; CHECK-NEXT: store i32 0, ptr [[TMP9]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE7]] -; CHECK: pred.store.continue7: -; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] -; CHECK: pred.store.if8: -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP10]] +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] +; CHECK: pred.store.continue2: +; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] +; CHECK: pred.store.if3: +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP2]] +; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] +; CHECK: pred.store.continue4: +; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] +; CHECK: pred.store.if5: +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP3]] ; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 4 -; CHECK-NEXT: br label [[PRED_STORE_CONTINUE9]] -; CHECK: pred.store.continue9: +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] +; CHECK: pred.store.continue6: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -69,7 +65,7 @@ define void @VF1-VPlanExe(ptr %dst) { ; CHECK-NEXT: store i32 0, ptr [[DST_PTR]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 15 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; entry: br label %for.body @@ -138,7 +134,7 @@ define void @VF1-VPWidenCanonicalIVRecipeExe(ptr %ptr1) { ; CHECK: pred.store.continue12: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -151,7 +147,7 @@ define void @VF1-VPWidenCanonicalIVRecipeExe(ptr %ptr1) { ; CHECK-NEXT: store double 0.000000e+00, ptr [[ADDR]], align 8 ; CHECK-NEXT: [[PTR]] = getelementptr inbounds double, ptr [[ADDR]], i64 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[PTR]], [[PTR2]] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; entry: %ptr2 = getelementptr inbounds double, ptr %ptr1, i64 15