From 6ac6bc171f70b6fd92138eb2157b0faec186a60e Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Fri, 7 Nov 2025 12:55:05 +0000 Subject: [PATCH] [VPlan] Narrow wide stores in narrowToSingleScalars --- .../Transforms/Vectorize/VPlanTransforms.cpp | 41 ++- .../AArch64/conditional-branches-cost.ll | 74 ++--- .../AArch64/divs-with-scalable-vfs.ll | 24 +- .../AArch64/eliminate-tail-predication.ll | 22 +- .../AArch64/epilog-vectorization-factors.ll | 93 +++--- .../epilog-vectorization-widen-inductions.ll | 16 +- ...-vectorization-fix-scalar-resume-values.ll | 44 +-- .../extractvalue-no-scalarization-required.ll | 22 +- .../AArch64/force-target-instruction-cost.ll | 122 +++---- .../AArch64/induction-costs-sve.ll | 39 ++- .../AArch64/interleave-with-gaps.ll | 8 +- .../LoopVectorize/AArch64/partial-reduce.ll | 16 +- .../LoopVectorize/AArch64/store-costs-sve.ll | 96 ++---- .../LoopVectorize/AArch64/sve-epilog-vect.ll | 298 ++++++------------ .../AArch64/sve-epilog-vscale-fixed.ll | 104 ++---- .../AArch64/sve-interleaved-accesses.ll | 54 ++-- .../sve-interleaved-masked-accesses.ll | 21 +- .../AArch64/sve-live-out-pointer-induction.ll | 19 +- .../LoopVectorize/AArch64/sve-tail-folding.ll | 4 +- .../AArch64/sve-widen-extractvalue.ll | 8 +- .../AArch64/tail-folding-styles.ll | 4 +- ...-interleave-to-widen-memory-multi-block.ll | 6 +- ...ctor-loop-backedge-elimination-epilogue.ll | 66 ++-- .../LoopVectorize/RISCV/dead-ops-cost.ll | 3 +- .../LoopVectorize/RISCV/safe-dep-distance.ll | 3 +- .../LoopVectorize/RISCV/scalable-tailfold.ll | 45 +-- .../LoopVectorize/RISCV/uniform-load-store.ll | 244 +++----------- .../WebAssembly/induction-branch-cost.ll | 43 +-- .../LoopVectorize/X86/constant-fold.ll | 17 +- .../X86/invariant-load-gather.ll | 8 +- .../X86/invariant-store-vectorization.ll | 38 +-- .../Transforms/LoopVectorize/X86/pr35432.ll | 56 +--- .../Transforms/LoopVectorize/X86/pr54634.ll | 42 +-- .../LoopVectorize/X86/reduction-crash.ll | 4 +- .../LoopVectorize/X86/scatter_crash.ll | 115 ++++--- .../LoopVectorize/consecutive-ptr-uniforms.ll | 8 +- .../dont-fold-tail-for-const-TC.ll | 6 +- .../dont-fold-tail-for-divisible-TC.ll | 4 +- .../epilog-vectorization-scev-expansion.ll | 4 +- .../extract-from-end-vector-constant.ll | 4 +- ...irst-order-recurrence-dead-instructions.ll | 4 +- .../LoopVectorize/first-order-recurrence.ll | 12 +- .../Transforms/LoopVectorize/hints-trans.ll | 4 +- ...nd-sink-mem-ops-with-invariant-pointers.ll | 4 +- .../LoopVectorize/if-pred-stores.ll | 2 +- .../invariant-store-vectorization-2.ll | 9 +- .../invariant-store-vectorization.ll | 20 +- .../LoopVectorize/iv_outside_user.ll | 6 +- .../Transforms/LoopVectorize/loop-form.ll | 18 +- .../min-trip-count-known-via-scev.ll | 4 +- .../nested-loops-scev-expansion.ll | 4 +- ...o-fold-tail-by-masking-iv-external-uses.ll | 2 +- .../optimal-epilog-vectorization.ll | 66 +--- llvm/test/Transforms/LoopVectorize/optsize.ll | 52 +-- .../outer-loop-inner-latch-successors.ll | 34 +- .../LoopVectorize/outer-loop-wide-phis.ll | 10 +- llvm/test/Transforms/LoopVectorize/pr37248.ll | 4 +- llvm/test/Transforms/LoopVectorize/pr50686.ll | 20 +- .../LoopVectorize/predicate-switch.ll | 32 +- .../reuse-lcssa-phi-scev-expansion.ll | 2 +- .../runtime-check-needed-but-empty.ll | 2 +- .../Transforms/LoopVectorize/runtime-check.ll | 2 +- ...ntime-checks-difference-simplifications.ll | 24 +- .../LoopVectorize/runtime-checks-hoist.ll | 4 +- .../LoopVectorize/scalable-iv-outside-user.ll | 4 +- .../LoopVectorize/scalable-lifetime.ll | 4 +- .../scev-exit-phi-invalidation.ll | 2 +- .../LoopVectorize/scev-predicate-reasoning.ll | 6 +- .../LoopVectorize/skeleton-lcssa-crash.ll | 2 +- .../trip-count-expansion-may-introduce-ub.ll | 26 +- .../LoopVectorize/trunc-extended-icmps.ll | 4 +- .../Transforms/LoopVectorize/uniform-blend.ll | 4 +- .../uniform_across_vf_induction1.ll | 4 +- .../uniform_across_vf_induction1_and.ll | 4 +- .../uniform_across_vf_induction1_div_urem.ll | 4 +- .../uniform_across_vf_induction1_lshr.ll | 12 +- .../uniform_across_vf_induction2.ll | 4 +- .../unused-blend-mask-for-first-operand.ll | 12 +- .../Transforms/LoopVectorize/vector-geps.ll | 4 +- ...p-backedge-elimination-outside-iv-users.ll | 16 +- .../version-stride-with-integer-casts.ll | 10 +- .../widen-gep-all-indices-invariant.ll | 16 +- 82 files changed, 835 insertions(+), 1489 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 48bd697397f41..b6e8e2a4da9ba 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1390,33 +1390,52 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) { for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) { - if (!isa(&R)) + if (!isa(&R)) continue; + auto *RepR = dyn_cast(&R); if (RepR && (RepR->isSingleScalar() || RepR->isPredicated())) continue; - - auto *RepOrWidenR = cast(&R); - if (RepR && isa(RepR->getUnderlyingInstr()) && - vputils::isSingleScalar(RepR->getOperand(1))) { - auto *Clone = new VPReplicateRecipe( - RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(), - true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Metadata*/); - Clone->insertBefore(RepOrWidenR); + auto *Store = dyn_cast(&R); + + if ((RepR && isa(RepR->getUnderlyingInstr()) && + vputils::isSingleScalar(RepR->getOperand(1))) || + (Store && !Store->isMasked() && + vputils::isSingleScalar(Store->getStoredValue()))) { + StoreInst *UnderlyingInstr = + RepR ? cast(RepR->getUnderlyingInstr()) + : cast(&Store->getIngredient()); + SmallVector Operands; + if (RepR) + append_range(Operands, RepR->operands()); + else + Operands = {Store->getStoredValue(), Store->getAddr()}; + VPValue *OperandToCheck = + RepR ? RepR->getOperand(1) : Store->getStoredValue(); + VPIRMetadata Metadata = + RepR ? VPIRMetadata(*RepR) : VPIRMetadata(*Store); + auto *Clone = new VPReplicateRecipe(UnderlyingInstr, Operands, + true /*IsSingleScalar*/, + nullptr /*Mask*/, Metadata); + Clone->insertBefore(&R); unsigned ExtractOpc = - vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1)) + vputils::isUniformAcrossVFsAndUFs(OperandToCheck) ? VPInstruction::ExtractLastElement : VPInstruction::ExtractLastLanePerPart; auto *Ext = new VPInstruction(ExtractOpc, {Clone->getOperand(0)}); Ext->insertBefore(Clone); Clone->setOperand(0, Ext); - RepR->eraseFromParent(); + R.eraseFromParent(); continue; } // Skip recipes that aren't single scalars or don't have only their // scalar results used. In the latter case, we would introduce extra // broadcasts. + auto *RepOrWidenR = dyn_cast(&R); + if (!RepOrWidenR) + continue; if (!vputils::isSingleScalar(RepOrWidenR) || !all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) { return U->usesScalars(RepOrWidenR) || diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index f16351720b20f..72323df2e3354 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -251,37 +251,23 @@ define void @latch_branch_cost(ptr %dst) { ; DEFAULT-LABEL: define void @latch_branch_cost( ; DEFAULT-SAME: ptr [[DST:%.*]]) { ; DEFAULT-NEXT: [[ITER_CHECK:.*:]] -; DEFAULT-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] -; DEFAULT: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: -; DEFAULT-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] +; DEFAULT-NEXT: br label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: ; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] ; DEFAULT: [[VECTOR_BODY]]: -; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], %[[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[INDEX1:%.*]] = add i64 [[INDEX]], 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i32 16 -; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP2]], align 1 -; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP5]], align 1 -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; DEFAULT-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 -; DEFAULT-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; DEFAULT: [[MIDDLE_BLOCK]]: -; DEFAULT-NEXT: br i1 false, [[EXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] -; DEFAULT: [[VEC_EPILOG_ITER_CHECK]]: -; DEFAULT-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF5:![0-9]+]] -; DEFAULT: [[VEC_EPILOG_PH]]: -; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; DEFAULT-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] -; DEFAULT: [[VEC_EPILOG_VECTOR_BODY]]: -; DEFAULT-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX1]] -; DEFAULT-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP8]], align 1 -; DEFAULT-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4 +; DEFAULT-NEXT: store i8 0, ptr [[TMP2]], align 1 +; DEFAULT-NEXT: store i8 0, ptr [[TMP8]], align 1 +; DEFAULT-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX]], 2 ; DEFAULT-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 100 -; DEFAULT-NEXT: br i1 [[TMP10]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; DEFAULT: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; DEFAULT-NEXT: br i1 true, [[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] -; DEFAULT: [[VEC_EPILOG_SCALAR_PH]]: +; DEFAULT-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; DEFAULT: [[MIDDLE_BLOCK]]: +; DEFAULT-NEXT: br label %[[EXIT:.*]] +; DEFAULT: [[EXIT]]: +; DEFAULT-NEXT: ret void ; ; PRED-LABEL: define void @latch_branch_cost( ; PRED-SAME: ptr [[DST:%.*]]) { @@ -430,14 +416,14 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt ; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE37:.*]] ] -; DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META8:![0-9]+]] +; DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META5:![0-9]+]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT28]], <4 x i32> poison, <4 x i32> zeroinitializer -; DEFAULT-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META11:![0-9]+]] +; DEFAULT-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META8:![0-9]+]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP19]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; DEFAULT-NEXT: [[TMP6:%.*]] = or <4 x i32> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT29]] -; DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]] +; DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META10:![0-9]+]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT30]], <4 x i32> poison, <4 x i32> zeroinitializer ; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt <4 x i32> [[BROADCAST_SPLAT31]], [[TMP6]] @@ -446,34 +432,34 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt ; DEFAULT-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; DEFAULT: [[PRED_STORE_IF]]: ; DEFAULT-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 -; DEFAULT-NEXT: store i32 [[TMP11]], ptr [[E]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META17:![0-9]+]] +; DEFAULT-NEXT: store i32 [[TMP11]], ptr [[E]], align 4, !alias.scope [[META12:![0-9]+]], !noalias [[META14:![0-9]+]] ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]] ; DEFAULT: [[PRED_STORE_CONTINUE]]: ; DEFAULT-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1 ; DEFAULT-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]] ; DEFAULT: [[PRED_STORE_IF32]]: ; DEFAULT-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 -; DEFAULT-NEXT: store i32 [[TMP13]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]] +; DEFAULT-NEXT: store i32 [[TMP13]], ptr [[E]], align 4, !alias.scope [[META12]], !noalias [[META14]] ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE33]] ; DEFAULT: [[PRED_STORE_CONTINUE33]]: ; DEFAULT-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2 ; DEFAULT-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35:.*]] ; DEFAULT: [[PRED_STORE_IF34]]: ; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 -; DEFAULT-NEXT: store i32 [[TMP15]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]] +; DEFAULT-NEXT: store i32 [[TMP15]], ptr [[E]], align 4, !alias.scope [[META12]], !noalias [[META14]] ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]] ; DEFAULT: [[PRED_STORE_CONTINUE35]]: ; DEFAULT-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3 ; DEFAULT-NEXT: br i1 [[TMP21]], label %[[PRED_STORE_IF36:.*]], label %[[PRED_STORE_CONTINUE37]] ; DEFAULT: [[PRED_STORE_IF36]]: ; DEFAULT-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0 -; DEFAULT-NEXT: store i32 [[TMP22]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]] +; DEFAULT-NEXT: store i32 [[TMP22]], ptr [[E]], align 4, !alias.scope [[META12]], !noalias [[META14]] ; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE37]] ; DEFAULT: [[PRED_STORE_CONTINUE37]]: -; DEFAULT-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr align 4 [[TMP16]], <4 x i1> [[TMP8]]), !alias.scope [[META19:![0-9]+]], !noalias [[META20:![0-9]+]] +; DEFAULT-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr align 4 [[TMP16]], <4 x i1> [[TMP8]]), !alias.scope [[META16:![0-9]+]], !noalias [[META17:![0-9]+]] ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; DEFAULT-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] @@ -550,7 +536,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; DEFAULT-NEXT: store <8 x double> [[TMP3]], ptr [[NEXT_GEP]], align 8 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; DEFAULT-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 -; DEFAULT-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[SCALAR_PH:.*]] ; DEFAULT: [[SCALAR_PH]]: @@ -660,16 +646,16 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) { ; COMMON-NEXT: store i8 6, ptr [[TMP6]], align 1 ; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE12]] ; COMMON: [[PRED_STORE_CONTINUE12]]: -; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[EXIT:.*]] +; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]] ; COMMON: [[PRED_STORE_IF13]]: ; COMMON-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7 ; COMMON-NEXT: store i8 7, ptr [[TMP7]], align 1 -; COMMON-NEXT: br label %[[EXIT]] +; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE14]] +; COMMON: [[PRED_STORE_CONTINUE14]]: +; COMMON-NEXT: br label %[[MIDDLE_BLOCK:.*]] +; COMMON: [[MIDDLE_BLOCK]]: +; COMMON-NEXT: br label %[[EXIT:.*]] ; COMMON: [[EXIT]]: -; COMMON-NEXT: br label %[[SCALAR_PH:.*]] -; COMMON: [[SCALAR_PH]]: -; COMMON-NEXT: br label %[[EXIT1:.*]] -; COMMON: [[EXIT1]]: ; COMMON-NEXT: ret void ; entry: @@ -866,7 +852,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) ; DEFAULT-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP80]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP80]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] @@ -1112,7 +1098,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) { ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; DEFAULT-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; DEFAULT-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: br label %[[SCALAR_PH:.*]] ; DEFAULT: [[SCALAR_PH]]: @@ -1287,7 +1273,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 { ; DEFAULT-NEXT: store [[TMP23]], ptr [[TMP24]], align 1 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; DEFAULT-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: ; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll index 72e813b62025f..589d1f1e77a90 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll @@ -7,10 +7,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-SAME: ptr [[DST:%.*]], i32 [[X:%.*]], i64 [[M:%.*]], i64 [[CONV6:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64 -; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP10]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 8) -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] ; CHECK: [[VECTOR_SCEVCHECK]]: ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[N]], -1 @@ -20,9 +17,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP18:%.*]] = sdiv i64 [[M]], [[CONV6]] ; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP18]] to i32 @@ -31,17 +26,20 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP24:%.*]] = sub i64 [[INDEX]], [[TMP22]] +; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP22]] ; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[TMP24]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP11]] to i32 ; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP26]] +; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP28]], [[TMP13]] ; CHECK-NEXT: [[TMP32:%.*]] = sext i32 [[TMP30]] to i64 +; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP15]] to i64 ; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP32]] -; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP38:%.*]] = shl nuw i64 [[TMP37]], 1 -; CHECK-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP34]], i64 [[TMP38]] -; CHECK-NEXT: store zeroinitializer, ptr [[TMP34]], align 8 -; CHECK-NEXT: store zeroinitializer, ptr [[TMP39]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]] +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP34]], align 8 +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP19]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP40]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll index 14c53cd89c922..9e0f1d95edd2d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/eliminate-tail-predication.ll @@ -9,32 +9,14 @@ define void @f1(ptr %A) #0 { ; CHECK-LABEL: define void @f1 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: store splat (i32 1), ptr [[TMP2]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[N_VEC]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll index a3b7392dd280f..6e82e6d83236e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll @@ -54,7 +54,7 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[ITERATIONS]], 8 @@ -71,7 +71,7 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK-NEXT: store <8 x i8> [[TMP26]], ptr [[TMP27]], align 1 ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 8 ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC10]] -; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -89,7 +89,7 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX6]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[ITERATIONS]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -157,13 +157,13 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store <8 x i16> [[TMP14]], ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[ITERATIONS]], 4 @@ -180,7 +180,7 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store <4 x i16> [[TMP26]], ptr [[TMP27]], align 1 ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 4 ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC10]] -; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -198,7 +198,7 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX6]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[ITERATIONS]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -266,13 +266,13 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[ITERATIONS]], 4 @@ -289,7 +289,7 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store <4 x i32> [[TMP26]], ptr [[TMP27]], align 1 ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 4 ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC10]] -; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -307,7 +307,7 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX6]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[ITERATIONS]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -369,7 +369,7 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF14:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] @@ -382,7 +382,7 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP13]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: @@ -397,7 +397,7 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK-NEXT: store i8 [[SELECT]], ptr [[GEP_B]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 20 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -422,14 +422,15 @@ exit: ; TODO: Should not try to create dead main vector loop. define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-LABEL: @trip_count_based_on_ptrtoint( -; CHECK-NEXT: iter.check: +; CHECK-NEXT: entry: ; CHECK-NEXT: [[PTR_START:%.*]] = inttoptr i64 [[X:%.*]] to ptr ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[X]], 40 ; CHECK-NEXT: [[PTR_END:%.*]] = inttoptr i64 [[ADD]] to ptr ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[ADD]], [[X]] ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[ADD]] to i2 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[X]] to i2 @@ -437,61 +438,41 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i64 ; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP6]], 0 ; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] -; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[N_VEC]], 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[TMP9]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP7]], 4 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP7]], 8 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP7]], 12 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 8 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 12 -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[NEXT_GEP]], align 4 -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP8]], align 4 -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP9]], align 4 -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP10]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[N_VEC]], 4 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[TMP12]] -; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] -; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF1:%.*]] = urem i64 [[TMP2]], 4 -; CHECK-NEXT: [[N_VEC2:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF1]] -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[N_VEC2]], 4 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[TMP13]] -; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] -; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX3]], 4 ; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[NEXT_GEP4]], align 4 -; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX3]], 4 -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC2]] -; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] -; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC2]] -; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] -; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], [[VECTOR_SCEVCHECK]] ], [ [[PTR_START]], [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: store i32 0, ptr [[NEXT_GEP]], align 4 +; CHECK-NEXT: store i32 0, ptr [[IND_END]], align 4 +; CHECK-NEXT: store i32 0, ptr [[TMP14]], align 4 +; CHECK-NEXT: store i32 0, ptr [[NEXT_GEP4]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[PTR_START]], [[ENTRY:%.*]] ], [ [[PTR_START]], [[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: store i32 0, ptr [[IV]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = getelementptr i8, ptr [[IV]], i64 4 ; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV]], [[PTR_END]] -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index d23e3c29b59e5..747fa985de198 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -37,8 +37,8 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]]) ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]]) ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[NEXT_GEP]], align 1 -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1 +; CHECK-NEXT: store i8 0, ptr [[NEXT_GEP]], align 1 +; CHECK-NEXT: store i8 0, ptr [[TMP15]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} @@ -46,7 +46,7 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END4:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 -; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 @@ -64,7 +64,7 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP22]]) ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]]) -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[NEXT_GEP7]], align 1 +; CHECK-NEXT: store i8 0, ptr [[NEXT_GEP7]], align 1 ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT9]], 10000 ; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} @@ -136,7 +136,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 2 @@ -221,7 +221,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END5:%.*]] = add i64 [[START]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -304,7 +304,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 2 @@ -416,7 +416,7 @@ define void @test_widen_truncated_induction(ptr %A) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[VEC_EPILOG_RESUME_VAL]] to i8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll index cb4e99332c04b..c64e3a9d0977e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll @@ -8,53 +8,37 @@ target triple = "aarch64" define void @epilogue_vectorization_fix_scalar_resume_values(ptr %dst, i64 %n) { ; CHECK-LABEL: define void @epilogue_vectorization_fix_scalar_resume_values( ; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) { -; CHECK-NEXT: [[ITER_CHECK:.*]]: +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[REM:%.*]] = urem i64 [[N]], 3 -; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] -; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: -; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[REM]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[REM]], 32 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[REM]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[REM]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP0]], align 1 -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP1]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP1]] +; CHECK-NEXT: store i8 0, ptr [[TMP0]], align 1 +; CHECK-NEXT: store i8 0, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[REM]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] -; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] -; CHECK: [[VEC_EPILOG_PH]]: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] -; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX1]] -; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 0 -; CHECK-NEXT: br i1 [[TMP4]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N3:%.*]] = icmp eq i64 [[REM]], 0 -; CHECK-NEXT: br i1 [[CMP_N3]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] -; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[GEP_DST_IV:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]] ; CHECK-NEXT: store i8 0, ptr [[GEP_DST_IV]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[REM]] -; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[EXIT_COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll index 5970608794b55..efcb2d34df676 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll @@ -30,23 +30,20 @@ define void @test1(ptr %dst, {i64, i64} %sv) { ; FORCED-NEXT: br label %[[VECTOR_PH:.*]] ; FORCED: [[VECTOR_PH]]: ; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { i64, i64 } [[SV]], 0 -; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0 -; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { i64, i64 } [[SV]], 1 -; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0 -; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer -; FORCED-NEXT: [[TMP1:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]] +; FORCED-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], [[TMP4]] ; FORCED-NEXT: br label %[[VECTOR_BODY:.*]] ; FORCED: [[VECTOR_BODY]]: ; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]] -; FORCED-NEXT: store <2 x i64> [[TMP1]], ptr [[TMP2]], align 4 +; FORCED-NEXT: store i64 [[TMP5]], ptr [[TMP2]], align 4 ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; FORCED: [[MIDDLE_BLOCK]]: -; FORCED-NEXT: br [[EXIT:label %.*]] -; FORCED: [[SCALAR_PH:.*:]] +; FORCED-NEXT: br label %[[EXIT:.*]] +; FORCED: [[EXIT]]: +; FORCED-NEXT: ret void ; entry: br label %loop.body @@ -99,10 +96,11 @@ define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) { ; FORCED-NEXT: store <2 x float> [[TMP2]], ptr [[TMP1]], align 4 ; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 -; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; FORCED: [[MIDDLE_BLOCK]]: -; FORCED-NEXT: br [[EXIT:label %.*]] -; FORCED: [[SCALAR_PH:.*:]] +; FORCED-NEXT: br label %[[EXIT:.*]] +; FORCED: [[EXIT]]: +; FORCED-NEXT: ret void ; entry: br label %loop.body @@ -121,3 +119,5 @@ loop.body: exit: ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CM: {{.*}} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index 068f82c7db670..256beede2c661 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -47,61 +47,17 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) { ; CHECK-NEXT: [[A_EXT:%.*]] = zext i8 [[A]] to i64 ; CHECK-NEXT: [[START:%.*]] = call i64 @llvm.umin.i64(i64 [[B]], i64 [[A_EXT]]) ; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[START]], 0 -; CHECK-NEXT: br i1 [[C]], label %[[EXIT:.*]], label %[[ITER_CHECK:.*]] -; CHECK: [[ITER_CHECK]]: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[START]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] -; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: -; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[START]], 16 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[START]], 16 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[START]], [[N_MOD_VF]] -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[INDEX]] -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[NEXT_GEP1]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[START]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] -; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[IND_END:%.*]] = sub i64 [[START]], [[N_VEC]] -; CHECK-NEXT: [[IND_END2:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[N_VEC]] -; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF4:![0-9]+]] -; CHECK: [[VEC_EPILOG_PH]]: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[START]], 4 -; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[START]], [[N_MOD_VF2]] -; CHECK-NEXT: [[IND_END1:%.*]] = sub i64 [[START]], [[N_VEC3]] -; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[N_VEC3]] -; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] -; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[INDEX4]] -; CHECK-NEXT: store <4 x i8> zeroinitializer, ptr [[NEXT_GEP5]], align 1 -; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX4]], 4 -; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP5]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[START]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N7]], label %[[EXIT_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] -; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END1]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL8:%.*]] = phi ptr [ [[IND_END5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END2]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], %[[ITER_CHECK]] ] +; CHECK-NEXT: br i1 [[C]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]] +; CHECK: [[LOOP_PREHEADER]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL8]], %[[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[START]], %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ], [ [[PTR_START]], %[[LOOP_PREHEADER]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1 ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 1 ; CHECK-NEXT: store i8 0, ptr [[PTR_IV]], align 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 0 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT_LOOPEXIT]]: ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[EXIT]]: @@ -197,26 +153,26 @@ define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y. ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 ; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: -; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7:![0-9]+]], !noalias [[META10:![0-9]+]] +; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META3:![0-9]+]], !noalias [[META6:![0-9]+]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 ; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF42:.*]], label %[[PRED_STORE_CONTINUE43:.*]] ; CHECK: [[PRED_STORE_IF42]]: -; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META7]], !noalias [[META10]] +; CHECK-NEXT: store i64 0, ptr [[DST_1]], align 8, !alias.scope [[META3]], !noalias [[META6]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE43]] ; CHECK: [[PRED_STORE_CONTINUE43]]: ; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP11]], <2 x i1> zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP13]], i32 0 ; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_STORE_IF44:.*]], label %[[PRED_STORE_CONTINUE45:.*]] ; CHECK: [[PRED_STORE_IF44]]: -; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15:![0-9]+]], !noalias [[META16:![0-9]+]] +; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META11:![0-9]+]], !noalias [[META12:![0-9]+]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE45]] ; CHECK: [[PRED_STORE_CONTINUE45]]: ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP13]], i32 1 ; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_STORE_IF46:.*]], label %[[PRED_STORE_CONTINUE47:.*]] ; CHECK: [[PRED_STORE_IF46]]: -; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META15]], !noalias [[META16]] +; CHECK-NEXT: store i64 0, ptr [[DST_3]], align 8, !alias.scope [[META11]], !noalias [[META12]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE47]] ; CHECK: [[PRED_STORE_CONTINUE47]]: ; CHECK-NEXT: [[TMP21:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer @@ -226,14 +182,14 @@ define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y. ; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF48:.*]], label %[[PRED_STORE_CONTINUE49:.*]] ; CHECK: [[PRED_STORE_IF48]]: ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x i64> [[PREDPHI58]], i32 0 -; CHECK-NEXT: store i64 [[TMP29]], ptr [[DST_2]], align 8, !alias.scope [[META17:![0-9]+]], !noalias [[META18:![0-9]+]] +; CHECK-NEXT: store i64 [[TMP29]], ptr [[DST_2]], align 8, !alias.scope [[META13:![0-9]+]], !noalias [[META14:![0-9]+]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE49]] ; CHECK: [[PRED_STORE_CONTINUE49]]: ; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP23]], i32 1 ; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF50:.*]], label %[[PRED_STORE_CONTINUE51:.*]] ; CHECK: [[PRED_STORE_IF50]]: ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i64> [[PREDPHI58]], i32 1 -; CHECK-NEXT: store i64 [[TMP31]], ptr [[DST_2]], align 8, !alias.scope [[META17]], !noalias [[META18]] +; CHECK-NEXT: store i64 [[TMP31]], ptr [[DST_2]], align 8, !alias.scope [[META13]], !noalias [[META14]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE51]] ; CHECK: [[PRED_STORE_CONTINUE51]]: ; CHECK-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP5]], <2 x i1> [[TMP6]], <2 x i1> zeroinitializer @@ -241,20 +197,20 @@ define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y. ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP37]], i32 0 ; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF52:.*]], label %[[PRED_STORE_CONTINUE53:.*]] ; CHECK: [[PRED_STORE_IF52]]: -; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19:![0-9]+]] -; CHECK-NEXT: store i64 [[TMP22]], ptr [[DST]], align 8, !alias.scope [[META20:![0-9]+]], !noalias [[META19]] +; CHECK-NEXT: [[TMP22:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META15:![0-9]+]] +; CHECK-NEXT: store i64 [[TMP22]], ptr [[DST]], align 8, !alias.scope [[META16:![0-9]+]], !noalias [[META15]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE53]] ; CHECK: [[PRED_STORE_CONTINUE53]]: ; CHECK-NEXT: [[TMP44:%.*]] = extractelement <2 x i1> [[TMP37]], i32 1 ; CHECK-NEXT: br i1 [[TMP44]], label %[[PRED_STORE_IF54:.*]], label %[[PRED_STORE_CONTINUE55]] ; CHECK: [[PRED_STORE_IF54]]: -; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META19]] -; CHECK-NEXT: store i64 [[TMP24]], ptr [[DST]], align 8, !alias.scope [[META20]], !noalias [[META19]] +; CHECK-NEXT: [[TMP24:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META15]] +; CHECK-NEXT: store i64 [[TMP24]], ptr [[DST]], align 8, !alias.scope [[META16]], !noalias [[META15]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE55]] ; CHECK: [[PRED_STORE_CONTINUE55]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 -; CHECK-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -295,7 +251,7 @@ define void @test_exit_branch_cost(ptr %dst, ptr noalias %x.ptr, ptr noalias %y. ; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 64 -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -371,7 +327,7 @@ define void @invalid_legacy_cost(i64 %N, ptr %x) #0 { ; CHECK-NEXT: store <2 x ptr> [[TMP8]], ptr [[TMP9]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] @@ -385,7 +341,7 @@ define void @invalid_legacy_cost(i64 %N, ptr %x) #0 { ; CHECK-NEXT: store ptr [[TMP12]], ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: ret void ; @@ -413,26 +369,22 @@ declare i64 @llvm.umin.i64(i64, i64) ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} -; CHECK: [[PROF4]] = !{!"branch_weights", i32 4, i32 12} -; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} -; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]} -; CHECK: [[META7]] = !{[[META8:![0-9]+]]} -; CHECK: [[META8]] = distinct !{[[META8]], [[META9:![0-9]+]]} -; CHECK: [[META9]] = distinct !{[[META9]], !"LVerDomain"} -; CHECK: [[META10]] = !{[[META11:![0-9]+]], [[META12:![0-9]+]], [[META13:![0-9]+]], [[META14:![0-9]+]]} -; CHECK: [[META11]] = distinct !{[[META11]], [[META9]]} -; CHECK: [[META12]] = distinct !{[[META12]], [[META9]]} -; CHECK: [[META13]] = distinct !{[[META13]], [[META9]]} -; CHECK: [[META14]] = distinct !{[[META14]], [[META9]]} -; CHECK: [[META15]] = !{[[META11]]} -; CHECK: [[META16]] = !{[[META12]], [[META13]], [[META14]]} -; CHECK: [[META17]] = !{[[META12]]} -; CHECK: [[META18]] = !{[[META13]], [[META14]]} -; CHECK: [[META19]] = !{[[META14]]} -; CHECK: [[META20]] = !{[[META13]]} -; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]], [[META2]]} -; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]]} -; CHECK: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]], [[META2]]} -; CHECK: [[LOOP24]] = distinct !{[[LOOP24]], [[META2]], [[META1]]} +; CHECK: [[META3]] = !{[[META4:![0-9]+]]} +; CHECK: [[META4]] = distinct !{[[META4]], [[META5:![0-9]+]]} +; CHECK: [[META5]] = distinct !{[[META5]], !"LVerDomain"} +; CHECK: [[META6]] = !{[[META7:![0-9]+]], [[META8:![0-9]+]], [[META9:![0-9]+]], [[META10:![0-9]+]]} +; CHECK: [[META7]] = distinct !{[[META7]], [[META5]]} +; CHECK: [[META8]] = distinct !{[[META8]], [[META5]]} +; CHECK: [[META9]] = distinct !{[[META9]], [[META5]]} +; CHECK: [[META10]] = distinct !{[[META10]], [[META5]]} +; CHECK: [[META11]] = !{[[META7]]} +; CHECK: [[META12]] = !{[[META8]], [[META9]], [[META10]]} +; CHECK: [[META13]] = !{[[META8]]} +; CHECK: [[META14]] = !{[[META9]], [[META10]]} +; CHECK: [[META15]] = !{[[META10]]} +; CHECK: [[META16]] = !{[[META9]]} +; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]], [[META2]]} +; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]]} +; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]], [[META2]]} +; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll index cfc6cc87a2a21..a1092e7bb9369 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll @@ -212,7 +212,10 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; DEFAULT-NEXT: [[ENTRY:.*]]: ; DEFAULT-NEXT: [[MUL_X:%.*]] = add i32 [[X]], 1 ; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 +; DEFAULT-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; DEFAULT-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 3 +; DEFAULT-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP14]], i64 28) +; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[UMAX]] ; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] ; DEFAULT: [[VECTOR_SCEVCHECK]]: ; DEFAULT-NEXT: [[TMP1:%.*]] = sub i32 -1, [[X]] @@ -232,22 +235,36 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 { ; DEFAULT-NEXT: [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]] ; DEFAULT-NEXT: br i1 [[TMP12]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] ; DEFAULT: [[VECTOR_PH]]: -; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 +; DEFAULT-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; DEFAULT-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 +; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 +; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; DEFAULT-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2 +; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP17]] ; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; DEFAULT-NEXT: [[TMP18:%.*]] = trunc [[BROADCAST_SPLAT]] to +; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[MUL_X]], i64 0 +; DEFAULT-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; DEFAULT-NEXT: [[TMP27:%.*]] = call @llvm.stepvector.nxv4i32() +; DEFAULT-NEXT: [[TMP28:%.*]] = mul [[TMP27]], splat (i32 1) +; DEFAULT-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP28]] ; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[TMP13:%.*]] = trunc i64 [[INDEX]] to i32 -; DEFAULT-NEXT: [[TMP14:%.*]] = add i32 [[TMP13]], 1 -; DEFAULT-NEXT: [[TMP15:%.*]] = mul i32 [[MUL_X]], [[TMP13]] -; DEFAULT-NEXT: [[TMP16:%.*]] = mul i32 [[MUL_X]], [[TMP14]] -; DEFAULT-NEXT: [[TMP17:%.*]] = zext i32 [[TMP15]] to i64 -; DEFAULT-NEXT: [[TMP18:%.*]] = zext i32 [[TMP16]] to i64 -; DEFAULT-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP17]] -; DEFAULT-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP18]] +; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[TMP18]] +; DEFAULT-NEXT: [[TMP29:%.*]] = mul [[BROADCAST_SPLAT2]], [[VEC_IND]] +; DEFAULT-NEXT: [[TMP22:%.*]] = mul [[BROADCAST_SPLAT2]], [[STEP_ADD]] +; DEFAULT-NEXT: [[TMP23:%.*]] = zext [[TMP29]] to +; DEFAULT-NEXT: [[TMP24:%.*]] = zext [[TMP22]] to +; DEFAULT-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[DST]], [[TMP23]] +; DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[DST]], [[TMP24]] +; DEFAULT-NEXT: [[TMP19:%.*]] = extractelement [[TMP25]], i32 0 ; DEFAULT-NEXT: store i32 1, ptr [[TMP19]], align 4 +; DEFAULT-NEXT: [[TMP20:%.*]] = extractelement [[TMP26]], i32 0 ; DEFAULT-NEXT: store i32 1, ptr [[TMP20]], align 4 -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] +; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[TMP18]] ; DEFAULT-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; DEFAULT: [[MIDDLE_BLOCK]]: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll index 5d80353fcadd8..256230866fc9a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll @@ -31,7 +31,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3 ; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP6]]) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP8]], align 1 +; CHECK-NEXT: store i8 0, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[TMP10:%.*]] = zext <16 x i32> [[TMP7]] to <16 x i64> ; CHECK-NEXT: [[TMP11]] = or <16 x i64> [[VEC_PHI]], [[TMP10]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -74,7 +74,7 @@ define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i3 ; CHECK-NEXT: [[TMP30:%.*]] = call @llvm.umin.nxv2i32( [[TMP24]], [[TMP29]]) ; CHECK-NEXT: [[TMP31:%.*]] = call @llvm.umin.nxv2i32( [[TMP24]], [[TMP30]]) ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX7]] -; CHECK-NEXT: store zeroinitializer, ptr [[TMP32]], align 1 +; CHECK-NEXT: store i8 0, ptr [[TMP32]], align 1 ; CHECK-NEXT: [[TMP34:%.*]] = zext [[TMP31]] to ; CHECK-NEXT: [[TMP35]] = or [[VEC_PHI8]], [[TMP34]] ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX7]], [[TMP17]] @@ -162,7 +162,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no ; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP5]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP6]]) ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP26]], align 1 +; CHECK-NEXT: store i8 0, ptr [[TMP26]], align 1 ; CHECK-NEXT: [[TMP10:%.*]] = zext <16 x i32> [[TMP7]] to <16 x i64> ; CHECK-NEXT: [[TMP11]] = or <16 x i64> [[VEC_PHI]], [[TMP10]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -205,7 +205,7 @@ define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr no ; CHECK-NEXT: [[TMP30:%.*]] = call @llvm.umin.nxv2i32( [[TMP24]], [[TMP29]]) ; CHECK-NEXT: [[TMP31:%.*]] = call @llvm.umin.nxv2i32( [[TMP24]], [[TMP30]]) ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX7]] -; CHECK-NEXT: store zeroinitializer, ptr [[TMP32]], align 1 +; CHECK-NEXT: store i8 0, ptr [[TMP32]], align 1 ; CHECK-NEXT: [[TMP34:%.*]] = zext [[TMP31]] to ; CHECK-NEXT: [[TMP35]] = or [[VEC_PHI8]], [[TMP34]] ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX7]], [[TMP17]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll index 46ec858d7455c..adbe94d5c8c13 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll @@ -889,7 +889,7 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ [[TMP2]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] ; CHECK-INTERLEAVE1-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[D]], [[INDEX]] ; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-INTERLEAVE1-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP3]], align 1 +; CHECK-INTERLEAVE1-NEXT: store i8 0, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP5]] = add <16 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT]] ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -922,8 +922,8 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[D]], [[VEC_PHI1]] ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[OFFSET_IDX]] ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 -; CHECK-INTERLEAVED-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP3]], align 1 -; CHECK-INTERLEAVED-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP5]], align 1 +; CHECK-INTERLEAVED-NEXT: store i8 0, ptr [[TMP3]], align 1 +; CHECK-INTERLEAVED-NEXT: store i8 0, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP6]] = add <16 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT]] ; CHECK-INTERLEAVED-NEXT: [[TMP7]] = add <16 x i32> [[VEC_PHI2]], [[BROADCAST_SPLAT]] ; CHECK-INTERLEAVED-NEXT: [[TMP21]] = add nuw i32 [[VEC_PHI1]], 32 @@ -960,7 +960,7 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 { ; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi [ [[TMP8]], [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[FOR_BODY]] ] ; CHECK-MAXBW-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[D]], [[INDEX]] ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-MAXBW-NEXT: store zeroinitializer, ptr [[TMP9]], align 1 +; CHECK-MAXBW-NEXT: store i8 0, ptr [[TMP9]], align 1 ; CHECK-MAXBW-NEXT: [[TMP11]] = add [[VEC_PHI]], [[BROADCAST_SPLAT]] ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]] ; CHECK-MAXBW-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -1010,7 +1010,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP2]], [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[FOR_BODY]] ] ; CHECK-INTERLEAVE1-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[D]], [[INDEX]] ; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-INTERLEAVE1-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP4]], align 1 +; CHECK-INTERLEAVE1-NEXT: store i8 0, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32> ; CHECK-INTERLEAVE1-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP5]]) ; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 @@ -1043,8 +1043,8 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[D]], [[VEC_PHI1]] ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[OFFSET_IDX]] ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 16 -; CHECK-INTERLEAVED-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP4]], align 1 -; CHECK-INTERLEAVED-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP6]], align 1 +; CHECK-INTERLEAVED-NEXT: store i8 0, ptr [[TMP4]], align 1 +; CHECK-INTERLEAVED-NEXT: store i8 0, ptr [[TMP6]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP5]]) ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE2]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP5]]) @@ -1081,7 +1081,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 { ; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi [ [[TMP8]], [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[FOR_BODY]] ] ; CHECK-MAXBW-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[D]], [[INDEX]] ; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-MAXBW-NEXT: store zeroinitializer, ptr [[TMP10]], align 1 +; CHECK-MAXBW-NEXT: store i8 0, ptr [[TMP10]], align 1 ; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext [[BROADCAST_SPLAT]] to ; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32( [[VEC_PHI]], [[TMP9]]) ; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index 0d8a1021bd438..fd5cc45c7e647 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -8,66 +8,31 @@ target triple = "arm64-apple-macosx14.0.0" define void @cost_store_i8(ptr %dst) #0 { ; DEFAULT-LABEL: define void @cost_store_i8( ; DEFAULT-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] { -; DEFAULT-NEXT: iter.check: -; DEFAULT-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP10]], 3 -; DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 101, [[TMP13]] -; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] -; DEFAULT: vector.main.loop.iter.check: -; DEFAULT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 -; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 101, [[TMP1]] -; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: br label [[VECTOR_PH:%.*]] ; DEFAULT: vector.ph: -; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 -; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 101, [[TMP3]] -; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 101, [[N_MOD_VF]] ; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 ; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 -; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP4]], i64 [[TMP6]] -; DEFAULT-NEXT: store zeroinitializer, ptr [[TMP4]], align 1 -; DEFAULT-NEXT: store zeroinitializer, ptr [[TMP7]], align 1 -; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; DEFAULT-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; DEFAULT-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP0]] +; DEFAULT-NEXT: store i8 0, ptr [[TMP4]], align 1 +; DEFAULT-NEXT: store i8 0, ptr [[TMP2]], align 1 +; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; DEFAULT-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; DEFAULT-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; DEFAULT: middle.block: -; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 101, [[N_VEC]] -; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; DEFAULT: vec.epilog.iter.check: -; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP13]] -; DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] -; DEFAULT: vec.epilog.ph: -; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; DEFAULT-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; DEFAULT-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 8 -; DEFAULT-NEXT: [[N_MOD_VF2:%.*]] = urem i64 101, [[TMP12]] -; DEFAULT-NEXT: [[N_VEC3:%.*]] = sub i64 101, [[N_MOD_VF2]] ; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] -; DEFAULT: vec.epilog.vector.body: -; DEFAULT-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX1]] -; DEFAULT-NEXT: store zeroinitializer, ptr [[TMP9]], align 1 -; DEFAULT-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP12]] -; DEFAULT-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC3]] -; DEFAULT-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; DEFAULT: vec.epilog.middle.block: -; DEFAULT-NEXT: [[CMP_N6:%.*]] = icmp eq i64 101, [[N_VEC3]] -; DEFAULT-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] -; DEFAULT: vec.epilog.scalar.ph: -; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; DEFAULT: scalar.ph: ; DEFAULT-NEXT: br label [[LOOP:%.*]] ; DEFAULT: loop: -; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ 100, [[VEC_EPILOG_VECTOR_BODY]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] ; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]] ; DEFAULT-NEXT: store i8 0, ptr [[GEP]], align 1 ; DEFAULT-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 100 -; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; DEFAULT: exit: ; DEFAULT-NEXT: ret void ; @@ -136,22 +101,22 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]] ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6:![0-9]+]] +; DEFAULT-NEXT: [[TMP1:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META4:![0-9]+]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x i64> poison, i64 [[TMP1]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT2]], <16 x i64> poison, <16 x i32> zeroinitializer ; DEFAULT-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[BROADCAST_SPLAT3]] to <16 x i8> ; DEFAULT-NEXT: [[TMP3:%.*]] = and <16 x i8> [[TMP2]], [[TMP0]] ; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] ; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16 -; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]] -; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP5]], align 1, !alias.scope [[META9]], !noalias [[META6]] +; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1, !alias.scope [[META7:![0-9]+]], !noalias [[META4]] +; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP5]], align 1, !alias.scope [[META7]], !noalias [[META4]] ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; DEFAULT-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992 -; DEFAULT-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; DEFAULT: middle.block: ; DEFAULT-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; DEFAULT: vec.epilog.iter.check: -; DEFAULT-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] +; DEFAULT-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF10:![0-9]+]] ; DEFAULT: vec.epilog.ph: ; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <8 x i16> poison, i16 [[X]], i64 0 @@ -160,16 +125,16 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; DEFAULT: vec.epilog.vector.body: ; DEFAULT-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; DEFAULT-NEXT: [[TMP8:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META6]] +; DEFAULT-NEXT: [[TMP8:%.*]] = load i64, ptr [[SRC]], align 8, !alias.scope [[META4]] ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <8 x i64> poison, i64 [[TMP8]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT7]], <8 x i64> poison, <8 x i32> zeroinitializer ; DEFAULT-NEXT: [[TMP9:%.*]] = trunc <8 x i64> [[BROADCAST_SPLAT8]] to <8 x i8> ; DEFAULT-NEXT: [[TMP10:%.*]] = and <8 x i8> [[TMP9]], [[TMP7]] ; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX6]] -; DEFAULT-NEXT: store <8 x i8> [[TMP10]], ptr [[TMP11]], align 1, !alias.scope [[META9]], !noalias [[META6]] +; DEFAULT-NEXT: store <8 x i8> [[TMP10]], ptr [[TMP11]], align 1, !alias.scope [[META7]], !noalias [[META4]] ; DEFAULT-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 8 ; DEFAULT-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT9]], 1000 -; DEFAULT-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; DEFAULT-NEXT: br i1 [[TMP12]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; DEFAULT: vec.epilog.middle.block: ; DEFAULT-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; DEFAULT: vec.epilog.scalar.ph: @@ -185,7 +150,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT-NEXT: store i8 [[TRUNC]], ptr [[GEP]], align 1 ; DEFAULT-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000 -; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] ; DEFAULT: exit: ; DEFAULT-NEXT: ret void ; @@ -268,17 +233,16 @@ attributes #1 = { vscale_range(1,16) "target-features"="+sve" } ; DEFAULT: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; DEFAULT: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; DEFAULT: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; DEFAULT: [[PROF3]] = !{!"branch_weights", i32 8, i32 24} -; DEFAULT: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; DEFAULT: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} -; DEFAULT: [[META6]] = !{[[META7:![0-9]+]]} -; DEFAULT: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]} -; DEFAULT: [[META8]] = distinct !{[[META8]], !"LVerDomain"} -; DEFAULT: [[META9]] = !{[[META10:![0-9]+]]} -; DEFAULT: [[META10]] = distinct !{[[META10]], [[META8]]} +; DEFAULT: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +; DEFAULT: [[META4]] = !{[[META5:![0-9]+]]} +; DEFAULT: [[META5]] = distinct !{[[META5]], [[META6:![0-9]+]]} +; DEFAULT: [[META6]] = distinct !{[[META6]], !"LVerDomain"} +; DEFAULT: [[META7]] = !{[[META8:![0-9]+]]} +; DEFAULT: [[META8]] = distinct !{[[META8]], [[META6]]} +; DEFAULT: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} +; DEFAULT: [[PROF10]] = !{!"branch_weights", i32 8, i32 24} ; DEFAULT: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]} -; DEFAULT: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} -; DEFAULT: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]} +; DEFAULT: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]]} ;. ; PRED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; PRED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll index 9c8a18725afa4..93ded33c05d70 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -21,80 +21,59 @@ target triple = "aarch64-linux-gnu" define void @main_vf_vscale_x_16(ptr %A) #0 { ; CHECK-LABEL: @main_vf_vscale_x_16( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP6]] -; CHECK-NEXT: store splat (i8 1), ptr [[TMP4]], align 1 -; CHECK-NEXT: store splat (i8 1), ptr [[TMP7]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_ITER_CHECK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: store i8 1, ptr [[TMP4]], align 1 +; CHECK-NEXT: store i8 1, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: +; CHECK: exit: +; CHECK-NEXT: ret void ; ; CHECK-VF8-LABEL: @main_vf_vscale_x_16( ; CHECK-VF8-NEXT: iter.check: ; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK-VF8: vector.main.loop.iter.check: -; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 -; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] -; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: -; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 -; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF8-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 ; CHECK-VF8-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-VF8-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 -; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP6]] -; CHECK-VF8-NEXT: store splat (i8 1), ptr [[TMP4]], align 1 -; CHECK-VF8-NEXT: store splat (i8 1), ptr [[TMP7]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-VF8-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-VF8-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-VF8-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]] +; CHECK-VF8-NEXT: store i8 1, ptr [[TMP4]], align 1 +; CHECK-VF8-NEXT: store i8 1, ptr [[TMP2]], align 1 +; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-VF8-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-VF8-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF8: middle.block: -; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] -; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK-VF8-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK-VF8: vec.epilog.iter.check: -; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] +; CHECK-VF8-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK-VF8: vec.epilog.ph: -; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK-VF8: vec.epilog.vector.body: ; CHECK-VF8-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX1]] -; CHECK-VF8-NEXT: store <8 x i8> splat (i8 1), ptr [[TMP9]], align 1 +; CHECK-VF8-NEXT: store i8 1, ptr [[TMP9]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 ; CHECK-VF8-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 ; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-VF8: vec.epilog.scalar.ph: -; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 1024, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-VF8-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-VF8: for.body: ; @@ -118,74 +97,57 @@ define void @main_vf_vscale_x_2_no_epi_iteration(ptr %A) #0 vscale_range(8, 8) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 [[TMP4]] -; CHECK-NEXT: store splat (i64 1), ptr [[TMP2]], align 1 -; CHECK-NEXT: store splat (i64 1), ptr [[TMP5]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: store i64 1, ptr [[TMP2]], align 1 +; CHECK-NEXT: store i64 1, ptr [[TMP4]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]] -; CHECK: scalar.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: +; CHECK: exit: +; CHECK-NEXT: ret void ; ; CHECK-VF8-LABEL: @main_vf_vscale_x_2_no_epi_iteration( ; CHECK-VF8-NEXT: iter.check: ; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK-VF8: vector.main.loop.iter.check: -; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 -; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] -; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: -; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF8-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 ; CHECK-VF8-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-VF8-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 -; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP6]] -; CHECK-VF8-NEXT: store splat (i64 1), ptr [[TMP4]], align 1 -; CHECK-VF8-NEXT: store splat (i64 1), ptr [[TMP7]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-VF8-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-VF8-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-VF8-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] +; CHECK-VF8-NEXT: store i64 1, ptr [[TMP4]], align 1 +; CHECK-VF8-NEXT: store i64 1, ptr [[TMP2]], align 1 +; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-VF8-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-VF8-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-VF8: middle.block: -; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] -; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK-VF8-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK-VF8: vec.epilog.iter.check: -; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] +; CHECK-VF8-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK-VF8: vec.epilog.ph: -; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK-VF8: vec.epilog.vector.body: ; CHECK-VF8-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX1]] -; CHECK-VF8-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP9]], align 1 +; CHECK-VF8-NEXT: store i64 1, ptr [[TMP9]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 ; CHECK-VF8-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 1024 ; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-VF8: vec.epilog.scalar.ph: -; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 1024, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-VF8-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-VF8: for.body: ; @@ -220,54 +182,28 @@ exit: ; VScaleForTuning (set by mcpu/mtune). define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-LABEL: @main_vf_vscale_x_2( -; CHECK-NEXT: iter.check: -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] -; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 -; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N:%.*]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP6]] -; CHECK-NEXT: store splat (i64 1), ptr [[TMP4]], align 1 -; CHECK-NEXT: store splat (i64 1), ptr [[TMP7]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: store i64 1, ptr [[TMP4]], align 1 +; CHECK-NEXT: store i64 1, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_ITER_CHECK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]] -; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 8 -; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] -; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] -; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX4]] -; CHECK-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP9]], align 1 -; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], 8 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] -; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; @@ -276,25 +212,20 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 ; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK-VF8: vector.main.loop.iter.check: -; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 -; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], 2 ; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: -; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 ; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF8-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 ; CHECK-VF8-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-VF8-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 1 -; CHECK-VF8-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 [[TMP6]] -; CHECK-VF8-NEXT: store splat (i64 1), ptr [[TMP4]], align 1 -; CHECK-VF8-NEXT: store splat (i64 1), ptr [[TMP7]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-VF8-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] +; CHECK-VF8-NEXT: store i64 1, ptr [[TMP4]], align 1 +; CHECK-VF8-NEXT: store i64 1, ptr [[TMP2]], align 1 +; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-VF8-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF8-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-VF8: middle.block: @@ -311,7 +242,7 @@ define void @main_vf_vscale_x_2(ptr %A, i64 %n) #0 vscale_range(8, 8) { ; CHECK-VF8: vec.epilog.vector.body: ; CHECK-VF8-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-VF8-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX4]] -; CHECK-VF8-NEXT: store <8 x i64> splat (i64 1), ptr [[TMP9]], align 1 +; CHECK-VF8-NEXT: store i64 1, ptr [[TMP9]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], 8 ; CHECK-VF8-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]] ; CHECK-VF8-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -342,103 +273,62 @@ exit: ; uses an incorrect resume value. define void @test_pr57912_pointer_induction(ptr %start) #0 { ; CHECK-LABEL: @test_pr57912_pointer_induction( -; CHECK-NEXT: iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] -; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 10000, [[TMP1]] -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX1:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP5]] -; CHECK-NEXT: store zeroinitializer, ptr [[NEXT_GEP]], align 1 -; CHECK-NEXT: store zeroinitializer, ptr [[TMP6]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] -; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7]] -; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 10000 -; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] -; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX1]] -; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr [[NEXT_GEP2]], align 1 -; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 8 -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 10000 -; CHECK-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] -; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] -; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[TMP8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK]] ] +; CHECK-NEXT: store i8 0, ptr [[NEXT_GEP]], align 1 +; CHECK-NEXT: store i8 0, ptr [[NEXT_GEP2]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 +; CHECK-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: middle.block: ; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: +; CHECK: exit: +; CHECK-NEXT: ret void ; ; CHECK-VF8-LABEL: @test_pr57912_pointer_induction( ; CHECK-VF8-NEXT: iter.check: ; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK-VF8: vector.main.loop.iter.check: -; CHECK-VF8-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 -; CHECK-VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 10000, [[TMP1]] -; CHECK-VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-VF8-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-VF8: vector.ph: -; CHECK-VF8-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 -; CHECK-VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 10000, [[TMP3]] -; CHECK-VF8-NEXT: [[N_VEC:%.*]] = sub i64 10000, [[N_MOD_VF]] ; CHECK-VF8-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF8: vector.body: ; CHECK-VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-VF8-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 ; CHECK-VF8-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[INDEX]] -; CHECK-VF8-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-VF8-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 4 -; CHECK-VF8-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 [[TMP5]] -; CHECK-VF8-NEXT: store zeroinitializer, ptr [[NEXT_GEP]], align 1 -; CHECK-VF8-NEXT: store zeroinitializer, ptr [[TMP6]], align 1 -; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-VF8-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-VF8-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-VF8-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP0]] +; CHECK-VF8-NEXT: store i8 0, ptr [[NEXT_GEP]], align 1 +; CHECK-VF8-NEXT: store i8 0, ptr [[NEXT_GEP1]], align 1 +; CHECK-VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-VF8-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 +; CHECK-VF8-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK-VF8: middle.block: -; CHECK-VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, [[N_VEC]] -; CHECK-VF8-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK-VF8-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK-VF8: vec.epilog.iter.check: -; CHECK-VF8-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] -; CHECK-VF8-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-VF8-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] +; CHECK-VF8-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 10000 +; CHECK-VF8-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK-VF8: vec.epilog.ph: -; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-VF8-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-VF8-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 10000 ; CHECK-VF8-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK-VF8: vec.epilog.vector.body: ; CHECK-VF8-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-VF8-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX1]] -; CHECK-VF8-NEXT: store <8 x i8> zeroinitializer, ptr [[NEXT_GEP2]], align 1 +; CHECK-VF8-NEXT: store i8 0, ptr [[NEXT_GEP2]], align 1 ; CHECK-VF8-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 8 ; CHECK-VF8-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 10000 ; CHECK-VF8-NEXT: br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-VF8: vec.epilog.middle.block: ; CHECK-VF8-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK-VF8: vec.epilog.scalar.ph: -; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-VF8-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[TMP8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[ITER_CHECK]] ] ; CHECK-VF8-NEXT: br label [[LOOP:%.*]] ; CHECK-VF8: loop: @@ -497,13 +387,13 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-NEXT: store [[TMP13]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF14:![0-9]+]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF8:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2 @@ -519,7 +409,7 @@ define void @trip_count_vscale(ptr noalias %a, ptr noalias %b) vscale_range(1, 1 ; CHECK-NEXT: store <2 x float> [[TMP17]], ptr [[TMP16]], align 4 ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC5]] -; CHECK-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N10]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -621,7 +511,7 @@ define void @trip_count_vscale_no_epilogue_iterations(ptr noalias %a, ptr noalia ; CHECK-NEXT: store [[TMP11]], ptr [[TMP9]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll index 4706798c525bd..faf340debbc98 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vscale-fixed.ll @@ -26,110 +26,54 @@ target triple = "aarch64-linux-gnu" define void @main_vf_vscale_x_16(ptr %A, i64 %n) #0 { ; CHECK-LABEL: @main_vf_vscale_x_16( -; CHECK-NEXT: iter.check: -; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N:%.*]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] -; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 5 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 32 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP5]], 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[TMP6]] -; CHECK-NEXT: store splat (i8 1), ptr [[TMP4]], align 1 -; CHECK-NEXT: store splat (i8 1), ptr [[TMP7]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: store i8 1, ptr [[TMP4]], align 1 +; CHECK-NEXT: store i8 1, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[VEC_EPILOG_ITER_CHECK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] -; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 8 -; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] -; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] -; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX1]] -; CHECK-NEXT: store <8 x i8> splat (i8 1), ptr [[TMP9]], align 1 -; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 8 -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] -; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; ; CHECK-EPILOG-PREFER-SCALABLE-LABEL: @main_vf_vscale_x_16( -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: iter.check: -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] -; CHECK-EPILOG-PREFER-SCALABLE: vector.main.loop.iter.check: -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 5 -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP3]] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: entry: +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N:%.*]], 2 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK-EPILOG-PREFER-SCALABLE: vector.ph: -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 32 -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP5]] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-EPILOG-PREFER-SCALABLE: vector.body: ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 1 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP8:%.*]] = shl nuw i64 [[TMP7]], 4 -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 [[TMP8]] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: store splat (i8 1), ptr [[TMP6]], align 1 -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: store splat (i8 1), ptr [[TMP9]], align 1 -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP0]] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: store i8 1, ptr [[TMP6]], align 1 +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: store i8 1, ptr [[TMP2]], align 1 +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[TMP10]], label [[VEC_EPILOG_ITER_CHECK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-EPILOG-PREFER-SCALABLE: middle.block: ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.iter.check: -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP1]] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] -; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.ph: -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP14:%.*]] = mul nuw i64 [[TMP13]], 8 -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], [[TMP14]] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] -; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.vector.body: -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX4]] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: store splat (i8 1), ptr [[TMP15]], align 1 -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX4]], [[TMP14]] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC3]] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[TMP16]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.middle.block: -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] -; CHECK-EPILOG-PREFER-SCALABLE: vec.epilog.scalar.ph: -; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_PH]] +; CHECK-EPILOG-PREFER-SCALABLE: scalar.ph: +; CHECK-EPILOG-PREFER-SCALABLE-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ] ; CHECK-EPILOG-PREFER-SCALABLE-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-EPILOG-PREFER-SCALABLE: for.body: ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 28d2a278de498..53777f72d5b85 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -802,8 +802,6 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -811,11 +809,11 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) #1 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], [[VEC_IND]] -; CHECK-NEXT: [[TMP14:%.*]] = extractelement [[TMP12]], i64 0 -; CHECK-NEXT: [[DOTSPLIT:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], [[VEC_IND]] +; CHECK-NEXT: [[DOTSPLIT:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], [[VEC_IND]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, [[DOTSPLIT]], i64 4 -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], align 4 [[TMP12]], splat (i1 true)) +; CHECK-NEXT: [[TMP10:%.*]] = extractelement [[VEC_IND]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP10]] +; CHECK-NEXT: store i32 [[Z:%.*]], ptr [[TMP14]], align 4 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP14]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 @@ -949,8 +947,6 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 { ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i64 [[TMP3]], i64 [[N_MOD_VF]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP6]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer @@ -958,11 +954,12 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) #1 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P:%.*]], [[VEC_IND]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 -8 -; CHECK-NEXT: [[DOTSPLIT:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], [[VEC_IND]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 -8 +; CHECK-NEXT: [[DOTSPLIT:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], [[VEC_IND]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, [[DOTSPLIT]], i64 4 -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], align 4 [[TMP12]], splat (i1 true)) +; CHECK-NEXT: [[TMP10:%.*]] = extractelement [[VEC_IND]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP10]] +; CHECK-NEXT: store i32 [[Z:%.*]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP13]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { , } @llvm.vector.deinterleave2.nxv8i32( [[WIDE_VEC]]) ; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { , } [[STRIDED_VEC]], 0 @@ -1104,8 +1101,6 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP7]] ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], [[DOTNOT]] ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 @@ -1119,9 +1114,9 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP9]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], [[VEC_IND]] -; CHECK-NEXT: [[P:%.*]] = extractelement [[TMP13]], i64 0 -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], align 4 [[TMP13]], splat (i1 true)) +; CHECK-NEXT: [[TMP12:%.*]] = extractelement [[VEC_IND]], i64 0 +; CHECK-NEXT: [[P:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP12]] +; CHECK-NEXT: store i32 [[X:%.*]], ptr [[P]], align 4 ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call @llvm.vector.interleave2.nxv8i32( [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT4]]) ; CHECK-NEXT: store [[INTERLEAVED_VEC]], ptr [[P]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] @@ -1181,12 +1176,6 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], [[DOTNOT]] ; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i64 [[TMP11]], 3 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Y:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i32 [[Z:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.stepvector.nxv4i64() ; CHECK-NEXT: [[TMP21:%.*]] = shl [[TMP10]], splat (i64 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add [[TMP21]], splat (i64 3) @@ -1197,14 +1186,17 @@ define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP13:%.*]] = add [[VEC_IND]], splat (i64 -1) -; CHECK-NEXT: [[TMP14:%.*]] = add [[VEC_IND]], splat (i64 -3) -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], [[VEC_IND]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], [[TMP13]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], [[TMP14]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT]], align 4 [[TMP16]], splat (i1 true)) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT2]], align 4 [[TMP17]], splat (i1 true)) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[BROADCAST_SPLAT4]], align 4 [[TMP15]], splat (i1 true)) +; CHECK-NEXT: [[TMP20:%.*]] = extractelement [[VEC_IND]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP20]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP13]], i64 -4 +; CHECK-NEXT: store i32 [[X:%.*]], ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[VEC_IND]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP16]], i64 -12 +; CHECK-NEXT: store i32 [[Y:%.*]], ptr [[TMP17]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement [[VEC_IND]], i64 0 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP22]] +; CHECK-NEXT: store i32 [[Z:%.*]], ptr [[TMP19]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll index 02cc499f18827..ecfead93eefe2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-masked-accesses.ll @@ -189,18 +189,19 @@ define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr no ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP3]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; SCALAR_TAIL_FOLDING-NEXT: [[TMP4:%.*]] = shl nuw nsw [[VEC_IND]], splat (i32 1) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = zext nneg [[TMP4]] to -; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP5]] -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 1), align 1 [[TMP6]], splat (i1 true)) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = or disjoint [[TMP4]], splat (i32 1) -; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = zext nneg [[TMP8]] to -; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP9]] -; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), align 1 [[TMP10]], [[TMP7]]) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP5:%.*]] = extractelement [[TMP4]], i64 0 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP6:%.*]] = zext i32 [[TMP5]] to i64 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[Q]], i64 [[TMP6]] +; SCALAR_TAIL_FOLDING-NEXT: store i8 1, ptr [[TMP7]], align 1 +; SCALAR_TAIL_FOLDING-NEXT: [[TMP8:%.*]] = icmp ugt [[VEC_IND]], [[BROADCAST_SPLAT]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP9:%.*]] = or disjoint [[TMP4]], splat (i32 1) +; SCALAR_TAIL_FOLDING-NEXT: [[TMP10:%.*]] = zext nneg [[TMP9]] to +; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[Q]], [[TMP10]] +; SCALAR_TAIL_FOLDING-NEXT: call void @llvm.masked.scatter.nxv16i8.nxv16p0( splat (i8 2), align 1 [[TMP11]], [[TMP8]]) ; SCALAR_TAIL_FOLDING-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP2]] ; SCALAR_TAIL_FOLDING-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] -; SCALAR_TAIL_FOLDING-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; SCALAR_TAIL_FOLDING-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; SCALAR_TAIL_FOLDING-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALAR_TAIL_FOLDING: middle.block: ; SCALAR_TAIL_FOLDING-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N_MOD_VF]], 0 ; SCALAR_TAIL_FOLDING-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll index 9312306ce519a..f8d69158238a2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll @@ -10,14 +10,10 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START_22]] ; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 -; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i64 [[TMP4]], 2 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], [[TMP7]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1:%.*]], i64 [[TMP8]] @@ -27,13 +23,12 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 8 ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP34:%.*]] = shl nuw i64 [[TMP33]], 1 -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP30]], i64 [[TMP34]] -; CHECK-NEXT: store zeroinitializer, ptr [[TMP30]], align 8 -; CHECK-NEXT: store zeroinitializer, ptr [[TMP35]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] +; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START_2]], i64 [[TMP10]] +; CHECK-NEXT: store i64 0, ptr [[TMP30]], align 8 +; CHECK-NEXT: store i64 0, ptr [[NEXT_GEP3]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index 945d808d3fa3f..0e72cdcaea3d1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -552,13 +552,11 @@ define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]] -; CHECK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP7]], align 4 +; CHECK-NEXT: store i32 [[VAL:%.*]], ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll index 0c6a490ddf4ba..06ea091dfe246 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll @@ -17,17 +17,13 @@ define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 { ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1000, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1000, [[N_MOD_VF]] ; CHECK-NEXT: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV]], 0 -; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement poison, i64 [[EXTRACT0]], i64 0 -; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector [[DOTSPLATINSERT1]], poison, zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { i64, i64 } [[SV]], 1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = add [[DOTSPLAT2]], [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[EXTRACT0]], [[TMP10]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]] -; CHECK-NEXT: store [[TMP7]], ptr [[TMP8]], align 8 +; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll index de70da6d2558b..c2e5c44079d89 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/tail-folding-styles.ll @@ -22,13 +22,11 @@ define void @simple_memset_tailfold(i32 %val, ptr %ptr, i64 %n) "target-features ; NONE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 ; NONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], [[TMP3]] ; NONE-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] -; NONE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 -; NONE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; NONE-NEXT: br label [[VECTOR_BODY:%.*]] ; NONE: vector.body: ; NONE-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] ; NONE-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX1]] -; NONE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP5]], align 4 +; NONE-NEXT: store i32 [[VAL:%.*]], ptr [[TMP5]], align 4 ; NONE-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP3]] ; NONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]] ; NONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll index 46b0ebdd2fa62..7b29a72b97ec2 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll @@ -33,7 +33,7 @@ define void @load_store_interleave_group_block_invar_cond(ptr noalias %data, ptr ; VF2IC1-NEXT: br label %[[PRED_STORE_CONTINUE3]] ; VF2IC1: [[PRED_STORE_CONTINUE3]]: ; VF2IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[DST_1]], i64 [[INDEX]] -; VF2IC1-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP2]], align 1 +; VF2IC1-NEXT: store i8 0, ptr [[TMP2]], align 1 ; VF2IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2IC1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; VF2IC1-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -89,8 +89,8 @@ define void @load_store_interleave_group_block_invar_cond(ptr noalias %data, ptr ; VF2IC2: [[PRED_STORE_CONTINUE11]]: ; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[DST_1]], i64 [[INDEX]] ; VF2IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 2 -; VF2IC2-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP5]], align 1 -; VF2IC2-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP6]], align 1 +; VF2IC2-NEXT: store i8 0, ptr [[TMP5]], align 1 +; VF2IC2-NEXT: store i8 0, ptr [[TMP6]], align 1 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF2IC2-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; VF2IC2-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll index 44b4e5a8c2bc7..b5b78f5294b30 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll @@ -6,50 +6,43 @@ target triple = "arm64-apple-macosx" define void @test_remove_vector_loop_region_epilogue(ptr %dst, i1 %c) { ; CHECK-LABEL: define void @test_remove_vector_loop_region_epilogue( ; CHECK-SAME: ptr [[DST:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[ITER_CHECK:.*]]: +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TC:%.*]] = select i1 [[C]], i64 8, i64 0 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TC]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] -; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: -; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TC]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], 64 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TC]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i32 16 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i32 32 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i32 48 -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[DST]], align 4 -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP2]], align 4 -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP3]], align 4 -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP4]], align 4 -; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[IV:%.*]] = add i64 [[VEC_EPILOG_RESUME_VAL]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[VEC_EPILOG_RESUME_VAL]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[VEC_EPILOG_RESUME_VAL]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[VEC_EPILOG_RESUME_VAL]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP2]] +; CHECK-NEXT: store i8 0, ptr [[TMP5]], align 4 +; CHECK-NEXT: store i8 0, ptr [[GEP]], align 4 +; CHECK-NEXT: store i8 0, ptr [[TMP8]], align 4 +; CHECK-NEXT: store i8 0, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[VEC_EPILOG_RESUME_VAL]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TC]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] -; CHECK: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] -; CHECK: [[VEC_EPILOG_PH]]: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] -; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[VEC_EPILOG_RESUME_VAL]] -; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr [[TMP5]], align 4 -; CHECK-NEXT: br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]] -; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-NEXT: br i1 true, label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] -; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TC]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store i8 0, ptr [[GEP]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV1]] +; CHECK-NEXT: store i8 0, ptr [[GEP1]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV1]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[TC]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -70,6 +63,7 @@ exit: } ;. ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll index f25b86d3b20c2..d69a43220cf91 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll @@ -102,7 +102,8 @@ define i8 @dead_live_out_due_to_scalar_epilogue_required(ptr %src, ptr %dst) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP15:%.*]] = sext [[VEC_IND]] to ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST]], [[TMP15]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i8.nxv4p0( zeroinitializer, align 1 [[TMP16]], splat (i1 true)), !alias.scope [[META3:![0-9]+]], !noalias [[META6:![0-9]+]] +; CHECK-NEXT: [[TMP12:%.*]] = extractelement [[TMP16]], i32 0 +; CHECK-NEXT: store i8 0, ptr [[TMP12]], align 1, !alias.scope [[META3:![0-9]+]], !noalias [[META6:![0-9]+]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll index 8e562a97d51cf..55f840cbdf3b1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll @@ -203,7 +203,8 @@ define void @safe_load_store_distance_not_pow_of_2(i64 %N) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr [10 x [12 x i16]], ptr @a, i64 0, i64 8, <8 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> zeroinitializer, <8 x ptr> align 2 [[TMP7]], <8 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x ptr> [[TMP7]], i32 0 +; CHECK-NEXT: store i16 0, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 24) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll index 3c90908b0a08f..7458d36ded4fc 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll @@ -71,7 +71,7 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP12]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.end: @@ -115,7 +115,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP15]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP11]]) ; CHECK-NEXT: br label [[FOR_BODY:%.*]] @@ -159,7 +159,7 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.end: @@ -199,7 +199,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.end: @@ -224,42 +224,17 @@ for.end: define i64 @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) { ; CHECK-LABEL: @uniform_load( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[B:%.*]], align 8 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] -; CHECK-NEXT: store [[BROADCAST_SPLAT]], ptr [[ARRAYIDX]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], [[TMP3]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] ; CHECK-NEXT: br label [[FOR_BODY1:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] -; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[B]], align 8 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV1]] +; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] +; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[B:%.*]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV1]] ; CHECK-NEXT: store i64 [[V1]], ptr [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], 1025 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY1]] ; CHECK: for.end: -; CHECK-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V1]], [[FOR_BODY1]] ], [ [[V]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V1]], [[FOR_BODY1]] ] ; CHECK-NEXT: ret i64 [[V_LCSSA]] ; entry: @@ -299,7 +274,7 @@ define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 1e21c753840e9..ca9b85203817d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -34,34 +34,16 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; ; FIXEDLEN-LABEL: define void @uniform_load( ; FIXEDLEN-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; FIXEDLEN-NEXT: [[ENTRY:.*:]] -; FIXEDLEN-NEXT: br label %[[VECTOR_PH:.*]] -; FIXEDLEN: [[VECTOR_PH]]: -; FIXEDLEN-NEXT: br label %[[VECTOR_BODY:.*]] -; FIXEDLEN: [[VECTOR_BODY]]: -; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B]], align 8 -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 -; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 -; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXEDLEN-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; FIXEDLEN: [[MIDDLE_BLOCK]]: -; FIXEDLEN-NEXT: br label %[[SCALAR_PH:.*]] -; FIXEDLEN: [[SCALAR_PH]]: +; FIXEDLEN-NEXT: [[ENTRY:.*]]: ; FIXEDLEN-NEXT: br label %[[FOR_BODY:.*]] ; FIXEDLEN: [[FOR_BODY]]: -; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ 1024, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 ; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]] ; FIXEDLEN: [[FOR_END]]: ; FIXEDLEN-NEXT: ret void ; @@ -110,74 +92,31 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-LABEL: define i64 @uniform_load_outside_use( ; SCALABLE-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SCALABLE-NEXT: [[ENTRY:.*]]: -; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] -; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; SCALABLE: [[VECTOR_PH]]: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] -; SCALABLE: [[VECTOR_BODY]]: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP6:%.*]] = load i64, ptr [[B]], align 8 -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP8]], align 8 -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] -; SCALABLE: [[MIDDLE_BLOCK]]: -; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] -; SCALABLE-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] -; SCALABLE: [[SCALAR_PH]]: -; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; SCALABLE-NEXT: br label %[[FOR_BODY:.*]] ; SCALABLE: [[FOR_BODY]]: -; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 ; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]] ; SCALABLE: [[FOR_END]]: -; SCALABLE-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], %[[FOR_BODY]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], %[[FOR_BODY]] ] ; SCALABLE-NEXT: ret i64 [[V_LCSSA]] ; ; FIXEDLEN-LABEL: define i64 @uniform_load_outside_use( ; FIXEDLEN-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; FIXEDLEN-NEXT: [[ENTRY:.*:]] -; FIXEDLEN-NEXT: br label %[[VECTOR_PH:.*]] -; FIXEDLEN: [[VECTOR_PH]]: -; FIXEDLEN-NEXT: br label %[[VECTOR_BODY:.*]] -; FIXEDLEN: [[VECTOR_BODY]]: -; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B]], align 8 -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 -; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 -; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXEDLEN-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; FIXEDLEN: [[MIDDLE_BLOCK]]: -; FIXEDLEN-NEXT: br label %[[SCALAR_PH:.*]] -; FIXEDLEN: [[SCALAR_PH]]: +; FIXEDLEN-NEXT: [[ENTRY:.*]]: ; FIXEDLEN-NEXT: br label %[[FOR_BODY:.*]] ; FIXEDLEN: [[FOR_BODY]]: -; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ 1024, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 ; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]] ; FIXEDLEN: [[FOR_END]]: ; FIXEDLEN-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], %[[FOR_BODY]] ] ; FIXEDLEN-NEXT: ret i64 [[V_LCSSA]] @@ -185,42 +124,17 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-LABEL: define i64 @uniform_load_outside_use( ; TF-SCALABLE-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; TF-SCALABLE-NEXT: [[ENTRY:.*]]: -; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 1 -; TF-SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] -; TF-SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; TF-SCALABLE: [[VECTOR_PH]]: -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 2 -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] -; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] -; TF-SCALABLE-NEXT: br label %[[VECTOR_BODY:.*]] -; TF-SCALABLE: [[VECTOR_BODY]]: -; TF-SCALABLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] -; TF-SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[ARRAYIDX]], align 8 -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], [[TMP3]] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] -; TF-SCALABLE: [[MIDDLE_BLOCK]]: -; TF-SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] -; TF-SCALABLE: [[SCALAR_PH]]: -; TF-SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] ; TF-SCALABLE-NEXT: br label %[[FOR_BODY:.*]] ; TF-SCALABLE: [[FOR_BODY]]: -; TF-SCALABLE-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; TF-SCALABLE-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; TF-SCALABLE-NEXT: [[V1:%.*]] = load i64, ptr [[B]], align 8 ; TF-SCALABLE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV1]] ; TF-SCALABLE-NEXT: store i64 [[V1]], ptr [[ARRAYIDX1]], align 8 ; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1 ; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]] ; TF-SCALABLE: [[FOR_END]]: -; TF-SCALABLE-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V1]], %[[FOR_BODY]] ], [ [[V]], %[[MIDDLE_BLOCK]] ] +; TF-SCALABLE-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V1]], %[[FOR_BODY]] ] ; TF-SCALABLE-NEXT: ret i64 [[V_LCSSA]] ; entry: @@ -269,7 +183,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; SCALABLE: [[MIDDLE_BLOCK]]: ; SCALABLE-NEXT: br label %[[FOR_END:.*]] ; SCALABLE: [[FOR_END]]: @@ -300,7 +214,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXEDLEN-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; FIXEDLEN-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; FIXEDLEN: [[MIDDLE_BLOCK]]: ; FIXEDLEN-NEXT: br label %[[SCALAR_PH:.*]] ; FIXEDLEN: [[SCALAR_PH]]: @@ -318,7 +232,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 ; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; FIXEDLEN: [[FOR_END]]: ; FIXEDLEN-NEXT: ret void ; @@ -350,7 +264,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; TF-SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; TF-SCALABLE-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] ; TF-SCALABLE: [[FOR_END]]: @@ -399,7 +313,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALABLE: [[MIDDLE_BLOCK]]: ; SCALABLE-NEXT: br label %[[FOR_END:.*]] ; SCALABLE: [[FOR_END]]: @@ -407,34 +321,16 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; ; FIXEDLEN-LABEL: define void @uniform_load_unaligned( ; FIXEDLEN-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; FIXEDLEN-NEXT: [[ENTRY:.*:]] -; FIXEDLEN-NEXT: br label %[[VECTOR_PH:.*]] -; FIXEDLEN: [[VECTOR_PH]]: -; FIXEDLEN-NEXT: br label %[[VECTOR_BODY:.*]] -; FIXEDLEN: [[VECTOR_BODY]]: -; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B]], align 1 -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 -; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 -; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXEDLEN-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; FIXEDLEN: [[MIDDLE_BLOCK]]: -; FIXEDLEN-NEXT: br label %[[SCALAR_PH:.*]] -; FIXEDLEN: [[SCALAR_PH]]: +; FIXEDLEN-NEXT: [[ENTRY:.*]]: ; FIXEDLEN-NEXT: br label %[[FOR_BODY:.*]] ; FIXEDLEN: [[FOR_BODY]]: -; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ 1024, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 1 ; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]] ; FIXEDLEN: [[FOR_END]]: ; FIXEDLEN-NEXT: ret void ; @@ -457,7 +353,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; TF-SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] ; TF-SCALABLE: [[FOR_END]]: @@ -499,7 +395,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP10]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; SCALABLE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; SCALABLE: [[MIDDLE_BLOCK]]: ; SCALABLE-NEXT: br label %[[FOR_END:.*]] ; SCALABLE: [[FOR_END]]: @@ -507,34 +403,16 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; ; FIXEDLEN-LABEL: define void @uniform_store( ; FIXEDLEN-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; FIXEDLEN-NEXT: [[ENTRY:.*:]] -; FIXEDLEN-NEXT: br label %[[VECTOR_PH:.*]] -; FIXEDLEN: [[VECTOR_PH]]: -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; FIXEDLEN-NEXT: br label %[[VECTOR_BODY:.*]] -; FIXEDLEN: [[VECTOR_BODY]]: -; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 8 -; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXEDLEN-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] -; FIXEDLEN: [[MIDDLE_BLOCK]]: -; FIXEDLEN-NEXT: br label %[[SCALAR_PH:.*]] -; FIXEDLEN: [[SCALAR_PH]]: +; FIXEDLEN-NEXT: [[ENTRY:.*]]: ; FIXEDLEN-NEXT: br label %[[FOR_BODY:.*]] ; FIXEDLEN: [[FOR_BODY]]: -; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ 1024, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 ; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]] ; FIXEDLEN: [[FOR_END]]: ; FIXEDLEN-NEXT: ret void ; @@ -557,7 +435,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[INDEX]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] ; TF-SCALABLE: [[FOR_END]]: @@ -608,7 +486,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALABLE: [[MIDDLE_BLOCK]]: ; SCALABLE-NEXT: br label %[[FOR_END:.*]] ; SCALABLE: [[FOR_END]]: @@ -619,23 +497,23 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; FIXEDLEN-NEXT: [[ENTRY:.*:]] ; FIXEDLEN-NEXT: br label %[[VECTOR_PH:.*]] ; FIXEDLEN: [[VECTOR_PH]]: -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer ; FIXEDLEN-NEXT: br label %[[VECTOR_BODY:.*]] ; FIXEDLEN: [[VECTOR_BODY]]: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 -; FIXEDLEN-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 5 -; FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 6 -; FIXEDLEN-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 7 -; FIXEDLEN-NEXT: store i64 [[TMP4]], ptr [[B]], align 8 +; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[VEC_IND]], <2 x ptr> align 8 [[BROADCAST_SPLAT]], <2 x i1> splat (i1 true)) +; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[STEP_ADD]], <2 x ptr> align 8 [[BROADCAST_SPLAT]], <2 x i1> splat (i1 true)) ; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 4 -; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 -; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 2 +; FIXEDLEN-NEXT: store i64 [[V]], ptr [[TMP5]], align 8 +; FIXEDLEN-NEXT: store i64 [[V]], ptr [[TMP1]], align 8 +; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) ; FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXEDLEN-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; FIXEDLEN-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; FIXEDLEN: [[MIDDLE_BLOCK]]: ; FIXEDLEN-NEXT: br label %[[SCALAR_PH:.*]] ; FIXEDLEN: [[SCALAR_PH]]: @@ -647,7 +525,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; FIXEDLEN: [[FOR_END]]: ; FIXEDLEN-NEXT: ret void ; @@ -679,7 +557,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP13]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; TF-SCALABLE-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] ; TF-SCALABLE: [[FOR_END]]: @@ -731,7 +609,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]] ; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; SCALABLE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; SCALABLE: [[MIDDLE_BLOCK]]: ; SCALABLE-NEXT: br label %[[FOR_END:.*]] ; SCALABLE: [[FOR_END]]: @@ -757,12 +635,12 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> align 8 [[BROADCAST_SPLAT2]], <4 x i1> [[TMP2]]) ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 4 -; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 -; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 +; FIXEDLEN-NEXT: store i64 [[V]], ptr [[TMP3]], align 8 +; FIXEDLEN-NEXT: store i64 [[V]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXEDLEN-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; FIXEDLEN-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FIXEDLEN: [[MIDDLE_BLOCK]]: ; FIXEDLEN-NEXT: br label %[[SCALAR_PH:.*]] ; FIXEDLEN: [[SCALAR_PH]]: @@ -779,7 +657,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; FIXEDLEN: [[FOR_END]]: ; FIXEDLEN-NEXT: ret void ; @@ -812,7 +690,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] ; TF-SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; TF-SCALABLE-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] ; TF-SCALABLE: [[FOR_END]]: @@ -860,7 +738,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP10]], [[INDEX]] ; SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; SCALABLE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; SCALABLE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; SCALABLE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALABLE: [[MIDDLE_BLOCK]]: ; SCALABLE-NEXT: br label %[[FOR_END:.*]] ; SCALABLE: [[FOR_END]]: @@ -868,34 +746,16 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; ; FIXEDLEN-LABEL: define void @uniform_store_unaligned( ; FIXEDLEN-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]], i64 [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; FIXEDLEN-NEXT: [[ENTRY:.*:]] -; FIXEDLEN-NEXT: br label %[[VECTOR_PH:.*]] -; FIXEDLEN: [[VECTOR_PH]]: -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; FIXEDLEN-NEXT: br label %[[VECTOR_BODY:.*]] -; FIXEDLEN: [[VECTOR_BODY]]: -; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 1 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 8 -; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXEDLEN-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] -; FIXEDLEN: [[MIDDLE_BLOCK]]: -; FIXEDLEN-NEXT: br label %[[SCALAR_PH:.*]] -; FIXEDLEN: [[SCALAR_PH]]: +; FIXEDLEN-NEXT: [[ENTRY:.*]]: ; FIXEDLEN-NEXT: br label %[[FOR_BODY:.*]] ; FIXEDLEN: [[FOR_BODY]]: -; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ 1024, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 1 ; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 -; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END:.*]], label %[[FOR_BODY]] ; FIXEDLEN: [[FOR_END]]: ; FIXEDLEN-NEXT: ret void ; @@ -918,7 +778,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[INDEX]] ; TF-SCALABLE-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]] ; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; TF-SCALABLE-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TF-SCALABLE: [[MIDDLE_BLOCK]]: ; TF-SCALABLE-NEXT: br label %[[FOR_END:.*]] ; TF-SCALABLE: [[FOR_END]]: diff --git a/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll index f0ea63c498a40..32a2980a977b7 100644 --- a/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/WebAssembly/induction-branch-cost.ll @@ -8,46 +8,15 @@ define void @induction_phi_and_branch_cost(ptr %end, ptr %start.1, ptr %start.2) ; CHECK-LABEL: define void @induction_phi_and_branch_cost( ; CHECK-SAME: ptr [[END:%.*]], ptr [[START_1:%.*]], ptr [[START_2:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: [[END2:%.*]] = ptrtoint ptr [[END]] to i32 -; CHECK-NEXT: [[START_11:%.*]] = ptrtoint ptr [[START_1]] to i32 -; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[START_11]], [[END2]] -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF]] -; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[N_VEC]], -4 -; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START_1]], i32 [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[N_VEC]], -4 -; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[START_2]], i32 [[TMP4]] -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], -4 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_2]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 -3 -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP7]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START_1]], %[[ENTRY]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[IND_END3]], %[[MIDDLE_BLOCK]] ], [ [[START_2]], %[[ENTRY]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ], [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START_1]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[START_2]], %[[ENTRY]] ], [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr nusw i8, ptr [[PTR_IV]], i32 -4 ; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr i8, ptr [[PTR_IV_2]], i32 -4 ; CHECK-NEXT: store i32 0, ptr [[PTR_IV_2]], align 4 ; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV]], [[END]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -68,9 +37,3 @@ exit: } attributes #0 = { "target-features"="+simd128" } -;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll index f65a9d7d45ed8..7a538199f632a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -13,16 +13,17 @@ define void @f1() { ; CHECK-LABEL: @f1( ; CHECK-NEXT: bb1: -; CHECK-NEXT: br label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[TMP0:%.*]] = sext i16 0 to i64 +; CHECK: bb2: +; CHECK-NEXT: [[C_1_0:%.*]] = phi i16 [ 0, [[BB1:%.*]] ], [ [[_TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[_TMP1:%.*]] = zext i16 0 to i64 +; CHECK-NEXT: [[_TMP2:%.*]] = getelementptr [1 x [[REC8:%.*]]], ptr @a, i16 0, i64 [[_TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = sext i16 [[C_1_0]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [2 x ptr], ptr @b, i16 0, i64 [[TMP0]] -; CHECK-NEXT: store <2 x ptr> , ptr [[TMP1]], align 8 -; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] -; CHECK: middle.block: -; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK-NEXT: store ptr [[_TMP2]], ptr [[TMP1]], align 8 +; CHECK-NEXT: [[_TMP9]] = add nsw i16 [[C_1_0]], 1 +; CHECK-NEXT: [[_TMP11:%.*]] = icmp slt i16 [[_TMP9]], 2 +; CHECK-NEXT: br i1 [[_TMP11]], label [[VECTOR_BODY]], label [[BB3:%.*]] ; CHECK: bb3: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll index 7e5964ac30cba..1e1f9e9669bd2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll @@ -29,13 +29,11 @@ define i32 @inv_load_conditional(ptr %a, i64 %n, ptr %b, i32 %k) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x ptr> poison, ptr [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x ptr> [[BROADCAST_SPLATINSERT]], <16 x ptr> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <16 x ptr> [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT4]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT5]], ptr [[TMP2]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] +; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[TMP2]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> align 4 [[BROADCAST_SPLAT]], <16 x i1> [[TMP1]], <16 x i32> poison), !alias.scope [[META3]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP1]], i32 0 ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP9]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> splat (i32 1) @@ -56,13 +54,11 @@ define i32 @inv_load_conditional(ptr %a, i64 %n, ptr %b, i32 %k) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x ptr> poison, ptr [[A]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x ptr> [[BROADCAST_SPLATINSERT8]], <8 x ptr> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x ptr> [[BROADCAST_SPLAT9]], zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT12:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT11]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX10]] -; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT12]], ptr [[TMP6]], align 4, !alias.scope [[META0]], !noalias [[META3]] +; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[TMP6]], align 4, !alias.scope [[META0]], !noalias [[META3]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER13:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> align 4 [[BROADCAST_SPLAT9]], <8 x i1> [[TMP5]], <8 x i32> poison), !alias.scope [[META3]] ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP5]], i32 0 ; CHECK-NEXT: [[PREDPHI14:%.*]] = select i1 [[TMP10]], <8 x i32> [[WIDE_MASKED_GATHER13]], <8 x i32> splat (i32 1) diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll index 7b9166de3ed05..de92d2bdc329f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll @@ -159,7 +159,7 @@ define void @inv_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b, i3 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP1]], align 8, !alias.scope [[META11:![0-9]+]], !noalias [[META14:![0-9]+]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT5]] -; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 4, !alias.scope [[META11]], !noalias [[META14]] +; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[TMP1]], align 4, !alias.scope [[META11]], !noalias [[META14]] ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> [[BROADCAST_SPLAT]], <16 x ptr> align 4 [[BROADCAST_SPLAT7]], <16 x i1> [[TMP2]]), !alias.scope [[META14]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -174,17 +174,15 @@ define void @inv_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b, i3 ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF8:%.*]] = urem i64 [[SMAX2]], 4 ; CHECK-NEXT: [[N_VEC9:%.*]] = sub i64 [[SMAX2]], [[N_MOD_VF8]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT12]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i32> poison, i32 [[K]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT14]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT22:%.*]], %[[PRED_STORE_CONTINUE21:.*]] ] +; CHECK-NEXT: [[INDEX10:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT22:%.*]], %[[PRED_STORE_CONTINUE19:.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX10]] ; CHECK-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x i32>, ptr [[TMP4]], align 8, !alias.scope [[META11]], !noalias [[META14]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD15]], [[BROADCAST_SPLAT15]] -; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT13]], ptr [[TMP4]], align 4, !alias.scope [[META11]], !noalias [[META14]] +; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[TMP4]], align 4, !alias.scope [[META11]], !noalias [[META14]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 ; CHECK-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] ; CHECK: [[PRED_STORE_IF]]: @@ -192,23 +190,23 @@ define void @inv_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b, i3 ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP5]], i32 1 -; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF14:.*]], label %[[PRED_STORE_CONTINUE15:.*]] +; CHECK: [[PRED_STORE_IF14]]: +; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4, !alias.scope [[META14]] +; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE15]] +; CHECK: [[PRED_STORE_CONTINUE15]]: +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 +; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF16:.*]], label %[[PRED_STORE_CONTINUE17:.*]] ; CHECK: [[PRED_STORE_IF16]]: ; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4, !alias.scope [[META14]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE17]] ; CHECK: [[PRED_STORE_CONTINUE17]]: -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP5]], i32 2 -; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19:.*]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 +; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF18:.*]], label %[[PRED_STORE_CONTINUE19]] ; CHECK: [[PRED_STORE_IF18]]: ; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4, !alias.scope [[META14]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE19]] ; CHECK: [[PRED_STORE_CONTINUE19]]: -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP5]], i32 3 -; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_STORE_IF20:.*]], label %[[PRED_STORE_CONTINUE21]] -; CHECK: [[PRED_STORE_IF20]]: -; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4, !alias.scope [[META14]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE21]] -; CHECK: [[PRED_STORE_CONTINUE21]]: ; CHECK-NEXT: [[INDEX_NEXT22]] = add nuw i64 [[INDEX10]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT22]], [[N_VEC9]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -292,8 +290,6 @@ define void @variant_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX10]], 16 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX10]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[NTRUNC]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <16 x i32> poison, i32 [[K]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT12]], <16 x i32> poison, <16 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <16 x ptr> poison, ptr [[A]], i64 0 @@ -304,7 +300,7 @@ define void @variant_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP1]], align 8, !alias.scope [[META20:![0-9]+]], !noalias [[META23:![0-9]+]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT13]] -; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 4, !alias.scope [[META20]], !noalias [[META23]] +; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[TMP1]], align 4, !alias.scope [[META20]], !noalias [[META23]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr align 8 [[TMP3]], <16 x i1> [[TMP2]], <16 x i32> poison), !alias.scope [[META26:![0-9]+]] ; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> [[WIDE_MASKED_LOAD]], <16 x ptr> align 4 [[BROADCAST_SPLAT15]], <16 x i1> [[TMP2]]), !alias.scope [[META27:![0-9]+]], !noalias [[META26]] @@ -321,8 +317,6 @@ define void @variant_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF16:%.*]] = urem i64 [[SMAX10]], 8 ; CHECK-NEXT: [[N_VEC17:%.*]] = sub i64 [[SMAX10]], [[N_MOD_VF16]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT20:%.*]] = insertelement <8 x i32> poison, i32 [[NTRUNC]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT21:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT20]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT22:%.*]] = insertelement <8 x i32> poison, i32 [[K]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT23:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT22]], <8 x i32> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT25:%.*]] = insertelement <8 x ptr> poison, ptr [[A]], i64 0 @@ -333,10 +327,10 @@ define void @variant_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX18]] ; CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <8 x i32>, ptr [[TMP5]], align 8, !alias.scope [[META20]], !noalias [[META23]] ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <8 x i32> [[WIDE_LOAD25]], [[BROADCAST_SPLAT23]] -; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT21]], ptr [[TMP5]], align 4, !alias.scope [[META20]], !noalias [[META23]] +; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[TMP5]], align 4, !alias.scope [[META20]], !noalias [[META23]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDEX18]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD26:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr align 8 [[TMP7]], <8 x i1> [[TMP6]], <8 x i32> poison), !alias.scope [[META26]] -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[WIDE_MASKED_LOAD26]], <8 x ptr> align 4 [[BROADCAST_SPLAT26]], <8 x i1> [[TMP6]]), !alias.scope [[META27]], !noalias [[META26]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD22:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr align 8 [[TMP7]], <8 x i1> [[TMP6]], <8 x i32> poison), !alias.scope [[META26]] +; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[WIDE_MASKED_LOAD22]], <8 x ptr> align 4 [[BROADCAST_SPLAT26]], <8 x i1> [[TMP6]]), !alias.scope [[META27]], !noalias [[META26]] ; CHECK-NEXT: [[INDEX_NEXT27]] = add nuw i64 [[INDEX18]], 8 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT27]], [[N_VEC17]] ; CHECK-NEXT: br i1 [[TMP8]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll index 3c618d71fc974..39cf0929e4752 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -33,65 +33,19 @@ define i32 @main(ptr %ptr) { ; CHECK: for.body8.lr.ph: ; CHECK-NEXT: [[CONV3:%.*]] = trunc i32 [[STOREMERGE_IN9]] to i8 ; CHECK-NEXT: [[DOTPROMOTED:%.*]] = load i32, ptr @a, align 16 -; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[CONV3]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 -; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1 -; CHECK-NEXT: [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP2]]) -; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], [[UMIN1]] -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP4]], 24 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[TMP5:%.*]] = add i8 [[CONV3]], -1 -; CHECK-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 -; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP6]]) -; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP6]], [[UMIN]] -; CHECK-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 -; CHECK-NEXT: [[TMP9:%.*]] = sub i8 [[TMP5]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i8 [[TMP9]], [[TMP5]] -; CHECK-NEXT: [[TMP12:%.*]] = icmp ugt i32 [[TMP7]], 255 -; CHECK-NEXT: [[TMP13:%.*]] = or i1 [[TMP10]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[DOTPROMOTED]], 1 -; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP7]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP15]], [[TMP14]] -; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP13]], [[TMP16]] -; CHECK-NEXT: br i1 [[TMP17]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP4]], 8 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP4]], [[N_MOD_VF]] -; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[DOTPROMOTED]], [[N_VEC]] -; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i8 -; CHECK-NEXT: [[IND_END2:%.*]] = sub i8 [[CONV3]], [[DOTCAST]] -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[DOTPROMOTED]], [[INDEX]] -; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], 1 -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP20]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 4 -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP22]], align 4 -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP25]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 -; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP4]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]] -; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i8 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ] ; CHECK-NEXT: br label [[FOR_BODY8:%.*]] ; CHECK: for.body8: -; CHECK-NEXT: [[INC5:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ] -; CHECK-NEXT: [[C_04:%.*]] = phi i8 [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ], [ [[DEC:%.*]], [[FOR_BODY8]] ] +; CHECK-NEXT: [[INC5:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ] +; CHECK-NEXT: [[C_04:%.*]] = phi i8 [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[DEC:%.*]], [[FOR_BODY8]] ] ; CHECK-NEXT: [[INC]] = add i32 [[INC5]], 1 ; CHECK-NEXT: [[DEC]] = add i8 [[C_04]], -1 -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i32 [[INC]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[INC]] ; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4 ; CHECK-NEXT: [[CONV5:%.*]] = zext i8 [[DEC]] to i32 ; CHECK-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP0]], [[CONV5]] -; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]] ; CHECK: for.cond4.for.inc9_crit_edge: -; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ] ; CHECK-NEXT: store i32 [[INC_LCSSA]], ptr @a, align 16 ; CHECK-NEXT: br label [[FOR_INC9]] ; CHECK: for.inc9: diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll index 785a248344825..1deb58dfd3da3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll @@ -29,10 +29,6 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP8]], 16 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP8]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr addrspace(10)> poison, ptr addrspace(10) [[DOTUNPACK]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr addrspace(10)> [[BROADCAST_SPLATINSERT]], <4 x ptr addrspace(10)> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i64> poison, i64 [[DOTUNPACK2]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT7]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -44,18 +40,26 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD4]], i32 0 ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD5]], i32 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> align 8 [[TMP18]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10:![0-9]+]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> align 8 [[TMP19]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> align 8 [[TMP20]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> align 8 [[TMP21]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x ptr addrspace(13)> [[TMP18]], i32 0 +; CHECK-NEXT: store ptr addrspace(10) [[DOTUNPACK]], ptr addrspace(13) [[TMP13]], align 8, !tbaa [[JTBAA_ARRAYBUF_TBAA10:![0-9]+]] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x ptr addrspace(13)> [[TMP19]], i32 0 +; CHECK-NEXT: store ptr addrspace(10) [[DOTUNPACK]], ptr addrspace(13) [[TMP14]], align 8, !tbaa [[JTBAA_ARRAYBUF_TBAA10]] +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x ptr addrspace(13)> [[TMP20]], i32 0 +; CHECK-NEXT: store ptr addrspace(10) [[DOTUNPACK]], ptr addrspace(13) [[TMP15]], align 8, !tbaa [[JTBAA_ARRAYBUF_TBAA10]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x ptr addrspace(13)> [[TMP21]], i32 0 +; CHECK-NEXT: store ptr addrspace(10) [[DOTUNPACK]], ptr addrspace(13) [[TMP16]], align 8, !tbaa [[JTBAA_ARRAYBUF_TBAA10]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND]], i32 1 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD]], i32 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD4]], i32 1 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD5]], i32 1 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> align 8 [[TMP22]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> align 8 [[TMP23]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> align 8 [[TMP24]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> align 8 [[TMP25]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x ptr addrspace(13)> [[TMP22]], i32 0 +; CHECK-NEXT: store i64 [[DOTUNPACK2]], ptr addrspace(13) [[TMP31]], align 8, !tbaa [[JTBAA_ARRAYBUF_TBAA10]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x ptr addrspace(13)> [[TMP23]], i32 0 +; CHECK-NEXT: store i64 [[DOTUNPACK2]], ptr addrspace(13) [[TMP32]], align 8, !tbaa [[JTBAA_ARRAYBUF_TBAA10]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x ptr addrspace(13)> [[TMP24]], i32 0 +; CHECK-NEXT: store i64 [[DOTUNPACK2]], ptr addrspace(13) [[TMP33]], align 8, !tbaa [[JTBAA_ARRAYBUF_TBAA10]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x ptr addrspace(13)> [[TMP25]], i32 0 +; CHECK-NEXT: store i64 [[DOTUNPACK2]], ptr addrspace(13) [[TMP34]], align 8, !tbaa [[JTBAA_ARRAYBUF_TBAA10]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD5]], splat (i64 4) ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -70,10 +74,6 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP8]], 4 ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP8]], [[N_MOD_VF4]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x ptr addrspace(10)> poison, ptr addrspace(10) [[DOTUNPACK]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x ptr addrspace(10)> [[BROADCAST_SPLATINSERT10]], <4 x ptr addrspace(10)> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x i64> poison, i64 [[DOTUNPACK2]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT12]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], @@ -82,9 +82,11 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo ; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND8:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND8]], i32 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT11]], <4 x ptr addrspace(13)> align 8 [[TMP28]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x ptr addrspace(13)> [[TMP28]], i32 0 +; CHECK-NEXT: store ptr addrspace(10) [[DOTUNPACK]], ptr addrspace(13) [[TMP35]], align 8, !tbaa [[JTBAA_ARRAYBUF_TBAA10]] ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND8]], i32 1 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT13]], <4 x ptr addrspace(13)> align 8 [[TMP29]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x ptr addrspace(13)> [[TMP29]], i32 0 +; CHECK-NEXT: store i64 [[DOTUNPACK2]], ptr addrspace(13) [[TMP36]], align 8, !tbaa [[JTBAA_ARRAYBUF_TBAA10]] ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX7]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <4 x i64> [[VEC_IND8]], splat (i64 4) ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC5]] @@ -93,10 +95,10 @@ define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) lo ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC5]] ; CHECK-NEXT: br i1 [[CMP_N15]], label %[[L44]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i64 [ [[N_VEC5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL9:%.*]] = phi i64 [ [[N_VEC5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[L26:.*]] ; CHECK: [[L26]]: -; CHECK-NEXT: [[VALUE_PHI5:%.*]] = phi i64 [ [[BC_RESUME_VAL17]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[TMP27:%.*]], %[[L26]] ] +; CHECK-NEXT: [[VALUE_PHI5:%.*]] = phi i64 [ [[BC_RESUME_VAL9]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[TMP27:%.*]], %[[L26]] ] ; CHECK-NEXT: [[DOTREPACK:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], i64 [[VALUE_PHI5]], i32 0 ; CHECK-NEXT: store ptr addrspace(10) [[DOTUNPACK]], ptr addrspace(13) [[DOTREPACK]], align 8, !tbaa [[JTBAA_ARRAYBUF_TBAA10]] ; CHECK-NEXT: [[DOTREPACK4:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], i64 [[VALUE_PHI5]], i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll index 3a2d7e7ff59ec..dd1ac1bf847e7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-crash.ll @@ -35,8 +35,8 @@ define void @pr15344(ptr noalias %ar, ptr noalias %ar2, i32 %exit.limit, i1 %con ; CHECK-NEXT: [[TMP3]] = fadd fast <2 x double> [[VEC_PHI2]], splat (double 1.000000e+00) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[AR2]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 2 -; CHECK-NEXT: store <2 x float> splat (float 2.000000e+00), ptr [[TMP4]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] -; CHECK-NEXT: store <2 x float> splat (float 2.000000e+00), ptr [[TMP6]], align 4, !alias.scope [[META0]], !noalias [[META3]] +; CHECK-NEXT: store float 2.000000e+00, ptr [[TMP4]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] +; CHECK-NEXT: store float 2.000000e+00, ptr [[TMP6]], align 4, !alias.scope [[META0]], !noalias [[META3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll index e405fe7c6f764..7b03e9e9bf991 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll @@ -28,7 +28,7 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[MUL:%.*]] = mul i64 [[TMP2]], 4063299859190 ; CHECK-NEXT: [[TOBOOL6:%.*]] = icmp eq i64 [[MUL]], 0 ; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[TMP0]] to i64 -; CHECK-NEXT: br i1 [[TOBOOL]], label %[[ITER_CHECK22:.*]], label %[[ITER_CHECK:.*]] +; CHECK-NEXT: br i1 [[TOBOOL]], label %[[ITER_CHECK23:.*]], label %[[ITER_CHECK:.*]] ; CHECK: [[ITER_CHECK]]: ; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], -9 ; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 1 @@ -36,10 +36,10 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP6]], 8 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: -; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP6]], 16 +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP6]], 32 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP6]], 16 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP6]], 32 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP6]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[N_VEC]], 2 ; CHECK-NEXT: [[IND_END:%.*]] = add i64 8, [[TMP7]] @@ -48,19 +48,34 @@ define void @_Z3fn1v() #0 { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <16 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <16 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i64> [[VEC_IND]], splat (i64 32) +; CHECK-NEXT: [[STEP_ADD3:%.*]] = add <16 x i64> [[VEC_IND3]], splat (i64 32) ; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <16 x i64> splat (i64 8), [[VEC_IND]] +; CHECK-NEXT: [[TMP35:%.*]] = sub nsw <16 x i64> splat (i64 8), [[STEP_ADD]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <16 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <16 x i64> [[STEP_ADD]] ; CHECK-NEXT: [[TMP12:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]] +; CHECK-NEXT: [[TMP39:%.*]] = add nsw <16 x i64> [[TMP35]], [[STEP_ADD3]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP12]], i64 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> align 16 [[TMP13]], <16 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP38]], <16 x i64> [[TMP39]], i64 0 +; CHECK-NEXT: [[TMP52:%.*]] = extractelement <16 x ptr> [[TMP13]], i32 0 +; CHECK-NEXT: store i32 8, ptr [[TMP52]], align 16 +; CHECK-NEXT: [[TMP53:%.*]] = extractelement <16 x ptr> [[TMP40]], i32 0 +; CHECK-NEXT: store i32 8, ptr [[TMP53]], align 16 ; CHECK-NEXT: [[TMP14:%.*]] = or disjoint <16 x i64> [[VEC_IND3]], splat (i64 1) +; CHECK-NEXT: [[TMP65:%.*]] = or disjoint <16 x i64> [[STEP_ADD3]], splat (i64 1) ; CHECK-NEXT: [[TMP15:%.*]] = add nsw <16 x i64> [[TMP10]], [[TMP14]] +; CHECK-NEXT: [[TMP66:%.*]] = add nsw <16 x i64> [[TMP35]], [[TMP65]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP11]], <16 x i64> [[TMP15]], i64 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> splat (i32 8), <16 x ptr> align 8 [[TMP16]], <16 x i1> splat (i1 true)) -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 32) -; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <16 x i64> [[VEC_IND3]], splat (i64 32) +; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds [10 x i32], <16 x ptr> [[TMP38]], <16 x i64> [[TMP66]], i64 0 +; CHECK-NEXT: [[TMP68:%.*]] = extractelement <16 x ptr> [[TMP16]], i32 0 +; CHECK-NEXT: store i32 8, ptr [[TMP68]], align 8 +; CHECK-NEXT: [[TMP69:%.*]] = extractelement <16 x ptr> [[TMP67]], i32 0 +; CHECK-NEXT: store i32 8, ptr [[TMP69]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[STEP_ADD]], splat (i64 32) +; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <16 x i64> [[STEP_ADD3]], splat (i64 32) ; CHECK-NEXT: [[TMP63:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP63]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -96,11 +111,13 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <8 x i64> [[VEC_IND15]] ; CHECK-NEXT: [[TMP20:%.*]] = add nsw <8 x i64> [[TMP18]], [[VEC_IND20]] ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP19]], <8 x i64> [[TMP20]], i64 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> align 16 [[TMP21]], <8 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP70:%.*]] = extractelement <8 x ptr> [[TMP21]], i32 0 +; CHECK-NEXT: store i32 8, ptr [[TMP70]], align 16 ; CHECK-NEXT: [[TMP22:%.*]] = or disjoint <8 x i64> [[VEC_IND20]], splat (i64 1) ; CHECK-NEXT: [[TMP23:%.*]] = add nsw <8 x i64> [[TMP18]], [[TMP22]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [10 x i32], <8 x ptr> [[TMP19]], <8 x i64> [[TMP23]], i64 0 -; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> splat (i32 8), <8 x ptr> align 8 [[TMP24]], <8 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP71:%.*]] = extractelement <8 x ptr> [[TMP24]], i32 0 +; CHECK-NEXT: store i32 8, ptr [[TMP71]], align 8 ; CHECK-NEXT: [[INDEX_NEXT22]] = add nuw i64 [[INDEX14]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT16]] = add <8 x i64> [[VEC_IND15]], splat (i64 16) ; CHECK-NEXT: [[VEC_IND_NEXT21]] = add <8 x i64> [[VEC_IND20]], splat (i64 16) @@ -110,19 +127,19 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[CMP_N23:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC7]] ; CHECK-NEXT: br i1 [[CMP_N23]], label %[[FOR_COND_CLEANUP_LOOPEXIT99]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i64 [ [[IND_END8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END9]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 8, %[[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL18:%.*]] = phi i64 [ [[IND_END11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END12]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL18:%.*]] = phi i64 [ [[IND_END8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END9]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 8, %[[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL19:%.*]] = phi i64 [ [[IND_END11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END12]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] -; CHECK: [[ITER_CHECK22]]: +; CHECK: [[ITER_CHECK23]]: ; CHECK-NEXT: [[TMP26:%.*]] = add nsw i64 [[TMP3]], -9 ; CHECK-NEXT: [[TMP27:%.*]] = lshr i64 [[TMP26]], 1 ; CHECK-NEXT: [[TMP28:%.*]] = add nuw i64 [[TMP27]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK25:%.*]] = icmp ult i64 [[TMP28]], 8 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK25]], label %[[VEC_EPILOG_SCALAR_PH42:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK24:.*]] -; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK24]]: +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK25]], label %[[VEC_EPILOG_SCALAR_PH43:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK25:.*]] +; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK25]]: ; CHECK-NEXT: [[MIN_ITERS_CHECK28:%.*]] = icmp ult i64 [[TMP28]], 16 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK28]], label %[[VEC_EPILOG_PH45:.*]], label %[[VECTOR_PH25:.*]] -; CHECK: [[VECTOR_PH25]]: +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK28]], label %[[VEC_EPILOG_PH46:.*]], label %[[VECTOR_PH26:.*]] +; CHECK: [[VECTOR_PH26]]: ; CHECK-NEXT: [[N_MOD_VF31:%.*]] = urem i64 [[TMP28]], 16 ; CHECK-NEXT: [[N_VEC32:%.*]] = sub i64 [[TMP28]], [[N_MOD_VF31]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, i1 [[TOBOOL6]], i64 0 @@ -131,11 +148,11 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[IND_END41:%.*]] = add i64 8, [[TMP29]] ; CHECK-NEXT: [[IND_END43:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[TMP34:%.*]] = xor <16 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: br label %[[VECTOR_BODY30:.*]] -; CHECK: [[VECTOR_BODY30]]: -; CHECK-NEXT: [[INDEX34:%.*]] = phi i64 [ 0, %[[VECTOR_PH25]] ], [ [[INDEX_NEXT39:%.*]], %[[VECTOR_BODY30]] ] -; CHECK-NEXT: [[VEC_IND35:%.*]] = phi <16 x i64> [ , %[[VECTOR_PH25]] ], [ [[VEC_IND_NEXT36:%.*]], %[[VECTOR_BODY30]] ] -; CHECK-NEXT: [[VEC_IND37:%.*]] = phi <16 x i64> [ , %[[VECTOR_PH25]] ], [ [[VEC_IND_NEXT38:%.*]], %[[VECTOR_BODY30]] ] +; CHECK-NEXT: br label %[[VECTOR_BODY31:.*]] +; CHECK: [[VECTOR_BODY31]]: +; CHECK-NEXT: [[INDEX34:%.*]] = phi i64 [ 0, %[[VECTOR_PH26]] ], [ [[INDEX_NEXT39:%.*]], %[[VECTOR_BODY31]] ] +; CHECK-NEXT: [[VEC_IND35:%.*]] = phi <16 x i64> [ , %[[VECTOR_PH26]] ], [ [[VEC_IND_NEXT36:%.*]], %[[VECTOR_BODY31]] ] +; CHECK-NEXT: [[VEC_IND37:%.*]] = phi <16 x i64> [ , %[[VECTOR_PH26]] ], [ [[VEC_IND_NEXT38:%.*]], %[[VECTOR_BODY31]] ] ; CHECK-NEXT: [[TMP30:%.*]] = sub nsw <16 x i64> splat (i64 8), [[VEC_IND35]] ; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <16 x i64> [[VEC_IND35]] ; CHECK-NEXT: [[TMP32:%.*]] = add nsw <16 x i64> [[TMP30]], [[VEC_IND37]] @@ -151,20 +168,20 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[VEC_IND_NEXT36]] = add <16 x i64> [[VEC_IND35]], splat (i64 32) ; CHECK-NEXT: [[VEC_IND_NEXT38]] = add <16 x i64> [[VEC_IND37]], splat (i64 32) ; CHECK-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT39]], [[N_VEC32]] -; CHECK-NEXT: br i1 [[TMP41]], label %[[MIDDLE_BLOCK37:.*]], label %[[VECTOR_BODY30]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK37]]: +; CHECK-NEXT: br i1 [[TMP41]], label %[[MIDDLE_BLOCK38:.*]], label %[[VECTOR_BODY31]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK38]]: ; CHECK-NEXT: [[CMP_N40:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC32]] -; CHECK-NEXT: br i1 [[CMP_N40]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK43:.*]] -; CHECK: [[VEC_EPILOG_ITER_CHECK43]]: +; CHECK-NEXT: br i1 [[CMP_N40]], label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK44:.*]] +; CHECK: [[VEC_EPILOG_ITER_CHECK44]]: ; CHECK-NEXT: [[TMP42:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[IND_END55:%.*]] = add i64 8, [[TMP42]] ; CHECK-NEXT: [[IND_END58:%.*]] = mul i64 [[N_VEC32]], 2 ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK50:%.*]] = icmp ult i64 [[N_MOD_VF31]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK50]], label %[[VEC_EPILOG_SCALAR_PH42]], label %[[VEC_EPILOG_PH45]], !prof [[PROF3]] -; CHECK: [[VEC_EPILOG_PH45]]: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL39:%.*]] = phi i64 [ [[N_VEC32]], %[[VEC_EPILOG_ITER_CHECK43]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK24]] ] -; CHECK-NEXT: [[BC_RESUME_VAL42:%.*]] = phi i64 [ [[IND_END41]], %[[VEC_EPILOG_ITER_CHECK43]] ], [ 8, %[[VECTOR_MAIN_LOOP_ITER_CHECK24]] ] -; CHECK-NEXT: [[BC_RESUME_VAL44:%.*]] = phi i64 [ [[IND_END43]], %[[VEC_EPILOG_ITER_CHECK43]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK24]] ] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK50]], label %[[VEC_EPILOG_SCALAR_PH43]], label %[[VEC_EPILOG_PH46]], !prof [[PROF6:![0-9]+]] +; CHECK: [[VEC_EPILOG_PH46]]: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL40:%.*]] = phi i64 [ [[N_VEC32]], %[[VEC_EPILOG_ITER_CHECK44]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK25]] ] +; CHECK-NEXT: [[BC_RESUME_VAL42:%.*]] = phi i64 [ [[IND_END41]], %[[VEC_EPILOG_ITER_CHECK44]] ], [ 8, %[[VECTOR_MAIN_LOOP_ITER_CHECK25]] ] +; CHECK-NEXT: [[BC_RESUME_VAL44:%.*]] = phi i64 [ [[IND_END43]], %[[VEC_EPILOG_ITER_CHECK44]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK25]] ] ; CHECK-NEXT: [[N_MOD_VF52:%.*]] = urem i64 [[TMP28]], 8 ; CHECK-NEXT: [[N_VEC53:%.*]] = sub i64 [[TMP28]], [[N_MOD_VF52]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT49:%.*]] = insertelement <8 x i1> poison, i1 [[TOBOOL6]], i64 0 @@ -179,11 +196,11 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[DOTSPLATINSERT67:%.*]] = insertelement <8 x i64> poison, i64 [[BC_RESUME_VAL44]], i64 0 ; CHECK-NEXT: [[DOTSPLAT68:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT67]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[INDUCTION69:%.*]] = add <8 x i64> [[DOTSPLAT68]], -; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY56:.*]] -; CHECK: [[VEC_EPILOG_VECTOR_BODY56]]: -; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL39]], %[[VEC_EPILOG_PH45]] ], [ [[INDEX_NEXT74:%.*]], %[[VEC_EPILOG_VECTOR_BODY56]] ] -; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], %[[VEC_EPILOG_PH45]] ], [ [[VEC_IND_NEXT66:%.*]], %[[VEC_EPILOG_VECTOR_BODY56]] ] -; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], %[[VEC_EPILOG_PH45]] ], [ [[VEC_IND_NEXT71:%.*]], %[[VEC_EPILOG_VECTOR_BODY56]] ] +; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY57:.*]] +; CHECK: [[VEC_EPILOG_VECTOR_BODY57]]: +; CHECK-NEXT: [[INDEX61:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL40]], %[[VEC_EPILOG_PH46]] ], [ [[INDEX_NEXT74:%.*]], %[[VEC_EPILOG_VECTOR_BODY57]] ] +; CHECK-NEXT: [[VEC_IND65:%.*]] = phi <8 x i64> [ [[INDUCTION64]], %[[VEC_EPILOG_PH46]] ], [ [[VEC_IND_NEXT66:%.*]], %[[VEC_EPILOG_VECTOR_BODY57]] ] +; CHECK-NEXT: [[VEC_IND70:%.*]] = phi <8 x i64> [ [[INDUCTION69]], %[[VEC_EPILOG_PH46]] ], [ [[VEC_IND_NEXT71:%.*]], %[[VEC_EPILOG_VECTOR_BODY57]] ] ; CHECK-NEXT: [[TMP44:%.*]] = sub nsw <8 x i64> splat (i64 8), [[VEC_IND65]] ; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, <8 x i64> [[VEC_IND65]] ; CHECK-NEXT: [[TMP46:%.*]] = add nsw <8 x i64> [[TMP44]], [[VEC_IND70]] @@ -199,17 +216,17 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[VEC_IND_NEXT66]] = add <8 x i64> [[VEC_IND65]], splat (i64 16) ; CHECK-NEXT: [[VEC_IND_NEXT71]] = add <8 x i64> [[VEC_IND70]], splat (i64 16) ; CHECK-NEXT: [[TMP55:%.*]] = icmp eq i64 [[INDEX_NEXT74]], [[N_VEC53]] -; CHECK-NEXT: br i1 [[TMP55]], label %[[VEC_EPILOG_MIDDLE_BLOCK63:.*]], label %[[VEC_EPILOG_VECTOR_BODY56]], !llvm.loop [[LOOP6:![0-9]+]] -; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK63]]: +; CHECK-NEXT: br i1 [[TMP55]], label %[[VEC_EPILOG_MIDDLE_BLOCK64:.*]], label %[[VEC_EPILOG_VECTOR_BODY57]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK64]]: ; CHECK-NEXT: [[CMP_N65:%.*]] = icmp eq i64 [[TMP28]], [[N_VEC53]] -; CHECK-NEXT: br i1 [[CMP_N65]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH42]] -; CHECK: [[VEC_EPILOG_SCALAR_PH42]]: -; CHECK-NEXT: [[BC_RESUME_VAL65:%.*]] = phi i64 [ [[IND_END54]], %[[VEC_EPILOG_MIDDLE_BLOCK63]] ], [ [[IND_END55]], %[[VEC_EPILOG_ITER_CHECK43]] ], [ 8, %[[ITER_CHECK22]] ] -; CHECK-NEXT: [[BC_RESUME_VAL66:%.*]] = phi i64 [ [[IND_END57]], %[[VEC_EPILOG_MIDDLE_BLOCK63]] ], [ [[IND_END58]], %[[VEC_EPILOG_ITER_CHECK43]] ], [ 0, %[[ITER_CHECK22]] ] +; CHECK-NEXT: br i1 [[CMP_N65]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH43]] +; CHECK: [[VEC_EPILOG_SCALAR_PH43]]: +; CHECK-NEXT: [[BC_RESUME_VAL66:%.*]] = phi i64 [ [[IND_END54]], %[[VEC_EPILOG_MIDDLE_BLOCK64]] ], [ [[IND_END55]], %[[VEC_EPILOG_ITER_CHECK44]] ], [ 8, %[[ITER_CHECK23]] ] +; CHECK-NEXT: [[BC_RESUME_VAL67:%.*]] = phi i64 [ [[IND_END57]], %[[VEC_EPILOG_MIDDLE_BLOCK64]] ], [ [[IND_END58]], %[[VEC_EPILOG_ITER_CHECK44]] ], [ 0, %[[ITER_CHECK23]] ] ; CHECK-NEXT: br label %[[FOR_BODY_US:.*]] ; CHECK: [[FOR_BODY_US]]: -; CHECK-NEXT: [[INDVARS_IV78:%.*]] = phi i64 [ [[INDVARS_IV_NEXT79:%.*]], %[[FOR_COND_CLEANUP4_US_LCSSA_US_US:.*]] ], [ [[BC_RESUME_VAL65]], %[[VEC_EPILOG_SCALAR_PH42]] ] -; CHECK-NEXT: [[INDVARS_IV70:%.*]] = phi i64 [ [[INDVARS_IV_NEXT71:%.*]], %[[FOR_COND_CLEANUP4_US_LCSSA_US_US]] ], [ [[BC_RESUME_VAL66]], %[[VEC_EPILOG_SCALAR_PH42]] ] +; CHECK-NEXT: [[INDVARS_IV78:%.*]] = phi i64 [ [[INDVARS_IV_NEXT79:%.*]], %[[FOR_COND_CLEANUP4_US_LCSSA_US_US:.*]] ], [ [[BC_RESUME_VAL66]], %[[VEC_EPILOG_SCALAR_PH43]] ] +; CHECK-NEXT: [[INDVARS_IV70:%.*]] = phi i64 [ [[INDVARS_IV_NEXT71:%.*]], %[[FOR_COND_CLEANUP4_US_LCSSA_US_US]] ], [ [[BC_RESUME_VAL67]], %[[VEC_EPILOG_SCALAR_PH43]] ] ; CHECK-NEXT: [[TMP56:%.*]] = sub nsw i64 8, [[INDVARS_IV78]] ; CHECK-NEXT: [[ADD_PTR_US:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, i64 [[INDVARS_IV78]] ; CHECK-NEXT: [[TMP57:%.*]] = add nsw i64 [[TMP56]], [[INDVARS_IV70]] @@ -233,7 +250,7 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[INDVARS_IV_NEXT79]] = add nuw nsw i64 [[INDVARS_IV78]], 2 ; CHECK-NEXT: [[CMP_US:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT79]], [[TMP3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT71]] = add nuw nsw i64 [[INDVARS_IV70]], 2 -; CHECK-NEXT: br i1 [[CMP_US]], label %[[FOR_BODY_US]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP_US]], label %[[FOR_BODY_US]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT]]: ; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]] ; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT99]]: @@ -241,8 +258,8 @@ define void @_Z3fn1v() #0 { ; CHECK: [[FOR_COND_CLEANUP]]: ; CHECK-NEXT: ret void ; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[INDVARS_IV95:%.*]] = phi i64 [ [[INDVARS_IV_NEXT96:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL17]], %[[VEC_EPILOG_SCALAR_PH]] ] -; CHECK-NEXT: [[INDVARS_IV87:%.*]] = phi i64 [ [[INDVARS_IV_NEXT88:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL18]], %[[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[INDVARS_IV95:%.*]] = phi i64 [ [[INDVARS_IV_NEXT96:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL18]], %[[VEC_EPILOG_SCALAR_PH]] ] +; CHECK-NEXT: [[INDVARS_IV87:%.*]] = phi i64 [ [[INDVARS_IV_NEXT88:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL19]], %[[VEC_EPILOG_SCALAR_PH]] ] ; CHECK-NEXT: [[TMP60:%.*]] = sub nsw i64 8, [[INDVARS_IV95]] ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr @d, i64 0, i64 [[INDVARS_IV95]] ; CHECK-NEXT: [[TMP61:%.*]] = add nsw i64 [[TMP60]], [[INDVARS_IV87]] @@ -255,7 +272,7 @@ define void @_Z3fn1v() #0 { ; CHECK-NEXT: [[INDVARS_IV_NEXT96]] = add nuw nsw i64 [[INDVARS_IV95]], 2 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT96]], [[TMP3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT88]] = add nuw nsw i64 [[INDVARS_IV87]], 2 -; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT99]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT99]], !llvm.loop [[LOOP9:![0-9]+]] ; entry: %0 = load i32, ptr @c, align 4 diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll index 45405511abe90..0c7f381b7858a 100644 --- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -841,14 +841,12 @@ define void @pointer_iv_uniform(ptr %a, i32 %x, i64 %n) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[N_VEC]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT]], ptr [[NEXT_GEP]], align 8 +; CHECK-NEXT: store i32 [[X]], ptr [[NEXT_GEP]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -881,14 +879,12 @@ define void @pointer_iv_uniform(ptr %a, i32 %x, i64 %n) { ; INTER-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]] ; INTER-NEXT: [[TMP0:%.*]] = mul i64 [[N_VEC]], 4 ; INTER-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] -; INTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 -; INTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; INTER-NEXT: br label %[[VECTOR_BODY:.*]] ; INTER: [[VECTOR_BODY]]: ; INTER-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTER-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; INTER-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] -; INTER-NEXT: store <4 x i32> [[BROADCAST_SPLAT]], ptr [[NEXT_GEP]], align 8 +; INTER-NEXT: store i32 [[X]], ptr [[NEXT_GEP]], align 8 ; INTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; INTER-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTER-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll index 274bd043cd86b..eab2830df0ebc 100644 --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll @@ -17,9 +17,9 @@ define dso_local void @constTC(ptr noalias nocapture %A) optsize { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 2 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 4 -; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP3]], align 1 -; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP7]], align 1 -; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP8]], align 1 +; CHECK-NEXT: store i32 13, ptr [[TMP3]], align 1 +; CHECK-NEXT: store i32 13, ptr [[TMP7]], align 1 +; CHECK-NEXT: store i32 13, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 6 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1800 ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll index 156c2bdca7b0e..48a42db9f1cb2 100644 --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-divisible-TC.ll @@ -19,7 +19,7 @@ define dso_local void @alignTC(ptr noalias nocapture %A, i32 %n) optsize { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] -; CHECK-NEXT: store <4 x i32> splat (i32 13), ptr [[TMP1]], align 1 +; CHECK-NEXT: store i32 13, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -82,7 +82,7 @@ define dso_local void @assumeAlignedTC(ptr noalias nocapture %A, i32 %p, i32 %q) ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] -; CHECK-NEXT: store <4 x i32> splat (i32 13), ptr [[TMP1]], align 1 +; CHECK-NEXT: store i32 13, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-scev-expansion.ll index b020e59824f49..34a7ed3f2ac0d 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-scev-expansion.ll @@ -18,7 +18,7 @@ define void @test_epilogue_step_scev_expansion(ptr %dst) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP0]], align 1 +; CHECK-NEXT: store i8 0, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -37,7 +37,7 @@ define void @test_epilogue_step_scev_expansion(ptr %dst) { ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT4:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX3]] -; CHECK-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP2]], align 1 +; CHECK-NEXT: store i8 0, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[INDEX3]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT4]], [[N_VEC2]] ; CHECK-NEXT: br i1 [[TMP3]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/extract-from-end-vector-constant.ll b/llvm/test/Transforms/LoopVectorize/extract-from-end-vector-constant.ll index 4c6e0dee64d7d..edf720c660ba1 100644 --- a/llvm/test/Transforms/LoopVectorize/extract-from-end-vector-constant.ll +++ b/llvm/test/Transforms/LoopVectorize/extract-from-end-vector-constant.ll @@ -13,7 +13,7 @@ define i64 @exit_value_scalar_live_in(ptr %dst, i64 %in) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP1]], align 2 +; CHECK-NEXT: store i16 0, ptr [[TMP1]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -58,7 +58,7 @@ define <2 x i64> @exit_value_vector_live_in(ptr %dst) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP1]], align 2 +; CHECK-NEXT: store i16 0, ptr [[TMP1]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll index eca39e6f0b6ba..423646d607c3b 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll @@ -99,8 +99,8 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP3]], i32 4 -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP3]], align 4 -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4 +; CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4 +; CHECK-NEXT: store i32 0, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index bd0c098d335a2..52154bffc5a5c 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -1916,8 +1916,8 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP6]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP7]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 4 -; UNROLL-NO-IC-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP8]], align 4 -; UNROLL-NO-IC-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP18]], align 4 +; UNROLL-NO-IC-NEXT: store i32 7, ptr [[TMP8]], align 4 +; UNROLL-NO-IC-NEXT: store i32 7, ptr [[TMP18]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP9]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = load i16, ptr [[TMP10]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = load i16, ptr [[TMP11]], align 2 @@ -2055,7 +2055,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; SINK-AFTER-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP1]], i64 1 ; SINK-AFTER-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP2]], i64 1 ; SINK-AFTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP3]], i64 1 -; SINK-AFTER-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP4]], align 4 +; SINK-AFTER-NEXT: store i32 7, ptr [[TMP4]], align 4 ; SINK-AFTER-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP5]], align 2 ; SINK-AFTER-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP6]], align 2 ; SINK-AFTER-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP7]], align 2 @@ -3219,8 +3219,8 @@ define i32 @sink_after_dead_inst(ptr %A.ptr, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[OFFSET_IDX]] ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 4 -; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP4]], align 4 -; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP6]], align 4 +; UNROLL-NO-IC-NEXT: store i32 0, ptr [[TMP4]], align 4 +; UNROLL-NO-IC-NEXT: store i32 0, ptr [[TMP6]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 @@ -3268,7 +3268,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr, i32 %n) { ; SINK-AFTER-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]] ; SINK-AFTER-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; SINK-AFTER-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[OFFSET_IDX]] -; SINK-AFTER-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP4]], align 4 +; SINK-AFTER-NEXT: store i32 0, ptr [[TMP4]], align 4 ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], splat (i16 4) ; SINK-AFTER-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 diff --git a/llvm/test/Transforms/LoopVectorize/hints-trans.ll b/llvm/test/Transforms/LoopVectorize/hints-trans.ll index 853be301da40f..68cfb4c98ec46 100644 --- a/llvm/test/Transforms/LoopVectorize/hints-trans.ll +++ b/llvm/test/Transforms/LoopVectorize/hints-trans.ll @@ -12,7 +12,7 @@ define void @scalar_loop_dead_after_vectorization(ptr nocapture %b) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 1, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP1]], label %[[EXIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -42,7 +42,7 @@ define void @scalar_loop_not_dead_after_vectorization(ptr nocapture %b) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP0]], align 4 +; CHECK-NEXT: store i32 1, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll index 8615401af34f8..40b643ce78f2e 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-and-sink-mem-ops-with-invariant-pointers.ll @@ -25,10 +25,8 @@ define void @hoist_invariant_load_noalias_due_to_memchecks(ptr %dst, ptr %invari ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[INVARIANT_PTR]], align 4, !alias.scope [[META0:![0-9]+]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] -; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] +; CHECK-NEXT: store i32 [[TMP4]], ptr [[TMP5]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index f9dd626e523e8..202b5f0104fbe 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -550,7 +550,7 @@ define void @minimal_bit_widths_with_aliasing_store(i1 %c, ptr %ptr) { ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] ; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[INDEX]] ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP2]], align 1 -; VEC-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP2]], align 1 +; VEC-NEXT: store i8 0, ptr [[TMP2]], align 1 ; VEC-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.if: ; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll index a02fddc4cf72d..b72b774458aca 100644 --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization-2.ll @@ -34,8 +34,7 @@ define void @inv_val_store_to_inv_address_conditional_diff_values_ic(ptr %a, i64 ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775804 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i64 3 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[K:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -44,7 +43,7 @@ define void @inv_val_store_to_inv_address_conditional_diff_values_ic(ptr %a, i64 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT4]] -; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 4, !alias.scope [[META0]], !noalias [[META3]] +; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[TMP1]], align 4, !alias.scope [[META0]], !noalias [[META3]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[PREDPHI]], i64 3 ; CHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4, !alias.scope [[META3]] @@ -134,13 +133,11 @@ define void @inv_val_store_to_inv_address_conditional_inv(ptr %a, i64 %n, ptr %b ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775804 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META12:![0-9]+]] +; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[TMP3]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META12:![0-9]+]] ; CHECK-NEXT: store i32 [[K]], ptr [[A]], align 4, !alias.scope [[META12]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll index abed18a57b90e..9815a12b29fd7 100644 --- a/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/invariant-store-vectorization.ll @@ -107,14 +107,12 @@ define void @inv_val_store_to_inv_address(ptr %a, i64 %n, ptr %b) { ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775804 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4, !alias.scope [[META9:![0-9]+]], !noalias [[META12:![0-9]+]] -; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 4, !alias.scope [[META12]] +; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[TMP1]], align 4, !alias.scope [[META12]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -182,8 +180,6 @@ define void @inv_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b, i3 ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX2]], 9223372036854775804 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[NTRUNC]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[K:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -192,7 +188,7 @@ define void @inv_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b, i3 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 8, !alias.scope [[META16:![0-9]+]], !noalias [[META19:![0-9]+]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT4]] -; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 4, !alias.scope [[META16]], !noalias [[META19]] +; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[TMP1]], align 4, !alias.scope [[META16]], !noalias [[META19]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0 ; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: @@ -201,22 +197,22 @@ define void @inv_val_store_to_inv_address_conditional(ptr %a, i64 %n, ptr %b, i3 ; CHECK: pred.store.continue: ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 ; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] -; CHECK: pred.store.if5: +; CHECK: pred.store.if3: ; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4, !alias.scope [[META19]] ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] -; CHECK: pred.store.continue6: +; CHECK: pred.store.continue4: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2 ; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] -; CHECK: pred.store.if7: +; CHECK: pred.store.if5: ; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4, !alias.scope [[META19]] ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] -; CHECK: pred.store.continue8: +; CHECK: pred.store.continue6: ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3 ; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]] -; CHECK: pred.store.if9: +; CHECK: pred.store.if7: ; CHECK-NEXT: store i32 [[NTRUNC]], ptr [[A]], align 4, !alias.scope [[META19]] ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]] -; CHECK: pred.store.continue10: +; CHECK: pred.store.continue8: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 86515ebe25637..797f5f757efd3 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -1023,7 +1023,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) { ; VEC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 ; VEC-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 ; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]] -; VEC-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP1]], align 2 +; VEC-NEXT: store i16 0, ptr [[TMP1]], align 2 ; VEC-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP6]] ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; VEC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 @@ -1165,7 +1165,7 @@ define i32 @iv_ext_used_outside( ptr %dst) { ; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VEC-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 ; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i16 [[OFFSET_IDX]] -; VEC-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP1]], align 4 +; VEC-NEXT: store i32 0, ptr [[TMP1]], align 4 ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; VEC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 ; VEC-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} @@ -1250,7 +1250,7 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; VEC-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[DST]], i64 3 ; VEC-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[TMP0]], i32 0 ; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i32 -1 -; VEC-NEXT: store <2 x i16> splat (i16 1), ptr [[TMP2]], align 2 +; VEC-NEXT: store i16 1, ptr [[TMP2]], align 2 ; VEC-NEXT: [[TMP5:%.*]] = add i64 1, -1 ; VEC-NEXT: [[IV_1_NEXT_LCSSA1:%.*]] = add i64 [[TMP5]], 1 ; VEC-NEXT: br label %[[MIDDLE_BLOCK:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll index 4db3d1eed4771..a929a4c2e1f1d 100644 --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -19,7 +19,7 @@ define void @bottom_tested(ptr %p, i32 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP2]] -; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP3]], align 4 +; CHECK-NEXT: store i16 0, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -116,7 +116,7 @@ define void @early_exit(ptr %p, i32 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]] -; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP5]], align 4 +; CHECK-NEXT: store i16 0, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -182,7 +182,7 @@ define i32 @early_exit_with_live_out(ptr %ptr) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store <2 x i32> splat (i32 10), ptr [[TMP1]], align 4 +; CHECK-NEXT: store i32 10, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -315,7 +315,7 @@ define void @multiple_unique_exit(ptr %p, i32 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]] -; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP5]], align 4 +; CHECK-NEXT: store i16 0, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -394,7 +394,7 @@ define i32 @multiple_unique_exit2(ptr %p, i32 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]] -; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP5]], align 4 +; CHECK-NEXT: store i16 0, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -475,7 +475,7 @@ define i32 @multiple_unique_exit3(ptr %p, i32 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]] -; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP5]], align 4 +; CHECK-NEXT: store i16 0, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -557,7 +557,7 @@ define i32 @multiple_exit_blocks(ptr %p, i32 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]] -; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP5]], align 4 +; CHECK-NEXT: store i16 0, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -643,7 +643,7 @@ define i32 @multiple_exit_blocks2(ptr %p, i32 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]] -; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP5]], align 4 +; CHECK-NEXT: store i16 0, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -733,7 +733,7 @@ define i32 @multiple_exit_blocks3(ptr %p, i32 %n) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[INDEX]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[P:%.*]], i64 [[TMP4]] -; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP5]], align 4 +; CHECK-NEXT: store i16 0, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/min-trip-count-known-via-scev.ll b/llvm/test/Transforms/LoopVectorize/min-trip-count-known-via-scev.ll index 619ea849fa6ad..b7fc5a8b9308c 100644 --- a/llvm/test/Transforms/LoopVectorize/min-trip-count-known-via-scev.ll +++ b/llvm/test/Transforms/LoopVectorize/min-trip-count-known-via-scev.ll @@ -20,7 +20,7 @@ define i32 @loop_with_at_least_2_iterations_via_guards_order_1(ptr %dst, i32 %n) ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[TMP1]], align 4 +; CHECK-NEXT: store i32 1, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -86,7 +86,7 @@ define i32 @loop_with_at_least_2_iterations_via_guards_order_2(ptr %dst, i32 %n) ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[TMP1]], align 4 +; CHECK-NEXT: store i32 1, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll index 3bf5c0d1d13a9..6ec6dbe9b3246 100644 --- a/llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll @@ -351,8 +351,8 @@ define void @test_expand_secv_in_entry_before_gep(ptr %dst) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OUTER_IV]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[GEP_M]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 2 -; CHECK-NEXT: store <2 x double> zeroinitializer, ptr [[TMP3]], align 8 -; CHECK-NEXT: store <2 x double> zeroinitializer, ptr [[TMP4]], align 8 +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP3]], align 8 +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll index 404ef096b49b3..3bfb031bb41e8 100644 --- a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll +++ b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll @@ -37,7 +37,7 @@ define i32 @test(ptr %arr, i64 %n) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP17]] -; CHECK-NEXT: store <4 x i32> splat (i32 65), ptr [[TMP18]], align 4 +; CHECK-NEXT: store i32 65, ptr [[TMP18]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index bfc7feecafbc4..e347ff9e7788c 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -317,7 +317,7 @@ define void @f3(ptr noalias %A, i64 %n) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i8> splat (i8 1), ptr [[TMP1]], align 1 +; CHECK-NEXT: store i8 1, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -335,7 +335,7 @@ define void @f3(ptr noalias %A, i64 %n) { ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX5]] -; CHECK-NEXT: store <4 x i8> splat (i8 1), ptr [[TMP5]], align 1 +; CHECK-NEXT: store i8 1, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -359,54 +359,15 @@ define void @f3(ptr noalias %A, i64 %n) { ; ; CHECK-PROFITABLE-BY-DEFAULT-LABEL: define void @f3( ; CHECK-PROFITABLE-BY-DEFAULT-SAME: ptr noalias [[A:%.*]], i64 [[N:%.*]]) { -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ITER_CHECK:.*]]: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] -; CHECK-PROFITABLE-BY-DEFAULT: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], 4 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK-PROFITABLE-BY-DEFAULT: [[VECTOR_PH]]: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK-PROFITABLE-BY-DEFAULT: [[VECTOR_BODY]]: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <4 x i8> splat (i8 1), ptr [[TMP1]], align 1 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK-PROFITABLE-BY-DEFAULT: [[MIDDLE_BLOCK]]: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] -; CHECK-PROFITABLE-BY-DEFAULT: [[VEC_EPILOG_ITER_CHECK]]: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] -; CHECK-PROFITABLE-BY-DEFAULT: [[VEC_EPILOG_PH]]: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 2 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[N_VEC3:%.*]] = sub i64 [[N]], [[N_MOD_VF2]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] -; CHECK-PROFITABLE-BY-DEFAULT: [[VEC_EPILOG_VECTOR_BODY]]: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX5]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store <2 x i8> splat (i8 1), ptr [[TMP5]], align 1 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT6]] = add nuw i64 [[INDEX5]], 2 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT6]], [[N_VEC3]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP7]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK-PROFITABLE-BY-DEFAULT: [[VEC_EPILOG_MIDDLE_BLOCK]]: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[N]], [[N_VEC3]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP_N4]], label %[[FOR_END_LOOPEXIT]], label %[[VEC_EPILOG_SCALAR_PH]] -; CHECK-PROFITABLE-BY-DEFAULT: [[VEC_EPILOG_SCALAR_PH]]: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ENTRY:.*]]: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-PROFITABLE-BY-DEFAULT: [[FOR_BODY]]: -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store i8 1, ptr [[ARRAYIDX]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[N]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END_LOOPEXIT:.*]] ; CHECK-PROFITABLE-BY-DEFAULT: [[FOR_END_LOOPEXIT]]: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br label %[[FOR_END:.*]] ; CHECK-PROFITABLE-BY-DEFAULT: [[FOR_END]]: @@ -555,12 +516,12 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], [[DOTSPLAT2]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 84 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: [[MIDDLE_BLOCK]]: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 true, label %[[OUTER_LATCH]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK-PROFITABLE-BY-DEFAULT: [[VEC_EPILOG_ITER_CHECK]]: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IND_END5:%.*]] = mul i8 84, [[INDUCTION_IV]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 true, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 true, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: [[VEC_EPILOG_PH]]: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 84, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -584,7 +545,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT18]] = add nuw i64 [[INDEX7]], 2 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT16]] = add <2 x i8> [[VEC_IND15]], [[DOTSPLAT14]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT18]], 84 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP13]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 true, label %[[OUTER_LATCH]], label %[[VEC_EPILOG_SCALAR_PH]] ; CHECK-PROFITABLE-BY-DEFAULT: [[VEC_EPILOG_SCALAR_PH]]: @@ -599,7 +560,7 @@ define void @induction_resume_value_requires_non_trivial_scev_expansion(ptr %dst ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store i8 [[IV_2]], ptr [[GEP_DST]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[EC:%.*]] = icmp ugt i64 [[IV]], 83 -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EC]], label %[[OUTER_LATCH]], label %[[INNER]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EC]], label %[[OUTER_LATCH]], label %[[INNER]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: [[OUTER_LATCH]]: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDVAR_NEXT]] = add i8 [[INDVAR]], 1 @@ -722,7 +683,7 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: [[MIDDLE_BLOCK]]: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] @@ -747,7 +708,7 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], splat (i32 2) ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC3]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP10]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[TMP10]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: [[VEC_EPILOG_MIDDLE_BLOCK]]: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[CMP_N5:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC3]] ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[CMP_N5]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] @@ -762,7 +723,7 @@ define void @f4(ptr noalias %A, i32 signext %n) { ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: store i8 [[CONV]], ptr [[ARRAYIDX]], align 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-PROFITABLE-BY-DEFAULT-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK-PROFITABLE-BY-DEFAULT: [[EXIT]]: ; CHECK-PROFITABLE-BY-DEFAULT-NEXT: ret void ; @@ -936,7 +897,4 @@ exit: ; preds = %loop ; CHECK-PROFITABLE-BY-DEFAULT: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} ; CHECK-PROFITABLE-BY-DEFAULT: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]} ; CHECK-PROFITABLE-BY-DEFAULT: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]} -; CHECK-PROFITABLE-BY-DEFAULT: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} -; CHECK-PROFITABLE-BY-DEFAULT: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} -; CHECK-PROFITABLE-BY-DEFAULT: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll index f9f7feb7bdfbc..087c2308a4e6a 100644 --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -415,39 +415,19 @@ define void @pr43371_pgso(i16 %val) !prof !14 { ; ; NPGSO-LABEL: define void @pr43371_pgso( ; NPGSO-SAME: i16 [[VAL:%.*]]) !prof [[PROF14]] { -; NPGSO-NEXT: [[ENTRY:.*:]] -; NPGSO-NEXT: br label %[[VECTOR_SCEVCHECK:.*]] -; NPGSO: [[VECTOR_SCEVCHECK]]: -; NPGSO-NEXT: [[TMP0:%.*]] = add i16 [[VAL]], 755 -; NPGSO-NEXT: [[TMP4:%.*]] = icmp ult i16 [[TMP0]], [[VAL]] -; NPGSO-NEXT: br i1 [[TMP4]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; NPGSO: [[VECTOR_PH]]: -; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]] -; NPGSO: [[VECTOR_BODY]]: -; NPGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; NPGSO-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 -; NPGSO-NEXT: [[TMP1:%.*]] = add i16 [[VAL]], [[OFFSET_IDX]] -; NPGSO-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 -; NPGSO-NEXT: [[TMP3:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[TMP2]] -; NPGSO-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP3]], align 1 -; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; NPGSO-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 756 -; NPGSO-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] -; NPGSO: [[MIDDLE_BLOCK]]: -; NPGSO-NEXT: br label %[[FOR_COND_CLEANUP28:.*]] -; NPGSO: [[SCALAR_PH]]: +; NPGSO-NEXT: [[ENTRY:.*]]: ; NPGSO-NEXT: br label %[[FOR_BODY29:.*]] -; NPGSO: [[FOR_COND_CLEANUP28]]: +; NPGSO: [[FOR_COND_CLEANUP28:.*]]: ; NPGSO-NEXT: unreachable ; NPGSO: [[FOR_BODY29]]: -; NPGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ 0, %[[SCALAR_PH]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] +; NPGSO-NEXT: [[I24_0170:%.*]] = phi i16 [ 0, %[[ENTRY]] ], [ [[INC37:%.*]], %[[FOR_BODY29]] ] ; NPGSO-NEXT: [[ADD33:%.*]] = add i16 [[VAL]], [[I24_0170]] ; NPGSO-NEXT: [[IDXPROM34:%.*]] = zext i16 [[ADD33]] to i32 ; NPGSO-NEXT: [[ARRAYIDX35:%.*]] = getelementptr [2592 x i16], ptr @cm_array, i32 0, i32 [[IDXPROM34]] ; NPGSO-NEXT: store i16 0, ptr [[ARRAYIDX35]], align 1 ; NPGSO-NEXT: [[INC37]] = add i16 [[I24_0170]], 1 ; NPGSO-NEXT: [[CMP26:%.*]] = icmp ult i16 [[INC37]], 756 -; NPGSO-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]], !llvm.loop [[LOOP21:![0-9]+]] +; NPGSO-NEXT: br i1 [[CMP26]], label %[[FOR_BODY29]], label %[[FOR_COND_CLEANUP28]] ; ; We do not want to generate SCEV predicates when optimising for size, because ; that will lead to extra code generation such as the SCEV overflow runtime @@ -573,7 +553,7 @@ define i32 @pr45526_pgso() !prof !14 { ; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; NPGSO-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 508 -; NPGSO-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; NPGSO-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; NPGSO: [[MIDDLE_BLOCK]]: ; NPGSO-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 ; NPGSO-NEXT: br label %[[SCALAR_PH:.*]] @@ -584,7 +564,7 @@ define i32 @pr45526_pgso() !prof !14 { ; NPGSO-NEXT: [[FOR:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[PIVPLUS1]], %[[LOOP]] ] ; NPGSO-NEXT: [[PIVPLUS1]] = add nuw nsw i32 [[PIV]], 1 ; NPGSO-NEXT: [[COND:%.*]] = icmp ult i32 [[PIV]], 510 -; NPGSO-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP23:![0-9]+]] +; NPGSO-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP21:![0-9]+]] ; NPGSO: [[EXIT]]: ; NPGSO-NEXT: [[FOR_LCSSA:%.*]] = phi i32 [ [[FOR]], %[[LOOP]] ] ; NPGSO-NEXT: ret i32 [[FOR_LCSSA]] @@ -716,7 +696,7 @@ define void @stride1(ptr noalias %B, i32 %BStride) optsize { ; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; NPGSO-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026 -; NPGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; NPGSO-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; NPGSO: [[MIDDLE_BLOCK]]: ; NPGSO-NEXT: br label %[[FOR_END:.*]] ; NPGSO: [[FOR_END]]: @@ -754,7 +734,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP0]] -; CHECK-NEXT: store <2 x i16> splat (i16 42), ptr [[TMP1]], align 4 +; CHECK-NEXT: store i16 42, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -786,7 +766,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 { ; PGSO: [[VECTOR_BODY]]: ; PGSO-NEXT: [[TMP0:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; PGSO-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP0]] -; PGSO-NEXT: store <2 x i16> splat (i16 42), ptr [[TMP1]], align 4 +; PGSO-NEXT: store i16 42, ptr [[TMP1]], align 4 ; PGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2 ; PGSO-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; PGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -818,10 +798,10 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 { ; NPGSO: [[VECTOR_BODY]]: ; NPGSO-NEXT: [[TMP0:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; NPGSO-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[TMP0]] -; NPGSO-NEXT: store <2 x i16> splat (i16 42), ptr [[TMP1]], align 4 +; NPGSO-NEXT: store i16 42, ptr [[TMP1]], align 4 ; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2 ; NPGSO-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 -; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; NPGSO-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; NPGSO: [[MIDDLE_BLOCK]]: ; NPGSO-NEXT: br label %[[SCALAR_PH]] ; NPGSO: [[SCALAR_PH]]: @@ -834,7 +814,7 @@ define void @stride1_pgso(ptr noalias %B, i32 %BStride) !prof !14 { ; NPGSO-NEXT: store i16 42, ptr [[GEPOFB]], align 4 ; NPGSO-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; NPGSO-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 1025 -; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; NPGSO-NEXT: br i1 [[EXITCOND]], label %[[FOR_END:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; NPGSO: [[FOR_END]]: ; NPGSO-NEXT: ret void ; @@ -1038,10 +1018,8 @@ exit: ; NPGSO: [[LOOP18]] = distinct !{[[LOOP18]], [[META17]], [[META16]]} ; NPGSO: [[LOOP19]] = distinct !{[[LOOP19]], [[META16]], [[META17]]} ; NPGSO: [[LOOP20]] = distinct !{[[LOOP20]], [[META16]], [[META17]]} -; NPGSO: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]]} +; NPGSO: [[LOOP21]] = distinct !{[[LOOP21]], [[META17]], [[META16]]} ; NPGSO: [[LOOP22]] = distinct !{[[LOOP22]], [[META16]], [[META17]]} -; NPGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]], [[META16]]} -; NPGSO: [[LOOP24]] = distinct !{[[LOOP24]], [[META16]], [[META17]]} -; NPGSO: [[LOOP25]] = distinct !{[[LOOP25]], [[META16]], [[META17]]} -; NPGSO: [[LOOP26]] = distinct !{[[LOOP26]], [[META16]]} +; NPGSO: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]], [[META17]]} +; NPGSO: [[LOOP24]] = distinct !{[[LOOP24]], [[META16]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll index 8a77d14b9d3ba..6329814f4d799 100644 --- a/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll +++ b/llvm/test/Transforms/LoopVectorize/outer-loop-inner-latch-successors.ll @@ -13,8 +13,6 @@ define void @inner_latch_header_first_successor(i64 %N, i32 %c, i64 %M) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[M]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[M]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -22,12 +20,13 @@ define void @inner_latch_header_first_successor(i64 %N, i32 %c, i64 %M) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_LATCH:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_LATCH]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1024 x i64], ptr @A, i64 0, <4 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> align 4 [[TMP0]], <4 x i1> splat (i1 true), <4 x i64> poison) -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[BROADCAST_SPLAT]], <4 x ptr> align 4 [[TMP0]], <4 x i1> splat (i1 true)) -; CHECK-NEXT: br label %[[INNER3:.*]] -; CHECK: [[INNER3]]: -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[INNER3]] ] -; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i64> [ [[WIDE_MASKED_GATHER]], %[[VECTOR_BODY]] ], [ [[TMP3:%.*]], %[[INNER3]] ] +; CHECK-NEXT: store i32 [[C]], ptr [[TMP5]], align 4 +; CHECK-NEXT: br label %[[INNER1:.*]] +; CHECK: [[INNER1]]: +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[INNER1]] ] +; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i64> [ [[WIDE_MASKED_GATHER]], %[[VECTOR_BODY]] ], [ [[TMP3:%.*]], %[[INNER1]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i64], ptr @B, i64 0, <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> align 4 [[TMP1]], <4 x i1> splat (i1 true), <4 x i64> poison) ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i64> [[WIDE_MASKED_GATHER5]], [[VEC_PHI4]] @@ -35,9 +34,9 @@ define void @inner_latch_header_first_successor(i64 %N, i32 %c, i64 %M) { ; CHECK-NEXT: [[TMP4]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP4]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 -; CHECK-NEXT: br i1 [[TMP9]], label %[[VECTOR_LATCH]], label %[[INNER3]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[VECTOR_LATCH]], label %[[INNER1]] ; CHECK: [[VECTOR_LATCH]]: -; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER3]] ] +; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER1]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP10]], <4 x ptr> align 4 [[TMP0]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) @@ -115,8 +114,6 @@ define void @inner_latch_header_second_successor(i64 %N, i32 %c, i64 %M) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[M]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[M]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -124,12 +121,13 @@ define void @inner_latch_header_second_successor(i64 %N, i32 %c, i64 %M) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_LATCH:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_LATCH]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1024 x i64], ptr @A, i64 0, <4 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> align 4 [[TMP0]], <4 x i1> splat (i1 true), <4 x i64> poison) -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[BROADCAST_SPLAT]], <4 x ptr> align 4 [[TMP0]], <4 x i1> splat (i1 true)) -; CHECK-NEXT: br label %[[INNER3:.*]] -; CHECK: [[INNER3]]: -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[INNER3]] ] -; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i64> [ [[WIDE_MASKED_GATHER]], %[[VECTOR_BODY]] ], [ [[TMP3:%.*]], %[[INNER3]] ] +; CHECK-NEXT: store i32 [[C]], ptr [[TMP10]], align 4 +; CHECK-NEXT: br label %[[INNER1:.*]] +; CHECK: [[INNER1]]: +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[INNER1]] ] +; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i64> [ [[WIDE_MASKED_GATHER]], %[[VECTOR_BODY]] ], [ [[TMP3:%.*]], %[[INNER1]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i64], ptr @B, i64 0, <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> align 4 [[TMP1]], <4 x i1> splat (i1 true), <4 x i64> poison) ; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i64> [[WIDE_MASKED_GATHER5]], [[VEC_PHI4]] @@ -137,9 +135,9 @@ define void @inner_latch_header_second_successor(i64 %N, i32 %c, i64 %M) { ; CHECK-NEXT: [[TMP4]] = add nuw nsw <4 x i64> [[VEC_PHI]], splat (i64 1) ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i64> [[TMP4]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0 -; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[INNER3]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_LATCH]], label %[[INNER1]] ; CHECK: [[VECTOR_LATCH]]: -; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER3]] ] +; CHECK-NEXT: [[TMP9:%.*]] = phi <4 x i64> [ [[TMP3]], %[[INNER1]] ] ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[TMP9]], <4 x ptr> align 4 [[TMP0]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) diff --git a/llvm/test/Transforms/LoopVectorize/outer-loop-wide-phis.ll b/llvm/test/Transforms/LoopVectorize/outer-loop-wide-phis.ll index 59e3d71f27a38..600b14530eb90 100644 --- a/llvm/test/Transforms/LoopVectorize/outer-loop-wide-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/outer-loop-wide-phis.ll @@ -7,14 +7,13 @@ define void @wide_phi_2_predecessors(ptr noalias %A, ptr noalias %B, i32 %c, i1 ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_LATCH:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_LATCH]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], <4 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[BROADCAST_SPLAT]], <4 x ptr> align 4 [[TMP0]], <4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 0 +; CHECK-NEXT: store i32 [[C]], ptr [[TMP8]], align 4 ; CHECK-NEXT: br label %[[INNER_HEADER1:.*]] ; CHECK: [[INNER_HEADER1]]: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[INNER_LATCH3:.*]] ] @@ -115,14 +114,13 @@ define void @wide_phi_2_predecessors_phi_ops_swapped(ptr noalias %A, ptr noalias ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[C]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_LATCH:.*]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_LATCH]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], <4 x i64> [[VEC_IND]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[BROADCAST_SPLAT]], <4 x ptr> align 4 [[TMP0]], <4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 0 +; CHECK-NEXT: store i32 [[C]], ptr [[TMP8]], align 4 ; CHECK-NEXT: br label %[[INNER_HEADER1:.*]] ; CHECK: [[INNER_HEADER1]]: ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[INNER_LATCH3:.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll index 28d5ef552482b..229668df218db 100644 --- a/llvm/test/Transforms/LoopVectorize/pr37248.ll +++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll @@ -58,7 +58,7 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP12]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 0 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP16]], i32 -1 -; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP17]], align 1 +; CHECK-NEXT: store i16 0, ptr [[TMP17]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -121,7 +121,7 @@ define void @f2(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP11]] ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[TMP12]], i32 0 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP13]], i32 -1 -; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP14]], align 1 +; CHECK-NEXT: store i16 0, ptr [[TMP14]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/pr50686.ll b/llvm/test/Transforms/LoopVectorize/pr50686.ll index 878fbec452220..acc50a4c2b127 100644 --- a/llvm/test/Transforms/LoopVectorize/pr50686.ll +++ b/llvm/test/Transforms/LoopVectorize/pr50686.ll @@ -18,20 +18,14 @@ define void @m(ptr nocapture %p, ptr nocapture %p2, i32 %q) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[P2]], align 4, !alias.scope [[META0:![0-9]+]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> zeroinitializer, [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX9_1]], align 4, !alias.scope [[META0]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], [[BROADCAST_SPLAT3]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX9_2]], align 4, !alias.scope [[META0]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> [[TMP4]], [[BROADCAST_SPLAT5]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[P2]], align 4, !alias.scope [[META0:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i32 0, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX9_1]], align 4, !alias.scope [[META0]] +; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX9_2]], align 4, !alias.scope [[META0]] +; CHECK-NEXT: [[TMP5:%.*]] = sub nsw i32 [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP7]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] +; CHECK-NEXT: store i32 [[TMP5]], ptr [[TMP7]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 60 ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll index 565e203e68f72..fd5e2b9acfd8f 100644 --- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll @@ -344,21 +344,21 @@ define void @switch_to_header(ptr %start) { ; IC1-NEXT: [[ENTRY:.*]]: ; IC1-NEXT: br label %[[LOOP_HEADER:.*]] ; IC1: [[LOOP_HEADER]]: -; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] +; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ] ; IC1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC1-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC1-NEXT: i64 120, label %[[IF_THEN1]] +; IC1-NEXT: i64 120, label %[[IF_THEN]] ; IC1-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC1-NEXT: ] -; IC1: [[IF_THEN1]]: +; IC1: [[IF_THEN]]: ; IC1-NEXT: br label %[[LOOP_HEADER]] -; IC1: [[IF_THEN:.*:]] +; IC1: [[IF_THEN1:.*:]] ; IC1-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison ; IC1-NEXT: store i64 42, ptr [[GEP]], align 1 ; IC1-NEXT: unreachable ; IC1: [[LOOP_LATCH]]: ; IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] +; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]] ; IC1: [[EXIT]]: ; IC1-NEXT: ret void ; @@ -367,21 +367,21 @@ define void @switch_to_header(ptr %start) { ; IC2-NEXT: [[ENTRY:.*]]: ; IC2-NEXT: br label %[[LOOP_HEADER:.*]] ; IC2: [[LOOP_HEADER]]: -; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] +; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ] ; IC2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC2-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC2-NEXT: i64 120, label %[[IF_THEN1]] +; IC2-NEXT: i64 120, label %[[IF_THEN]] ; IC2-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC2-NEXT: ] -; IC2: [[IF_THEN1]]: +; IC2: [[IF_THEN]]: ; IC2-NEXT: br label %[[LOOP_HEADER]] -; IC2: [[IF_THEN:.*:]] +; IC2: [[IF_THEN1:.*:]] ; IC2-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison ; IC2-NEXT: store i64 42, ptr [[GEP]], align 1 ; IC2-NEXT: unreachable ; IC2: [[LOOP_LATCH]]: ; IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] +; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]] ; IC2: [[EXIT]]: ; IC2-NEXT: ret void ; @@ -419,7 +419,7 @@ define void @switch_all_to_default(ptr %start) { ; IC1: [[VECTOR_BODY]]: ; IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 [[INDEX]] -; IC1-NEXT: store <2 x i64> splat (i64 42), ptr [[TMP1]], align 1 +; IC1-NEXT: store i64 42, ptr [[TMP1]], align 1 ; IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; IC1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; IC1-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -438,8 +438,8 @@ define void @switch_all_to_default(ptr %start) { ; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 [[INDEX]] ; IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; IC2-NEXT: store <2 x i64> splat (i64 42), ptr [[TMP2]], align 1 -; IC2-NEXT: store <2 x i64> splat (i64 42), ptr [[TMP5]], align 1 +; IC2-NEXT: store i64 42, ptr [[TMP2]], align 1 +; IC2-NEXT: store i64 42, ptr [[TMP5]], align 1 ; IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; IC2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; IC2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -479,7 +479,7 @@ define void @switch_unconditional(ptr %start) { ; IC1: [[VECTOR_BODY]]: ; IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IC1-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[START]], i64 [[INDEX]] -; IC1-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP1]], align 4 +; IC1-NEXT: store i32 0, ptr [[TMP1]], align 4 ; IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; IC1-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; IC1-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -498,8 +498,8 @@ define void @switch_unconditional(ptr %start) { ; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IC2-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[START]], i64 [[INDEX]] ; IC2-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP2]], i32 2 -; IC2-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP2]], align 4 -; IC2-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP1]], align 4 +; IC2-NEXT: store i32 0, ptr [[TMP2]], align 4 +; IC2-NEXT: store i32 0, ptr [[TMP1]], align 4 ; IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; IC2-NEXT: [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; IC2-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll index faca86a41b023..47e3b9fe60552 100644 --- a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll @@ -237,7 +237,7 @@ define void @expand_diff_scev_unknown(ptr %dst, i1 %invar.c, i32 %step) mustprog ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[IV_1_LCSSA]], [[INDEX]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP11]], align 4 +; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll index 1c16ef1114371..03b13965e7f5c 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll @@ -101,7 +101,7 @@ define void @diff_memcheck_known_false_for_vf_4(ptr %B, ptr %A, ptr %end) { ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3 -; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr [[TMP8]], align 8 +; CHECK-NEXT: store i64 0, ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check.ll b/llvm/test/Transforms/LoopVectorize/runtime-check.ll index 6ea227f7492ee..0f72b0350d5ad 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check.ll @@ -505,7 +505,7 @@ define void @test_scev_check_mul_add_expansion(ptr %out, ptr %in, i32 %len, i32 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[INDEX]], 6 ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[OFFSET_IDX]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[OUT]], i64 [[TMP6]] -; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP7]], align 2, !alias.scope [[META36:![0-9]+]], !noalias [[META39:![0-9]+]] +; CHECK-NEXT: store i16 0, ptr [[TMP7]], align 2, !alias.scope [[META36:![0-9]+]], !noalias [[META39:![0-9]+]] ; CHECK-NEXT: store i32 0, ptr [[IN]], align 4, !alias.scope [[META39]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll index e6624da7516b1..c62befba30eec 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll @@ -61,40 +61,40 @@ define void @test_large_number_of_group(ptr %dst, i64 %off, i64 %N) { ; CHECK-NEXT: [[TMP11:%.*]] = add nsw i64 [[INDEX]], -5 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], [[OFF]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[DST:%.*]], i64 [[TMP12]] -; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP14]], align 8 +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP14]], align 8 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP11]], [[OFF_MUL_2]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP15]] -; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP17]], align 8 +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP17]], align 8 ; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP11]], [[OFF_MUL_3]] ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP18]] -; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP20]], align 8 +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP20]], align 8 ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP11]], [[OFF_MUL_4]] ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP21]] -; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP23]], align 8 +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP23]], align 8 ; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[TMP11]], [[OFF_MUL_5]] ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP24]] -; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP26]], align 8 +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP26]], align 8 ; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[TMP11]], [[OFF_MUL_6]] ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP27]] -; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP29]], align 8 +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP29]], align 8 ; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[TMP11]], [[OFF_MUL_7]] ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP30]] -; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP32]], align 8 +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP32]], align 8 ; CHECK-NEXT: [[TMP33:%.*]] = add i64 [[TMP11]], [[OFF_MUL_8]] ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP33]] -; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP35]], align 8 +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP35]], align 8 ; CHECK-NEXT: [[TMP36:%.*]] = add i64 [[TMP11]], [[OFF_MUL_9]] ; CHECK-NEXT: [[TMP38:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP36]] -; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP38]], align 8 +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP38]], align 8 ; CHECK-NEXT: [[TMP39:%.*]] = add i64 [[TMP11]], [[OFF_MUL_10]] ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP39]] -; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP41]], align 8 +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP41]], align 8 ; CHECK-NEXT: [[TMP42:%.*]] = add i64 [[TMP11]], [[OFF_MUL_11]] ; CHECK-NEXT: [[TMP44:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP42]] -; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP44]], align 8 +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP44]], align 8 ; CHECK-NEXT: [[TMP45:%.*]] = add i64 [[TMP11]], [[OFF_MUL_12]] ; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP45]] -; CHECK-NEXT: store <4 x double> zeroinitializer, ptr [[TMP47]], align 8 +; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP47]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll index a1329598529fd..5f86706a4b5b8 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll @@ -1516,9 +1516,7 @@ define void @stride_check_known_via_loop_guard(ptr %C, ptr %A, i32 %Acols) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, ptr [[C]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX_US]], align 8, !alias.scope [[META69:![0-9]+]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP1]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: store <4 x double> [[BROADCAST_SPLAT]], ptr [[TMP0]], align 8, !alias.scope [[META72:![0-9]+]], !noalias [[META69]] +; CHECK-NEXT: store double [[TMP1]], ptr [[TMP0]], align 8, !alias.scope [[META72:![0-9]+]], !noalias [[META69]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP74:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll index cb61fc6e0a046..3ccc25546c95f 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll @@ -33,8 +33,8 @@ define i32 @iv_live_out_wide(ptr %dst) { ; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP13:%.*]] = shl nuw i64 [[TMP12]], 1 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]] -; CHECK-NEXT: store zeroinitializer, ptr [[TMP10]], align 2 -; CHECK-NEXT: store zeroinitializer, ptr [[TMP14]], align 2 +; CHECK-NEXT: store i16 0, ptr [[TMP10]], align 2 +; CHECK-NEXT: store i16 0, ptr [[TMP14]], align 2 ; CHECK-NEXT: [[TMP15:%.*]] = add [[BROADCAST_SPLAT]], [[STEP_ADD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP6]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[BROADCAST_SPLAT2]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll index 850d6b299ca23..0ebb0ca6e589d 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll @@ -25,7 +25,7 @@ define void @test(ptr %d) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ARR]]) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[INDEX]] -; CHECK-NEXT: store splat (i32 100), ptr [[TMP4]], align 8 +; CHECK-NEXT: store i32 100, ptr [[TMP4]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ARR]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -93,7 +93,7 @@ define void @testloopvariant(ptr %d) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[ARR]]) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[D]], i64 [[INDEX]] -; CHECK-NEXT: store splat (i32 100), ptr [[TMP4]], align 8 +; CHECK-NEXT: store i32 100, ptr [[TMP4]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[ARR]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll b/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll index 479d859a9287c..522b6417094fa 100644 --- a/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll +++ b/llvm/test/Transforms/LoopVectorize/scev-exit-phi-invalidation.ll @@ -44,7 +44,7 @@ define void @test_pr63368(i1 %c, ptr %A) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX4]] to i8 ; CHECK-NEXT: [[TMP10:%.*]] = add i8 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[A]], i8 [[TMP10]] -; CHECK-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP11]], align 1 +; CHECK-NEXT: store i8 0, ptr [[TMP11]], align 1 ; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i32 [[INDEX4]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT5]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK6:%.*]], label [[VECTOR_BODY3]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll index 163faa2254700..3117923ab0c34 100644 --- a/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll +++ b/llvm/test/Transforms/LoopVectorize/scev-predicate-reasoning.ll @@ -160,9 +160,9 @@ define void @implied_wrap_predicate(ptr %A, ptr %B, ptr %C) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr [[TMP17]], align 4 +; CHECK-NEXT: store i64 0, ptr [[TMP17]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i64, ptr [[C]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr [[TMP18]], align 4 +; CHECK-NEXT: store i64 0, ptr [[TMP18]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] @@ -223,7 +223,7 @@ define void @no_signed_wrap_iv_via_btc(ptr %dst, i32 %N) mustprogress { ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SUB4]], [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = sext i32 [[TMP3]] to i64 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP4]] -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4 +; CHECK-NEXT: store i32 0, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll index 1e4598e756645..39133c39e0731 100644 --- a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll @@ -155,7 +155,7 @@ define void @test2(ptr %dst) { ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -1 -; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP11]], align 4 +; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll index 586804bac9429..4cb1924723906 100644 --- a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll +++ b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll @@ -23,7 +23,7 @@ define i64 @multi_exit_1_exit_count_with_udiv_by_value_in_header(ptr %dst, i64 % ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP5]], align 4 +; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -87,7 +87,7 @@ define i64 @multi_exit_1_exit_count_with_udiv_by_constant_in_header(ptr %dst, i6 ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP5]], align 4 +; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -474,7 +474,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch(ptr %dst, i64 %N ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP5]], align 4 +; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] @@ -544,7 +544,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_call_before_loop ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP8]], align 4 +; CHECK-NEXT: store i32 1, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] @@ -614,7 +614,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_loop_may_not_exe ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP8]], align 4 +; CHECK-NEXT: store i32 1, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -684,7 +684,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch_different_bounds ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP8]], align 4 +; CHECK-NEXT: store i32 1, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -751,7 +751,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_frozen_value_in_latch(ptr %dst, ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP7]], align 4 +; CHECK-NEXT: store i32 1, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -817,7 +817,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_constant_in_latch(ptr %dst, i64 ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP_HEADER]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[GEP]], align 4 +; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP20:![0-9]+]] @@ -877,7 +877,7 @@ define void @single_exit_tc_with_udiv(ptr %dst, i64 %N) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP3]], align 4 +; CHECK-NEXT: store i32 1, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] @@ -938,7 +938,7 @@ define i64 @multi_exit_4_exit_count_with_urem_by_value_in_latch(ptr %dst, i64 %N ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP7]], align 4 +; CHECK-NEXT: store i32 1, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] @@ -1003,7 +1003,7 @@ define i64 @multi_exit_4_exit_count_with_urem_by_constant_in_latch(ptr %dst, i64 ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP_HEADER]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[GEP]], align 4 +; CHECK-NEXT: store i32 1, ptr [[GEP]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP26:![0-9]+]] @@ -1151,7 +1151,7 @@ define i64 @multi_exit_4_exit_count_with_udiv_by_value_in_latch1(ptr %dst, i64 % ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP5]], align 4 +; CHECK-NEXT: store i32 1, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] @@ -1262,7 +1262,7 @@ define i64 @multi_exit_count_with_udiv_by_value_in_latch_different_bounds_diviso ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[TMP6]], align 4 +; CHECK-NEXT: store i32 1, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll index 8a162930ffd99..a97a322cdfc01 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll @@ -15,7 +15,7 @@ define i32 @test_icmp_constant_op_zext(ptr %dst) { ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i16 1, [[DOTCAST]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i16 [[OFFSET_IDX]] -; CHECK-NEXT: store <4 x i8> splat (i8 109), ptr [[TMP1]], align 1 +; CHECK-NEXT: store i8 109, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -128,7 +128,7 @@ define void @ext_cmp(ptr %src.1, ptr %src.2, ptr noalias %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP0]], align 2 +; CHECK-NEXT: store i16 0, ptr [[TMP0]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll index 3b515a2acb1a7..a201ea77e40e3 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -15,7 +15,7 @@ define void @blend_uniform_iv_trunc(i1 %c) { ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i16 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[C]], i16 [[TMP0]], i16 poison ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i16 [[TMP6]] -; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP7]], align 2 +; CHECK-NEXT: store i16 0, ptr [[TMP7]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -58,7 +58,7 @@ define void @blend_uniform_iv(i1 %c) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[C]], i64 [[INDEX]], i64 poison ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [32 x i16], ptr @dst, i16 0, i64 [[TMP6]] -; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP7]], align 2 +; CHECK-NEXT: store i16 0, ptr [[TMP7]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll index 927fefc73ceea..fe8653ebe54fd 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1.ll @@ -59,10 +59,8 @@ define void @ld_div2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 +; CHECK-NEXT: store i64 [[TMP3]], ptr [[TMP4]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll index d6277d657ea7e..f6ca7caba4a0c 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_and.ll @@ -59,10 +59,8 @@ define void @ld_and_neg2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 +; CHECK-NEXT: store i64 [[TMP3]], ptr [[TMP4]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll index edf04bbcbcdff..27ef8adae2ea0 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_div_urem.ll @@ -244,10 +244,8 @@ define void @ld_div8_urem3(ptr noalias %A, ptr noalias %B) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], 42 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[TMP4]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: store <8 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 +; CHECK-NEXT: store i64 [[TMP4]], ptr [[TMP5]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll index 32873a4e90e81..ea2b67648b84a 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction1_lshr.ll @@ -82,10 +82,8 @@ define void @ld_lshr1_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] ; VF2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 ; VF2-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42 -; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0 -; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 +; VF2-NEXT: store i64 [[TMP3]], ptr [[TMP4]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] @@ -164,10 +162,8 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] ; VF2-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 ; VF2-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42 -; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i64 0 -; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; VF2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 +; VF2-NEXT: store i64 [[TMP3]], ptr [[TMP4]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF2-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -188,10 +184,8 @@ define void @ld_lshr2_step1_start0_ind1(ptr noalias %A, ptr noalias %B) { ; VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] ; VF4-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 ; VF4-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP2]], 42 -; VF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP3]], i64 0 -; VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; VF4-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 +; VF4-NEXT: store i64 [[TMP3]], ptr [[TMP4]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF4-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll index 607d1365098f2..4b9241151db2e 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform_across_vf_induction2.ll @@ -117,10 +117,8 @@ define void @ld_div2_step1_start0_ind2(ptr noalias %A, ptr noalias %B) { ; VF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; VF2-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 ; VF2-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], 42 -; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0 -; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; VF2-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 +; VF2-NEXT: store i64 [[TMP4]], ptr [[TMP5]], align 8 ; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; VF2-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll index 5d07341263bc2..7492c9e5adf0b 100644 --- a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll +++ b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll @@ -16,9 +16,7 @@ define void @test_not_first_lane_only_constant(ptr %A, ptr noalias %B) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[OFFSET_IDX]] ; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr [[B]], align 2 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i16> poison, i16 [[TMP13]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT5]], <4 x i16> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: store <4 x i16> [[BROADCAST_SPLAT6]], ptr [[TMP1]], align 2 +; CHECK-NEXT: store i16 [[TMP13]], ptr [[TMP1]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -72,9 +70,7 @@ define void @test_not_first_lane_only_wide_compare(ptr %A, ptr noalias %B, i16 % ; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i16 [[TMP3]], [[X]] ; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP4]], ptr poison, ptr [[B]] ; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 2 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i16> poison, i16 [[TMP13]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT5]], <4 x i16> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: store <4 x i16> [[BROADCAST_SPLAT6]], ptr [[TMP1]], align 2 +; CHECK-NEXT: store i16 [[TMP13]], ptr [[TMP1]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] @@ -131,9 +127,7 @@ define void @test_not_first_lane_only_wide_compare_incoming_order_swapped(ptr %A ; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i16 [[TMP3]], [[X]] ; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP4]], ptr poison, ptr [[B]] ; CHECK-NEXT: [[TMP12:%.*]] = load i16, ptr [[PREDPHI]], align 2 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP12]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT3]], <4 x i16> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: store <4 x i16> [[BROADCAST_SPLAT4]], ptr [[TMP1]], align 2 +; CHECK-NEXT: store i16 [[TMP12]], ptr [[TMP1]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-geps.ll b/llvm/test/Transforms/LoopVectorize/vector-geps.ll index 94bc32205ec11..c7014b3603d6a 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-geps.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-geps.ll @@ -66,13 +66,11 @@ define void @uniform_vector_gep_stored(ptr %a, ptr %b, i64 %n) { ; CHECK: vector.ph: ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[B:%.*]], i64 4 -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x ptr> [[DOTSPLAT]], ptr [[TMP1]], align 8 +; CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll index 2317af5619749..8f543f2cd4107 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll @@ -12,8 +12,8 @@ define i64 @remove_loop_region_int_iv_used_outside(ptr %dst) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 -; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[DST]], align 8 -; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 +; CHECK-NEXT: store ptr null, ptr [[DST]], align 8 +; CHECK-NEXT: store ptr null, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -45,8 +45,8 @@ define i64 @remove_loop_region_int_iv_inc_used_outside(ptr %dst) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 -; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[DST]], align 8 -; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 +; CHECK-NEXT: store ptr null, ptr [[DST]], align 8 +; CHECK-NEXT: store ptr null, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -79,8 +79,8 @@ define ptr @remove_loop_region_ptr_iv_used_outside(ptr %dst) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 -; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[DST]], align 8 -; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 +; CHECK-NEXT: store ptr null, ptr [[DST]], align 8 +; CHECK-NEXT: store ptr null, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -8 @@ -115,8 +115,8 @@ define ptr @remove_loop_region_ptr_iv_inc_used_outside(ptr %dst) { ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 -; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[DST]], align 8 -; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 +; CHECK-NEXT: store ptr null, ptr [[DST]], align 8 +; CHECK-NEXT: store ptr null, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll index 6a6ae316f4e52..5c2fa947ad4bd 100644 --- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll @@ -28,7 +28,7 @@ define void @test_versioned_with_sext_use(i32 %offset, ptr %dst) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]] -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP4]], align 8 +; CHECK-NEXT: store i32 0, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -98,7 +98,7 @@ define void @test_versioned_with_zext_use(i32 %offset, ptr %dst) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[INDEX]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]] -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP4]], align 8 +; CHECK-NEXT: store i32 0, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -248,7 +248,7 @@ define void @test_versioned_with_different_uses(i32 %offset, ptr noalias %dst.1, ; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 8 ; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 8 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[DST_2]], i64 [[TMP3]] -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP12]], align 8 +; CHECK-NEXT: store i32 0, ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] @@ -342,7 +342,7 @@ define void @test_versioned_with_non_ex_use(i32 %offset, ptr noalias %dst.1, ptr ; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 8 ; CHECK-NEXT: store i32 0, ptr [[TMP18]], align 8 ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DST_2]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP20]], align 8 +; CHECK-NEXT: store i32 0, ptr [[TMP20]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 @@ -412,7 +412,7 @@ define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i16> splat (i16 1), ptr [[TMP4]], align 2 +; CHECK-NEXT: store i16 1, ptr [[TMP4]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll index d08ca8c99e8ba..9ee0f79ef5c2a 100644 --- a/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll +++ b/llvm/test/Transforms/LoopVectorize/widen-gep-all-indices-invariant.ll @@ -10,15 +10,16 @@ define void @pr63340(ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 1 ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[TMP0]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x ptr> [[DOTSPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x ptr> [[DOTSPLAT]], i32 3 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i8 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[B]], i8 [[OFFSET_IDX]] -; CHECK-NEXT: store <4 x ptr> [[DOTSPLAT]], ptr [[TMP1]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[B]], i8 [[OFFSET_IDX]] +; CHECK-NEXT: store ptr [[TMP1]], ptr [[TMP2]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 -; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: @@ -59,10 +60,11 @@ define void @wide_gep_index_invariant(ptr noalias %dst, ptr noalias %src, i64 %n ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, <4 x ptr> [[BROADCAST_SPLAT]], i64 [[N]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x ptr> [[TMP1]], ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x ptr> [[TMP1]], i32 3 +; CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP2]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 -; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: