diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index e41f67103e096..1c88b56ca89dc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2593,22 +2593,11 @@ void VPWidenGEPRecipe::printRecipe(raw_ostream &O, const Twine &Indent, } #endif -static Type *getGEPIndexTy(bool IsScalable, bool IsReverse, bool IsUnitStride, - unsigned CurrentPart, IRBuilderBase &Builder) { - // Use i32 for the gep index type when the value is constant, - // or query DataLayout for a more suitable index type otherwise. - const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); - return !IsUnitStride || (IsScalable && (IsReverse || CurrentPart > 0)) - ? DL.getIndexType(Builder.getPtrTy(0)) - : Builder.getInt32Ty(); -} - void VPVectorEndPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; unsigned CurrentPart = getUnrollPart(*this); - bool IsUnitStride = Stride == 1 || Stride == -1; - Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ true, - IsUnitStride, CurrentPart, Builder); + const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); + Type *IndexTy = DL.getIndexType(State.TypeAnalysis.inferScalarType(this)); // The wide store needs to start at the last vector element. Value *RunTimeVF = State.get(getVFValue(), VPLane(0)); @@ -2644,8 +2633,8 @@ void VPVectorEndPointerRecipe::printRecipe(raw_ostream &O, const Twine &Indent, void VPVectorPointerRecipe::execute(VPTransformState &State) { auto &Builder = State.Builder; unsigned CurrentPart = getUnrollPart(*this); - Type *IndexTy = getGEPIndexTy(State.VF.isScalable(), /*IsReverse*/ false, - /*IsUnitStride*/ true, CurrentPart, Builder); + const DataLayout &DL = Builder.GetInsertBlock()->getDataLayout(); + Type *IndexTy = DL.getIndexType(State.TypeAnalysis.inferScalarType(this)); Value *Ptr = State.get(getOperand(0), VPLane(0)); Value *Increment = createStepForVF(Builder, IndexTy, State.VF, CurrentPart); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll index 23918427e7003..95b4dcb23dd47 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/call-costs.ll @@ -14,7 +14,7 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i64> [ , %[[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD1]] = load <2 x i64>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[VECTOR_RECUR]], <2 x i64> [[WIDE_LOAD]], <2 x i32> @@ -22,7 +22,7 @@ define void @fshl_operand_first_order_recurrence(ptr %dst, ptr noalias %src) { ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP6]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[TMP9:%.*]] = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> splat (i64 1), <2 x i64> [[TMP7]], <2 x i64> splat (i64 1)) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP10]], align 8 ; CHECK-NEXT: store <2 x i64> [[TMP9]], ptr [[TMP13]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 9609982b2c68f..2e3ddc8899ec7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -63,7 +63,7 @@ define void @loop_dependent_cond(ptr %src, ptr noalias %dst, i64 %N) { ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ] ; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[TMP3]], i32 2 +; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[TMP3]], i64 2 ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP3]], align 8 ; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP6]], align 8 ; DEFAULT-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD]]) @@ -259,7 +259,7 @@ define void @latch_branch_cost(ptr %dst) { ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i32 16 +; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 16 ; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP2]], align 1 ; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP5]], align 1 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll index 0a433ec76acf4..f0664197dcb94 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll @@ -137,7 +137,7 @@ define void @test_shrink_zext_in_preheader(ptr noalias %src, ptr noalias %dst, i ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 16 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 @@ -210,7 +210,7 @@ define void @test_shrink_select(ptr noalias %src, ptr noalias %dst, i32 %A, i1 % ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[DST]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP5]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[TMP6]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 @@ -279,7 +279,7 @@ define void @trunc_invariant_sdiv_result(i32 %a, i32 %b, ptr noalias %src, ptr % ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i16> @@ -287,7 +287,7 @@ define void @trunc_invariant_sdiv_result(i32 %a, i32 %b, ptr noalias %src, ptr % ; CHECK-NEXT: [[TMP5:%.*]] = mul <16 x i16> [[TMP0]], [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = mul <16 x i16> [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 16 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i64 16 ; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP7]], align 2 ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 @@ -413,7 +413,7 @@ define void @old_and_new_size_equalko(ptr noalias %src, ptr noalias %dst) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[WIDE_LOAD]] to <4 x i1> @@ -427,15 +427,16 @@ define void @old_and_new_size_equalko(ptr noalias %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[TMP8]] to <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = trunc <4 x i64> [[TMP9]] to <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 4 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 4 ; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr [[TMP12]], align 4 ; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll index 2180f18750bf2..580c568c373f1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll @@ -27,7 +27,7 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) { ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i8> [[VEC_IND]], splat (i8 16) ; CHECK-NEXT: [[INDEX4:%.*]] = trunc i32 [[INDEX]] to i8 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[INDEX4]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 16 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP8]], align 8 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], splat (i8 3) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll index a3b7392dd280f..549df337e6907 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll @@ -19,17 +19,17 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 32 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 48 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 48 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 16 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 32 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 48 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 16 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 32 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 48 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i8>, ptr [[TMP9]], align 1 @@ -39,9 +39,9 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK-NEXT: [[TMP13:%.*]] = add <16 x i8> [[WIDE_LOAD7]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP14:%.*]] = add <16 x i8> [[WIDE_LOAD8]], [[WIDE_LOAD4]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i32 16 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i32 32 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i32 48 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 16 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 32 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP15]], i64 48 ; CHECK-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP15]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP17]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP13]], ptr [[TMP18]], align 1 @@ -54,7 +54,7 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[ITERATIONS]], 8 @@ -71,7 +71,7 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK-NEXT: store <8 x i8> [[TMP26]], ptr [[TMP27]], align 1 ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 8 ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC10]] -; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -89,7 +89,7 @@ define void @add_i8(ptr noalias nocapture noundef writeonly %A, ptr nocapture no ; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX6]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[ITERATIONS]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -128,17 +128,17 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 8 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 16 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 24 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 16 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 24 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i16>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 8 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 16 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 24 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i64 16 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i64 24 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i16>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i16>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i16>, ptr [[TMP9]], align 1 @@ -148,22 +148,22 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: [[TMP13:%.*]] = add <8 x i16> [[WIDE_LOAD7]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP14:%.*]] = add <8 x i16> [[WIDE_LOAD8]], [[WIDE_LOAD4]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i16, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 8 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 16 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 24 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 8 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 16 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 24 ; CHECK-NEXT: store <8 x i16> [[TMP11]], ptr [[TMP15]], align 1 ; CHECK-NEXT: store <8 x i16> [[TMP12]], ptr [[TMP17]], align 1 ; CHECK-NEXT: store <8 x i16> [[TMP13]], ptr [[TMP18]], align 1 ; CHECK-NEXT: store <8 x i16> [[TMP14]], ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[ITERATIONS]], 4 @@ -180,7 +180,7 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store <4 x i16> [[TMP26]], ptr [[TMP27]], align 1 ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 4 ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC10]] -; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -198,7 +198,7 @@ define void @add_i16(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX6]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[ITERATIONS]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -237,17 +237,17 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP4]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 8 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 12 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP9]], align 1 @@ -257,22 +257,22 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD3]] ; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[WIDE_LOAD8]], [[WIDE_LOAD4]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 4 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 8 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 12 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 4 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 8 +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 12 ; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP15]], align 1 ; CHECK-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP17]], align 1 ; CHECK-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP18]], align 1 ; CHECK-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP19]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[ITERATIONS]], 4 @@ -289,7 +289,7 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store <4 x i32> [[TMP26]], ptr [[TMP27]], align 1 ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX11]], 4 ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC10]] -; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP29]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[ITERATIONS]], [[N_VEC10]] ; CHECK-NEXT: br i1 [[CMP_N15]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -307,7 +307,7 @@ define void @add_i32(ptr noalias nocapture noundef writeonly %A, ptr nocapture n ; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX6]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[ITERATIONS]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -347,9 +347,9 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 16 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 32 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 48 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 32 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 48 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[ARG]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 @@ -358,9 +358,9 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[WIDE_LOAD4]], splat (i8 10) ; CHECK-NEXT: [[TMP6:%.*]] = add <16 x i8> [[WIDE_LOAD5]], splat (i8 10) ; CHECK-NEXT: [[TMP7:%.*]] = add <16 x i8> [[WIDE_LOAD6]], splat (i8 10) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 16 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 32 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 48 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 16 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 32 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i64 48 ; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[ARG2]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP8]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP9]], align 1 @@ -369,7 +369,7 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF14:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] @@ -382,7 +382,7 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP13]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: @@ -397,7 +397,7 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK-NEXT: store i8 [[SELECT]], ptr [[GEP_B]], align 1 ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 20 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -447,16 +447,16 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[INDEX]], 4 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 8 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 12 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 12 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[NEXT_GEP]], align 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP8]], align 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP9]], align 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP10]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -464,7 +464,7 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[N_VEC]], 4 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[TMP12]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF1:%.*]] = urem i64 [[TMP2]], 4 @@ -479,7 +479,7 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[NEXT_GEP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX3]], 4 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC2]] -; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC2]] ; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -491,7 +491,7 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" { ; CHECK-NEXT: store i32 0, ptr [[IV]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = getelementptr i8, ptr [[IV]], i64 4 ; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[IV]], [[PTR_END]] -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index 5e92123891b31..85726c161cc54 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -29,7 +29,7 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP12]]) ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP13]]) ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP14]]) -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 2 ; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[NEXT_GEP]], align 1 ; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -117,7 +117,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2 ; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP1]], align 4 ; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP3]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -201,7 +201,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2 ; CHECK-NEXT: store <2 x i64> [[VEC_IND]], ptr [[TMP2]], align 4 ; CHECK-NEXT: store <2 x i64> [[STEP_ADD]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -285,7 +285,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 10) ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i64> [[STEP_ADD]], splat (i64 10) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2 ; CHECK-NEXT: store <2 x i64> [[TMP2]], ptr [[TMP1]], align 4 ; CHECK-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -399,7 +399,7 @@ define void @test_widen_truncated_induction(ptr %A) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i8> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], splat (i8 2) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 2 ; CHECK-NEXT: store <2 x i8> [[VEC_IND]], ptr [[TMP1]], align 1 ; CHECK-NEXT: store <2 x i8> [[STEP_ADD]], ptr [[TMP3]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll index cb4e99332c04b..4eacc55a99f72 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilogue-vectorization-fix-scalar-resume-values.ll @@ -20,7 +20,7 @@ define void @epilogue_vectorization_fix_scalar_resume_values(ptr %dst, i64 %n) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP0]], align 1 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll index 35d7e2cc8c586..feb0175e75542 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/f128-fmuladd-reduction.ll @@ -21,16 +21,16 @@ define double @fp128_fmuladd_reduction(ptr %start0, ptr %start1, ptr %end0, ptr ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[START0]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[START1]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr fp128, ptr [[TMP1]], i32 2 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr fp128, ptr [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr fp128, ptr [[TMP1]], i32 6 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr fp128, ptr [[TMP1]], i64 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr fp128, ptr [[TMP1]], i64 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr fp128, ptr [[TMP1]], i64 6 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x fp128>, ptr [[TMP1]], align 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x fp128>, ptr [[TMP24]], align 16 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x fp128>, ptr [[TMP4]], align 16 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x fp128>, ptr [[TMP5]], align 16 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP3]], i32 2 -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP3]], i32 4 -; CHECK-NEXT: [[TMP36:%.*]] = getelementptr double, ptr [[TMP3]], i32 6 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr double, ptr [[TMP3]], i64 6 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP3]], align 16 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP28]], align 16 ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x double>, ptr [[TMP35]], align 16 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll index c94b3a4c49555..c692ba5b06690 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll @@ -26,7 +26,7 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i8> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD1]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[VECTOR_RECUR]], <16 x i8> [[WIDE_LOAD]], <16 x i32> @@ -34,7 +34,7 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP9:%.*]] = add <16 x i8> [[WIDE_LOAD]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = add <16 x i8> [[WIDE_LOAD1]], [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 16 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP11]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP14]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 @@ -119,7 +119,7 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[VECTOR_RECUR4:%.*]] = phi <16 x i8> [ [[VECTOR_RECUR_INIT3]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD5]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[VECTOR_RECUR]], <16 x i8> [[WIDE_LOAD]], <16 x i32> @@ -135,7 +135,7 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP17:%.*]] = add <16 x i8> [[TMP15]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP18:%.*]] = add <16 x i8> [[TMP16]], [[WIDE_LOAD5]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i32 16 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr [[TMP19]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP18]], ptr [[TMP22]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll index faee4c1194018..591bdabca65e7 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-wide-lane-mask.ll @@ -56,9 +56,9 @@ define void @fixed_wide_active_lane_mask(ptr noalias %dst, ptr noalias readonly ; CHECK-UF4-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0 ; CHECK-UF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-UF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-UF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 4 -; CHECK-UF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 8 -; CHECK-UF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 12 +; CHECK-UF4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 4 +; CHECK-UF4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 8 +; CHECK-UF4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 12 ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP8]], <4 x i1> [[ACTIVE_LANE_MASK]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP17]], <4 x i1> [[ACTIVE_LANE_MASK4]]) ; CHECK-UF4-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[BROADCAST_SPLAT]], ptr align 4 [[TMP18]], <4 x i1> [[ACTIVE_LANE_MASK5]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll index 6902dd990509e..a04367f32dd01 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll @@ -53,7 +53,7 @@ define float @fmaxnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]) @@ -128,10 +128,10 @@ define float @test_fmax_and_fmin(ptr %src.0, ptr %src.1, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_0]], i64 [[IV]] ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_1]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_0]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_0]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC_0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_1]], i32 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_1]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[GEP_SRC_1]], align 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI2]], <4 x float> [[WIDE_LOAD]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll index 193424d3eb70a..0bddc498f9e83 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll @@ -53,7 +53,7 @@ define float @fminnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll index f15f04fe5f6f2..3a9d5c34bacab 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fminimumnum.ll @@ -20,17 +20,17 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[WIDE_LOAD5]], <4 x float> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i32 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP8]], ptr [[TMP10]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP12]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -97,17 +97,17 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> [[WIDE_LOAD5]], <4 x float> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i32 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP8]], ptr [[TMP10]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP12]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -174,17 +174,17 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[WIDE_LOAD5]], <2 x double> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[TMP10]], align 8 ; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -251,17 +251,17 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[WIDE_LOAD5]], <2 x double> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[TMP10]], align 8 ; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -328,17 +328,17 @@ define void @fmin16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw half, ptr [[TMP2]], i32 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw half, ptr [[TMP2]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x half>, ptr [[TMP2]], align 2 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x half>, ptr [[TMP6]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw half, ptr [[TMP4]], i32 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw half, ptr [[TMP4]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x half>, ptr [[TMP4]], align 2 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x half>, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[TMP11:%.*]] = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> [[WIDE_LOAD]], <8 x half> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> [[WIDE_LOAD5]], <8 x half> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw half, ptr [[TMP7]], i32 8 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw half, ptr [[TMP7]], i64 8 ; CHECK-NEXT: store <8 x half> [[TMP11]], ptr [[TMP7]], align 2 ; CHECK-NEXT: store <8 x half> [[TMP13]], ptr [[TMP12]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -405,17 +405,17 @@ define void @fmax16(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw half, ptr [[TMP2]], i32 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw half, ptr [[TMP2]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x half>, ptr [[TMP2]], align 2 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x half>, ptr [[TMP6]], align 2 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw half, ptr [[TMP4]], i32 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw half, ptr [[TMP4]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x half>, ptr [[TMP4]], align 2 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x half>, ptr [[TMP10]], align 2 ; CHECK-NEXT: [[TMP11:%.*]] = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> [[WIDE_LOAD]], <8 x half> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP13:%.*]] = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> [[WIDE_LOAD5]], <8 x half> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [4096 x half], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw half, ptr [[TMP7]], i32 8 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw half, ptr [[TMP7]], i64 8 ; CHECK-NEXT: store <8 x half> [[TMP11]], ptr [[TMP7]], align 2 ; CHECK-NEXT: store <8 x half> [[TMP13]], ptr [[TMP12]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index 56edee44fe3b1..21b21774d18cf 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -62,7 +62,7 @@ define void @test_iv_cost(ptr %ptr.start, i8 %a, i64 %b) { ; COST1: [[VECTOR_BODY]]: ; COST1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; COST1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[INDEX]] -; COST1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 +; COST1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 ; COST1-NEXT: store <16 x i8> zeroinitializer, ptr [[NEXT_GEP]], align 1 ; COST1-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP0]], align 1 ; COST1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 @@ -328,7 +328,7 @@ define void @invalid_legacy_cost(i64 %N, ptr %x) #0 { ; COST1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP1]], i64 0 ; COST1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer ; COST1-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[X]], i64 [[INDEX]] -; COST1-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr [[TMP2]], i32 2 +; COST1-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr [[TMP2]], i64 2 ; COST1-NEXT: store <2 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 ; COST1-NEXT: store <2 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 ; COST1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index 42a1940925968..7b42e565e127d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -25,7 +25,7 @@ define i32 @multi_exit_iv_uniform(i32 %a, i64 %N, ptr %dst) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP5]], i64 4 ; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP5]], align 8 ; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP9]], align 8 ; CHECK-NEXT: [[TMP10]] = add <4 x i32> [[VEC_PHI]], splat (i32 -1) @@ -106,7 +106,7 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i32>, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = zext <2 x i32> [[WIDE_LOAD4]] to <2 x i64> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -161,7 +161,7 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i32> [[WIDE_LOAD3]] to <4 x i64> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -205,7 +205,7 @@ define void @wide_truncated_iv(ptr %dst) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i8> [ , [[VECTOR_PH1]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i8> [[VEC_IND]], splat (i8 8) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i32 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 8 ; CHECK-NEXT: store <8 x i8> [[VEC_IND]], ptr [[TMP2]], align 1 ; CHECK-NEXT: store <8 x i8> [[STEP_ADD]], ptr [[TMP5]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -287,7 +287,7 @@ define i64 @test_ptr_ivs_and_widened_ivs(ptr %src, i32 %N) { ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[WIDE_LOAD]], splat (i32 1) ; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i32> [[TMP7]] to <4 x i64> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll index 9b4151f30d640..f7060ec3512ac 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-load-store.ll @@ -35,9 +35,9 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4: vector.body: ; INTERLEAVE-4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; INTERLEAVE-4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 16 -; INTERLEAVE-4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 32 -; INTERLEAVE-4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 48 +; INTERLEAVE-4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 16 +; INTERLEAVE-4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 32 +; INTERLEAVE-4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 48 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1 @@ -55,9 +55,9 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4-NEXT: [[TMP23:%.*]] = select <16 x i1> [[TMP15]], <16 x i8> [[BROADCAST_SPLAT]], <16 x i8> [[TMP19]] ; INTERLEAVE-4-NEXT: [[TMP24:%.*]] = select <16 x i1> [[TMP16]], <16 x i8> [[BROADCAST_SPLAT]], <16 x i8> [[TMP20]] ; INTERLEAVE-4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] -; INTERLEAVE-4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 16 -; INTERLEAVE-4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 32 -; INTERLEAVE-4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 48 +; INTERLEAVE-4-NEXT: [[TMP30:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 16 +; INTERLEAVE-4-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 32 +; INTERLEAVE-4-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 48 ; INTERLEAVE-4-NEXT: store <16 x i8> [[TMP21]], ptr [[TMP25]], align 1 ; INTERLEAVE-4-NEXT: store <16 x i8> [[TMP22]], ptr [[TMP30]], align 1 ; INTERLEAVE-4-NEXT: store <16 x i8> [[TMP23]], ptr [[TMP31]], align 1 @@ -70,7 +70,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; INTERLEAVE-4: vec.epilog.iter.check: ; INTERLEAVE-4-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; INTERLEAVE-4-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; INTERLEAVE-4-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; INTERLEAVE-4: vec.epilog.ph: ; INTERLEAVE-4-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; INTERLEAVE-4-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[N]], 8 @@ -91,7 +91,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4-NEXT: store <8 x i8> [[TMP39]], ptr [[TMP40]], align 1 ; INTERLEAVE-4-NEXT: [[INDEX_NEXT18]] = add nuw i64 [[INDEX12]], 8 ; INTERLEAVE-4-NEXT: [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT18]], [[N_VEC10]] -; INTERLEAVE-4-NEXT: br i1 [[TMP42]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; INTERLEAVE-4-NEXT: br i1 [[TMP42]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; INTERLEAVE-4: vec.epilog.middle.block: ; INTERLEAVE-4-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N]], [[N_VEC10]] ; INTERLEAVE-4-NEXT: br i1 [[CMP_N11]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -109,7 +109,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-4-NEXT: store i8 [[SEL]], ptr [[GEP_DST]], align 1 ; INTERLEAVE-4-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; INTERLEAVE-4-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; INTERLEAVE-4-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; INTERLEAVE-4-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; INTERLEAVE-4: exit: ; INTERLEAVE-4-NEXT: ret void ; @@ -137,7 +137,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-2: vector.body: ; INTERLEAVE-2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; INTERLEAVE-2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; INTERLEAVE-2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; INTERLEAVE-2-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; INTERLEAVE-2-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; INTERLEAVE-2-NEXT: [[TMP7:%.*]] = icmp sgt <16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -147,7 +147,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-2-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP7]], <16 x i8> [[BROADCAST_SPLAT]], <16 x i8> [[TMP9]] ; INTERLEAVE-2-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP8]], <16 x i8> [[BROADCAST_SPLAT]], <16 x i8> [[TMP10]] ; INTERLEAVE-2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] -; INTERLEAVE-2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 16 +; INTERLEAVE-2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i64 16 ; INTERLEAVE-2-NEXT: store <16 x i8> [[TMP11]], ptr [[TMP13]], align 1 ; INTERLEAVE-2-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP16]], align 1 ; INTERLEAVE-2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 @@ -158,7 +158,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-2-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; INTERLEAVE-2: vec.epilog.iter.check: ; INTERLEAVE-2-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; INTERLEAVE-2-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; INTERLEAVE-2-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; INTERLEAVE-2: vec.epilog.ph: ; INTERLEAVE-2-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; INTERLEAVE-2-NEXT: [[N_MOD_VF7:%.*]] = urem i64 [[N]], 8 @@ -179,7 +179,7 @@ define void @interleave_single_load_store(ptr %src, ptr %dst, i64 %N, i8 %a, i8 ; INTERLEAVE-2-NEXT: store <8 x i8> [[TMP23]], ptr [[TMP24]], align 1 ; INTERLEAVE-2-NEXT: [[INDEX_NEXT16]] = add nuw i64 [[INDEX10]], 8 ; INTERLEAVE-2-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC8]] -; INTERLEAVE-2-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; INTERLEAVE-2-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; INTERLEAVE-2: vec.epilog.middle.block: ; INTERLEAVE-2-NEXT: [[CMP_N9:%.*]] = icmp eq i64 [[N]], [[N_VEC8]] ; INTERLEAVE-2-NEXT: br i1 [[CMP_N9]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll index aa94763b44a30..53cb0653fd241 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaving-reduction.ll @@ -29,9 +29,9 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] -; INTERLEAVE-4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -; INTERLEAVE-4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -; INTERLEAVE-4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 12 +; INTERLEAVE-4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 +; INTERLEAVE-4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8 +; INTERLEAVE-4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 12 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 1 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP1]], align 1 ; INTERLEAVE-4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP2]], align 1 @@ -103,7 +103,7 @@ define i32 @interleave_integer_reduction(ptr %src, i64 %N) { ; INTERLEAVE-2-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-2-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; INTERLEAVE-2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[INDEX]] -; INTERLEAVE-2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; INTERLEAVE-2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; INTERLEAVE-2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 1 ; INTERLEAVE-2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 1 ; INTERLEAVE-2-NEXT: [[TMP2]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll index ee3a4a04566c9..3eb42845bec4a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll @@ -36,12 +36,12 @@ define void @saddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[OFFSET_IDX2]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD]], <8 x i16> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[WIDE_LOAD4]], <8 x i16> [[BROADCAST_SPLAT]]) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 8 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 8 ; CHECK-NEXT: store <8 x i16> [[TMP2]], ptr [[NEXT_GEP3]], align 2 ; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr [[TMP4]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -160,12 +160,12 @@ define void @umin(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP1]], align 2 ; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD]], <16 x i8> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.umin.v16i8(<16 x i8> [[WIDE_LOAD3]], <16 x i8> [[BROADCAST_SPLAT]]) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i32 16 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[NEXT_GEP2]], align 2 ; CHECK-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll index 0a9494e4c7ade..c43d62404006d 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll @@ -22,7 +22,7 @@ define void @licm_replicate_call(double %x, ptr %dst) { ; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP3]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP3]], [[TMP5]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i32 2 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[TMP8]], i64 2 ; CHECK-NEXT: store <2 x double> [[TMP6]], ptr [[TMP8]], align 8 ; CHECK-NEXT: store <2 x double> [[TMP7]], ptr [[TMP10]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll index c768fec31a497..bdbf08aecf6b3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll @@ -103,7 +103,7 @@ define void @vectorize_without_optsize(ptr %p, i32 %x, i64 %n) { ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 +; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; DEFAULT-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -621,17 +621,17 @@ define void @dont_vectorize_with_minsize() { ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 8 +; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4 ; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 ; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i32 8 +; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 ; DEFAULT-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 ; DEFAULT-NEXT: [[TMP7:%.*]] = mul nsw <8 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; DEFAULT-NEXT: [[TMP8:%.*]] = mul nsw <8 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD3]] ; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP9]], i32 8 +; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP9]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP9]], align 2 ; DEFAULT-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i16>, ptr [[TMP11]], align 2 ; DEFAULT-NEXT: [[TMP12:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16> @@ -737,17 +737,17 @@ define void @vectorization_forced_minsize_reduce_width() { ; DEFAULT: [[VECTOR_BODY]]: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @B, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 8 +; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4 ; DEFAULT-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 ; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [1000 x i32], ptr @C, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i32 8 +; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP4]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 ; DEFAULT-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 ; DEFAULT-NEXT: [[TMP7:%.*]] = mul nsw <8 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]] ; DEFAULT-NEXT: [[TMP8:%.*]] = mul nsw <8 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD3]] ; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [1000 x i16], ptr @A, i64 0, i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP9]], i32 8 +; DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP9]], i64 8 ; DEFAULT-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP9]], align 2 ; DEFAULT-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i16>, ptr [[TMP11]], align 2 ; DEFAULT-NEXT: [[TMP12:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll index 0ee6b52a2450b..3142227815383 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-mixed.ll @@ -61,13 +61,13 @@ define i32 @sudot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP6]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP13]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP13]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP13]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP14:%.*]] = sext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -164,13 +164,13 @@ define i32 @usdot(ptr %a, ptr %b) #0 { ; CHECK-NOI8MM-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP6]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP13]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP13]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP13]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP14:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -223,11 +223,11 @@ define i32 @sudot_neon(ptr %a, ptr %b) #1 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 16 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = sext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -259,13 +259,13 @@ define i32 @sudot_neon(ptr %a, ptr %b) #1 { ; CHECK-NOI8MM-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP8:%.*]] = sext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -318,11 +318,11 @@ define i32 @usdot_neon(ptr %a, ptr %b) #1 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 16 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -354,13 +354,13 @@ define i32 @usdot_neon(ptr %a, ptr %b) #1 { ; CHECK-NOI8MM-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; CHECK-NOI8MM-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP3:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP4:%.*]] = sext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NOI8MM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NOI8MM-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 16 +; CHECK-NOI8MM-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 16 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-NOI8MM-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-NOI8MM-NEXT: [[TMP8:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll index c6c5c5105d540..b2be0e1d7a442 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll @@ -44,11 +44,11 @@ define i32 @dotp(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -249,7 +249,7 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 ; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] -; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP34]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -513,13 +513,13 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) { ; CHECK-INTERLEAVED: vector.body: ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32> @@ -791,10 +791,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = or disjoint i64 [[INDEX]], 3 ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]] -; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i8>, ptr [[TMP38]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD9:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i8>, ptr [[TMP16]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = sext <16 x i8> [[WIDE_LOAD9]] to <16 x i32> @@ -805,10 +805,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = sext <16 x i8> [[WIDE_LOAD8]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = mul nsw <16 x i32> [[TMP19]], [[TMP17]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE11]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI7]], <16 x i32> [[TMP21]]) -; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD13:%.*]] = load <16 x i8>, ptr [[TMP22]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD14:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD15:%.*]] = load <16 x i8>, ptr [[TMP26]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = sext <16 x i8> [[WIDE_LOAD12]] to <16 x i32> @@ -819,10 +819,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = sext <16 x i8> [[WIDE_LOAD15]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = mul nsw <16 x i32> [[TMP27]], [[TMP25]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE17]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI5]], <16 x i32> [[TMP29]]) -; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD18:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD19:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD20:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD21:%.*]] = load <16 x i8>, ptr [[TMP36]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = sext <16 x i8> [[WIDE_LOAD18]] to <16 x i32> @@ -833,10 +833,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP33:%.*]] = sext <16 x i8> [[WIDE_LOAD21]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = mul nsw <16 x i32> [[TMP35]], [[TMP33]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP37]]) -; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD24:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD25:%.*]] = load <16 x i8>, ptr [[TMP42]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD26:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD27:%.*]] = load <16 x i8>, ptr [[TMP46]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = sext <16 x i8> [[WIDE_LOAD24]] to <16 x i32> @@ -1811,13 +1811,13 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index ab593f6f8bb6b..71eb5476b7ac5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -44,11 +44,11 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX1]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP20]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP20]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP20]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX1]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP28]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP28]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP28]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -162,16 +162,16 @@ define i64 @not_dotp_i8_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly %b ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE14:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 32 -; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 48 +; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 32 +; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 48 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i32 32 -; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i32 48 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i64 32 +; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[NEXT_GEP1]], i64 48 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[NEXT_GEP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i8>, ptr [[TMP18]], align 1 @@ -299,16 +299,16 @@ define i64 @not_dotp_i16_to_i64_has_neon_dotprod(ptr readonly %a, ptr readonly % ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] ; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX2]] -; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 8 -; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 24 +; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 8 +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 24 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[NEXT_GEP]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i16>, ptr [[TMP0]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i16>, ptr [[TMP10]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i16>, ptr [[TMP11]], align 2 -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 8 -; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 24 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 8 +; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 24 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i16>, ptr [[NEXT_GEP3]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD11:%.*]] = load <8 x i16>, ptr [[TMP18]], align 2 @@ -525,7 +525,7 @@ define i32 @not_dotp_different_types(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; CHECK-INTERLEAVED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 ; CHECK-INTERLEAVED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] -; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP32]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP34]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -789,13 +789,13 @@ define i32 @not_dotp_not_loop_carried(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED: vector.body: ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP10]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP10]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD1]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP17]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP17]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP17]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> @@ -1080,10 +1080,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = or disjoint i64 [[INDEX]], 3 ; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP9]] ; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP9]] -; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i8>, ptr [[TMP43]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD9:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i8>, ptr [[TMP12]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = sext <16 x i8> [[WIDE_LOAD9]] to <16 x i32> @@ -1094,10 +1094,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP17:%.*]] = sext <16 x i8> [[WIDE_LOAD8]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = mul nsw <16 x i32> [[TMP16]], [[TMP17]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE11]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI7]], <16 x i32> [[TMP18]]) -; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD12:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD13:%.*]] = load <16 x i8>, ptr [[TMP19]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD14:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD15:%.*]] = load <16 x i8>, ptr [[TMP20]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP21:%.*]] = sext <16 x i8> [[WIDE_LOAD12]] to <16 x i32> @@ -1108,10 +1108,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = sext <16 x i8> [[WIDE_LOAD15]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP26:%.*]] = mul nsw <16 x i32> [[TMP24]], [[TMP25]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE17]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI5]], <16 x i32> [[TMP26]]) -; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD18:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD19:%.*]] = load <16 x i8>, ptr [[TMP27]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD20:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD21:%.*]] = load <16 x i8>, ptr [[TMP28]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = sext <16 x i8> [[WIDE_LOAD18]] to <16 x i32> @@ -1122,10 +1122,10 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[TMP33:%.*]] = sext <16 x i8> [[WIDE_LOAD21]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP34:%.*]] = mul nsw <16 x i32> [[TMP48]], [[TMP33]] ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE23]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP34]]) -; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP35:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD24:%.*]] = load <16 x i8>, ptr [[TMP10]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD25:%.*]] = load <16 x i8>, ptr [[TMP35]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD26:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD27:%.*]] = load <16 x i8>, ptr [[TMP36]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP37:%.*]] = sext <16 x i8> [[WIDE_LOAD24]] to <16 x i32> @@ -1450,13 +1450,13 @@ define i32 @not_dotp_extend_user(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP3]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP3]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP8]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP8]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> @@ -1572,12 +1572,12 @@ define i64 @dotp_cost_disagreement(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[TMP14]] -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP15]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP15]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP15]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i64> @@ -1880,7 +1880,7 @@ define i64 @not_dotp_ext_outside_plan(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 8 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i64 8 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[WIDE_LOAD]] to <8 x i64> @@ -2009,7 +2009,7 @@ define i64 @not_dotp_ext_outside_plan2(ptr %a, i16 %b, i64 %n) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i16, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i32 8 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i16, ptr [[TMP1]], i64 8 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[WIDE_LOAD]] to <8 x i64> @@ -2146,10 +2146,10 @@ define dso_local i32 @not_dotp_vscale1(ptr %a, ptr %b, i32 %n, i64 %cost) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE6:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 -; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[NEXT_GEP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = zext <16 x i8> [[WIDE_LOAD4]] to <16 x i64> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll index bd9fae6cd610b..80edfb5f0b6ff 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-interleave.ll @@ -20,7 +20,7 @@ define i32 @partial_reduce_with_non_constant_start_value(ptr %src, i32 %rdx.star ; IC2-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP0]], %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], %[[VECTOR_BODY]] ] ; IC2-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], %[[VECTOR_BODY]] ] ; IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 +; IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 ; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; IC2-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; IC2-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -73,9 +73,9 @@ define i32 @partial_reduce_with_non_constant_start_value(ptr %src, i32 %rdx.star ; IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE8:%.*]], %[[VECTOR_BODY]] ] ; IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PARTIAL_REDUCE9:%.*]], %[[VECTOR_BODY]] ] ; IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[INDEX]] -; IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 -; IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 32 -; IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 48 +; IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 +; IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 32 +; IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 48 ; IC4-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-no-dotprod.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-no-dotprod.ll index 672d19b1edeba..a439f5189794a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-no-dotprod.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-no-dotprod.ll @@ -16,13 +16,13 @@ define i32 @not_dotp(ptr %a, ptr %b) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i32 16 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP6]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-NEXT: [[TMP9:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll index 6dae09ef97e1c..1f5d5f0ea218e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll @@ -45,11 +45,11 @@ define i32 @dotp(ptr %a, ptr %b) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP7]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP7]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP14]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP14]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i8>, ptr [[TMP14]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD3]] to <16 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll index 46ec858d7455c..dd2fe09a9e593 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll @@ -41,7 +41,7 @@ define i32 @zext_add_reduc_i8_i32_sve(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -134,7 +134,7 @@ define i32 @zext_add_reduc_i8_i32_neon(ptr %a) #2 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -223,7 +223,7 @@ define i64 @zext_add_reduc_i8_i64(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i64> @@ -317,7 +317,7 @@ define i64 @zext_add_reduc_i16_i64(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP1]], i32 8 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP1]], i64 8 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[TMP1]], align 2 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i16>, ptr [[TMP3]], align 2 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <8 x i16> [[WIDE_LOAD]] to <8 x i64> @@ -413,9 +413,9 @@ define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE8:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE9:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 32 -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 32 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP1]], i64 48 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 @@ -703,9 +703,9 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 32 -; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 32 +; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP1]], i64 48 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 @@ -807,7 +807,7 @@ define i32 @sext_add_reduc_i8_i32(ptr %a) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 16 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sext <16 x i8> [[WIDE_LOAD]] to <16 x i32> @@ -921,7 +921,7 @@ define i32 @add_of_zext_outside_loop(i32 %a, ptr noalias %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[D]], [[VEC_PHI1]] ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; CHECK-INTERLEAVED-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP3]], align 1 ; CHECK-INTERLEAVED-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP5]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP6]] = add <16 x i32> [[VEC_PHI]], [[BROADCAST_SPLAT]] @@ -1042,7 +1042,7 @@ define i32 @add_of_loop_invariant_zext(i32 %a, ptr %b, i8 %c, i32 %d) #0 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE2:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[D]], [[VEC_PHI1]] ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[B]], i32 [[OFFSET_IDX]] -; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 16 ; CHECK-INTERLEAVED-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP4]], align 1 ; CHECK-INTERLEAVED-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP6]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32> @@ -1156,9 +1156,9 @@ define i64 @sext_reduction_i32_to_i64(ptr %arr, i64 %n) #1 { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE8:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE9:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 4 -; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 8 -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 12 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 4 +; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 8 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 12 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP14]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll index 5355a9772ef10..73dbefeb10413 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll @@ -15,7 +15,7 @@ define void @cost_hoisted_vector_code(ptr %p, float %arg) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[P]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP1]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP1]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP1]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP0]], ptr [[TMP2]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll index 68cfc659e1e94..fceab6f823d5a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll @@ -229,9 +229,9 @@ define void @test_load_gep_widen_induction(ptr noalias %dst, ptr noalias %dst2) ; CHECK-NEXT: store ptr null, ptr [[TMP11]], align 8 ; CHECK-NEXT: store ptr null, ptr [[TMP17]], align 8 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr ptr, ptr [[DST2]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr ptr, ptr [[TMP12]], i32 2 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr ptr, ptr [[TMP12]], i32 4 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr ptr, ptr [[TMP12]], i32 6 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr ptr, ptr [[TMP12]], i64 2 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr ptr, ptr [[TMP12]], i64 4 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr ptr, ptr [[TMP12]], i64 6 ; CHECK-NEXT: store <2 x ptr> [[TMP0]], ptr [[TMP12]], align 8 ; CHECK-NEXT: store <2 x ptr> [[TMP1]], ptr [[TMP13]], align 8 ; CHECK-NEXT: store <2 x ptr> [[TMP2]], ptr [[TMP14]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll index 1596b60f48567..bf4ab32fbf9e4 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/store-costs-sve.ll @@ -142,7 +142,7 @@ define void @trunc_store(ptr %dst, ptr %src, i16 %x) #1 { ; DEFAULT: vector.body: ; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]] -; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 16 +; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 16 ; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP4]], align 1, !alias.scope [[META9:![0-9]+]], !noalias [[META6]] ; DEFAULT-NEXT: store <16 x i8> [[TMP3]], ptr [[TMP5]], align 1, !alias.scope [[META9]], !noalias [[META6]] ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll index 76a7536501bd6..389f91f878534 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-fixed-width-inorder-core.ll @@ -29,17 +29,17 @@ define void @sve_add(ptr %dst, ptr %a, ptr %b, i64 %n) { ; CHECK-CA510: [[VECTOR_BODY]]: ; CHECK-CA510-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-CA510-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP2]] -; CHECK-CA510-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i32 4 +; CHECK-CA510-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i64 4 ; CHECK-CA510-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-CA510-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-CA510-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP2]] -; CHECK-CA510-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 4 +; CHECK-CA510-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 4 ; CHECK-CA510-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP6]], align 4 ; CHECK-CA510-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 ; CHECK-CA510-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD]] ; CHECK-CA510-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD5]] ; CHECK-CA510-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[TMP2]] -; CHECK-CA510-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i32 4 +; CHECK-CA510-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 4 ; CHECK-CA510-NEXT: store <4 x float> [[TMP9]], ptr [[TMP11]], align 4 ; CHECK-CA510-NEXT: store <4 x float> [[TMP10]], ptr [[TMP13]], align 4 ; CHECK-CA510-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 8 @@ -93,17 +93,17 @@ define void @sve_add(ptr %dst, ptr %a, ptr %b, i64 %n) { ; CHECK-CA520: [[VECTOR_BODY]]: ; CHECK-CA520-NEXT: [[TMP2:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-CA520-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[TMP2]] -; CHECK-CA520-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i32 4 +; CHECK-CA520-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP3]], i64 4 ; CHECK-CA520-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-CA520-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-CA520-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[TMP2]] -; CHECK-CA520-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i32 4 +; CHECK-CA520-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[TMP6]], i64 4 ; CHECK-CA520-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP6]], align 4 ; CHECK-CA520-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 ; CHECK-CA520-NEXT: [[TMP9:%.*]] = fadd fast <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD]] ; CHECK-CA520-NEXT: [[TMP10:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD5]] ; CHECK-CA520-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[TMP2]] -; CHECK-CA520-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i32 4 +; CHECK-CA520-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw float, ptr [[TMP11]], i64 4 ; CHECK-CA520-NEXT: store <4 x float> [[TMP9]], ptr [[TMP11]], align 4 ; CHECK-CA520-NEXT: store <4 x float> [[TMP10]], ptr [[TMP13]], align 4 ; CHECK-CA520-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP2]], 8 @@ -157,17 +157,17 @@ define void @sve_add(ptr %dst, ptr %a, ptr %b, i64 %n) { ; CHECK-CA320: [[VECTOR_BODY]]: ; CHECK-CA320-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-CA320-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]] -; CHECK-CA320-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i32 4 +; CHECK-CA320-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 4 ; CHECK-CA320-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-CA320-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-CA320-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]] -; CHECK-CA320-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i32 4 +; CHECK-CA320-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[TMP4]], i64 4 ; CHECK-CA320-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-CA320-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-CA320-NEXT: [[TMP6:%.*]] = fadd fast <4 x float> [[WIDE_LOAD6]], [[WIDE_LOAD]] ; CHECK-CA320-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD5]] ; CHECK-CA320-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[DST]], i64 [[INDEX]] -; CHECK-CA320-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i32 4 +; CHECK-CA320-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw float, ptr [[TMP8]], i64 4 ; CHECK-CA320-NEXT: store <4 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-CA320-NEXT: store <4 x float> [[TMP7]], ptr [[TMP9]], align 4 ; CHECK-CA320-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll index 2a19402347e40..6eb8242bf7975 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-cost.ll @@ -178,9 +178,9 @@ define void @test_interleave_store_one_constant(ptr noalias %src, ptr noalias %d ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP13]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP13]], i32 4 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP13]], i32 6 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP13]], i64 2 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP13]], i64 4 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP13]], i64 6 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP13]], align 8 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP15]], align 8 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP16]], align 8 @@ -323,9 +323,9 @@ define void @single_fmul_used_by_each_member(ptr noalias %A, ptr noalias %B, ptr ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[INDEX]], 6 ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i32 2 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i32 4 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i32 6 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i64 2 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i64 4 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i64 6 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP23]], align 8 ; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <2 x double>, ptr [[TMP25]], align 8 ; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <2 x double>, ptr [[TMP26]], align 8 @@ -456,8 +456,9 @@ define void @test_interleave_after_narrowing(i32 %n, ptr %x, ptr noalias %y) { ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[EXIT:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll index 46b0ebdd2fa62..99c735f777b66 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-multi-block.ll @@ -88,7 +88,7 @@ define void @load_store_interleave_group_block_invar_cond(ptr noalias %data, ptr ; VF2IC2-NEXT: br label %[[PRED_STORE_CONTINUE11]] ; VF2IC2: [[PRED_STORE_CONTINUE11]]: ; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[DST_1]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 2 +; VF2IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 2 ; VF2IC2-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP5]], align 1 ; VF2IC2-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP6]], align 1 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -199,7 +199,7 @@ define void @load_store_interleave_group_block_var_cond(ptr noalias %data, ptr % ; VF2IC2-NEXT: [[INTERLEAVED_VEC5:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> poison, <4 x i32> ; VF2IC2-NEXT: store <4 x i64> [[INTERLEAVED_VEC5]], ptr [[TMP4]], align 8 ; VF2IC2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[MASKS]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 2 +; VF2IC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 2 ; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP7]], align 1 ; VF2IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP8]], align 1 ; VF2IC2-NEXT: [[TMP9:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll index d290f2d4f5bc3..b14b1783c97e3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-unroll.ll @@ -62,7 +62,7 @@ define void @test_2xi64_with_wide_load(ptr noalias %data, ptr noalias %factor) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[FACTOR]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 ; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = shl nsw i64 [[INDEX]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll index 44b4e5a8c2bc7..4ede21040f393 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-loop-backedge-elimination-epilogue.ll @@ -17,9 +17,9 @@ define void @test_remove_vector_loop_region_epilogue(ptr %dst, i1 %c) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TC]], [[N_MOD_VF]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i32 16 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i32 32 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i32 48 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 16 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 48 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[DST]], align 4 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP2]], align 4 ; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP3]], align 4 @@ -30,7 +30,7 @@ define void @test_remove_vector_loop_region_epilogue(ptr %dst, i1 %c) { ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF0:![0-9]+]] ; CHECK: [[VEC_EPILOG_PH]]: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] @@ -49,7 +49,7 @@ define void @test_remove_vector_loop_region_epilogue(ptr %dst, i1 %c) { ; CHECK-NEXT: store i8 0, ptr [[GEP]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[TC]] -; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP1:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; @@ -69,7 +69,8 @@ exit: ret void } ;. -; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; CHECK: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"} -; CHECK: [[META2]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[PROF0]] = !{!"branch_weights", i32 8, i32 56} +; CHECK: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]], [[META3:![0-9]+]]} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1} ;. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll index 2abc787061b53..ec874d0b48030 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll @@ -11,14 +11,14 @@ define void @vector_reverse_f64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-LABEL: vector_reverse_f64 ; CHECK-LABEL: vector.body -; CHECK: %[[GEP:.*]] = getelementptr inbounds double, ptr %{{.*}}, i32 0 -; CHECK-NEXT: %[[GEP1:.*]] = getelementptr inbounds double, ptr %[[GEP]], i32 -7 +; CHECK: %[[GEP:.*]] = getelementptr inbounds double, ptr %{{.*}}, i64 0 +; CHECK-NEXT: %[[GEP1:.*]] = getelementptr inbounds double, ptr %[[GEP]], i64 -7 ; CHECK-NEXT: %[[WIDE:.*]] = load <8 x double>, ptr %[[GEP1]], align 8 ; CHECK-NEXT: %[[REVERSE:.*]] = shufflevector <8 x double> %[[WIDE]], <8 x double> poison, <8 x i32> ; CHECK-NEXT: %[[FADD:.*]] = fadd <8 x double> %[[REVERSE]] ; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds double, ptr {{.*}}, i64 {{.*}} -; CHECK-NEXT: %[[GEP3:.*]] = getelementptr inbounds double, ptr %[[GEP2]], i32 0 -; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds double, ptr %[[GEP3]], i32 -7 +; CHECK-NEXT: %[[GEP3:.*]] = getelementptr inbounds double, ptr %[[GEP2]], i64 0 +; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds double, ptr %[[GEP3]], i64 -7 ; CHECK-NEXT: %[[REVERSE6:.*]] = shufflevector <8 x double> %[[FADD]], <8 x double> poison, <8 x i32> ; CHECK-NEXT: store <8 x double> %[[REVERSE6]], ptr %[[GEP4]], align 8 @@ -44,14 +44,14 @@ for.body: ; preds = %entry, %for.body define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 { ; CHECK-LABEL: vector_reverse_i64 ; CHECK-LABEL: vector.body -; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, ptr %{{.*}}, i32 0 -; CHECK-NEXT: %[[GEP1:.*]] = getelementptr inbounds i64, ptr %[[GEP]], i32 -7 +; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, ptr %{{.*}}, i64 0 +; CHECK-NEXT: %[[GEP1:.*]] = getelementptr inbounds i64, ptr %[[GEP]], i64 -7 ; CHECK-NEXT: %[[WIDE:.*]] = load <8 x i64>, ptr %[[GEP1]], align 8 ; CHECK-NEXT: %[[REVERSE:.*]] = shufflevector <8 x i64> %[[WIDE]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: %[[FADD:.*]] = add <8 x i64> %[[REVERSE]] ; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds i64, ptr {{.*}}, i64 {{.*}} -; CHECK-NEXT: %[[GEP3:.*]] = getelementptr inbounds i64, ptr %[[GEP2]], i32 0 -; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds i64, ptr %[[GEP3]], i32 -7 +; CHECK-NEXT: %[[GEP3:.*]] = getelementptr inbounds i64, ptr %[[GEP2]], i64 0 +; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds i64, ptr %[[GEP3]], i64 -7 ; CHECK-NEXT: %[[REVERSE6:.*]] = shufflevector <8 x i64> %[[FADD]], <8 x i64> poison, <8 x i32> ; CHECK-NEXT: store <8 x i64> %[[REVERSE6]], ptr %[[GEP4]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll index 7afa8ce998121..e05332abcee61 100644 --- a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll +++ b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll @@ -22,7 +22,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll index abbd176a1df6e..478c9c1141949 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll @@ -51,17 +51,17 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[STEP_ADD_10:%.*]] = add <2 x i64> [[STEP_ADD_9]], splat (i64 2) ; CHECK-NEXT: [[STEP_ADD_11:%.*]] = add <2 x i64> [[STEP_ADD_10]], splat (i64 2) ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 4 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 6 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 10 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 12 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 14 -; CHECK-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 16 -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 18 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 20 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 22 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 4 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 6 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 8 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 10 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 12 +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 14 +; CHECK-NEXT: [[TMP68:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 16 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 18 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 20 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 22 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1 ; CHECK-NEXT: [[WIDE_LOAD25:%.*]] = load <2 x i8>, ptr [[TMP12]], align 1 ; CHECK-NEXT: [[WIDE_LOAD26:%.*]] = load <2 x i8>, ptr [[TMP13]], align 1 @@ -193,6 +193,7 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) { ; CHECK-NEXT: [[CMP_I166_I:%.*]] = icmp ult ptr [[PTR_IV]], [[END]] ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[IV]], [[N]] ; CHECK-NEXT: [[AND:%.*]] = select i1 [[CMP_I166_I]], i1 [[CMP2]], i1 false +; CHECK-NEXT: br i1 [[AND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP52]], %[[MIDDLE_BLOCK]] ], [ [[TMP55]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: [[RES:%.*]] = icmp eq i64 [[RED_NEXT_LCSSA]], 0 @@ -226,4 +227,6 @@ exit: ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} ; CHECK: [[PROF3]] = !{!"branch_weights", i32 2, i32 22} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll index 7677c9666455a..f1fbf1dd5d942 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/optimal-epilog-vectorization.ll @@ -22,13 +22,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-TWO-CHECK: [[VECTOR_BODY]]: ; VF-TWO-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF-TWO-CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[BB]], i64 [[INDEX]] -; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 4 -; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 8 -; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 12 -; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 16 -; VF-TWO-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 20 -; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 24 -; VF-TWO-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 28 +; VF-TWO-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 4 +; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 8 +; VF-TWO-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 12 +; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 16 +; VF-TWO-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 20 +; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 24 +; VF-TWO-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 28 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP17]], align 4 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP18]], align 4 @@ -38,13 +38,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP22]], align 4 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP23]], align 4 ; VF-TWO-CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[CC]], i64 [[INDEX]] -; VF-TWO-CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 4 -; VF-TWO-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 8 -; VF-TWO-CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 12 -; VF-TWO-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 16 -; VF-TWO-CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 20 -; VF-TWO-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 24 -; VF-TWO-CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 28 +; VF-TWO-CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 4 +; VF-TWO-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 8 +; VF-TWO-CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 12 +; VF-TWO-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 16 +; VF-TWO-CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 20 +; VF-TWO-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 24 +; VF-TWO-CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 28 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP24]], align 4 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP33]], align 4 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP34]], align 4 @@ -62,13 +62,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-TWO-CHECK-NEXT: [[TMP46:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD15]] ; VF-TWO-CHECK-NEXT: [[TMP47:%.*]] = fadd fast <4 x float> [[WIDE_LOAD8]], [[WIDE_LOAD16]] ; VF-TWO-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[AA]], i64 [[INDEX]] -; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 4 -; VF-TWO-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 8 -; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 12 -; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 16 -; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 20 -; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 24 -; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 28 +; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 4 +; VF-TWO-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 8 +; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 12 +; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 16 +; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 20 +; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 24 +; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 28 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP40]], ptr [[TMP48]], align 4 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP41]], ptr [[TMP57]], align 4 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP42]], ptr [[TMP58]], align 4 @@ -124,13 +124,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-FOUR-CHECK: [[VECTOR_BODY]]: ; VF-FOUR-CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF-FOUR-CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[BB]], i64 [[INDEX]] -; VF-FOUR-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 4 -; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 8 -; VF-FOUR-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 12 -; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 16 -; VF-FOUR-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 20 -; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 24 -; VF-FOUR-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i32 28 +; VF-FOUR-CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 4 +; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 8 +; VF-FOUR-CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 12 +; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 16 +; VF-FOUR-CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 20 +; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 24 +; VF-FOUR-CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 28 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP17]], align 4 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP18]], align 4 @@ -140,13 +140,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP22]], align 4 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP23]], align 4 ; VF-FOUR-CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds float, ptr [[CC]], i64 [[INDEX]] -; VF-FOUR-CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 4 -; VF-FOUR-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 8 -; VF-FOUR-CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 12 -; VF-FOUR-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 16 -; VF-FOUR-CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 20 -; VF-FOUR-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 24 -; VF-FOUR-CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i32 28 +; VF-FOUR-CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 4 +; VF-FOUR-CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 8 +; VF-FOUR-CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 12 +; VF-FOUR-CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 16 +; VF-FOUR-CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 20 +; VF-FOUR-CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 24 +; VF-FOUR-CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds float, ptr [[TMP24]], i64 28 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP24]], align 4 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x float>, ptr [[TMP33]], align 4 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <4 x float>, ptr [[TMP34]], align 4 @@ -164,13 +164,13 @@ define void @f1(ptr noalias %aa, ptr noalias %bb, ptr noalias %cc, i32 %N) { ; VF-FOUR-CHECK-NEXT: [[TMP46:%.*]] = fadd fast <4 x float> [[WIDE_LOAD7]], [[WIDE_LOAD15]] ; VF-FOUR-CHECK-NEXT: [[TMP47:%.*]] = fadd fast <4 x float> [[WIDE_LOAD8]], [[WIDE_LOAD16]] ; VF-FOUR-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[AA]], i64 [[INDEX]] -; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 4 -; VF-FOUR-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 8 -; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 12 -; VF-FOUR-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 16 -; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 20 -; VF-FOUR-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 24 -; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 28 +; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 4 +; VF-FOUR-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 8 +; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 12 +; VF-FOUR-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 16 +; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 20 +; VF-FOUR-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 24 +; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 28 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP40]], ptr [[TMP48]], align 4 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP41]], ptr [[TMP57]], align 4 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP42]], ptr [[TMP58]], align 4 @@ -261,22 +261,22 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-TWO-CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP24]], [[N]] ; VF-TWO-CHECK-NEXT: [[TMP40:%.*]] = sext i32 [[TMP32]] to i64 ; VF-TWO-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP40]] -; VF-TWO-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 0 -; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP56]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -4 -; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP58]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -8 -; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP60]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -12 -; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP62]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -16 -; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP64]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -20 -; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP66]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -24 -; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP68]], i32 -3 -; VF-TWO-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -28 -; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP70]], i32 -3 +; VF-TWO-CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 0 +; VF-TWO-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -4 +; VF-TWO-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -8 +; VF-TWO-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -12 +; VF-TWO-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -16 +; VF-TWO-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -20 +; VF-TWO-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -24 +; VF-TWO-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 -3 +; VF-TWO-CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -28 +; VF-TWO-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 -3 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP57]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP59]], align 4 @@ -302,13 +302,13 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-TWO-CHECK-NEXT: [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], splat (float 1.000000e+00) ; VF-TWO-CHECK-NEXT: [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], splat (float 1.000000e+00) ; VF-TWO-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; VF-TWO-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 4 -; VF-TWO-CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 8 -; VF-TWO-CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 12 -; VF-TWO-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 16 -; VF-TWO-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 20 -; VF-TWO-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 24 -; VF-TWO-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 28 +; VF-TWO-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 4 +; VF-TWO-CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 8 +; VF-TWO-CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 12 +; VF-TWO-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 16 +; VF-TWO-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 20 +; VF-TWO-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 24 +; VF-TWO-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 28 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP72]], ptr [[TMP80]], align 4 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP73]], ptr [[TMP89]], align 4 ; VF-TWO-CHECK-NEXT: store <4 x float> [[TMP74]], ptr [[TMP90]], align 4 @@ -340,8 +340,8 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-TWO-CHECK-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], [[N]] ; VF-TWO-CHECK-NEXT: [[TMP101:%.*]] = sext i32 [[TMP100]] to i64 ; VF-TWO-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP101]] -; VF-TWO-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i32 0 -; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP103]], i32 -1 +; VF-TWO-CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i64 0 +; VF-TWO-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP50]], i64 -1 ; VF-TWO-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <2 x float>, ptr [[TMP104]], align 4 ; VF-TWO-CHECK-NEXT: [[REVERSE24:%.*]] = shufflevector <2 x float> [[WIDE_LOAD23]], <2 x float> poison, <2 x i32> ; VF-TWO-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <2 x float> [[REVERSE24]], splat (float 1.000000e+00) @@ -384,22 +384,22 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-FOUR-CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP24]], [[N]] ; VF-FOUR-CHECK-NEXT: [[TMP40:%.*]] = sext i32 [[TMP32]] to i64 ; VF-FOUR-CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP40]] -; VF-FOUR-CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 0 -; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP56]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -4 -; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP58]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -8 -; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP60]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -12 -; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP62]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -16 -; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP64]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -20 -; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP66]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -24 -; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP68]], i32 -3 -; VF-FOUR-CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 -28 -; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP70]], i32 -3 +; VF-FOUR-CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 0 +; VF-FOUR-CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds float, ptr [[TMP12]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -4 +; VF-FOUR-CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -8 +; VF-FOUR-CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -12 +; VF-FOUR-CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds float, ptr [[TMP18]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -16 +; VF-FOUR-CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -20 +; VF-FOUR-CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -24 +; VF-FOUR-CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 -3 +; VF-FOUR-CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 -28 +; VF-FOUR-CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 -3 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP57]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP59]], align 4 @@ -425,13 +425,13 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-FOUR-CHECK-NEXT: [[TMP78:%.*]] = fadd fast <4 x float> [[REVERSE13]], splat (float 1.000000e+00) ; VF-FOUR-CHECK-NEXT: [[TMP79:%.*]] = fadd fast <4 x float> [[REVERSE15]], splat (float 1.000000e+00) ; VF-FOUR-CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; VF-FOUR-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 4 -; VF-FOUR-CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 8 -; VF-FOUR-CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 12 -; VF-FOUR-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 16 -; VF-FOUR-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 20 -; VF-FOUR-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 24 -; VF-FOUR-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i32 28 +; VF-FOUR-CHECK-NEXT: [[TMP89:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 4 +; VF-FOUR-CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 8 +; VF-FOUR-CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 12 +; VF-FOUR-CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 16 +; VF-FOUR-CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 20 +; VF-FOUR-CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 24 +; VF-FOUR-CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds float, ptr [[TMP80]], i64 28 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP72]], ptr [[TMP80]], align 4 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP73]], ptr [[TMP89]], align 4 ; VF-FOUR-CHECK-NEXT: store <4 x float> [[TMP74]], ptr [[TMP90]], align 4 @@ -463,8 +463,8 @@ define void @f2(ptr noalias %A, ptr noalias %B, i32 %n) { ; VF-FOUR-CHECK-NEXT: [[TMP100:%.*]] = add i32 [[TMP99]], [[N]] ; VF-FOUR-CHECK-NEXT: [[TMP101:%.*]] = sext i32 [[TMP100]] to i64 ; VF-FOUR-CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP101]] -; VF-FOUR-CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i32 0 -; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP103]], i32 -3 +; VF-FOUR-CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[TMP102]], i64 0 +; VF-FOUR-CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds float, ptr [[TMP50]], i64 -3 ; VF-FOUR-CHECK-NEXT: [[WIDE_LOAD23:%.*]] = load <4 x float>, ptr [[TMP104]], align 4 ; VF-FOUR-CHECK-NEXT: [[REVERSE24:%.*]] = shufflevector <4 x float> [[WIDE_LOAD23]], <4 x float> poison, <4 x i32> ; VF-FOUR-CHECK-NEXT: [[TMP105:%.*]] = fadd fast <4 x float> [[REVERSE24]], splat (float 1.000000e+00) diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll index d82a3cde4639a..dc9c154b3fe05 100644 --- a/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll @@ -34,13 +34,13 @@ define void @test(ptr %arr, i32 %len) { ; CHECK-NEXT: [[VEC_PHI7:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP18:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi <2 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[ARR]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 2 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 6 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 8 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 10 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 12 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 14 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 6 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 10 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 12 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 14 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <2 x double>, ptr [[TMP6]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll index 8d3026e63748a..1ae1ba6795c01 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/partial-reduce-dot-product.ll @@ -87,13 +87,13 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; FIXED-V-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-V-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[WIDE_LOAD]] to <8 x i32> ; FIXED-V-NEXT: [[TMP4:%.*]] = sext <8 x i8> [[WIDE_LOAD2]] to <8 x i32> ; FIXED-V-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-V-NEXT: [[TMP8:%.*]] = sext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -123,11 +123,11 @@ define i32 @vqdot(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP8:%.*]] = sext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -280,13 +280,13 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; FIXED-V-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-V-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i32> ; FIXED-V-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[WIDE_LOAD2]] to <8 x i32> ; FIXED-V-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-V-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -316,11 +316,11 @@ define i32 @vqdotu(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -473,13 +473,13 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; FIXED-V-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-V-NEXT: [[TMP3:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i32> ; FIXED-V-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[WIDE_LOAD2]] to <8 x i32> ; FIXED-V-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-V-NEXT: [[TMP8:%.*]] = sext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -509,11 +509,11 @@ define i32 @vqdotsu(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP9:%.*]] = sext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -665,13 +665,13 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; FIXED-V-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ] ; FIXED-V-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-V-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-V-NEXT: [[TMP3:%.*]] = sext <8 x i8> [[WIDE_LOAD]] to <8 x i32> ; FIXED-V-NEXT: [[TMP4:%.*]] = sext <8 x i8> [[WIDE_LOAD2]] to <8 x i32> ; FIXED-V-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-V-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-V-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-V-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-V-NEXT: [[TMP8:%.*]] = zext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> @@ -701,11 +701,11 @@ define i32 @vqdotsu2(ptr %a, ptr %b) #0 { ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE5:%.*]], [[VECTOR_BODY]] ] ; FIXED-ZVQDOTQ-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]] -; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i32 8 +; FIXED-ZVQDOTQ-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP5]], i64 8 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x i8>, ptr [[TMP5]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i8>, ptr [[TMP7]], align 1 ; FIXED-ZVQDOTQ-NEXT: [[TMP9:%.*]] = zext <8 x i8> [[WIDE_LOAD3]] to <8 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll index 735fb769de8b9..671a929e6fa35 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll @@ -69,7 +69,7 @@ define i32 @sub(ptr %a, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP4]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP3]]) ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -116,7 +116,7 @@ define i32 @addsub(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP6]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP5]]) ; CHECK-NEXT: br label %[[EXIT:.*]] @@ -166,7 +166,7 @@ define i32 @or(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -212,7 +212,7 @@ define i32 @and(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32( [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -258,7 +258,7 @@ define i32 @xor(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -305,7 +305,7 @@ define i32 @smin(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -353,7 +353,7 @@ define i32 @umax(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -400,7 +400,7 @@ define float @fadd_fast(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -444,7 +444,7 @@ define half @fadd_fast_half_zvfh(ptr noalias nocapture readonly %a, i64 %n) "tar ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP11:%.*]] = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, [[TMP8]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -483,14 +483,14 @@ define half @fadd_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) " ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x half> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x half> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x half>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x half>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2]] = fadd fast <16 x half> [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP3]] = fadd fast <16 x half> [[WIDE_LOAD2]], [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x half> [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> [[BIN_RDX]]) @@ -508,7 +508,7 @@ define half @fadd_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) " ; CHECK-NEXT: [[ADD]] = fadd fast half [[TMP6]], [[SUM_07]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi half [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret half [[ADD_LCSSA]] @@ -545,14 +545,14 @@ define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) "targ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x bfloat> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x bfloat> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds bfloat, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds bfloat, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x bfloat>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x bfloat>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2]] = fadd fast <16 x bfloat> [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP3]] = fadd fast <16 x bfloat> [[WIDE_LOAD2]], [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x bfloat> [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> [[BIN_RDX]]) @@ -570,7 +570,7 @@ define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) "targ ; CHECK-NEXT: [[ADD]] = fadd fast bfloat [[TMP6]], [[SUM_07]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi bfloat [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret bfloat [[ADD_LCSSA]] @@ -615,7 +615,7 @@ define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call float @llvm.vector.reduce.fmin.nxv4f32( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -661,7 +661,7 @@ define half @fmin_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) # ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call half @llvm.vector.reduce.fmin.nxv8f16( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -707,7 +707,7 @@ define bfloat @fmin_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call bfloat @llvm.vector.reduce.fmin.nxv8bf16( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -755,7 +755,7 @@ define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -801,7 +801,7 @@ define half @fmax_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) # ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call fast half @llvm.vector.reduce.fmax.nxv8f16( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -847,7 +847,7 @@ define bfloat @fmax_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP12:%.*]] = call fast bfloat @llvm.vector.reduce.fmax.nxv8bf16( [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -891,14 +891,14 @@ define i32 @mul(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ , %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ splat (i32 1), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2]] = mul <8 x i32> [[WIDE_LOAD]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP3]] = mul <8 x i32> [[WIDE_LOAD2]], [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <8 x i32> [[TMP3]], [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[BIN_RDX]]) @@ -916,7 +916,7 @@ define i32 @mul(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-NEXT: [[MUL]] = mul nsw i32 [[TMP6]], [[SUM_07]] ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[MUL_LCSSA]] @@ -963,7 +963,7 @@ define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture re ; CHECK-NEXT: [[TMP5]] = mul <8 x i32> [[WIDE_LOAD1]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP5]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -986,7 +986,7 @@ define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture re ; CHECK-NEXT: [[MUL]] = mul nsw i32 [[TMP9]], [[SUM]] ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[MUL_LCSSA]] @@ -1036,7 +1036,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP16:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -1084,7 +1084,7 @@ define half @fmuladd_f16_zvfh(ptr %a, ptr %b, i64 %n) "target-features"="+zvfh" ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP16:%.*]] = call reassoc half @llvm.vector.reduce.fadd.nxv8f16(half 0xH8000, [[TMP9]]) ; CHECK-NEXT: br label %[[FOR_END:.*]] @@ -1128,18 +1128,18 @@ define half @fmuladd_f16_zvfhmin(ptr %a, ptr %b, i64 %n) "target-features"="+zvf ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x half> [ , %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x half> [ splat (half 0xH8000), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x half>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x half>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds half, ptr [[TMP2]], i32 16 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds half, ptr [[TMP2]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x half>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x half>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4]] = call reassoc <16 x half> @llvm.fmuladd.v16f16(<16 x half> [[WIDE_LOAD]], <16 x half> [[WIDE_LOAD3]], <16 x half> [[VEC_PHI]]) ; CHECK-NEXT: [[TMP5]] = call reassoc <16 x half> @llvm.fmuladd.v16f16(<16 x half> [[WIDE_LOAD2]], <16 x half> [[WIDE_LOAD4]], <16 x half> [[VEC_PHI1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc <16 x half> [[TMP5]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> [[BIN_RDX]]) @@ -1159,7 +1159,7 @@ define half @fmuladd_f16_zvfhmin(ptr %a, ptr %b, i64 %n) "target-features"="+zvf ; CHECK-NEXT: [[MULADD]] = tail call reassoc half @llvm.fmuladd.f16(half [[TMP8]], half [[TMP9]], half [[SUM_07]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi half [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret half [[MULADD_LCSSA]] @@ -1198,18 +1198,18 @@ define bfloat @fmuladd_bf16(ptr %a, ptr %b, i64 %n) "target-features"="+zvfbfmin ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x bfloat> [ , %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x bfloat> [ splat (bfloat 0xR8000), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds bfloat, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds bfloat, ptr [[TMP0]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x bfloat>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x bfloat>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds bfloat, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds bfloat, ptr [[TMP2]], i32 16 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds bfloat, ptr [[TMP2]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x bfloat>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x bfloat>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4]] = call reassoc <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> [[WIDE_LOAD]], <16 x bfloat> [[WIDE_LOAD3]], <16 x bfloat> [[VEC_PHI]]) ; CHECK-NEXT: [[TMP5]] = call reassoc <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> [[WIDE_LOAD2]], <16 x bfloat> [[WIDE_LOAD4]], <16 x bfloat> [[VEC_PHI1]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc <16 x bfloat> [[TMP5]], [[TMP4]] ; CHECK-NEXT: [[TMP7:%.*]] = call reassoc bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR8000, <16 x bfloat> [[BIN_RDX]]) @@ -1229,7 +1229,7 @@ define bfloat @fmuladd_bf16(ptr %a, ptr %b, i64 %n) "target-features"="+zvfbfmin ; CHECK-NEXT: [[MULADD]] = tail call reassoc bfloat @llvm.fmuladd.bf16(bfloat [[TMP8]], bfloat [[TMP9]], bfloat [[SUM_07]]) ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] ; CHECK: [[FOR_END]]: ; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi bfloat [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret bfloat [[MULADD_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll index b9a4e97cd9f24..cc1b2380bc532 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll @@ -108,7 +108,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[RDX:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD2]]) @@ -117,7 +117,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[TMP5]] = mul i32 [[VEC_PHI1]], [[TMP4]] ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 8 ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[BIN_RDX:%.*]] = mul i32 [[TMP5]], [[MUL]] ; IF-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_RND_UP]], [[N_VEC]] @@ -134,7 +134,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[MUL1]] = mul nsw i32 [[TMP0]], [[RDX1]] ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL1]], [[FOR_BODY1]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret i32 [[MUL_LCSSA]] @@ -152,7 +152,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 1, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 ; NO-VP-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -219,7 +219,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -303,7 +303,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -387,7 +387,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -471,7 +471,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -557,7 +557,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -643,7 +643,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -729,7 +729,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -815,7 +815,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -895,7 +895,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[RDX:%.*]] = phi float [ [[START:%.*]], [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP8:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD2]]) @@ -904,7 +904,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[TMP5]] = fmul reassoc float [[VEC_PHI1]], [[TMP4]] ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 8 ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[BIN_RDX:%.*]] = fmul reassoc float [[TMP5]], [[MUL]] ; IF-EVL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_RND_UP]], [[N_VEC]] @@ -921,7 +921,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MUL1]] = fmul reassoc float [[TMP0]], [[RDX1]] ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP15:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi float [ [[MUL1]], [[FOR_BODY1]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MUL_LCSSA]] @@ -939,7 +939,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi float [ [[START:%.*]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; NO-VP-NEXT: [[TMP6:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD]]) @@ -1007,7 +1007,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -1095,7 +1095,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP8]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -1179,14 +1179,14 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI2:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP3]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) ; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI2]], <8 x float> [[WIDE_LOAD3]]) ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[TMP3]], <8 x float> [[TMP4]]) ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP5]]) @@ -1204,7 +1204,7 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP0]]) ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP19:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP18:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY1]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MIN_LCSSA]] @@ -1224,7 +1224,7 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP5]], align 4 ; NO-VP-NEXT: [[TMP6]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]]) @@ -1287,14 +1287,14 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI2:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP3]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) ; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI2]], <8 x float> [[WIDE_LOAD3]]) ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[TMP3]], <8 x float> [[TMP4]]) ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> [[TMP5]]) @@ -1312,7 +1312,7 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP0]]) ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP21:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP20:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY1]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MAX_LCSSA]] @@ -1332,7 +1332,7 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP5]], align 4 ; NO-VP-NEXT: [[TMP6]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]]) @@ -1401,7 +1401,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP11]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] ; IF-EVL: for.end: @@ -1492,7 +1492,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP16]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] @@ -1584,7 +1584,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP19:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP16]]) ; IF-EVL-NEXT: [[TMP20:%.*]] = freeze i1 [[TMP19]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll index 7179e7dc48c8d..d1a2303e35e68 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll @@ -110,14 +110,14 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ [[TMP9]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ splat (i32 1), [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 8 +; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 8 ; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[TMP5]] = mul <8 x i32> [[WIDE_MASKED_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP4]] = mul <8 x i32> [[WIDE_LOAD2]], [[VEC_PHI1]] ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP6:%.*]] = mul <8 x i32> [[TMP4]], [[TMP5]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP6]]) @@ -135,7 +135,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[MUL]] = mul nsw i32 [[TMP0]], [[RDX]] ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP5:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP4:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], [[FOR_BODY1]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret i32 [[MUL_LCSSA]] @@ -154,7 +154,7 @@ define i32 @mul(ptr %a, i64 %n, i32 %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ splat (i32 1), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 8 +; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4 ; NO-VP-NEXT: [[TMP5]] = mul <8 x i32> [[WIDE_LOAD]], [[VEC_PHI]] @@ -221,7 +221,7 @@ define i32 @or(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[TMP14]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -308,7 +308,7 @@ define i32 @and(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32( [[TMP14]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -395,7 +395,7 @@ define i32 @xor(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32( [[TMP14]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -484,7 +484,7 @@ define i32 @smin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -577,7 +577,7 @@ define i32 @smax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.smax.nxv4i32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -670,7 +670,7 @@ define i32 @umin(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.umin.nxv4i32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -763,7 +763,7 @@ define i32 @umax(ptr %a, i64 %n, i32 %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -854,7 +854,7 @@ define float @fadd(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP15]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP15]] ; IF-EVL-NEXT: [[TMP11:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP17:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP14]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -937,14 +937,14 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[TMP9]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ splat (float 1.000000e+00), [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 8 +; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 8 ; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <8 x float>, ptr [[TMP3]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[TMP5]] = fmul reassoc <8 x float> [[WIDE_MASKED_LOAD]], [[VEC_PHI]] ; IF-EVL-NEXT: [[TMP4]] = fmul reassoc <8 x float> [[WIDE_LOAD2]], [[VEC_PHI1]] ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP7:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP6:%.*]] = fmul reassoc <8 x float> [[TMP4]], [[TMP5]] ; IF-EVL-NEXT: [[TMP8:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v8f32(float 1.000000e+00, <8 x float> [[TMP6]]) @@ -962,7 +962,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MUL]] = fmul reassoc float [[TMP0]], [[RDX]] ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP15:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP14:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MUL_LCSSA:%.*]] = phi float [ [[MUL]], [[FOR_BODY1]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MUL_LCSSA]] @@ -981,7 +981,7 @@ define float @fmul(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ splat (float 1.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP4]], align 4 ; NO-VP-NEXT: [[TMP5]] = fmul reassoc <8 x float> [[WIDE_LOAD]], [[VEC_PHI]] @@ -1050,7 +1050,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -1143,7 +1143,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP12:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32( [[TMP15]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -1230,14 +1230,14 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_MASKED_LOAD]]) ; IF-EVL-NEXT: [[TMP3]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[TMP4]], <8 x float> [[TMP3]]) ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> [[TMP5]]) @@ -1255,7 +1255,7 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MIN]] = tail call float @llvm.minimum.f32(float [[RDX]], float [[TMP0]]) ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP19:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP18:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MIN_LCSSA:%.*]] = phi float [ [[MIN]], [[FOR_BODY1]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MIN_LCSSA]] @@ -1275,7 +1275,7 @@ define float @fminimum(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 +; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP3]], align 4 ; NO-VP-NEXT: [[TMP4]] = call <8 x float> @llvm.minimum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]]) @@ -1338,14 +1338,14 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP4:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FOR_BODY]] ] ; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IV]] -; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 +; IF-EVL-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 ; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; IF-EVL-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; IF-EVL-NEXT: [[TMP4]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_MASKED_LOAD]]) ; IF-EVL-NEXT: [[TMP3]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI1]], <8 x float> [[WIDE_LOAD2]]) ; IF-EVL-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 16 ; IF-EVL-NEXT: [[TMP6:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]] -; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP5:%.*]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[TMP4]], <8 x float> [[TMP3]]) ; IF-EVL-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> [[TMP5]]) @@ -1363,7 +1363,7 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; IF-EVL-NEXT: [[MAX]] = tail call float @llvm.maximum.f32(float [[RDX]], float [[TMP0]]) ; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i64 [[IV1]], 1 ; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT1]], [[N_RND_UP]] -; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP21:![0-9]+]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY1]], !llvm.loop [[LOOP20:![0-9]+]] ; IF-EVL: for.end: ; IF-EVL-NEXT: [[MAX_LCSSA:%.*]] = phi float [ [[MAX]], [[FOR_BODY1]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] ; IF-EVL-NEXT: ret float [[MAX_LCSSA]] @@ -1383,7 +1383,7 @@ define float @fmaximum(ptr %a, i64 %n, float %start) { ; NO-VP-NEXT: [[VEC_PHI:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[VEC_PHI1:%.*]] = phi <8 x float> [ [[BROADCAST_SPLAT]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 +; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP1]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP3]], align 4 ; NO-VP-NEXT: [[TMP4]] = call <8 x float> @llvm.maximum.v8f32(<8 x float> [[VEC_PHI]], <8 x float> [[WIDE_LOAD]]) @@ -1452,7 +1452,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP18]] ; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP20:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[TMP17]]) ; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] @@ -1544,7 +1544,7 @@ define i32 @anyof_icmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP15]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]] @@ -1636,7 +1636,7 @@ define i32 @anyof_fcmp(ptr %a, i64 %n, i32 %start, i32 %inv) { ; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]] ; IF-EVL-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]] ; IF-EVL-NEXT: [[TMP10:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; IF-EVL-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; IF-EVL: middle.block: ; IF-EVL-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1( [[TMP15]]) ; IF-EVL-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll index 7b0ac78fb365c..13990000585ea 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reverse-load-store.ll @@ -331,20 +331,20 @@ define void @multiple_reverse_vector_pointer(ptr noalias %a, ptr noalias %b, ptr ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] ; NO-VP-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1024, [[INDEX]] ; NO-VP-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[OFFSET_IDX]] -; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0 -; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 -15 +; NO-VP-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 0 +; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -15 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 ; NO-VP-NEXT: [[REVERSE:%.*]] = shufflevector <16 x i8> [[WIDE_LOAD]], <16 x i8> poison, <16 x i32> ; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[B:%.*]], <16 x i8> [[REVERSE]] ; NO-VP-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> align 1 [[TMP3]], <16 x i1> splat (i1 true), <16 x i8> poison) ; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[C:%.*]], i64 [[OFFSET_IDX]] -; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i32 0 -; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 -15 +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP4]], i64 0 +; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i64 -15 ; NO-VP-NEXT: [[REVERSE1:%.*]] = shufflevector <16 x i8> [[WIDE_MASKED_GATHER]], <16 x i8> poison, <16 x i32> ; NO-VP-NEXT: store <16 x i8> [[REVERSE1]], ptr [[TMP6]], align 1 ; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[D:%.*]], i64 [[OFFSET_IDX]] -; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i32 0 -; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 -15 +; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i64 0 +; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i64 -15 ; NO-VP-NEXT: store <16 x i8> [[REVERSE1]], ptr [[TMP9]], align 1 ; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; NO-VP-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 0375f0a8fd132..03377f10c2283 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -44,7 +44,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 ; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -160,7 +160,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 ; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -294,7 +294,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer ; FIXEDLEN-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER1]], <4 x i64> zeroinitializer ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 4 +; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP3]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -417,7 +417,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 ; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -517,7 +517,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 8 ; FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 +; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -630,7 +630,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; FIXEDLEN-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 7 ; FIXEDLEN-NEXT: store i64 [[TMP4]], ptr [[B]], align 8 ; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 4 +; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -756,7 +756,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> align 8 [[BROADCAST_SPLAT2]], <4 x i1> [[TMP1]]) ; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> align 8 [[BROADCAST_SPLAT2]], <4 x i1> [[TMP2]]) ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 4 +; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -878,7 +878,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B]], align 1 ; FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 +; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP1]], align 8 ; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 ; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll index 0287645d9d7f9..94ebf01509ec2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/conversion-cost.ll @@ -126,9 +126,9 @@ define void @conversion_cost2(i32 %n, ptr nocapture %A, ptr nocapture %B) nounwi ; CHECK-NEXT: [[TMP19:%.*]] = sitofp <2 x i64> [[TMP10]] to <2 x float> ; CHECK-NEXT: [[TMP20:%.*]] = sitofp <2 x i64> [[TMP11]] to <2 x float> ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 2 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 4 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 6 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 2 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 4 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 6 ; CHECK-NEXT: store <2 x float> [[TMP12]], ptr [[TMP13]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP18]], ptr [[TMP15]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP19]], ptr [[TMP16]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index b3c45a565a8fe..c70a3aa249919 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -385,7 +385,7 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i64, ptr [[SRC_3]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i32 2 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[TMP24]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP25]], align 8, !alias.scope [[META9:![0-9]+]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq <2 x i64> [[WIDE_LOAD]], zeroinitializer ; CHECK-NEXT: [[TMP27:%.*]] = and <2 x i1> [[TMP23]], [[TMP26]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll index 3165422dcc539..d19ae728cc913 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/drop-inbounds-flags-for-reverse-vector-pointer.ll @@ -23,8 +23,8 @@ define i1 @fn(ptr %nno) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], splat (i64 1) ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i32, ptr [[NNO]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP23]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 -3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP23]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP6]], <4 x i1> [[REVERSE]], <4 x i32> poison) ; CHECK-NEXT: [[REVERSE1:%.*]] = shufflevector <4 x i32> [[WIDE_MASKED_LOAD]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll index 6e940ee58fabe..a1b92e0658bd3 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll @@ -161,9 +161,9 @@ define void @test_induction_step_needs_expansion(ptr noalias %j, ptr %k, i64 %l, ; CHECK-NEXT: [[TMP6:%.*]] = sub <16 x i16> [[STEP_ADD_2]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = sub <16 x i16> [[STEP_ADD_3]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[K:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 16 -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 32 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 48 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i64 16 +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i64 32 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i64 48 ; CHECK-NEXT: store <16 x i16> [[TMP4]], ptr [[TMP8]], align 2 ; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP10]], align 2 ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[TMP21]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index 12b8d1e15b523..84579d97b38e2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -26,7 +26,7 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i8> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD1]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[VECTOR_RECUR]], <16 x i8> [[WIDE_LOAD]], <16 x i32> @@ -34,7 +34,7 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP9:%.*]] = add <16 x i8> [[WIDE_LOAD]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = add <16 x i8> [[WIDE_LOAD1]], [[TMP8]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 16 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP9]], ptr [[TMP11]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP10]], ptr [[TMP14]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 @@ -119,7 +119,7 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[VECTOR_RECUR4:%.*]] = phi <16 x i8> [ [[VECTOR_RECUR_INIT3]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 16 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[WIDE_LOAD5]] = load <16 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i8> [[VECTOR_RECUR]], <16 x i8> [[WIDE_LOAD]], <16 x i32> @@ -135,7 +135,7 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP17:%.*]] = add <16 x i8> [[TMP15]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP18:%.*]] = add <16 x i8> [[TMP16]], [[WIDE_LOAD5]] ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[Y:%.*]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i32 16 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 16 ; CHECK-NEXT: store <16 x i8> [[TMP17]], ptr [[TMP19]], align 1 ; CHECK-NEXT: store <16 x i8> [[TMP18]], ptr [[TMP22]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index 39217e51ab117..41249c595f9eb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -41,9 +41,9 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd fast <8 x float> [[STEP_ADD]], splat (float 4.000000e+00) ; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd fast <8 x float> [[STEP_ADD2]], splat (float 4.000000e+00) ; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 16 -; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 24 +; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 16 +; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 24 ; AUTO_VEC-NEXT: store <8 x float> [[VEC_IND]], ptr [[TMP1]], align 4 ; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD]], ptr [[TMP2]], align 4 ; AUTO_VEC-NEXT: store <8 x float> [[STEP_ADD2]], ptr [[TMP3]], align 4 @@ -208,9 +208,9 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) { ; AUTO_VEC-NEXT: [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[STEP_ADD]], splat (double 1.200000e+01) ; AUTO_VEC-NEXT: [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[STEP_ADD_2]], splat (double 1.200000e+01) ; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX]] -; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP1]], i32 4 -; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[TMP1]], i32 8 -; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP1]], i32 12 +; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP1]], i64 4 +; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[TMP1]], i64 8 +; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[TMP1]], i64 12 ; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND]], ptr [[TMP1]], align 8 ; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD]], ptr [[TMP2]], align 8 ; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_2]], ptr [[TMP3]], align 8 @@ -326,9 +326,9 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) { ; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd reassoc <8 x float> [[STEP_ADD]], splat (float 3.360000e+02) ; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd reassoc <8 x float> [[STEP_ADD2]], splat (float 3.360000e+02) ; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 [[INDEX]] -; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 8 -; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 16 -; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 24 +; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 8 +; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 16 +; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 24 ; AUTO_VEC-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[TMP2]], align 4 ; AUTO_VEC-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP3]], align 4 ; AUTO_VEC-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x float>, ptr [[TMP4]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll b/llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll index a0637ceb53cf2..137c09b653f2c 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fminimumnum.ll @@ -22,17 +22,17 @@ define void @fmin32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> [[WIDE_LOAD5]], <4 x float> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i32 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP8]], ptr [[TMP10]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP12]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -99,17 +99,17 @@ define void @fmax32(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[TMP5]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> [[WIDE_LOAD5]], <4 x float> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x float], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i32 4 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[TMP10]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP8]], ptr [[TMP10]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP12]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -176,17 +176,17 @@ define void @fmin64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> [[WIDE_LOAD5]], <2 x double> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[TMP10]], align 8 ; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -253,17 +253,17 @@ define void @fmax64(ptr noundef readonly captures(none) %input1, ptr noundef rea ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw double, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x double>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[INPUT2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw double, ptr [[TMP5]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x double>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD6]]) ; CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> [[WIDE_LOAD5]], <2 x double> [[WIDE_LOAD7]]) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [4096 x double], ptr [[OUTPUT]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw double, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x double> [[TMP8]], ptr [[TMP10]], align 8 ; CHECK-NEXT: store <2 x double> [[TMP9]], ptr [[TMP12]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll index 877fcd4d638eb..34a99b07ee93e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll @@ -75,7 +75,7 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; SSE-NEXT: [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] ; SSE-NEXT: [[VEC_PHI1:%.*]] = phi <2 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ] ; SSE-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[ARR:%.*]], i32 [[INDEX]] -; SSE-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[TMP2]], i32 2 +; SSE-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[TMP2]], i64 2 ; SSE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 ; SSE-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 ; SSE-NEXT: [[TMP6:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD]], splat (double 4.200000e+01) @@ -106,9 +106,9 @@ define double @sumIfVector(ptr nocapture readonly %arr) { ; AVX-NEXT: [[VEC_PHI2:%.*]] = phi <4 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI8:%.*]], [[VECTOR_BODY]] ] ; AVX-NEXT: [[VEC_PHI3:%.*]] = phi <4 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI9:%.*]], [[VECTOR_BODY]] ] ; AVX-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[ARR:%.*]], i32 [[INDEX]] -; AVX-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[TMP4]], i32 4 -; AVX-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[TMP4]], i32 8 -; AVX-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP4]], i32 12 +; AVX-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[TMP4]], i64 4 +; AVX-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[TMP4]], i64 8 +; AVX-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP4]], i64 12 ; AVX-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP4]], align 8 ; AVX-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x double>, ptr [[TMP9]], align 8 ; AVX-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x double>, ptr [[TMP10]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 4028dd87e34b3..04bff3c393f62 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -125,13 +125,13 @@ define void @multiple_truncated_ivs_with_wide_uses(i1 %c, ptr %A, ptr %B) { ; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[C]], <4 x i16> [[VEC_IND]], <4 x i16> splat (i16 10) ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[C]], <4 x i16> [[STEP_ADD]], <4 x i16> splat (i16 10) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP4]], i64 4 ; CHECK-NEXT: store <4 x i16> [[TMP1]], ptr [[TMP4]], align 2, !alias.scope [[META6:![0-9]+]], !noalias [[META9:![0-9]+]] -; CHECK-NEXT: store <4 x i16> [[TMP2]], ptr [[TMP7]], align 2, !alias.scope [[META6]], !noalias [[META9]] +; CHECK-NEXT: store <4 x i16> [[TMP2]], ptr [[TMP3]], align 2, !alias.scope [[META6]], !noalias [[META9]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP8]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP8]], i64 4 ; CHECK-NEXT: store <4 x i32> [[VEC_IND3]], ptr [[TMP8]], align 4, !alias.scope [[META9]] -; CHECK-NEXT: store <4 x i32> [[STEP_ADD4]], ptr [[TMP11]], align 4, !alias.scope [[META9]] +; CHECK-NEXT: store <4 x i32> [[STEP_ADD4]], ptr [[TMP5]], align 4, !alias.scope [[META9]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], splat (i16 4) ; CHECK-NEXT: [[VEC_IND_NEXT6]] = add <4 x i32> [[STEP_ADD4]], splat (i32 4) @@ -192,7 +192,7 @@ define void @truncated_ivs_with_wide_and_scalar_uses(i1 %c, ptr %dst) { ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[DST]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[C]], <8 x i16> [[VEC_IND]], <8 x i16> splat (i16 10) ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[C]], <8 x i16> [[STEP_ADD]], <8 x i16> splat (i16 10) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[TMP3]], i32 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[TMP3]], i64 8 ; CHECK-NEXT: store <8 x i16> [[TMP5]], ptr [[TMP3]], align 2 ; CHECK-NEXT: store <8 x i16> [[TMP6]], ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 @@ -477,9 +477,9 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK-NEXT: [[TMP21:%.*]] = trunc <8 x i64> [[TMP13]] to <8 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = trunc <8 x i64> [[TMP14]] to <8 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = trunc <8 x i64> [[TMP15]] to <8 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP16]], i32 8 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP16]], i32 16 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP16]], i32 24 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP16]], i64 8 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP16]], i64 16 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP16]], i64 24 ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP20]], ptr align 4 [[TMP16]], <8 x i1> [[TMP8]]) ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP21]], ptr align 4 [[TMP25]], <8 x i1> [[TMP9]]) ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP22]], ptr align 4 [[TMP26]], <8 x i1> [[TMP10]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll index 61f07eff768c1..d25d9f81de985 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll @@ -27,7 +27,7 @@ define i16 @wide_add_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i16> [[VEC_IND]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i16> [[STEP_ADD]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[DST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i64 4 ; CHECK-NEXT: store <4 x i16> [[TMP4]], ptr [[TMP5]], align 2 ; CHECK-NEXT: store <4 x i16> [[TMP9]], ptr [[TMP8]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -99,7 +99,7 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[TMP5:%.*]] = sub <4 x i16> [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP10:%.*]] = sub <4 x i16> [[STEP_ADD]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[DST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i32 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP6]], i64 4 ; CHECK-NEXT: store <4 x i16> [[TMP5]], ptr [[TMP6]], align 2 ; CHECK-NEXT: store <4 x i16> [[TMP10]], ptr [[TMP9]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll index d75fd0e0023f7..ad6dfb054b726 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll @@ -32,9 +32,9 @@ define void @uaddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[OFFSET_IDX2]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 16 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 32 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i32 48 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 16 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 32 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[NEXT_GEP]], i64 48 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i16>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i16>, ptr [[TMP1]], align 2 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i16>, ptr [[TMP2]], align 2 @@ -43,9 +43,9 @@ define void @uaddsat(ptr nocapture readonly %pSrc, i16 signext %offset, ptr noca ; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD4]], <16 x i16> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD5]], <16 x i16> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> [[WIDE_LOAD6]], <16 x i16> [[BROADCAST_SPLAT]]) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 16 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 32 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i32 48 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 16 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 32 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[NEXT_GEP3]], i64 48 ; CHECK-NEXT: store <16 x i16> [[TMP4]], ptr [[NEXT_GEP3]], align 2 ; CHECK-NEXT: store <16 x i16> [[TMP5]], ptr [[TMP8]], align 2 ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr [[TMP9]], align 2 @@ -160,9 +160,9 @@ define void @fshl(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PSRC:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PDST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 32 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 64 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 96 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 96 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[NEXT_GEP]], align 2 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <32 x i8>, ptr [[TMP1]], align 2 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <32 x i8>, ptr [[TMP2]], align 2 @@ -171,9 +171,9 @@ define void @fshl(ptr nocapture readonly %pSrc, i8 signext %offset, ptr nocaptur ; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD3]], <32 x i8> [[WIDE_LOAD3]], <32 x i8> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD4]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> [[BROADCAST_SPLAT]]) ; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i8> @llvm.fshl.v32i8(<32 x i8> [[WIDE_LOAD5]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> [[BROADCAST_SPLAT]]) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i32 32 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i32 64 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i32 96 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 32 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 64 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[NEXT_GEP2]], i64 96 ; CHECK-NEXT: store <32 x i8> [[TMP4]], ptr [[NEXT_GEP2]], align 2 ; CHECK-NEXT: store <32 x i8> [[TMP5]], ptr [[TMP8]], align 2 ; CHECK-NEXT: store <32 x i8> [[TMP6]], ptr [[TMP9]], align 2 diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll index b710236c026d2..751e885733f17 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll @@ -38,9 +38,9 @@ define i32 @inv_val_store_to_inv_address_with_reduction(ptr %a, i64 %n, ptr %b) ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 16 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 32 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 48 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 16 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 48 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP1]], align 8, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP2]], align 8, !alias.scope [[META0]] ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i32>, ptr [[TMP3]], align 8, !alias.scope [[META0]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll b/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll index bcb6b5c422343..a247285317a1e 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/iv-live-outs.ll @@ -18,9 +18,9 @@ define i64 @test_pr98660(ptr %dst, i64 %N) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP9]], i32 8 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP9]], i32 16 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 24 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP9]], i64 8 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP9]], i64 16 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i64 24 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP9]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP14]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP15]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll index 6e3b2a5390948..ea3ec99cf46e1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/limit-vf-by-tripcount.ll @@ -193,17 +193,17 @@ define void @test_tc_20(ptr noalias %src, ptr noalias %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 64 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 64 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 64 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP5]], align 64 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 8 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 12 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i64 12 ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD]], ptr [[TMP6]], align 64 ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD1]], ptr [[TMP8]], align 64 ; CHECK-NEXT: store <4 x i8> [[WIDE_LOAD2]], ptr [[TMP9]], align 64 diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index 6605338771c47..78363e13595cb 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -39,9 +39,9 @@ define i32 @test_explicit_pred(i64 %len) { ; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <4 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i32 4 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP8]], i32 8 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP8]], i32 12 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP8]], i64 4 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP8]], i64 8 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP8]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4 ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP14]], align 4 @@ -171,9 +171,9 @@ define i32 @test_explicit_pred_generic(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP64]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP69]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP70]], align 4 @@ -718,9 +718,9 @@ define i32 @test_max_trip_count(i64 %len, ptr %test_base, i64 %n) { ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2 ; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3 ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i32 4 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i32 8 -; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i32 12 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i64 4 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i64 8 +; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP65]], <4 x i1> [[TMP40]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP48]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP71]], <4 x i1> [[TMP56]], <4 x i32> poison) @@ -877,9 +877,9 @@ define i32 @test_non_zero_start(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP64]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP69]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP70]], align 4 @@ -1231,9 +1231,9 @@ define i32 @neg_off_by_many(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) @@ -1362,9 +1362,9 @@ define i32 @neg_off_by_one_iteration(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) @@ -1493,9 +1493,9 @@ define i32 @neg_off_by_one_byte(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) @@ -1633,9 +1633,9 @@ define i32 @test_constant_max(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 2 ; CHECK-NEXT: [[TMP64:%.*]] = insertelement <4 x i1> [[TMP63]], i1 [[TMP60]], i32 3 ; CHECK-NEXT: [[TMP65:%.*]] = getelementptr i32, ptr [[ALLOCA]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i32 4 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i32 8 -; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i32 12 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP65]], i64 4 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP65]], i64 8 +; CHECK-NEXT: [[TMP72:%.*]] = getelementptr i32, ptr [[TMP65]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP65]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP70]], align 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP71]], align 4 @@ -1793,9 +1793,9 @@ define i32 @test_allocsize(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) @@ -1925,9 +1925,9 @@ define i32 @test_allocsize_array(i64 %len, ptr %test_base) nofree nosync { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) @@ -2067,9 +2067,9 @@ define i32 @test_allocsize_cond_deref(i1 %allzero, ptr %test_base) { ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 ; CHECK-NEXT: [[TMP64:%.*]] = getelementptr i32, ptr [[ALLOCATION]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i32 4 -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i32 8 -; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i32 12 +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr i32, ptr [[TMP64]], i64 4 +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr i32, ptr [[TMP64]], i64 8 +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr i32, ptr [[TMP64]], i64 12 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP64]], <4 x i1> [[TMP39]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP69]], <4 x i1> [[TMP47]], <4 x i32> poison) ; CHECK-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr align 4 [[TMP70]], <4 x i1> [[TMP55]], <4 x i32> poison) diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll index 2c172b2aecd16..1d0906902ad62 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked-store-cost.ll @@ -35,9 +35,9 @@ define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { ; CHECK-NEXT: [[TMP21:%.*]] = trunc <8 x i64> [[TMP13]] to <8 x i32> ; CHECK-NEXT: [[TMP22:%.*]] = trunc <8 x i64> [[TMP14]] to <8 x i32> ; CHECK-NEXT: [[TMP23:%.*]] = trunc <8 x i64> [[TMP15]] to <8 x i32> -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP16]], i32 8 -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP16]], i32 16 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP16]], i32 24 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP16]], i64 8 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP16]], i64 16 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP16]], i64 24 ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP20]], ptr align 4 [[TMP16]], <8 x i1> [[TMP8]]) ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP21]], ptr align 4 [[TMP25]], <8 x i1> [[TMP9]]) ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP22]], ptr align 4 [[TMP26]], <8 x i1> [[TMP10]]) @@ -199,7 +199,7 @@ define void @test_scalar_cost_single_store_loop_varying_cond(ptr %dst, ptr noali ; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <16 x i32> [[WIDE_VEC4]], <16 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC]], splat (i32 123) ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[STRIDED_VEC5]], splat (i32 123) -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i32 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[NEXT_GEP]], i64 4 ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr align 4 [[NEXT_GEP]], <4 x i1> [[TMP8]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr align 4 [[TMP11]], <4 x i1> [[TMP9]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index 932153a23bdbd..e4977ee642b09 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -72,9 +72,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 8 -; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 16 -; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 24 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 8 +; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 16 +; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 24 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 ; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 @@ -84,9 +84,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i32 8 -; AVX2-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP12]], i32 16 -; AVX2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP12]], i32 24 +; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i64 8 +; AVX2-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP12]], i64 16 +; AVX2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP12]], i64 24 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr align 4 [[TMP12]], <8 x i1> [[TMP8]], <8 x i32> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr align 4 [[TMP14]], <8 x i1> [[TMP9]], <8 x i32> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr align 4 [[TMP15]], <8 x i1> [[TMP10]], <8 x i32> poison) @@ -96,9 +96,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[TMP19:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] ; AVX2-NEXT: [[TMP20:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] ; AVX2-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP21]], i32 8 -; AVX2-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i32 16 -; AVX2-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP21]], i32 24 +; AVX2-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP21]], i64 8 +; AVX2-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 16 +; AVX2-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP21]], i64 24 ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP17]], ptr align 4 [[TMP21]], <8 x i1> [[TMP8]]) ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP18]], ptr align 4 [[TMP23]], <8 x i1> [[TMP9]]) ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP19]], ptr align 4 [[TMP24]], <8 x i1> [[TMP10]]) @@ -151,9 +151,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 16 -; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 32 -; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 48 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 16 +; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 32 +; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 48 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP3]], align 4 ; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 @@ -163,9 +163,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i32 16 -; AVX512-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP12]], i32 32 -; AVX512-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP12]], i32 48 +; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[TMP12]], i64 16 +; AVX512-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr [[TMP12]], i64 32 +; AVX512-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP12]], i64 48 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr align 4 [[TMP12]], <16 x i1> [[TMP8]], <16 x i32> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr align 4 [[TMP14]], <16 x i1> [[TMP9]], <16 x i32> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr align 4 [[TMP15]], <16 x i1> [[TMP10]], <16 x i32> poison) @@ -175,9 +175,9 @@ define void @foo1(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP19:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] ; AVX512-NEXT: [[TMP20:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] ; AVX512-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP21]], i32 16 -; AVX512-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i32 32 -; AVX512-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP21]], i32 48 +; AVX512-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP21]], i64 16 +; AVX512-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP21]], i64 32 +; AVX512-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP21]], i64 48 ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP17]], ptr align 4 [[TMP21]], <16 x i1> [[TMP8]]) ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP18]], ptr align 4 [[TMP23]], <16 x i1> [[TMP9]]) ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP19]], ptr align 4 [[TMP24]], <16 x i1> [[TMP10]]) @@ -293,9 +293,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 8 -; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 16 -; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 24 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 8 +; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 16 +; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 24 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP3]], align 4 ; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP5]], align 4 ; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr addrspace(1) [[TMP6]], align 4 @@ -305,9 +305,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 8 -; AVX2-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 16 -; AVX2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 24 +; AVX2-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 8 +; AVX2-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 16 +; AVX2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 24 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) align 4 [[TMP12]], <8 x i1> [[TMP8]], <8 x i32> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) align 4 [[TMP14]], <8 x i1> [[TMP9]], <8 x i32> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) align 4 [[TMP15]], <8 x i1> [[TMP10]], <8 x i32> poison) @@ -317,9 +317,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX2-NEXT: [[TMP19:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] ; AVX2-NEXT: [[TMP20:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] ; AVX2-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr addrspace(1) [[A]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 8 -; AVX2-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 16 -; AVX2-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 24 +; AVX2-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 8 +; AVX2-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 16 +; AVX2-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 24 ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> [[TMP17]], ptr addrspace(1) align 4 [[TMP21]], <8 x i1> [[TMP8]]) ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> [[TMP18]], ptr addrspace(1) align 4 [[TMP23]], <8 x i1> [[TMP9]]) ; AVX2-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> [[TMP19]], ptr addrspace(1) align 4 [[TMP24]], <8 x i1> [[TMP10]]) @@ -372,9 +372,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 16 -; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 32 -; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i32 48 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 16 +; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 32 +; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP3]], i64 48 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP3]], align 4 ; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP5]], align 4 ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr addrspace(1) [[TMP6]], align 4 @@ -384,9 +384,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr addrspace(1) [[B]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 16 -; AVX512-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 32 -; AVX512-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i32 48 +; AVX512-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 16 +; AVX512-NEXT: [[TMP15:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 32 +; AVX512-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP12]], i64 48 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1(ptr addrspace(1) align 4 [[TMP12]], <16 x i1> [[TMP8]], <16 x i32> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1(ptr addrspace(1) align 4 [[TMP14]], <16 x i1> [[TMP9]], <16 x i32> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p1(ptr addrspace(1) align 4 [[TMP15]], <16 x i1> [[TMP10]], <16 x i32> poison) @@ -396,9 +396,9 @@ define void @foo1_addrspace1(ptr addrspace(1) nocapture %A, ptr addrspace(1) noc ; AVX512-NEXT: [[TMP19:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD9]], [[WIDE_LOAD6]] ; AVX512-NEXT: [[TMP20:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD10]], [[WIDE_LOAD7]] ; AVX512-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr addrspace(1) [[A]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 16 -; AVX512-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 32 -; AVX512-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i32 48 +; AVX512-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 16 +; AVX512-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 32 +; AVX512-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr addrspace(1) [[TMP21]], i64 48 ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1(<16 x i32> [[TMP17]], ptr addrspace(1) align 4 [[TMP21]], <16 x i1> [[TMP8]]) ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1(<16 x i32> [[TMP18]], ptr addrspace(1) align 4 [[TMP23]], <16 x i1> [[TMP9]]) ; AVX512-NEXT: call void @llvm.masked.store.v16i32.p1(<16 x i32> [[TMP19]], ptr addrspace(1) align 4 [[TMP24]], <16 x i1> [[TMP10]]) @@ -524,9 +524,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 8 -; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 16 -; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 24 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 8 +; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 16 +; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 24 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 ; AVX2-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 @@ -536,9 +536,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[TMP10:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX2-NEXT: [[TMP11:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i32 8 -; AVX2-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[TMP12]], i32 16 -; AVX2-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP12]], i32 24 +; AVX2-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i64 8 +; AVX2-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[TMP12]], i64 16 +; AVX2-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP12]], i64 24 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr align 4 [[TMP12]], <8 x i1> [[TMP8]], <8 x float> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr align 4 [[TMP14]], <8 x i1> [[TMP9]], <8 x float> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr align 4 [[TMP15]], <8 x i1> [[TMP10]], <8 x float> poison) @@ -552,9 +552,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[TMP23:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD9]], [[TMP19]] ; AVX2-NEXT: [[TMP24:%.*]] = fadd <8 x float> [[WIDE_MASKED_LOAD10]], [[TMP20]] ; AVX2-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP25]], i32 8 -; AVX2-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP25]], i32 16 -; AVX2-NEXT: [[TMP29:%.*]] = getelementptr float, ptr [[TMP25]], i32 24 +; AVX2-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP25]], i64 8 +; AVX2-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP25]], i64 16 +; AVX2-NEXT: [[TMP29:%.*]] = getelementptr float, ptr [[TMP25]], i64 24 ; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP21]], ptr align 4 [[TMP25]], <8 x i1> [[TMP8]]) ; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP22]], ptr align 4 [[TMP27]], <8 x i1> [[TMP9]]) ; AVX2-NEXT: call void @llvm.masked.store.v8f32.p0(<8 x float> [[TMP23]], ptr align 4 [[TMP28]], <8 x i1> [[TMP10]]) @@ -608,9 +608,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 16 -; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 32 -; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 48 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 16 +; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 32 +; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 48 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP3]], align 4 ; AVX512-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 ; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 @@ -620,9 +620,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP10:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX512-NEXT: [[TMP11:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr float, ptr [[B]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i32 16 -; AVX512-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[TMP12]], i32 32 -; AVX512-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP12]], i32 48 +; AVX512-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[TMP12]], i64 16 +; AVX512-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[TMP12]], i64 32 +; AVX512-NEXT: [[TMP16:%.*]] = getelementptr float, ptr [[TMP12]], i64 48 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr align 4 [[TMP12]], <16 x i1> [[TMP8]], <16 x float> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD8:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr align 4 [[TMP14]], <16 x i1> [[TMP9]], <16 x float> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <16 x float> @llvm.masked.load.v16f32.p0(ptr align 4 [[TMP15]], <16 x i1> [[TMP10]], <16 x float> poison) @@ -636,9 +636,9 @@ define void @foo2(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP23:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD9]], [[TMP19]] ; AVX512-NEXT: [[TMP24:%.*]] = fadd <16 x float> [[WIDE_MASKED_LOAD10]], [[TMP20]] ; AVX512-NEXT: [[TMP25:%.*]] = getelementptr float, ptr [[A]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP25]], i32 16 -; AVX512-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP25]], i32 32 -; AVX512-NEXT: [[TMP29:%.*]] = getelementptr float, ptr [[TMP25]], i32 48 +; AVX512-NEXT: [[TMP27:%.*]] = getelementptr float, ptr [[TMP25]], i64 16 +; AVX512-NEXT: [[TMP28:%.*]] = getelementptr float, ptr [[TMP25]], i64 32 +; AVX512-NEXT: [[TMP29:%.*]] = getelementptr float, ptr [[TMP25]], i64 48 ; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP21]], ptr align 4 [[TMP25]], <16 x i1> [[TMP8]]) ; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP22]], ptr align 4 [[TMP27]], <16 x i1> [[TMP9]]) ; AVX512-NEXT: call void @llvm.masked.store.v16f32.p0(<16 x float> [[TMP23]], ptr align 4 [[TMP28]], <16 x i1> [[TMP10]]) @@ -732,25 +732,25 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX1: [[VECTOR_BODY]]: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; AVX1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META8:![0-9]+]] -; AVX1-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META8]] -; AVX1-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META8]] -; AVX1-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META8]] +; AVX1-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META8]] +; AVX1-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META8]] +; AVX1-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META8]] ; AVX1-NEXT: [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 100) ; AVX1-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX1-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX1-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD8]], splat (i32 100) ; AVX1-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 4 -; AVX1-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i32 8 -; AVX1-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP10]], i32 12 +; AVX1-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i64 4 +; AVX1-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i64 8 +; AVX1-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i64 12 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP10]], <4 x i1> [[TMP6]], <4 x double> poison), !alias.scope [[META11:![0-9]+]] ; AVX1-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP12]], <4 x i1> [[TMP7]], <4 x double> poison), !alias.scope [[META11]] ; AVX1-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP13]], <4 x i1> [[TMP8]], <4 x double> poison), !alias.scope [[META11]] -; AVX1-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP14]], <4 x i1> [[TMP9]], <4 x double> poison), !alias.scope [[META11]] +; AVX1-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP11]], <4 x i1> [[TMP9]], <4 x double> poison), !alias.scope [[META11]] ; AVX1-NEXT: [[TMP15:%.*]] = sitofp <4 x i32> [[WIDE_LOAD]] to <4 x double> ; AVX1-NEXT: [[TMP16:%.*]] = sitofp <4 x i32> [[WIDE_LOAD6]] to <4 x double> ; AVX1-NEXT: [[TMP17:%.*]] = sitofp <4 x i32> [[WIDE_LOAD7]] to <4 x double> @@ -760,13 +760,13 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX1-NEXT: [[TMP21:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD10]], [[TMP17]] ; AVX1-NEXT: [[TMP22:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD11]], [[TMP18]] ; AVX1-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i32 4 -; AVX1-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i32 8 -; AVX1-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i32 12 +; AVX1-NEXT: [[TMP24:%.*]] = getelementptr double, ptr [[TMP23]], i64 4 +; AVX1-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i64 8 +; AVX1-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i64 12 ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP19]], ptr align 8 [[TMP23]], <4 x i1> [[TMP6]]), !alias.scope [[META13:![0-9]+]], !noalias [[META15:![0-9]+]] -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP20]], ptr align 8 [[TMP25]], <4 x i1> [[TMP7]]), !alias.scope [[META13]], !noalias [[META15]] -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP21]], ptr align 8 [[TMP26]], <4 x i1> [[TMP8]]), !alias.scope [[META13]], !noalias [[META15]] -; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP22]], ptr align 8 [[TMP27]], <4 x i1> [[TMP9]]), !alias.scope [[META13]], !noalias [[META15]] +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP20]], ptr align 8 [[TMP24]], <4 x i1> [[TMP7]]), !alias.scope [[META13]], !noalias [[META15]] +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP21]], ptr align 8 [[TMP25]], <4 x i1> [[TMP8]]), !alias.scope [[META13]], !noalias [[META15]] +; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP22]], ptr align 8 [[TMP26]], <4 x i1> [[TMP9]]), !alias.scope [[META13]], !noalias [[META15]] ; AVX1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX1-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; AVX1-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] @@ -795,25 +795,25 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META12:![0-9]+]] -; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META12]] -; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META12]] -; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META12]] +; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META12]] +; AVX2-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META12]] +; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META12]] ; AVX2-NEXT: [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], splat (i32 100) ; AVX2-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX2-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX2-NEXT: [[TMP9:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD8]], splat (i32 100) ; AVX2-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 4 -; AVX2-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i32 8 -; AVX2-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP10]], i32 12 +; AVX2-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i64 4 +; AVX2-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i64 8 +; AVX2-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i64 12 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP10]], <4 x i1> [[TMP6]], <4 x double> poison), !alias.scope [[META15:![0-9]+]] ; AVX2-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP12]], <4 x i1> [[TMP7]], <4 x double> poison), !alias.scope [[META15]] ; AVX2-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP13]], <4 x i1> [[TMP8]], <4 x double> poison), !alias.scope [[META15]] -; AVX2-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP14]], <4 x i1> [[TMP9]], <4 x double> poison), !alias.scope [[META15]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP11]], <4 x i1> [[TMP9]], <4 x double> poison), !alias.scope [[META15]] ; AVX2-NEXT: [[TMP15:%.*]] = sitofp <4 x i32> [[WIDE_LOAD]] to <4 x double> ; AVX2-NEXT: [[TMP16:%.*]] = sitofp <4 x i32> [[WIDE_LOAD6]] to <4 x double> ; AVX2-NEXT: [[TMP17:%.*]] = sitofp <4 x i32> [[WIDE_LOAD7]] to <4 x double> @@ -823,13 +823,13 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX2-NEXT: [[TMP21:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD10]], [[TMP17]] ; AVX2-NEXT: [[TMP22:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD11]], [[TMP18]] ; AVX2-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i32 4 -; AVX2-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i32 8 -; AVX2-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i32 12 +; AVX2-NEXT: [[TMP24:%.*]] = getelementptr double, ptr [[TMP23]], i64 4 +; AVX2-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i64 8 +; AVX2-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i64 12 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP19]], ptr align 8 [[TMP23]], <4 x i1> [[TMP6]]), !alias.scope [[META17:![0-9]+]], !noalias [[META19:![0-9]+]] -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP20]], ptr align 8 [[TMP25]], <4 x i1> [[TMP7]]), !alias.scope [[META17]], !noalias [[META19]] -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP21]], ptr align 8 [[TMP26]], <4 x i1> [[TMP8]]), !alias.scope [[META17]], !noalias [[META19]] -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP22]], ptr align 8 [[TMP27]], <4 x i1> [[TMP9]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP20]], ptr align 8 [[TMP24]], <4 x i1> [[TMP7]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP21]], ptr align 8 [[TMP25]], <4 x i1> [[TMP8]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[TMP22]], ptr align 8 [[TMP26]], <4 x i1> [[TMP9]]), !alias.scope [[META17]], !noalias [[META19]] ; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX2-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; AVX2-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -860,25 +860,25 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 16 -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 24 +; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 16 +; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 24 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4, !alias.scope [[META12:![0-9]+]] -; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META12]] -; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META12]] -; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META12]] +; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META12]] +; AVX512-NEXT: [[WIDE_LOAD7:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META12]] +; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META12]] ; AVX512-NEXT: [[TMP6:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], splat (i32 100) ; AVX512-NEXT: [[TMP7:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD6]], splat (i32 100) ; AVX512-NEXT: [[TMP8:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD7]], splat (i32 100) ; AVX512-NEXT: [[TMP9:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD8]], splat (i32 100) ; AVX512-NEXT: [[TMP10:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i32 8 -; AVX512-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i32 16 -; AVX512-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP10]], i32 24 +; AVX512-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[TMP10]], i64 8 +; AVX512-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP10]], i64 16 +; AVX512-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP10]], i64 24 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP10]], <8 x i1> [[TMP6]], <8 x double> poison), !alias.scope [[META15:![0-9]+]] ; AVX512-NEXT: [[WIDE_MASKED_LOAD9:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP12]], <8 x i1> [[TMP7]], <8 x double> poison), !alias.scope [[META15]] ; AVX512-NEXT: [[WIDE_MASKED_LOAD10:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP13]], <8 x i1> [[TMP8]], <8 x double> poison), !alias.scope [[META15]] -; AVX512-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP14]], <8 x i1> [[TMP9]], <8 x double> poison), !alias.scope [[META15]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD11:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP11]], <8 x i1> [[TMP9]], <8 x double> poison), !alias.scope [[META15]] ; AVX512-NEXT: [[TMP15:%.*]] = sitofp <8 x i32> [[WIDE_LOAD]] to <8 x double> ; AVX512-NEXT: [[TMP16:%.*]] = sitofp <8 x i32> [[WIDE_LOAD6]] to <8 x double> ; AVX512-NEXT: [[TMP17:%.*]] = sitofp <8 x i32> [[WIDE_LOAD7]] to <8 x double> @@ -888,13 +888,13 @@ define void @foo3(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; AVX512-NEXT: [[TMP21:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD10]], [[TMP17]] ; AVX512-NEXT: [[TMP22:%.*]] = fadd <8 x double> [[WIDE_MASKED_LOAD11]], [[TMP18]] ; AVX512-NEXT: [[TMP23:%.*]] = getelementptr double, ptr [[A]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i32 8 -; AVX512-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i32 16 -; AVX512-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[TMP23]], i32 24 +; AVX512-NEXT: [[TMP24:%.*]] = getelementptr double, ptr [[TMP23]], i64 8 +; AVX512-NEXT: [[TMP25:%.*]] = getelementptr double, ptr [[TMP23]], i64 16 +; AVX512-NEXT: [[TMP26:%.*]] = getelementptr double, ptr [[TMP23]], i64 24 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP19]], ptr align 8 [[TMP23]], <8 x i1> [[TMP6]]), !alias.scope [[META17:![0-9]+]], !noalias [[META19:![0-9]+]] -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP20]], ptr align 8 [[TMP25]], <8 x i1> [[TMP7]]), !alias.scope [[META17]], !noalias [[META19]] -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP21]], ptr align 8 [[TMP26]], <8 x i1> [[TMP8]]), !alias.scope [[META17]], !noalias [[META19]] -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP22]], ptr align 8 [[TMP27]], <8 x i1> [[TMP9]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP20]], ptr align 8 [[TMP24]], <8 x i1> [[TMP7]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP21]], ptr align 8 [[TMP25]], <8 x i1> [[TMP8]]), !alias.scope [[META17]], !noalias [[META19]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[TMP22]], ptr align 8 [[TMP26]], <8 x i1> [[TMP9]]), !alias.scope [[META17]], !noalias [[META19]] ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; AVX512-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 ; AVX512-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] @@ -1117,68 +1117,68 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]] ; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[OFFSET_IDX]] -; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -3 -; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -4 -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 -3 -; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -8 -; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3 -; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -12 -; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 -; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META22:![0-9]+]] +; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +; AVX2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -3 +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -4 +; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -3 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -8 +; AVX2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -3 +; AVX2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -12 +; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -3 +; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META22:![0-9]+]] ; AVX2-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META22]] +; AVX2-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META22]] ; AVX2-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD6]], <4 x i32> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META22]] +; AVX2-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META22]] ; AVX2-NEXT: [[REVERSE9:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD8]], <4 x i32> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4, !alias.scope [[META22]] +; AVX2-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META22]] ; AVX2-NEXT: [[REVERSE11:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD10]], <4 x i32> poison, <4 x i32> ; AVX2-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i32> [[REVERSE]], zeroinitializer ; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[REVERSE7]], zeroinitializer ; AVX2-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i32> [[REVERSE9]], zeroinitializer ; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt <4 x i32> [[REVERSE11]], zeroinitializer ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[IN]], i64 [[OFFSET_IDX]] -; AVX2-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i32 0 -; AVX2-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP15]], i32 -3 -; AVX2-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP14]], i32 -4 -; AVX2-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP17]], i32 -3 -; AVX2-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP14]], i32 -8 -; AVX2-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP19]], i32 -3 -; AVX2-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP14]], i32 -12 -; AVX2-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP21]], i32 -3 +; AVX2-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP14]], i64 0 +; AVX2-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP22]], i64 -3 +; AVX2-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP14]], i64 -4 +; AVX2-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i64 -3 +; AVX2-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP14]], i64 -8 +; AVX2-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP18]], i64 -3 +; AVX2-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP14]], i64 -12 +; AVX2-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP20]], i64 -3 ; AVX2-NEXT: [[REVERSE12:%.*]] = shufflevector <4 x i1> [[TMP10]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP16]], <4 x i1> [[REVERSE12]], <4 x double> poison), !alias.scope [[META25:![0-9]+]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP15]], <4 x i1> [[REVERSE12]], <4 x double> poison), !alias.scope [[META25:![0-9]+]] ; AVX2-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i1> [[TMP11]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP18]], <4 x i1> [[REVERSE14]], <4 x double> poison), !alias.scope [[META25]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP17]], <4 x i1> [[REVERSE14]], <4 x double> poison), !alias.scope [[META25]] ; AVX2-NEXT: [[REVERSE16:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD15]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i1> [[TMP12]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP20]], <4 x i1> [[REVERSE17]], <4 x double> poison), !alias.scope [[META25]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP19]], <4 x i1> [[REVERSE17]], <4 x double> poison), !alias.scope [[META25]] ; AVX2-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD18]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP13]], <4 x i1> poison, <4 x i32> -; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP22]], <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope [[META25]] +; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr align 8 [[TMP21]], <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope [[META25]] ; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> ; AVX2-NEXT: [[TMP23:%.*]] = fadd <4 x double> [[REVERSE13]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[REVERSE16]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[REVERSE19]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP26:%.*]] = fadd <4 x double> [[REVERSE22]], splat (double 5.000000e-01) ; AVX2-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[OFFSET_IDX]] -; AVX2-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP27]], i32 0 -; AVX2-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 -3 -; AVX2-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP27]], i32 -4 -; AVX2-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP30]], i32 -3 -; AVX2-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP27]], i32 -8 -; AVX2-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP32]], i32 -3 -; AVX2-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP27]], i32 -12 -; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP34]], i32 -3 +; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP27]], i64 0 +; AVX2-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP35]], i64 -3 +; AVX2-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP27]], i64 -4 +; AVX2-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP29]], i64 -3 +; AVX2-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP27]], i64 -8 +; AVX2-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP31]], i64 -3 +; AVX2-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP27]], i64 -12 +; AVX2-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP33]], i64 -3 ; AVX2-NEXT: [[REVERSE24:%.*]] = shufflevector <4 x double> [[TMP23]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE24]], ptr align 8 [[TMP29]], <4 x i1> [[REVERSE12]]), !alias.scope [[META27:![0-9]+]], !noalias [[META29:![0-9]+]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE24]], ptr align 8 [[TMP28]], <4 x i1> [[REVERSE12]]), !alias.scope [[META27:![0-9]+]], !noalias [[META29:![0-9]+]] ; AVX2-NEXT: [[REVERSE26:%.*]] = shufflevector <4 x double> [[TMP24]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE26]], ptr align 8 [[TMP31]], <4 x i1> [[REVERSE14]]), !alias.scope [[META27]], !noalias [[META29]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE26]], ptr align 8 [[TMP30]], <4 x i1> [[REVERSE14]]), !alias.scope [[META27]], !noalias [[META29]] ; AVX2-NEXT: [[REVERSE28:%.*]] = shufflevector <4 x double> [[TMP25]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE28]], ptr align 8 [[TMP33]], <4 x i1> [[REVERSE17]]), !alias.scope [[META27]], !noalias [[META29]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE28]], ptr align 8 [[TMP32]], <4 x i1> [[REVERSE17]]), !alias.scope [[META27]], !noalias [[META29]] ; AVX2-NEXT: [[REVERSE30:%.*]] = shufflevector <4 x double> [[TMP26]], <4 x double> poison, <4 x i32> -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE30]], ptr align 8 [[TMP35]], <4 x i1> [[REVERSE20]]), !alias.scope [[META27]], !noalias [[META29]] +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> [[REVERSE30]], ptr align 8 [[TMP34]], <4 x i1> [[REVERSE20]]), !alias.scope [[META27]], !noalias [[META29]] ; AVX2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX2-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; AVX2-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] @@ -1208,68 +1208,68 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[OFFSET_IDX:%.*]] = sub i64 4095, [[INDEX]] ; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[OFFSET_IDX]] -; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -7 -; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -8 -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 -7 -; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -16 -; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -7 -; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 -24 -; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -7 -; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4, !alias.scope [[META34:![0-9]+]] +; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -7 +; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -8 +; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -7 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -16 +; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -7 +; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 -24 +; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -7 +; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP2]], align 4, !alias.scope [[META34:![0-9]+]] ; AVX512-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD]], <8 x i32> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4, !alias.scope [[META34]] +; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META34]] ; AVX512-NEXT: [[REVERSE7:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD6]], <8 x i32> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META34]] +; AVX512-NEXT: [[WIDE_LOAD8:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META34]] ; AVX512-NEXT: [[REVERSE9:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD8]], <8 x i32> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i32>, ptr [[TMP9]], align 4, !alias.scope [[META34]] +; AVX512-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META34]] ; AVX512-NEXT: [[REVERSE11:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD10]], <8 x i32> poison, <8 x i32> ; AVX512-NEXT: [[TMP10:%.*]] = icmp sgt <8 x i32> [[REVERSE]], zeroinitializer ; AVX512-NEXT: [[TMP11:%.*]] = icmp sgt <8 x i32> [[REVERSE7]], zeroinitializer ; AVX512-NEXT: [[TMP12:%.*]] = icmp sgt <8 x i32> [[REVERSE9]], zeroinitializer ; AVX512-NEXT: [[TMP13:%.*]] = icmp sgt <8 x i32> [[REVERSE11]], zeroinitializer ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[IN]], i64 [[OFFSET_IDX]] -; AVX512-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i32 0 -; AVX512-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP15]], i32 -7 -; AVX512-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP14]], i32 -8 -; AVX512-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP17]], i32 -7 -; AVX512-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP14]], i32 -16 -; AVX512-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP19]], i32 -7 -; AVX512-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP14]], i32 -24 -; AVX512-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP21]], i32 -7 +; AVX512-NEXT: [[TMP22:%.*]] = getelementptr double, ptr [[TMP14]], i64 0 +; AVX512-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[TMP22]], i64 -7 +; AVX512-NEXT: [[TMP16:%.*]] = getelementptr double, ptr [[TMP14]], i64 -8 +; AVX512-NEXT: [[TMP17:%.*]] = getelementptr double, ptr [[TMP16]], i64 -7 +; AVX512-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[TMP14]], i64 -16 +; AVX512-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[TMP18]], i64 -7 +; AVX512-NEXT: [[TMP20:%.*]] = getelementptr double, ptr [[TMP14]], i64 -24 +; AVX512-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[TMP20]], i64 -7 ; AVX512-NEXT: [[REVERSE12:%.*]] = shufflevector <8 x i1> [[TMP10]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP16]], <8 x i1> [[REVERSE12]], <8 x double> poison), !alias.scope [[META37:![0-9]+]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP15]], <8 x i1> [[REVERSE12]], <8 x double> poison), !alias.scope [[META37:![0-9]+]] ; AVX512-NEXT: [[REVERSE13:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE14:%.*]] = shufflevector <8 x i1> [[TMP11]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP18]], <8 x i1> [[REVERSE14]], <8 x double> poison), !alias.scope [[META37]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP17]], <8 x i1> [[REVERSE14]], <8 x double> poison), !alias.scope [[META37]] ; AVX512-NEXT: [[REVERSE16:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD15]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i1> [[TMP12]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP20]], <8 x i1> [[REVERSE17]], <8 x double> poison), !alias.scope [[META37]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP19]], <8 x i1> [[REVERSE17]], <8 x double> poison), !alias.scope [[META37]] ; AVX512-NEXT: [[REVERSE19:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD18]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP13]], <8 x i1> poison, <8 x i32> -; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP22]], <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope [[META37]] +; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr align 8 [[TMP21]], <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope [[META37]] ; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> poison, <8 x i32> ; AVX512-NEXT: [[TMP23:%.*]] = fadd <8 x double> [[REVERSE13]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP24:%.*]] = fadd <8 x double> [[REVERSE16]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP25:%.*]] = fadd <8 x double> [[REVERSE19]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP26:%.*]] = fadd <8 x double> [[REVERSE22]], splat (double 5.000000e-01) ; AVX512-NEXT: [[TMP27:%.*]] = getelementptr double, ptr [[OUT]], i64 [[OFFSET_IDX]] -; AVX512-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP27]], i32 0 -; AVX512-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP28]], i32 -7 -; AVX512-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP27]], i32 -8 -; AVX512-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP30]], i32 -7 -; AVX512-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP27]], i32 -16 -; AVX512-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP32]], i32 -7 -; AVX512-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP27]], i32 -24 -; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP34]], i32 -7 +; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[TMP27]], i64 0 +; AVX512-NEXT: [[TMP28:%.*]] = getelementptr double, ptr [[TMP35]], i64 -7 +; AVX512-NEXT: [[TMP29:%.*]] = getelementptr double, ptr [[TMP27]], i64 -8 +; AVX512-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP29]], i64 -7 +; AVX512-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP27]], i64 -16 +; AVX512-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[TMP31]], i64 -7 +; AVX512-NEXT: [[TMP33:%.*]] = getelementptr double, ptr [[TMP27]], i64 -24 +; AVX512-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[TMP33]], i64 -7 ; AVX512-NEXT: [[REVERSE24:%.*]] = shufflevector <8 x double> [[TMP23]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE24]], ptr align 8 [[TMP29]], <8 x i1> [[REVERSE12]]), !alias.scope [[META39:![0-9]+]], !noalias [[META41:![0-9]+]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE24]], ptr align 8 [[TMP28]], <8 x i1> [[REVERSE12]]), !alias.scope [[META39:![0-9]+]], !noalias [[META41:![0-9]+]] ; AVX512-NEXT: [[REVERSE26:%.*]] = shufflevector <8 x double> [[TMP24]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE26]], ptr align 8 [[TMP31]], <8 x i1> [[REVERSE14]]), !alias.scope [[META39]], !noalias [[META41]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE26]], ptr align 8 [[TMP30]], <8 x i1> [[REVERSE14]]), !alias.scope [[META39]], !noalias [[META41]] ; AVX512-NEXT: [[REVERSE28:%.*]] = shufflevector <8 x double> [[TMP25]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE28]], ptr align 8 [[TMP33]], <8 x i1> [[REVERSE17]]), !alias.scope [[META39]], !noalias [[META41]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE28]], ptr align 8 [[TMP32]], <8 x i1> [[REVERSE17]]), !alias.scope [[META39]], !noalias [[META41]] ; AVX512-NEXT: [[REVERSE30:%.*]] = shufflevector <8 x double> [[TMP26]], <8 x double> poison, <8 x i32> -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE30]], ptr align 8 [[TMP35]], <8 x i1> [[REVERSE20]]), !alias.scope [[META39]], !noalias [[META41]] +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> [[REVERSE30]], ptr align 8 [[TMP34]], <8 x i1> [[REVERSE20]]), !alias.scope [[META39]], !noalias [[META41]] ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; AVX512-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 ; AVX512-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] @@ -1332,9 +1332,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1: [[VECTOR_BODY]]: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 -; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 +; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 +; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; AVX1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX1-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -1348,9 +1348,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP16:%.*]] = icmp ne <4 x i8> [[TMP8]], zeroinitializer ; AVX1-NEXT: [[TMP17:%.*]] = icmp ne <4 x i8> [[TMP9]], zeroinitializer ; AVX1-NEXT: [[TMP13:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 4 -; AVX1-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 8 -; AVX1-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 12 +; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 4 +; AVX1-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 8 +; AVX1-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 12 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP13]], <4 x i1> [[TMP14]], <4 x ptr> poison) ; AVX1-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP20]], <4 x i1> [[TMP15]], <4 x ptr> poison) ; AVX1-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP21]], <4 x i1> [[TMP16]], <4 x ptr> poison) @@ -1364,9 +1364,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> [[TMP30]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 -; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 +; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 4 +; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 12 ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <4 x i1> [[TMP31]]) ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <4 x i1> [[TMP32]]) ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <4 x i1> [[TMP33]]) @@ -1424,9 +1424,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 -; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 +; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; AVX2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -1440,9 +1440,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP16:%.*]] = icmp ne <4 x i8> [[TMP8]], zeroinitializer ; AVX2-NEXT: [[TMP17:%.*]] = icmp ne <4 x i8> [[TMP9]], zeroinitializer ; AVX2-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 -; AVX2-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 -; AVX2-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 12 +; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 4 +; AVX2-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 8 +; AVX2-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 12 ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP18]], <4 x i1> [[TMP14]], <4 x ptr> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP20]], <4 x i1> [[TMP15]], <4 x ptr> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP21]], <4 x i1> [[TMP16]], <4 x ptr> poison) @@ -1456,9 +1456,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> [[TMP30]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 -; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 +; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 4 +; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 12 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <4 x i1> [[TMP31]]) ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <4 x i1> [[TMP32]]) ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <4 x i1> [[TMP33]]) @@ -1516,9 +1516,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 24 +; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 ; AVX512-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1 ; AVX512-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP4]], align 1 @@ -1532,9 +1532,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP16:%.*]] = icmp ne <8 x i8> [[TMP8]], zeroinitializer ; AVX512-NEXT: [[TMP17:%.*]] = icmp ne <8 x i8> [[TMP9]], zeroinitializer ; AVX512-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 -; AVX512-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 16 -; AVX512-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 24 +; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 8 +; AVX512-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 16 +; AVX512-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 24 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP18]], <8 x i1> [[TMP14]], <8 x ptr> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP20]], <8 x i1> [[TMP15]], <8 x ptr> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP21]], <8 x i1> [[TMP16]], <8 x ptr> poison) @@ -1548,9 +1548,9 @@ define void @foo7(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP16]], <8 x i1> [[TMP29]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP34:%.*]] = select <8 x i1> [[TMP17]], <8 x i1> [[TMP30]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 16 -; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 24 +; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 16 +; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 24 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <8 x i1> [[TMP31]]) ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <8 x i1> [[TMP32]]) ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <8 x i1> [[TMP33]]) @@ -1653,9 +1653,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1: [[VECTOR_BODY]]: ; AVX1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 -; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 +; AVX1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 +; AVX1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12 ; AVX1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; AVX1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX1-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -1669,9 +1669,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP16:%.*]] = icmp ne <4 x i8> [[TMP8]], zeroinitializer ; AVX1-NEXT: [[TMP17:%.*]] = icmp ne <4 x i8> [[TMP9]], zeroinitializer ; AVX1-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 4 -; AVX1-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 -; AVX1-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 12 +; AVX1-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 4 +; AVX1-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 8 +; AVX1-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 12 ; AVX1-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP18]], <4 x i1> [[TMP14]], <4 x ptr> poison) ; AVX1-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP20]], <4 x i1> [[TMP15]], <4 x ptr> poison) ; AVX1-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP21]], <4 x i1> [[TMP16]], <4 x ptr> poison) @@ -1685,9 +1685,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX1-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> [[TMP30]], <4 x i1> zeroinitializer ; AVX1-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 -; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 +; AVX1-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 4 +; AVX1-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX1-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 12 ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <4 x i1> [[TMP31]]) ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <4 x i1> [[TMP32]]) ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <4 x i1> [[TMP33]]) @@ -1745,9 +1745,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2: [[VECTOR_BODY]]: ; AVX2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 -; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 +; AVX2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 +; AVX2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 12 ; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; AVX2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; AVX2-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -1761,9 +1761,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP16:%.*]] = icmp ne <4 x i8> [[TMP8]], zeroinitializer ; AVX2-NEXT: [[TMP12:%.*]] = icmp ne <4 x i8> [[TMP9]], zeroinitializer ; AVX2-NEXT: [[TMP13:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 4 -; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 8 -; AVX2-NEXT: [[TMP23:%.*]] = getelementptr ptr, ptr [[TMP13]], i32 12 +; AVX2-NEXT: [[TMP19:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 4 +; AVX2-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 8 +; AVX2-NEXT: [[TMP23:%.*]] = getelementptr ptr, ptr [[TMP13]], i64 12 ; AVX2-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP13]], <4 x i1> [[TMP17]], <4 x ptr> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP19]], <4 x i1> [[TMP15]], <4 x ptr> poison) ; AVX2-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr align 8 [[TMP20]], <4 x i1> [[TMP16]], <4 x ptr> poison) @@ -1777,9 +1777,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX2-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> [[TMP29]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP12]], <4 x i1> [[TMP21]], <4 x i1> zeroinitializer ; AVX2-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 4 -; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 12 +; AVX2-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 4 +; AVX2-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX2-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 12 ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <4 x i1> [[TMP31]]) ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <4 x i1> [[TMP32]]) ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0(<4 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <4 x i1> [[TMP33]]) @@ -1837,9 +1837,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512: [[VECTOR_BODY]]: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TRIGGER]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 8 -; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 16 -; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 24 +; AVX512-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 8 +; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 24 ; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 ; AVX512-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP3]], align 1 ; AVX512-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i8>, ptr [[TMP4]], align 1 @@ -1853,9 +1853,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP16:%.*]] = icmp ne <8 x i8> [[TMP8]], zeroinitializer ; AVX512-NEXT: [[TMP17:%.*]] = icmp ne <8 x i8> [[TMP9]], zeroinitializer ; AVX512-NEXT: [[TMP18:%.*]] = getelementptr ptr, ptr [[IN]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 8 -; AVX512-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 16 -; AVX512-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i32 24 +; AVX512-NEXT: [[TMP20:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 8 +; AVX512-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 16 +; AVX512-NEXT: [[TMP22:%.*]] = getelementptr ptr, ptr [[TMP18]], i64 24 ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP18]], <8 x i1> [[TMP14]], <8 x ptr> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP20]], <8 x i1> [[TMP15]], <8 x ptr> poison) ; AVX512-NEXT: [[WIDE_MASKED_LOAD5:%.*]] = call <8 x ptr> @llvm.masked.load.v8p0.p0(ptr align 8 [[TMP21]], <8 x i1> [[TMP16]], <8 x ptr> poison) @@ -1869,9 +1869,9 @@ define void @foo8(ptr noalias nocapture %out, ptr noalias nocapture readonly %in ; AVX512-NEXT: [[TMP33:%.*]] = select <8 x i1> [[TMP16]], <8 x i1> [[TMP29]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP34:%.*]] = select <8 x i1> [[TMP17]], <8 x i1> [[TMP30]], <8 x i1> zeroinitializer ; AVX512-NEXT: [[TMP35:%.*]] = getelementptr double, ptr [[OUT]], i64 [[INDEX]] -; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i32 8 -; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i32 16 -; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i32 24 +; AVX512-NEXT: [[TMP37:%.*]] = getelementptr double, ptr [[TMP35]], i64 8 +; AVX512-NEXT: [[TMP38:%.*]] = getelementptr double, ptr [[TMP35]], i64 16 +; AVX512-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP35]], i64 24 ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP35]], <8 x i1> [[TMP31]]) ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP37]], <8 x i1> [[TMP32]]) ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0(<8 x double> splat (double 5.000000e-01), ptr align 8 [[TMP38]], <8 x i1> [[TMP33]]) diff --git a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll index e23f8a9b63ef0..d514ab6bc72b7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -1186,13 +1186,13 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b ; O1VEC2: vector.body: ; O1VEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; O1VEC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDEX]] -; O1VEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 4 +; O1VEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 4 ; O1VEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; O1VEC2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; O1VEC2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; O1VEC2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]] ; O1VEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDEX]] -; O1VEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i32 4 +; O1VEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i64 4 ; O1VEC2-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4 ; O1VEC2-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP8]], align 4 ; O1VEC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -1214,13 +1214,13 @@ define i32 @nopragma(ptr noalias nocapture %a, ptr noalias nocapture readonly %b ; OzVEC2: vector.body: ; OzVEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; OzVEC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[B:%.*]], i64 [[INDEX]] -; OzVEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i32 4 +; OzVEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 4 ; OzVEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; OzVEC2-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; OzVEC2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] ; OzVEC2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT]] ; OzVEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[INDEX]] -; OzVEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i32 4 +; OzVEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i64 4 ; OzVEC2-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4 ; OzVEC2-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP8]], align 4 ; OzVEC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll index de6418066dea0..2809a77b36f1a 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll @@ -30,8 +30,8 @@ define i64 @test_value_in_exit_compare_chain_used_outside(ptr %src, i64 %x, i64 ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i8> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP10]], 1 ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP26]], i32 0 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[TMP27]], i32 -7 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP26]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[TMP12]], i64 -7 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP28]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i8> [[WIDE_LOAD]], <8 x i8> poison, <8 x i32> ; CHECK-NEXT: [[TMP29]] = xor <8 x i8> [[REVERSE]], [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll index 31269b1b8c221..85d77eaadc632 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll @@ -35,17 +35,17 @@ define void @foo(ptr addrspace(1) align 8 dereferenceable_or_null(16), ptr addrs ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT12]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i32 4 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i32 8 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i32 12 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 8 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP5]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP5]], align 8, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP6]], align 8, !alias.scope [[META0]] ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP7]], align 8, !alias.scope [[META0]] ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x ptr addrspace(1)>, ptr addrspace(1) [[TMP8]], align 8, !alias.scope [[META0]] ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[DOT10]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i32 4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i32 8 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i32 12 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i64 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i64 8 +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[TMP9]], i64 12 ; CHECK-NEXT: store <4 x ptr addrspace(1)> [[WIDE_LOAD]], ptr addrspace(1) [[TMP9]], align 8, !alias.scope [[META3:![0-9]+]], !noalias [[META0]] ; CHECK-NEXT: store <4 x ptr addrspace(1)> [[WIDE_LOAD4]], ptr addrspace(1) [[TMP10]], align 8, !alias.scope [[META3]], !noalias [[META0]] ; CHECK-NEXT: store <4 x ptr addrspace(1)> [[WIDE_LOAD5]], ptr addrspace(1) [[TMP11]], align 8, !alias.scope [[META3]], !noalias [[META0]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll index 3c618d71fc974..9217c905945ac 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll @@ -67,7 +67,7 @@ define i32 @main(ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[DOTPROMOTED]], [[INDEX]] ; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP20]] -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 4 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP22]], align 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP25]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll b/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll index 737bcf35fbd2c..38db41271d1f6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr47437.ll @@ -124,7 +124,7 @@ define void @test_muladd(ptr noalias nocapture %d1, ptr noalias nocapture readon ; SSE41-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[TMP22]], [[TMP16]] ; SSE41-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[TMP23]], [[TMP17]] ; SSE41-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[D1:%.*]], i64 [[INDEX]] -; SSE41-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 4 +; SSE41-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 4 ; SSE41-NEXT: store <4 x i32> [[TMP24]], ptr [[TMP26]], align 4 ; SSE41-NEXT: store <4 x i32> [[TMP25]], ptr [[TMP29]], align 4 ; SSE41-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -250,9 +250,9 @@ define void @test_muladd(ptr noalias nocapture %d1, ptr noalias nocapture readon ; AVX1-NEXT: [[TMP69:%.*]] = add nsw <4 x i32> [[TMP67]], [[TMP46]] ; AVX1-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[TMP68]], [[TMP47]] ; AVX1-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[D1:%.*]], i64 [[INDEX]] -; AVX1-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 4 -; AVX1-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 8 -; AVX1-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 12 +; AVX1-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 4 +; AVX1-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 8 +; AVX1-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i64 12 ; AVX1-NEXT: store <4 x i32> [[TMP19]], ptr [[TMP21]], align 4 ; AVX1-NEXT: store <4 x i32> [[TMP20]], ptr [[TMP26]], align 4 ; AVX1-NEXT: store <4 x i32> [[TMP69]], ptr [[TMP71]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll index 08855fe9ecba5..c756a54ec6d2b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll @@ -30,8 +30,8 @@ define void @test(ptr noundef align 8 dereferenceable_or_null(16) %arr) #0 { ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[ARR]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr align 8 [[TMP8]], <4 x i1> [[REVERSE]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll index 2aceb279d47db..5a396f88b1a64 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll @@ -76,7 +76,7 @@ define void @switch_default_to_latch_common_dest(ptr %start, ptr %end) { ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -214,7 +214,7 @@ define void @switch_default_to_latch_common_dest_using_branches(ptr %start, ptr ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -337,7 +337,7 @@ define void @switch_all_dests_distinct(ptr %start, ptr %end) { ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -527,7 +527,7 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -687,7 +687,7 @@ define void @switch_multiple_common_dests(ptr %start, ptr %end) { ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP23:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -836,7 +836,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP15:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -1014,7 +1014,7 @@ define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end, ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -1167,7 +1167,7 @@ define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end, ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12) @@ -1319,7 +1319,7 @@ define void @large_number_of_cases(ptr %start, ptr %end) { ; FORCED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; FORCED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; FORCED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]] -; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 4 +; FORCED-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 4 ; FORCED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1 ; FORCED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP8]], align 1 ; FORCED-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 1) diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll index 52e90e4475208..3afdf947081b6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll @@ -60,7 +60,7 @@ define float @reduction_sum_float_fastmath(i32 %n, ptr %array) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] @@ -111,7 +111,7 @@ define float @reduction_sum_float_only_reassoc(i32 %n, ptr %array) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6]] = fadd reassoc <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] @@ -162,7 +162,7 @@ define float @reduction_sum_float_only_reassoc_and_contract(i32 %n, ptr %array) ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -0.000000e+00), [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[ARRAY:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6]] = fadd reassoc contract <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] @@ -220,7 +220,7 @@ define float @PR35538(ptr nocapture readonly %a, i32 %N) #0 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = fcmp nnan ninf nsz oge <4 x float> [[WIDE_LOAD]], [[VEC_PHI]] @@ -301,7 +301,7 @@ define float @PR35538_more_FMF(ptr nocapture readonly %a, i32 %N) #0 { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+00), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = fcmp nnan ninf oge <4 x float> [[WIDE_LOAD]], [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll index 602a3921eb34c..da48f984cb329 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/strided_load_cost.ll @@ -59,13 +59,13 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; CHECK-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 ; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 8 -; CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 16 -; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 24 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[INT_TBAA1:![0-9]+]] -; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[INT_TBAA1]] -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[INT_TBAA1]] -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP34]], align 4, !tbaa [[INT_TBAA1]] +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]] ; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]] ; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]] @@ -290,13 +290,13 @@ define i32 @matrix_row_col(ptr nocapture readonly %data, i32 %i, i32 %j) local_u ; MAX-BW-NEXT: [[TMP30:%.*]] = add i64 [[INDEX]], 30 ; MAX-BW-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 31 ; MAX-BW-NEXT: [[TMP32:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[IDXPROM]], i64 [[TMP0]] -; MAX-BW-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 8 -; MAX-BW-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 16 -; MAX-BW-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i32 24 +; MAX-BW-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 8 +; MAX-BW-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 16 +; MAX-BW-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP32]], i64 24 ; MAX-BW-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP32]], align 4, !tbaa [[INT_TBAA1:![0-9]+]] -; MAX-BW-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4, !tbaa [[INT_TBAA1]] -; MAX-BW-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP38]], align 4, !tbaa [[INT_TBAA1]] -; MAX-BW-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP39]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[WIDE_LOAD4:%.*]] = load <8 x i32>, ptr [[TMP33]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[WIDE_LOAD5:%.*]] = load <8 x i32>, ptr [[TMP34]], align 4, !tbaa [[INT_TBAA1]] +; MAX-BW-NEXT: [[WIDE_LOAD6:%.*]] = load <8 x i32>, ptr [[TMP35]], align 4, !tbaa [[INT_TBAA1]] ; MAX-BW-NEXT: [[TMP40:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP0]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP41:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP1]], i64 [[IDXPROM5]] ; MAX-BW-NEXT: [[TMP42:%.*]] = getelementptr inbounds [100 x i32], ptr [[DATA]], i64 [[TMP2]], i64 [[IDXPROM5]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll index 8081c0e17f865..692ab3db0aa42 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll @@ -25,9 +25,9 @@ define void @foo(ptr nocapture noalias %A, i64 %N) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr @inc, align 4 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[TMP1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 16 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 24 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 16 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 24 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x float>, ptr [[A]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <8 x float>, ptr [[TMP5]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll index fda944e072d4a..714d01315e507 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vect.omp.force.small-tc.ll @@ -29,29 +29,29 @@ define void @vectorized(ptr noalias nocapture %A, ptr noalias nocapture readonly ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4, !llvm.access.group [[ACC_GRP0:![0-9]+]] -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 8 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 12 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 8 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP7]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[TMP11:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD4]] ; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <4 x float> [[WIDE_LOAD1]], [[WIDE_LOAD5]] ; CHECK-NEXT: [[TMP13:%.*]] = fadd fast <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD6]] ; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <4 x float> [[WIDE_LOAD3]], [[WIDE_LOAD7]] ; CHECK-NEXT: store <4 x float> [[TMP11]], ptr [[TMP6]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: store <4 x float> [[TMP12]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP0]] -; CHECK-NEXT: store <4 x float> [[TMP14]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: store <4 x float> [[TMP12]], ptr [[TMP5]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: store <4 x float> [[TMP14]], ptr [[TMP7]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP1:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll index c8e3766aa936e..a792d2463e647 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll @@ -56,17 +56,17 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP: vector.body: ; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 16 -; NO-VP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 32 -; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 48 +; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 16 +; NO-VP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 32 +; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 48 ; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP4]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i32>, ptr [[TMP9]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i32>, ptr [[TMP10]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i32>, ptr [[TMP11]], align 4 ; NO-VP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 16 -; NO-VP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 32 -; NO-VP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 48 +; NO-VP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 16 +; NO-VP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 32 +; NO-VP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 48 ; NO-VP-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP12]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP17]], align 4 ; NO-VP-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP18]], align 4 @@ -76,9 +76,9 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP-NEXT: [[TMP22:%.*]] = add nsw <16 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD3]] ; NO-VP-NEXT: [[TMP23:%.*]] = add nsw <16 x i32> [[WIDE_LOAD8]], [[WIDE_LOAD4]] ; NO-VP-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; NO-VP-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 16 -; NO-VP-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 32 -; NO-VP-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 48 +; NO-VP-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 16 +; NO-VP-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 32 +; NO-VP-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 48 ; NO-VP-NEXT: store <16 x i32> [[TMP20]], ptr [[TMP24]], align 4 ; NO-VP-NEXT: store <16 x i32> [[TMP21]], ptr [[TMP29]], align 4 ; NO-VP-NEXT: store <16 x i32> [[TMP22]], ptr [[TMP30]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll index 8184cad22ae8b..26268f1ff4e94 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/widened-value-used-as-scalar-and-first-lane.ll @@ -18,9 +18,9 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 8 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 12 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 4 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 8 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8 @@ -36,9 +36,9 @@ define void @iv.4_used_as_vector_and_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD6]], splat (i64 128) ; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[TMP26]], 1 ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP27]] -; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i64, ptr [[TMP28]], i32 4 -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[TMP28]], i32 8 -; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP28]], i32 12 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i64, ptr [[TMP28]], i64 4 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i64, ptr [[TMP28]], i64 8 +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i64, ptr [[TMP28]], i64 12 ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP12]], ptr align 4 [[TMP28]], <4 x i1> [[TMP16]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP13]], ptr align 4 [[TMP33]], <4 x i1> [[TMP17]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[TMP14]], ptr align 4 [[TMP34]], <4 x i1> [[TMP18]]) @@ -88,9 +88,9 @@ define void @iv.4_used_as_first_lane(ptr %src, ptr noalias %dst) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 8 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 12 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 4 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 8 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 12 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8 @@ -102,9 +102,9 @@ define void @iv.4_used_as_first_lane(ptr %src, ptr noalias %dst) { ; CHECK-NEXT: [[TMP19:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD3]], splat (i64 128) ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP15]], 1 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP23]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[TMP24]], i32 4 -; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[TMP24]], i32 8 -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[TMP24]], i32 12 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i64, ptr [[TMP24]], i64 4 +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr i64, ptr [[TMP24]], i64 8 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i64, ptr [[TMP24]], i64 12 ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[WIDE_LOAD]], ptr align 4 [[TMP24]], <4 x i1> [[TMP16]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[WIDE_LOAD1]], ptr align 4 [[TMP29]], <4 x i1> [[TMP17]]) ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[WIDE_LOAD2]], ptr align 4 [[TMP30]], <4 x i1> [[TMP18]]) diff --git a/llvm/test/Transforms/LoopVectorize/assume.ll b/llvm/test/Transforms/LoopVectorize/assume.ll index a9a0b33f542af..eddd5f9ddc584 100644 --- a/llvm/test/Transforms/LoopVectorize/assume.ll +++ b/llvm/test/Transforms/LoopVectorize/assume.ll @@ -11,7 +11,7 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 2 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD]], splat (float 1.000000e+02) @@ -27,7 +27,7 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i32 2 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP10]], i64 2 ; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[TMP10]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP9]], ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -80,13 +80,13 @@ define void @test2(ptr noalias %a, ptr noalias %b) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 2 ; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -151,13 +151,13 @@ define void @predicated_assume(ptr noalias nocapture readonly %a, ptr noalias no ; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x float> splat (float 2.300000e+01), <2 x float> splat (float 4.200000e+01) ; CHECK-NEXT: [[PREDPHI1:%.*]] = select <2 x i1> [[TMP2]], <2 x float> splat (float 2.300000e+01), <2 x float> splat (float 4.200000e+01) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[PREDPHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x float> [[PREDPHI1]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 2 ; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: store <2 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll index 1fe3962dfd072..6c63b823b7666 100644 --- a/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll @@ -130,8 +130,8 @@ define i32 @consecutive_ptr_reverse(ptr %a, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP5]] = add <4 x i32> [[VEC_PHI]], [[REVERSE]] @@ -177,8 +177,8 @@ define i32 @consecutive_ptr_reverse(ptr %a, i64 %n) { ; INTER-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; INTER-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[N]], [[INDEX]] ; INTER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]] -; INTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 -; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 +; INTER-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 0 +; INTER-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 ; INTER-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 8 ; INTER-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; INTER-NEXT: [[TMP5]] = add <4 x i32> [[VEC_PHI]], [[REVERSE]] diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll index fb45745eff1cb..4737a56df2735 100644 --- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll @@ -19,7 +19,7 @@ define i8 @preserve_flags_when_cloning_trunc(i8 %start, ptr noalias %src, ptr no ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP2]] to <4 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i64 4 ; CHECK-NEXT: store <4 x i16> [[TMP3]], ptr [[TMP4]], align 2 ; CHECK-NEXT: store <4 x i16> [[TMP3]], ptr [[TMP5]], align 2 ; CHECK-NEXT: [[TMP6]] = mul <4 x i8> [[VEC_PHI]], splat (i8 3) diff --git a/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll b/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll index 5d92c127aff93..901652537a5c5 100644 --- a/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll +++ b/llvm/test/Transforms/LoopVectorize/cse-gep-source-element-type.ll @@ -16,19 +16,19 @@ define void @cse_replicate_gep(ptr noalias %A, ptr noalias %B, ptr noalias %C, i ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP0]], i32 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP0]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP8]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP8]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i16>, ptr [[TMP8]], align 2 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP3]], i64 4 ; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD]], ptr [[TMP3]], align 4 ; CHECK-NEXT: store <4 x i32> [[WIDE_LOAD1]], ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[C]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[TMP5]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[TMP5]], i64 4 ; CHECK-NEXT: store <4 x i16> [[WIDE_LOAD2]], ptr [[TMP5]], align 2 ; CHECK-NEXT: store <4 x i16> [[WIDE_LOAD3]], ptr [[TMP6]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -79,11 +79,11 @@ define void @cse_wide_gep(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %n ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[A]], <4 x i64> [[VEC_IND]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[A]], <4 x i64> [[STEP_ADD]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX1]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[TMP4]], i64 4 ; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP4]], align 8 ; CHECK-NEXT: store <4 x ptr> [[TMP1]], ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[C]], i64 [[INDEX1]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr ptr, ptr [[TMP6]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr ptr, ptr [[TMP6]], i64 4 ; CHECK-NEXT: store <4 x ptr> [[TMP2]], ptr [[TMP6]], align 8 ; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll index 02e1d0e9e7004..6e5213568c735 100644 --- a/llvm/test/Transforms/LoopVectorize/dead_instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/dead_instructions.ll @@ -25,7 +25,7 @@ define i64 @dead_instructions_01(ptr %a, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 ; CHECK-NEXT: [[TMP6]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] @@ -133,13 +133,13 @@ define void @dead_load_and_vector_pointer(ptr %a, ptr %b) { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 8, !alias.scope [[META5:![0-9]+]], !noalias [[META8:![0-9]+]] -; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP5]], align 8, !alias.scope [[META5]], !noalias [[META8]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP1]], align 8, !alias.scope [[META5]], !noalias [[META8]] ; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[WIDE_LOAD]], splat (i32 1) ; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i32> [[WIDE_LOAD2]], splat (i32 1) ; CHECK-NEXT: store <2 x i32> [[TMP6]], ptr [[TMP2]], align 4, !alias.scope [[META5]], !noalias [[META8]] -; CHECK-NEXT: store <2 x i32> [[TMP7]], ptr [[TMP5]], align 4, !alias.scope [[META5]], !noalias [[META8]] +; CHECK-NEXT: store <2 x i32> [[TMP7]], ptr [[TMP1]], align 4, !alias.scope [[META5]], !noalias [[META8]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128 ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll index 274bd043cd86b..c23d28cdd0f3a 100644 --- a/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll +++ b/llvm/test/Transforms/LoopVectorize/dont-fold-tail-for-const-TC.ll @@ -15,8 +15,8 @@ define dso_local void @constTC(ptr noalias nocapture %A) optsize { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 2 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 2 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 4 ; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP3]], align 1 ; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP7]], align 1 ; CHECK-NEXT: store <2 x i32> splat (i32 13), ptr [[TMP8]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/expand-scev-after-invoke.ll b/llvm/test/Transforms/LoopVectorize/expand-scev-after-invoke.ll index 4af9f4a13b62b..50e55f6051485 100644 --- a/llvm/test/Transforms/LoopVectorize/expand-scev-after-invoke.ll +++ b/llvm/test/Transforms/LoopVectorize/expand-scev-after-invoke.ll @@ -26,7 +26,7 @@ define void @test(ptr %dst) personality ptr null { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], [[TMP1]] ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 4 ; CHECK-NEXT: store <4 x i32> [[VEC_IND]], ptr [[TMP4]], align 8 ; CHECK-NEXT: store <4 x i32> [[STEP_ADD]], ptr [[TMP6]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/fcmp-uno-fold-interleave.ll b/llvm/test/Transforms/LoopVectorize/fcmp-uno-fold-interleave.ll index 22226a711bcf0..5edd83bd1e0d1 100644 --- a/llvm/test/Transforms/LoopVectorize/fcmp-uno-fold-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/fcmp-uno-fold-interleave.ll @@ -19,8 +19,8 @@ define float @fmaxnum(ptr %src, i64 %n) { ; IC3-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; IC3-NEXT: [[VEC_PHI2:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; IC3-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX]] -; IC3-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 4 -; IC3-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 8 +; IC3-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 4 +; IC3-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 8 ; IC3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; IC3-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; IC3-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 @@ -71,9 +71,9 @@ define float @fmaxnum(ptr %src, i64 %n) { ; IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX]] -; IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 4 -; IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 8 -; IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 12 +; IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 4 +; IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 8 +; IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 12 ; IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 @@ -129,10 +129,10 @@ define float @fmaxnum(ptr %src, i64 %n) { ; IC5-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; IC5-NEXT: [[VEC_PHI4:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; IC5-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[INDEX]] -; IC5-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 4 -; IC5-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 8 -; IC5-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 12 -; IC5-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i32 16 +; IC5-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 4 +; IC5-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 8 +; IC5-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 12 +; IC5-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[TMP0]], i64 16 ; IC5-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; IC5-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; IC5-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll index eca39e6f0b6ba..cf2e7ccd1b2f0 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-dead-instructions.ll @@ -98,7 +98,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) { ; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i16> [[TMP0]], [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP3]], i64 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP3]], align 4 ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 @@ -160,7 +160,7 @@ define void @sink_dead_inst(ptr %a) { ; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i16> [[TMP5]], splat (i16 10) ; CHECK-NEXT: [[TMP8:%.*]] = sub <4 x i16> [[TMP6]], splat (i16 10) ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[A]], i16 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP9]], i32 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[TMP9]], i64 4 ; CHECK-NEXT: store <4 x i16> [[TMP7]], ptr [[TMP9]], align 2 ; CHECK-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP11]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index cebd52fa7f866..063f47ce2b32d 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -33,7 +33,7 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP4]] -; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i32>, ptr [[TMP7]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> @@ -41,7 +41,7 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP8]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add <4 x i32> [[WIDE_LOAD1]], [[TMP9]] -; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP10]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP14]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -231,7 +231,7 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2]] = load <4 x i32>, ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> @@ -485,7 +485,7 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP4]], i64 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP4]], align 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP6]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> @@ -499,7 +499,7 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = fsub fast <4 x double> [[TMP9]], [[TMP13]] ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = fsub fast <4 x double> [[TMP10]], [[TMP14]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[TMP17]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP15]], ptr [[TMP17]], align 8 ; UNROLL-NO-IC-NEXT: store <4 x double> [[TMP16]], ptr [[TMP19]], align 8 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -1700,7 +1700,7 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i64 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP4]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> @@ -1712,7 +1712,7 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP7]] ; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = mul nsw <4 x i32> [[TMP10]], [[TMP8]] ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP11]], ptr [[TMP13]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP12]], ptr [[TMP15]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -1915,7 +1915,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP5]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP6]], i64 1 ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i16], ptr [[A]], i64 [[TMP7]], i64 1 -; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP8]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> splat (i32 7), ptr [[TMP18]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP9]], align 2 @@ -1943,7 +1943,7 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP37]] ; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = mul nsw <4 x i32> [[TMP40]], [[TMP38]] ; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] -; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, ptr [[TMP43]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP41]], ptr [[TMP43]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP42]], ptr [[TMP45]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -2146,7 +2146,7 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 [[TMP1]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[TMP2]], i64 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD1]] = load <4 x i16>, ptr [[TMP4]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> @@ -2160,7 +2160,7 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP11]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = mul nsw <4 x i32> [[TMP10]], [[TMP12]] ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP15]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP17]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -2449,7 +2449,7 @@ define void @sink_dead_inst(ptr %a) { ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = sub <4 x i16> [[TMP6]], splat (i16 10) ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = sub <4 x i16> [[TMP7]], splat (i16 10) ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[A:%.*]], i16 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP10]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[TMP10]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP10]], align 2 ; UNROLL-NO-IC-NEXT: store <4 x i16> [[TMP9]], ptr [[TMP12]], align 2 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 @@ -3218,7 +3218,7 @@ define i32 @sink_after_dead_inst(ptr %A.ptr, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]] ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 4 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i64 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP6]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll index 0745f286b2608..0d9d28d079b92 100644 --- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll @@ -53,7 +53,7 @@ define float @fmaxnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ splat (float -1.000000e+07), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw float, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP7]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]) @@ -127,10 +127,10 @@ define float @test_fmax_and_fmin(ptr %src.0, ptr %src.1, i64 %n) { ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_0]], i64 [[IV]] ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds nuw float, ptr [[SRC_1]], i64 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_0]], i32 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_0]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[GEP_SRC_0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_1]], i32 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[GEP_SRC_1]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[GEP_SRC_1]], align 4 ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[TMP4]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VEC_PHI2]], <4 x float> [[WIDE_LOAD]]) diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll index 73a2203c3115b..eab9df558f608 100644 --- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll @@ -1648,8 +1648,8 @@ define i32 @fcmp_0_sub_select1(ptr noalias %x, i32 %N) nounwind readonly { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 -3 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[REVERSE]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll b/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll index 4e51d6e9ba708..dee377d61ba30 100644 --- a/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll +++ b/llvm/test/Transforms/LoopVectorize/induction-wrapflags.ll @@ -81,8 +81,8 @@ define i32 @induction_trunc_wrapflags(ptr %p) { ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 326, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 -3 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[VEC_IND]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: store <4 x i8> [[REVERSE]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index bacb8eb10c755..b6fb378a042fd 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -155,7 +155,7 @@ define void @multi_int_induction(ptr %A, i32 %N) { ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP6]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -469,20 +469,20 @@ define void @scalar_use(ptr %a, float %b, i64 %offset, i64 %offset2, i64 %n) { ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], [[OFFSET]] ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]] -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP7]], align 4, !alias.scope [[META4:![0-9]+]], !noalias [[META7:![0-9]+]] -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x float>, ptr [[TMP9]], align 4, !alias.scope [[META4]], !noalias [[META7]] +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x float>, ptr [[TMP8]], align 4, !alias.scope [[META4]], !noalias [[META7]] ; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], [[OFFSET2]] ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP10]] -; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i64 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x float>, ptr [[TMP11]], align 4, !alias.scope [[META7]] -; UNROLL-NO-IC-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x float>, ptr [[TMP13]], align 4, !alias.scope [[META7]] +; UNROLL-NO-IC-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x float>, ptr [[TMP12]], align 4, !alias.scope [[META7]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD5]] ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD6]] ; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP14]] ; UNROLL-NO-IC-NEXT: [[TMP17:%.*]] = fadd fast <2 x float> [[WIDE_LOAD4]], [[TMP15]] ; UNROLL-NO-IC-NEXT: store <2 x float> [[TMP16]], ptr [[TMP7]], align 4, !alias.scope [[META4]], !noalias [[META7]] -; UNROLL-NO-IC-NEXT: store <2 x float> [[TMP17]], ptr [[TMP9]], align 4, !alias.scope [[META4]], !noalias [[META7]] +; UNROLL-NO-IC-NEXT: store <2 x float> [[TMP17]], ptr [[TMP8]], align 4, !alias.scope [[META4]], !noalias [[META7]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; UNROLL-NO-IC-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] @@ -743,7 +743,7 @@ define i64 @scalarize_induction_variable_01(ptr %a, i64 %n) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 8 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, ptr [[TMP3]], align 8 ; UNROLL-NO-IC-NEXT: [[TMP4]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]] @@ -2179,7 +2179,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP32:%.*]], [[PRED_UDIV_CONTINUE8]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP33:%.*]], [[PRED_UDIV_CONTINUE8]] ] ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 ; UNROLL-NO-IC-NEXT: br i1 [[C:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] @@ -3512,7 +3512,7 @@ define void @wrappingindvars1(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i8 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP13]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP15]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -3897,7 +3897,7 @@ define void @wrappingindvars2(i8 %t, i32 %len, ptr %A) { ; UNROLL-NO-IC-NEXT: [[DOTCAST4:%.*]] = trunc i32 [[INDEX]] to i8 ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i8 [[T]], [[DOTCAST4]] ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i8 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP14]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP16]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -4151,7 +4151,7 @@ define void @veciv(ptr nocapture %a, i32 %start, i32 %k) { ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -4373,7 +4373,7 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = trunc i64 [[INDEX]] to i32 ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP5]] -; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP7]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP9]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -4600,7 +4600,7 @@ define void @nonprimary(ptr nocapture %a, i32 %start, i32 %i, i32 %k) { ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; UNROLL-NO-IC-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]] ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[OFFSET_IDX]] -; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP2]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP4]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -4815,7 +4815,7 @@ define void @non_primary_iv_trunc(ptr %a, i64 %n) { ; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; UNROLL-NO-IC-NEXT: [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[VEC_IND]], ptr [[TMP1]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[STEP_ADD]], ptr [[TMP3]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -5774,7 +5774,7 @@ define void @pr52460_first_order_recurrence_truncated_iv(ptr noalias %src, ptr % ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add <2 x i32> [[VEC_IND]], [[TMP5]] ; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = add <2 x i32> [[STEP_ADD]], [[TMP6]] -; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP7]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP7]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP8]], ptr [[TMP7]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP9]], ptr [[TMP11]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -6131,7 +6131,7 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP21:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[INDEX]] -; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 2 +; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 2 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP20]], ptr [[TMP22]], align 4 ; UNROLL-NO-IC-NEXT: store <2 x i32> [[TMP21]], ptr [[TMP24]], align 4 ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll index 2c97bb7622740..ffe9da09ca680 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-with-i65-induction.ll @@ -19,10 +19,10 @@ define void @i65_induction_with_negative_step(ptr %dst) { ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[VEC_IND]], <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i64> [[VEC_IND]], <4 x i64> [[STEP_ADD]], <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 -3 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 -4 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 -3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i64 -4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: store <4 x i64> [[REVERSE]], ptr [[TMP6]], align 8 ; CHECK-NEXT: [[REVERSE1:%.*]] = shufflevector <4 x i64> [[TMP3]], <4 x i64> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll index 69d2aa4c620c1..2200a7d0431d2 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll @@ -16,8 +16,8 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 9223372036854775807), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3 +; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3) @@ -52,14 +52,14 @@ define i64 @select_decreasing_induction_icmp_const_start(ptr %a) { ; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4) ; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 19999, [[INDEX]] ; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3 -; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -4 -; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 -3 -; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -8 -; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 -3 -; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -12 -; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 -3 +; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 +; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 +; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -4 +; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 -3 +; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -8 +; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3 +; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -12 +; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 -3 ; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -172,8 +172,8 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) { ; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 -3 +; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i64 0 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 1 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i16> [[REVERSE]], [[BROADCAST_SPLAT]] @@ -498,8 +498,8 @@ define i16 @select_decreasing_induction_icmp_table_half(half noundef %val) { ; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16 ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i32 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[TMP1]], i32 -3 +; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i64 0 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[TMP1]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x half>, ptr [[TMP2]], align 1 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x half> [[WIDE_LOAD]], <4 x half> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = fcmp ugt <4 x half> [[REVERSE]], [[BROADCAST_SPLAT]] @@ -822,8 +822,8 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]] ; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3 +; IC1VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 +; IC1VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 ; IC1VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC1VF4-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i64> [[REVERSE]], splat (i64 3) @@ -858,14 +858,14 @@ define i64 @select_decreasing_induction_icmp_iv_unsigned(ptr %a) { ; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 -4) ; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 9223372036854775807, [[INDEX]] ; IC4VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[OFFSET_IDX]] -; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 -3 -; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -4 -; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 -3 -; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -8 -; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 -3 -; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 -12 -; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 -3 +; IC4VF4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 0 +; IC4VF4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 -3 +; IC4VF4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -4 +; IC4VF4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i64 -3 +; IC4VF4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -8 +; IC4VF4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 -3 +; IC4VF4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 -12 +; IC4VF4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i64 -3 ; IC4VF4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; IC4VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> ; IC4VF4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll index 24c5602a580da..ba326e254f3cd 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-nested-loop.ll @@ -82,9 +82,9 @@ define i64 @select_iv_def_from_outer_loop(ptr %a, i64 %start, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 [[TMP1]] -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll index b991d58eb2b8d..45c2abd43c36a 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll @@ -85,9 +85,9 @@ define i32 @select_icmp_const_truncated_iv_widened_exit(ptr %a, i32 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -281,9 +281,9 @@ define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -424,9 +424,9 @@ define i32 @select_fcmp_max_valid_const_ub(ptr %a) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 @@ -573,9 +573,9 @@ define i32 @select_icmp_truncated_unsigned_iv_range(ptr %a) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 2147483646, [[INDEX]] ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll index 8d3bd267b9482..a071949f82062 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll @@ -69,9 +69,9 @@ define i64 @select_icmp_const_1(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -262,9 +262,9 @@ define i64 @select_icmp_const_2(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -455,9 +455,9 @@ define i64 @select_icmp_const_3_variable_rdx_start(ptr %a, i64 %rdx.start, i64 % ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 @@ -648,9 +648,9 @@ define i64 @select_fcmp_const_fast(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 @@ -841,9 +841,9 @@ define i64 @select_fcmp_const(ptr %a, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 @@ -1038,17 +1038,17 @@ define i64 @select_icmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 ; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 @@ -1257,17 +1257,17 @@ define i64 @select_fcmp(ptr %a, ptr %b, i64 %rdx.start, i64 %n) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP6]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP9]], align 4 @@ -1481,17 +1481,17 @@ define i64 @select_icmp_min_valid_iv_start(ptr %a, ptr %b, i64 %rdx.start, i64 % ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 ; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP6]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD10:%.*]] = load <4 x i64>, ptr [[TMP9]], align 8 @@ -1706,17 +1706,17 @@ define i64 @select_icmp_unsigned_iv_range(ptr %a, ptr %b, i64 %rdx.start) { ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4) ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 ; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i64>, ptr [[TMP7]], align 8 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i64>, ptr [[TMP8]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 162803a377bc0..b3368a46f706d 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -1248,8 +1248,8 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] ; VEC: [[VECTOR_BODY]]: ; VEC-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[DST]], i64 3 -; VEC-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[TMP0]], i32 0 -; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i32 -1 +; VEC-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[TMP0]], i64 0 +; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i64 -1 ; VEC-NEXT: store <2 x i16> splat (i16 1), ptr [[TMP2]], align 2 ; VEC-NEXT: [[TMP5:%.*]] = add i64 1, -1 ; VEC-NEXT: [[IV_1_NEXT_LCSSA1:%.*]] = add i64 [[TMP5]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll index 4b3f370f4cdaf..8d3d0ff7a6406 100644 --- a/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll +++ b/llvm/test/Transforms/LoopVectorize/load-deref-pred-align.ll @@ -296,14 +296,14 @@ define void @test_rev_loops_deref_loads(ptr nocapture noundef writeonly %dest) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr [1024 x i32], ptr [[LOCAL_SRC]], i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i32 -1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD1]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 @@ -388,8 +388,8 @@ define void @test_rev_loops_non_deref_loads(ptr nocapture noundef writeonly %des ; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 -1) ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 -1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3) @@ -544,8 +544,8 @@ define void @test_rev_loops_strided_deref_loads(ptr nocapture noundef writeonly ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 511, [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1024 x i32], ptr [[LOCAL_CMP]], i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x i32> [[WIDE_LOAD]], <2 x i32> poison, <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <2 x i32> [[REVERSE]], splat (i32 3) diff --git a/llvm/test/Transforms/LoopVectorize/metadata.ll b/llvm/test/Transforms/LoopVectorize/metadata.ll index ed027e8b9a895..fed5df2b65228 100644 --- a/llvm/test/Transforms/LoopVectorize/metadata.ll +++ b/llvm/test/Transforms/LoopVectorize/metadata.ll @@ -62,20 +62,20 @@ define void @fp_math(ptr nocapture %a, ptr noalias %b, i64 %size) { ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0:![0-9]+]] -; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP3]], align 4, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP2]], align 4, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 9.900000e+01), !fpmath [[META3:![0-9]+]] ; INTERLEAVE-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[WIDE_LOAD1]], splat (double 9.900000e+01), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP6:%.*]] = fcmp oge <2 x double> [[TMP4]], splat (double 1.000000e+01) ; INTERLEAVE-NEXT: [[TMP7:%.*]] = fcmp oge <2 x double> [[TMP5]], splat (double 1.000000e+01) -; INTERLEAVE-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP6]], <2 x double> [[WIDE_LOAD]], <2 x double> zeroinitializer, !fpmath [[META3]] +; INTERLEAVE-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP6]], <2 x double> [[WIDE_LOAD]], <2 x double> zeroinitializer, !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x double> [[WIDE_LOAD1]], <2 x double> zeroinitializer, !fpmath [[META3]] -; INTERLEAVE-NEXT: [[TMP9:%.*]] = fptrunc <2 x double> [[TMP11]] to <2 x float>, !fpmath [[META3]] +; INTERLEAVE-NEXT: [[TMP9:%.*]] = fptrunc <2 x double> [[TMP12]] to <2 x float>, !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP10:%.*]] = fptrunc <2 x double> [[TMP8]] to <2 x float>, !fpmath [[META3]] -; INTERLEAVE-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 2 +; INTERLEAVE-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 2 ; INTERLEAVE-NEXT: store <2 x float> [[TMP9]], ptr [[TMP1]], align 4, !tbaa [[CHAR_TBAA0]] -; INTERLEAVE-NEXT: store <2 x float> [[TMP10]], ptr [[TMP13]], align 4, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: store <2 x float> [[TMP10]], ptr [[TMP11]], align 4, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -154,13 +154,13 @@ define void @widen_call_range(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[VECTOR_BODY]]: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4, !tbaa [[CHAR_TBAA0]] -; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP2]], align 4, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP1]], align 4, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = call <2 x i64> @foo_vector_fixed2_nomask(<2 x i64> [[WIDE_LOAD]]) ; INTERLEAVE-NEXT: [[TMP4:%.*]] = call <2 x i64> @foo_vector_fixed2_nomask(<2 x i64> [[WIDE_LOAD1]]) ; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 2 ; INTERLEAVE-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 4 ; INTERLEAVE-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP7]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -220,13 +220,13 @@ define void @widen_call_fpmath(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[VECTOR_BODY]]: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[CHAR_TBAA0]] -; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP2]], align 8, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP1]], align 8, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = call <2 x double> @bar_vector_fixed2_nomask(<2 x double> [[WIDE_LOAD]]), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = call <2 x double> @bar_vector_fixed2_nomask(<2 x double> [[WIDE_LOAD1]]), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i64 2 ; INTERLEAVE-NEXT: store <2 x double> [[TMP3]], ptr [[TMP5]], align 8 ; INTERLEAVE-NEXT: store <2 x double> [[TMP4]], ptr [[TMP7]], align 8 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -286,13 +286,13 @@ define void @widen_intrinsic(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[VECTOR_BODY]]: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 4 ; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP2]], align 4 ; INTERLEAVE-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[WIDE_LOAD]], i1 true) ; INTERLEAVE-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[WIDE_LOAD1]], i1 true) ; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i64 2 ; INTERLEAVE-NEXT: store <2 x i64> [[TMP3]], ptr [[TMP5]], align 4 ; INTERLEAVE-NEXT: store <2 x i64> [[TMP4]], ptr [[TMP7]], align 4 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -352,13 +352,13 @@ define void @widen_intrinsic_fpmath(ptr noalias %a, ptr readonly %b) { ; INTERLEAVE: [[VECTOR_BODY]]: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP0:%.*]] = getelementptr double, ptr [[B]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8, !tbaa [[CHAR_TBAA0]] -; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP2]], align 8, !tbaa [[CHAR_TBAA0]] +; INTERLEAVE-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x double>, ptr [[TMP1]], align 8, !tbaa [[CHAR_TBAA0]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[WIDE_LOAD]]), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.sin.v2f64(<2 x double> [[WIDE_LOAD1]]), !fpmath [[META3]] ; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i64 2 ; INTERLEAVE-NEXT: store <2 x double> [[TMP3]], ptr [[TMP5]], align 8 ; INTERLEAVE-NEXT: store <2 x double> [[TMP4]], ptr [[TMP7]], align 8 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -449,10 +449,10 @@ define void @unknown_metadata(ptr nocapture %a, ptr noalias %b, i64 %size) { ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], <2 x i64> [[VEC_IND]] ; INTERLEAVE-NEXT: [[TMP3:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], <2 x i64> [[STEP_ADD]] -; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 2 +; INTERLEAVE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 2 ; INTERLEAVE-NEXT: store <2 x i32> [[VEC_IND1]], ptr [[TMP3]], align 4 ; INTERLEAVE-NEXT: store <2 x i32> [[STEP_ADD3]], ptr [[TMP5]], align 4 -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 2 ; INTERLEAVE-NEXT: store <2 x ptr> [[TMP1]], ptr [[TMP0]], align 8 ; INTERLEAVE-NEXT: store <2 x ptr> [[TMP2]], ptr [[TMP7]], align 8 ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -582,7 +582,7 @@ define void @noalias_metadata(ptr align 8 %dst, ptr align 8 %src) { ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; INTERLEAVE-NEXT: [[TMP26:%.*]] = mul i64 [[INDEX]], 8 ; INTERLEAVE-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP26]] -; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 2 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i64 2 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x ptr>, ptr [[TMP7]], align 8, !alias.scope [[META14:![0-9]+]] ; INTERLEAVE-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[WIDE_LOAD]], i32 1 ; INTERLEAVE-NEXT: store ptr [[TMP8]], ptr [[DST]], align 8, !alias.scope [[META17:![0-9]+]], !noalias [[META19:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll b/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll index 47a2a84b44601..120307629c82e 100644 --- a/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/minimumnum-maximumnum-reductions.ll @@ -14,7 +14,7 @@ define float @maximumnum_intrinsic(ptr readonly %x) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3]] = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[VEC_PHI]], <2 x float> [[WIDE_LOAD]]) @@ -58,7 +58,7 @@ define float @maximumnum_intrinsic_fast(ptr readonly %x) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3]] = call fast <2 x float> @llvm.maximumnum.v2f32(<2 x float> [[VEC_PHI]], <2 x float> [[WIDE_LOAD]]) @@ -102,7 +102,7 @@ define float @minimumnum_intrinsic(ptr readonly %x) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3]] = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> [[VEC_PHI]], <2 x float> [[WIDE_LOAD]]) @@ -146,7 +146,7 @@ define float @minimumnum_intrinsic_fast(ptr readonly %x) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds float, ptr [[X]], i32 [[IV]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i32 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[GEP]], i64 2 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3]] = call fast <2 x float> @llvm.minimumnum.v2f32(<2 x float> [[VEC_PHI]], <2 x float> [[WIDE_LOAD]]) diff --git a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll index 440309d246899..5385a83dfac65 100644 --- a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll +++ b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll @@ -197,10 +197,10 @@ define void @narrow_widen_store_user(i32 %x, ptr noalias %A, ptr noalias %B) { ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]] ; VF2IC2-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]] -; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP2]], i32 2 +; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP2]], i64 2 ; VF2IC2-NEXT: store <2 x i32> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 4 ; VF2IC2-NEXT: store <2 x i32> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 4 -; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP3]], i32 2 +; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP3]], i64 2 ; VF2IC2-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP3]], align 4 ; VF2IC2-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP5]], align 4 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll index 3bf5c0d1d13a9..1f1bbf7c27ada 100644 --- a/llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/nested-loops-scev-expansion.ll @@ -254,7 +254,7 @@ define void @pr52024(ptr %dst, i16 %N) { ; CHECK-NEXT: [[TMP10:%.*]] = zext <2 x i16> [[TMP8]] to <2 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = zext <2 x i16> [[TMP9]] to <2 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[DST]], i32 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 2 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i64 2 ; CHECK-NEXT: store <2 x i32> [[TMP10]], ptr [[TMP12]], align 4 ; CHECK-NEXT: store <2 x i32> [[TMP11]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -350,7 +350,7 @@ define void @test_expand_secv_in_entry_before_gep(ptr %dst) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[OUTER_IV]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[GEP_M]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 2 ; CHECK-NEXT: store <2 x double> zeroinitializer, ptr [[TMP3]], align 8 ; CHECK-NEXT: store <2 x double> zeroinitializer, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll index 30d01e8b790a7..4fab2995f14bd 100644 --- a/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll +++ b/llvm/test/Transforms/LoopVectorize/noalias-scope-decl.ll @@ -11,14 +11,14 @@ define void @test1(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]]) ; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 4 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP2]], ptr [[TMP4]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP5]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -78,14 +78,14 @@ define void @test2(ptr nocapture readonly %d) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META0]]) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[WIDE_LOAD]], splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x float> [[WIDE_LOAD1]], splat (float 1.000000e+00) ; CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -165,13 +165,13 @@ define void @predicated_noalias_scope_decl(ptr noalias nocapture readonly %a, pt ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x float> splat (float 2.300000e+01), <4 x float> splat (float 4.200000e+01) ; CHECK-NEXT: [[PREDPHI1:%.*]] = select <4 x i1> [[TMP2]], <4 x float> splat (float 2.300000e+01), <4 x float> splat (float 4.200000e+01) ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 4 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 4 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[PREDPHI]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[PREDPHI1]], [[WIDE_LOAD2]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i64 4 ; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP7]], align 4 ; CHECK-NEXT: store <4 x float> [[TMP6]], ptr [[TMP8]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll index b5d74687dc808..1a1c05187590e 100644 --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization.ll @@ -173,8 +173,8 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], [[N]] ; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i32 0 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 -3 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[TMP13]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP15]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x float> [[WIDE_LOAD]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP16:%.*]] = fadd fast <4 x float> [[REVERSE]], splat (float 1.000000e+00) @@ -203,8 +203,8 @@ define dso_local signext i32 @f2(ptr noalias %A, ptr noalias %B, i32 signext %n) ; CHECK-NEXT: [[TMP23:%.*]] = add i32 [[TMP22]], [[N]] ; CHECK-NEXT: [[TMP24:%.*]] = sext i32 [[TMP23]] to i64 ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i32 0 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i32 -3 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds float, ptr [[TMP25]], i64 0 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds float, ptr [[TMP26]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x float>, ptr [[TMP27]], align 4 ; CHECK-NEXT: [[REVERSE10:%.*]] = shufflevector <4 x float> [[WIDE_LOAD9]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <4 x float> [[REVERSE10]], splat (float 1.000000e+00) diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index 5c62ca3ff3d01..d96134e8adf1d 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -38,8 +38,8 @@ define void @a(ptr readnone %b) { ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <4 x i8> [[REVERSE]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/pr37248.ll b/llvm/test/Transforms/LoopVectorize/pr37248.ll index 28d5ef552482b..33b3d263e634a 100644 --- a/llvm/test/Transforms/LoopVectorize/pr37248.ll +++ b/llvm/test/Transforms/LoopVectorize/pr37248.ll @@ -56,8 +56,8 @@ define void @f1(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]] ; CHECK: [[PRED_STORE_CONTINUE3]]: ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP12]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP16]], i32 -1 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i64 0 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP16]], i64 -1 ; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP17]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -119,8 +119,8 @@ define void @f2(ptr noalias %b, i1 %c, i32 %start) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP11:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP13]], i32 -1 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP13]], i64 -1 ; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP14]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll index 565e203e68f72..3276528e54225 100644 --- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll @@ -125,7 +125,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) { ; IC2-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]] ; IC2-NEXT: [[TMP30:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP4]], i32 0 ; IC2-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> [[TMP30]], ptr [[NEXT_GEP5]], i32 1 -; IC2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 2 +; IC2-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 2 ; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[NEXT_GEP]], align 1 ; IC2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i8>, ptr [[TMP6]], align 1 ; IC2-NEXT: [[TMP13:%.*]] = icmp eq <2 x i8> [[WIDE_LOAD]], splat (i8 -12) @@ -344,21 +344,21 @@ define void @switch_to_header(ptr %start) { ; IC1-NEXT: [[ENTRY:.*]]: ; IC1-NEXT: br label %[[LOOP_HEADER:.*]] ; IC1: [[LOOP_HEADER]]: -; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] +; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ] ; IC1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC1-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC1-NEXT: i64 120, label %[[IF_THEN1]] +; IC1-NEXT: i64 120, label %[[IF_THEN]] ; IC1-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC1-NEXT: ] -; IC1: [[IF_THEN1]]: +; IC1: [[IF_THEN]]: ; IC1-NEXT: br label %[[LOOP_HEADER]] -; IC1: [[IF_THEN:.*:]] +; IC1: [[IF_THEN1:.*:]] ; IC1-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison ; IC1-NEXT: store i64 42, ptr [[GEP]], align 1 ; IC1-NEXT: unreachable ; IC1: [[LOOP_LATCH]]: ; IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] +; IC1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]] ; IC1: [[EXIT]]: ; IC1-NEXT: ret void ; @@ -367,21 +367,21 @@ define void @switch_to_header(ptr %start) { ; IC2-NEXT: [[ENTRY:.*]]: ; IC2-NEXT: br label %[[LOOP_HEADER:.*]] ; IC2: [[LOOP_HEADER]]: -; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN1:.*]] ] +; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[IF_THEN:.*]] ] ; IC2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; IC2-NEXT: switch i64 [[IV]], label %[[LOOP_LATCH:.*]] [ -; IC2-NEXT: i64 120, label %[[IF_THEN1]] +; IC2-NEXT: i64 120, label %[[IF_THEN]] ; IC2-NEXT: i64 100, label %[[LOOP_LATCH]] ; IC2-NEXT: ] -; IC2: [[IF_THEN1]]: +; IC2: [[IF_THEN]]: ; IC2-NEXT: br label %[[LOOP_HEADER]] -; IC2: [[IF_THEN:.*:]] +; IC2: [[IF_THEN1:.*:]] ; IC2-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 poison ; IC2-NEXT: store i64 42, ptr [[GEP]], align 1 ; IC2-NEXT: unreachable ; IC2: [[LOOP_LATCH]]: ; IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 -; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN1]] +; IC2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[IF_THEN]] ; IC2: [[EXIT]]: ; IC2-NEXT: ret void ; @@ -437,7 +437,7 @@ define void @switch_all_to_default(ptr %start) { ; IC2: [[VECTOR_BODY]]: ; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[START]], i64 [[INDEX]] -; IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 +; IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i64 2 ; IC2-NEXT: store <2 x i64> splat (i64 42), ptr [[TMP2]], align 1 ; IC2-NEXT: store <2 x i64> splat (i64 42), ptr [[TMP5]], align 1 ; IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -497,7 +497,7 @@ define void @switch_unconditional(ptr %start) { ; IC2: [[VECTOR_BODY]]: ; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; IC2-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[START]], i64 [[INDEX]] -; IC2-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP2]], i32 2 +; IC2-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP2]], i64 2 ; IC2-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP2]], align 4 ; IC2-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP1]], align 4 ; IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll index fafa82c211dc6..43dede0b612f3 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll @@ -37,7 +37,7 @@ define i32 @reduction_sum_single(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -115,11 +115,11 @@ define i32 @reduction_sum(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]]) @@ -203,7 +203,7 @@ define i32 @reduction_sum_const(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -286,11 +286,11 @@ define i32 @reduction_prod(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[VEC_IND]]) @@ -381,11 +381,11 @@ define i32 @reduction_mix(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]]) @@ -469,11 +469,11 @@ define i32 @reduction_mul(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 19, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 1, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -551,11 +551,11 @@ define i32 @start_at_non_zero(ptr nocapture %in, ptr nocapture %coeff, ptr nocap ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 120, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[COEFF]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]] @@ -632,11 +632,11 @@ define i32 @reduction_and(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -1, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ -1, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -714,11 +714,11 @@ define i32 @reduction_or(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]] @@ -794,11 +794,11 @@ define i32 @reduction_xor(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD3]], [[WIDE_LOAD]] @@ -875,11 +875,11 @@ define float @reduction_fadd(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[WIDE_LOAD]]) @@ -958,11 +958,11 @@ define float @reduction_fmul(ptr nocapture %A, ptr nocapture %B) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi float [ 1.000000e+00, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[WIDE_LOAD]]) @@ -1038,7 +1038,7 @@ define i32 @reduction_sub_lhs(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 3, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[WIDE_LOAD]]) @@ -1122,11 +1122,11 @@ define float @reduction_conditional(ptr %A, ptr %B, ptr %C, float %S) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], %[[VECTOR_PH]] ], [ [[PREDPHI6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]] @@ -1207,11 +1207,11 @@ for.end: define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocapture %B) { ; CHECK-LABEL: define i32 @reduction_sum_multiuse( ; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { -; CHECK-NEXT: [[_LR_PH:.*]]: +; CHECK-NEXT: [[_LR_PH1:.*]]: ; CHECK-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK: [[_LR_PH1:.*:]] -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK: [[_LR_PH:.*:]] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] ; CHECK-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1231,11 +1231,11 @@ define i32 @reduction_sum_multiuse(ptr noalias nocapture %A, ptr noalias nocaptu ; ; CHECK-INTERLEAVED-LABEL: define i32 @reduction_sum_multiuse( ; CHECK-INTERLEAVED-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias captures(none) [[B:%.*]]) { -; CHECK-INTERLEAVED-NEXT: [[_LR_PH:.*]]: +; CHECK-INTERLEAVED-NEXT: [[_LR_PH1:.*]]: ; CHECK-INTERLEAVED-NEXT: br label %[[DOTLR_PH:.*]] -; CHECK-INTERLEAVED: [[_LR_PH1:.*:]] -; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] -; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH]] ] +; CHECK-INTERLEAVED: [[_LR_PH:.*:]] +; CHECK-INTERLEAVED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] +; CHECK-INTERLEAVED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], %[[DOTLR_PH]] ], [ 0, %[[_LR_PH1]] ] ; CHECK-INTERLEAVED-NEXT: [[L2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] ; CHECK-INTERLEAVED-NEXT: [[L3:%.*]] = load i32, ptr [[L2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[L4:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -1322,11 +1322,11 @@ define i32 @reduction_predicated(ptr noalias nocapture %A, ptr noalias nocapture ; CHECK-INTERLEAVED-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[VEC_IND]]) @@ -1416,7 +1416,7 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255) ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI1]], splat (i32 255) ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> @@ -1502,7 +1502,7 @@ define i8 @reduction_and_trunc(ptr noalias nocapture %A) { ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = and <4 x i32> [[VEC_PHI]], splat (i32 255) ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI1]], splat (i32 255) ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32> @@ -1606,11 +1606,11 @@ define float @reduction_fmuladd(ptr %a, ptr %b, i64 %n) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi float [ -0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = fmul <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]] @@ -1856,11 +1856,11 @@ define float @reduction_fmuladd_blend(ptr %a, ptr %b, i64 %n, i1 %c) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi float [ -0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD3]] @@ -2005,7 +2005,7 @@ define i32 @predicated_not_dominates_reduction(ptr nocapture noundef readonly %h ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[H]], i32 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], zeroinitializer @@ -2162,7 +2162,7 @@ define i32 @predicated_not_dominates_reduction_twoadd(ptr nocapture noundef read ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[H]], i32 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP27:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], zeroinitializer @@ -2268,10 +2268,10 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP0]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP6]], align 4 ; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP8]], align 4 @@ -2284,7 +2284,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP19]], i32 0 ; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK: [[PRED_LOAD_IF]]: -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP0]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP0]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> poison, i32 [[TMP23]], i32 0 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -2293,7 +2293,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP19]], i32 1 ; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] ; CHECK: [[PRED_LOAD_IF1]]: -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]] ; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4 ; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i32> [[TMP25]], i32 [[TMP29]], i32 1 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] @@ -2302,7 +2302,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[TMP19]], i32 2 ; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] ; CHECK: [[PRED_LOAD_IF3]]: -; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP2]] +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]] ; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP34]], align 4 ; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP35]], i32 2 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] @@ -2311,7 +2311,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x i1> [[TMP19]], i32 3 ; CHECK-NEXT: br i1 [[TMP38]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6]] ; CHECK: [[PRED_LOAD_IF5]]: -; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP3]] +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]] ; CHECK-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 ; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP37]], i32 [[TMP41]], i32 3 ; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] @@ -2350,14 +2350,14 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 5 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 6 ; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 7 -; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP0]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP1]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP2]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP3]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP4]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP5]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP6]], i32 1 -; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP7]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds [0 x [[STRUCT_E:%.*]]], ptr [[B]], i32 0, i32 [[TMP0]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP4]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP5]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP6]], i32 1 +; CHECK-INTERLEAVED-NEXT: [[TMP22:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP7]], i32 1 ; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP8]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP10]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP12]], align 4 @@ -2379,7 +2379,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP41:%.*]] = extractelement <4 x i1> [[TMP39]], i32 0 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP41]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF]]: -; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP0]] +; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP0]] ; CHECK-INTERLEAVED-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP45:%.*]] = insertelement <4 x i32> poison, i32 [[TMP44]], i32 0 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE]] @@ -2388,7 +2388,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP47:%.*]] = extractelement <4 x i1> [[TMP39]], i32 1 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP47]], label %[[PRED_LOAD_IF2:.*]], label %[[PRED_LOAD_CONTINUE3:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF2]]: -; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP1]] +; CHECK-INTERLEAVED-NEXT: [[TMP49:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP1]] ; CHECK-INTERLEAVED-NEXT: [[TMP50:%.*]] = load i32, ptr [[TMP49]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP51:%.*]] = insertelement <4 x i32> [[TMP46]], i32 [[TMP50]], i32 1 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE3]] @@ -2397,7 +2397,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP53:%.*]] = extractelement <4 x i1> [[TMP39]], i32 2 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP53]], label %[[PRED_LOAD_IF4:.*]], label %[[PRED_LOAD_CONTINUE5:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF4]]: -; CHECK-INTERLEAVED-NEXT: [[TMP55:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP2]] +; CHECK-INTERLEAVED-NEXT: [[TMP55:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP2]] ; CHECK-INTERLEAVED-NEXT: [[TMP56:%.*]] = load i32, ptr [[TMP55]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP57:%.*]] = insertelement <4 x i32> [[TMP52]], i32 [[TMP56]], i32 2 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE5]] @@ -2406,7 +2406,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP59:%.*]] = extractelement <4 x i1> [[TMP39]], i32 3 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP59]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF6]]: -; CHECK-INTERLEAVED-NEXT: [[TMP61:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP3]] +; CHECK-INTERLEAVED-NEXT: [[TMP61:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP3]] ; CHECK-INTERLEAVED-NEXT: [[TMP62:%.*]] = load i32, ptr [[TMP61]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP63:%.*]] = insertelement <4 x i32> [[TMP58]], i32 [[TMP62]], i32 3 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE7]] @@ -2415,7 +2415,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP65:%.*]] = extractelement <4 x i1> [[TMP40]], i32 0 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP65]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF8]]: -; CHECK-INTERLEAVED-NEXT: [[TMP67:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP4]] +; CHECK-INTERLEAVED-NEXT: [[TMP67:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP4]] ; CHECK-INTERLEAVED-NEXT: [[TMP68:%.*]] = load i32, ptr [[TMP67]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP69:%.*]] = insertelement <4 x i32> poison, i32 [[TMP68]], i32 0 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE9]] @@ -2424,7 +2424,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP71:%.*]] = extractelement <4 x i1> [[TMP40]], i32 1 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP71]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF10]]: -; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP5]] +; CHECK-INTERLEAVED-NEXT: [[TMP73:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP5]] ; CHECK-INTERLEAVED-NEXT: [[TMP74:%.*]] = load i32, ptr [[TMP73]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP75:%.*]] = insertelement <4 x i32> [[TMP70]], i32 [[TMP74]], i32 1 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE11]] @@ -2433,7 +2433,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP77:%.*]] = extractelement <4 x i1> [[TMP40]], i32 2 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP77]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF12]]: -; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP6]] +; CHECK-INTERLEAVED-NEXT: [[TMP79:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP6]] ; CHECK-INTERLEAVED-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP79]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP81:%.*]] = insertelement <4 x i32> [[TMP76]], i32 [[TMP80]], i32 2 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE13]] @@ -2442,7 +2442,7 @@ define i32 @predicated_or_dominates_reduction(ptr %b) { ; CHECK-INTERLEAVED-NEXT: [[TMP83:%.*]] = extractelement <4 x i1> [[TMP40]], i32 3 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP83]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15]] ; CHECK-INTERLEAVED: [[PRED_LOAD_IF14]]: -; CHECK-INTERLEAVED-NEXT: [[TMP85:%.*]] = getelementptr inbounds [0 x %struct.e], ptr [[B]], i32 0, i32 [[TMP7]] +; CHECK-INTERLEAVED-NEXT: [[TMP85:%.*]] = getelementptr inbounds [0 x [[STRUCT_E]]], ptr [[B]], i32 0, i32 [[TMP7]] ; CHECK-INTERLEAVED-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP87:%.*]] = insertelement <4 x i32> [[TMP82]], i32 [[TMP86]], i32 3 ; CHECK-INTERLEAVED-NEXT: br label %[[PRED_LOAD_CONTINUE15]] @@ -2543,10 +2543,10 @@ define i32 @reduction_add_sub(ptr noalias nocapture %A, ptr noalias nocapture %B ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sub <4 x i32> zeroinitializer, [[WIDE_LOAD3]] @@ -2630,10 +2630,10 @@ define i32 @reduction_sub_add(ptr noalias nocapture %A, ptr noalias nocapture %B ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sub <4 x i32> zeroinitializer, [[WIDE_LOAD]] @@ -2739,7 +2739,7 @@ define i64 @reduction_expression_same_operands(ptr nocapture readonly %x, ptr no ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP1]], align 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i16>, ptr [[TMP2]], align 4 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i64> @@ -2840,7 +2840,7 @@ define i32 @reduction_expression_ext_mulacc_livein(ptr %a, i16 %c) { ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i16> diff --git a/llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll b/llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll index daf4cba197cc2..6a8ea3756b6b0 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-odd-interleave-counts.ll @@ -8,8 +8,8 @@ define i32 @reduction_sum(i64 %n, ptr noalias nocapture %A) { ; UF3-NEXT: [[SUM1:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[SUM1_NEXT:%.+]], %vector.body ] ; UF3-NEXT: [[SUM2:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[SUM2_NEXT:%.+]], %vector.body ] ; UF3-NEXT: [[GEP0:%.+]] = getelementptr inbounds i32, ptr %A, i64 [[IV]] -; UF3-NEXT: [[L_GEP1:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 4 -; UF3-NEXT: [[L_GEP2:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 8 +; UF3-NEXT: [[L_GEP1:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 4 +; UF3-NEXT: [[L_GEP2:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 8 ; UF3-NEXT: [[L0:%.+]] = load <4 x i32>, ptr [[GEP0]], align 4 ; UF3-NEXT: [[L1:%.+]] = load <4 x i32>, ptr [[L_GEP1]], align 4 ; UF3-NEXT: [[L2:%.+]] = load <4 x i32>, ptr [[L_GEP2]], align 4 @@ -34,10 +34,10 @@ define i32 @reduction_sum(i64 %n, ptr noalias nocapture %A) { ; UF5-NEXT: [[SUM3:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[SUM3_NEXT:%.+]], %vector.body ] ; UF5-NEXT: [[SUM4:%.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[SUM4_NEXT:%.+]], %vector.body ] ; UF5-NEXT: [[GEP0:%.+]] = getelementptr inbounds i32, ptr %A, i64 [[IV]] -; UF5-NEXT: [[L_GEP1:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 4 -; UF5-NEXT: [[L_GEP2:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 8 -; UF5-NEXT: [[L_GEP3:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 12 -; UF5-NEXT: [[L_GEP4:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i32 16 +; UF5-NEXT: [[L_GEP1:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 4 +; UF5-NEXT: [[L_GEP2:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 8 +; UF5-NEXT: [[L_GEP3:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 12 +; UF5-NEXT: [[L_GEP4:%.+]] = getelementptr inbounds i32, ptr [[GEP0]], i64 16 ; UF5-NEXT: [[L0:%.+]] = load <4 x i32>, ptr [[GEP0]], align 4 ; UF5-NEXT: [[L1:%.+]] = load <4 x i32>, ptr [[L_GEP1]], align 4 ; UF5-NEXT: [[L2:%.+]] = load <4 x i32>, ptr [[L_GEP2]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll index faca86a41b023..a2649053680d2 100644 --- a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll +++ b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll @@ -36,8 +36,8 @@ define void @reuse_lcssa_phi_for_add_rec1(ptr %head) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[IV_LCSSA]], [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr ptr, ptr [[SRC_2]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP6]], i32 -1 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP6]], i64 -1 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x ptr>, ptr [[TMP7]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <2 x ptr> [[WIDE_LOAD]], <2 x ptr> poison, <2 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[REVERSE]], i32 0 diff --git a/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll b/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll index 0896848905c6c..71c75e52d4050 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse-induction-gep-nowrap-flags.ll @@ -14,8 +14,8 @@ define i32 @preserve_inbounds(i64 %start, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]] @@ -60,8 +60,8 @@ define i32 @preserve_nusw(i64 %start, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i32, ptr [[PTR]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr nusw i32, ptr [[TMP2]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr nusw i32, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr nusw i32, ptr [[TMP2]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]] @@ -106,8 +106,8 @@ define i32 @drop_nuw(i64 %start, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[START]], [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nuw i32, ptr [[PTR]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[REVERSE]], [[VEC_PHI]] diff --git a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll index 31129d3bcc2f4..d3e291e4f3ed2 100644 --- a/llvm/test/Transforms/LoopVectorize/reverse_induction.ll +++ b/llvm/test/Transforms/LoopVectorize/reverse_induction.ll @@ -20,10 +20,10 @@ define i32 @reverse_induction_i64(i64 %startval, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4 @@ -74,10 +74,10 @@ define i32 @reverse_induction_i128(i128 %startval, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i128 [[STARTVAL]], [[INDEX]] ; CHECK-NEXT: [[TMP3:%.*]] = add i128 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i128 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -3 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -4 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 -3 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -4 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP9]], align 4 @@ -134,10 +134,10 @@ define i32 @reverse_induction_i16(i16 %startval, ptr %ptr) { ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i16 [[STARTVAL]], [[DOTCAST]] ; CHECK-NEXT: [[TMP7:%.*]] = add i16 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i16 [[TMP7]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -3 -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 -4 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 -3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 -4 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4 @@ -221,10 +221,10 @@ define void @reverse_forward_induction_i64_i8() { ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[REVERSE]], ptr [[TMP9]], align 4 ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> @@ -274,10 +274,10 @@ define void @reverse_forward_induction_i64_i8_signed() { ; CHECK-NEXT: [[TMP4:%.*]] = sext <4 x i8> [[TMP2]] to <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i8> [[TMP3]] to <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @a, i64 0, i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 -3 -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 -4 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -3 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 -3 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: store <4 x i32> [[REVERSE]], ptr [[TMP9]], align 4 ; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll index 830bd92c70b16..5a1844ac450e7 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-known-true.ll @@ -39,12 +39,12 @@ define void @test_runtime_check_known_false_after_construction(ptr %start.1, ptr ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[OFFSET_IDX]] ; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], -8 ; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[START_2_DIFF]], i64 [[OFFSET_IDX2]] -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[NEXT_GEP3]], i32 0 -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[TMP13]], i32 -3 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[NEXT_GEP3]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[TMP13]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP14]], align 8 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i64> [[WIDE_LOAD]], <4 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP15]], i32 -3 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP15]], i64 -3 ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i64> [[REVERSE]], <4 x i64> poison, <4 x i32> ; CHECK-NEXT: store <4 x i64> [[REVERSE4]], ptr [[TMP16]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll index 1c16ef1114371..af272955abbd2 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-check-needed-but-empty.ll @@ -99,8 +99,8 @@ define void @diff_memcheck_known_false_for_vf_4(ptr %B, ptr %A, ptr %end) { ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], -8 ; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i32 -3 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[NEXT_GEP]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[TMP7]], i64 -3 ; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr [[TMP8]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll index 25f40be238338..5be2b09a504c0 100644 --- a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll +++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll @@ -996,14 +996,14 @@ define void @decreasing_inner_iv(ptr nocapture noundef %dst, ptr nocapture nound ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[TMP0]], [[INDEX]] ; CHECK-NEXT: [[TMP21:%.*]] = add nsw i64 [[OFFSET_IDX]], [[TMP16]] ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i32 0 -; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i32 -3 +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[TMP22]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META46:![0-9]+]] ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP25:%.*]] = add nsw i64 [[OFFSET_IDX]], [[TMP17]] ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP25]] -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i32 0 -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i32 -3 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[TMP26]], i64 0 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP27]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP28]], align 4, !alias.scope [[META49:![0-9]+]], !noalias [[META46]] ; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD3]], <4 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP29:%.*]] = add nsw <4 x i32> [[REVERSE4]], [[REVERSE]] diff --git a/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll b/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll index 7811b17f1b7e1..306bcf336e5af 100644 --- a/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/scalar_after_vectorization.ll @@ -24,7 +24,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" ; NO-IC: %[[T4:.+]] = add nuw nsw i64 [[OFFSET_IDX]], %tmp0 ; NO-IC: %[[T6:.+]] = sub nsw i64 %[[T4]], %x ; NO-IC: %[[T8:.+]] = getelementptr inbounds i32, ptr %a, i64 %[[T6]] -; NO-IC: %[[T12:.+]] = getelementptr inbounds i32, ptr %[[T8]], i32 4 +; NO-IC: %[[T12:.+]] = getelementptr inbounds i32, ptr %[[T8]], i64 4 ; NO-IC: load <4 x i32>, ptr %[[T8]], align 4 ; NO-IC: load <4 x i32>, ptr %[[T12]], align 4 ; NO-IC: br {{.*}}, label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll index 64e12cc8c9cb8..fa03c62bb4927 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-multiuse.ll @@ -88,7 +88,7 @@ define i32 @multi_user_cmp(ptr readonly %a, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-VF4-IC2-NEXT: [[TMP6:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer @@ -307,7 +307,7 @@ define i32 @multi_user_cmp_int(ptr readonly %a, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 4 +; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 ; CHECK-VF4-IC2-NEXT: [[TMP6:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], zeroinitializer @@ -593,9 +593,9 @@ define i32 @multi_user_cmp_branch_use(ptr readonly %a, ptr %b, i64 noundef %n) { ; CHECK-VF4-IC2-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE19]] ] ; CHECK-VF4-IC2-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[PRED_STORE_CONTINUE19]] ] ; CHECK-VF4-IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4-IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 4 +; CHECK-VF4-IC2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP3]], align 4, !alias.scope [[META6:![0-9]+]] -; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP6]], align 4, !alias.scope [[META6]] +; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4, !alias.scope [[META6]] ; CHECK-VF4-IC2-NEXT: [[TMP7:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer ; CHECK-VF4-IC2-NEXT: [[TMP8:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD5]], zeroinitializer ; CHECK-VF4-IC2-NEXT: [[TMP9]] = or <4 x i1> [[VEC_PHI3]], [[TMP7]] @@ -947,7 +947,7 @@ define i32 @multi_user_cmp_branch_use_and_outside_bb_use(ptr readonly %a, i64 no ; CHECK-VF4-IC2-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-VF4-IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 4 +; CHECK-VF4-IC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-VF4-IC2-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP5]], align 4 ; CHECK-VF4-IC2-NEXT: [[TMP6:%.*]] = fcmp olt <4 x float> [[WIDE_LOAD]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll index e4922d3e4f627..2b352abe9f7a1 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll @@ -63,9 +63,9 @@ define i32 @select_const_i32_from_icmp(ptr %v, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 @@ -249,9 +249,9 @@ define i32 @select_const_i32_from_icmp2(ptr %v, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 @@ -435,9 +435,9 @@ define i32 @select_i32_from_icmp(ptr %v, i32 %a, i32 %b, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 @@ -621,9 +621,9 @@ define i32 @select_const_i32_from_fcmp_fast(ptr %v, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 @@ -807,9 +807,9 @@ define i32 @select_const_i32_from_fcmp(ptr %v, i64 %n) { ; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP17:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[INDEX]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 4 -; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 8 -; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 12 +; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 4 +; CHECK-VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 8 +; CHECK-VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 12 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP3]], align 4 ; CHECK-VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x float>, ptr [[TMP4]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll index d8e62c7b3b8d4..2183c520d9e81 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave-hint.ll @@ -14,9 +14,9 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; VF4IC4: [[VECTOR_BODY]]: ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP12]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll index 053863117bdc8..bdf73d6a52c22 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-interleave.ll @@ -14,9 +14,9 @@ define i64 @multi_exiting_to_different_exits_live_in_exit_values() { ; VF4IC4: vector.body: ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 @@ -88,17 +88,17 @@ define i64 @same_exit_block_pre_inc_use1() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 4 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 8 -; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 12 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 4 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 8 +; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1 @@ -183,9 +183,9 @@ define ptr @same_exit_block_pre_inc_use1_ivptr() { ; VF4IC4: vector.body: ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[P1]], i64 [[INDEX]] -; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 4 -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 12 +; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 4 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 8 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[NEXT_GEP]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 @@ -268,17 +268,17 @@ define i64 @same_exit_block_post_inc_use() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 4 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 8 -; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 12 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 4 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 8 +; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1 @@ -365,17 +365,17 @@ define i64 @diff_exit_block_pre_inc_use1() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 4 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 8 -; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 12 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 4 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 8 +; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1 @@ -467,17 +467,17 @@ define i64 @diff_exit_block_post_inc_use1() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 4 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 8 -; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 12 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 4 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 8 +; VF4IC4-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP18]], align 1 @@ -569,14 +569,14 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 -3 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -4 -; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 -3 -; VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -8 -; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 -3 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -12 -; VF4IC4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i32 -3 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 0 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 -3 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -4 +; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3 +; VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -8 +; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -12 +; VF4IC4-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP18]], i64 -3 ; VF4IC4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD13]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -586,14 +586,14 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP27]], align 1 ; VF4IC4-NEXT: [[REVERSE6:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD5]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 0 -; VF4IC4-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i32 -3 -; VF4IC4-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 -4 -; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP41]], i32 -3 -; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 -8 -; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 -3 -; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i32 -12 -; VF4IC4-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 -3 +; VF4IC4-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 0 +; VF4IC4-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i64 -3 +; VF4IC4-NEXT: [[TMP41:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -4 +; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP41]], i64 -3 +; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -8 +; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -3 +; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP25]], i64 -12 +; VF4IC4-NEXT: [[TMP42:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 -3 ; VF4IC4-NEXT: [[WIDE_LOAD14:%.*]] = load <4 x i8>, ptr [[TMP46]], align 1 ; VF4IC4-NEXT: [[REVERSE15:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD14]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 @@ -698,17 +698,17 @@ define i8 @same_exit_block_use_loaded_value() { ; VF4IC4: vector.body: ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 -; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 12 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 +; VF4IC4-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 8 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 4 -; VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 8 -; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 12 +; VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 4 +; VF4IC4-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 8 +; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 12 ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 ; VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1 @@ -807,14 +807,14 @@ define i8 @same_exit_block_reverse_use_loaded_value() { ; VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VF4IC4-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]] ; VF4IC4-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 0 -; VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 -3 -; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -4 -; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 -3 -; VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -8 -; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 -3 -; VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 -12 -; VF4IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 -3 +; VF4IC4-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 0 +; VF4IC4-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 -3 +; VF4IC4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -4 +; VF4IC4-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 -3 +; VF4IC4-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -8 +; VF4IC4-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3 +; VF4IC4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 -12 +; VF4IC4-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 -3 ; VF4IC4-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1 ; VF4IC4-NEXT: [[REVERSE6:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD5]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 @@ -824,14 +824,14 @@ define i8 @same_exit_block_reverse_use_loaded_value() { ; VF4IC4-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP11]], align 1 ; VF4IC4-NEXT: [[REVERSE7:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD6]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; VF4IC4-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 -; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i32 -3 -; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 -4 -; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i32 -3 -; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 -8 -; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i32 -3 -; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 -12 -; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 -3 +; VF4IC4-NEXT: [[TMP38:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 0 +; VF4IC4-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP38]], i64 -3 +; VF4IC4-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -4 +; VF4IC4-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 -3 +; VF4IC4-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -8 +; VF4IC4-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr [[TMP14]], i64 -3 +; VF4IC4-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i64 -12 +; VF4IC4-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 -3 ; VF4IC4-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x i8>, ptr [[TMP17]], align 1 ; VF4IC4-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD13]], <4 x i8> poison, <4 x i32> ; VF4IC4-NEXT: [[WIDE_LOAD9:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll index 55682bc410527..f76634d954dd3 100644 --- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -1573,13 +1573,13 @@ define i64 @same_exit_block_pre_inc_use1_reverse() { ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX1]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 -3 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 -3 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 -3 ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD2]], <4 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <4 x i8> [[REVERSE]], [[REVERSE3]] diff --git a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll index 1e4598e756645..e10cb2794aadb 100644 --- a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll @@ -153,8 +153,8 @@ define void @test2(ptr %dst) { ; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[OFFSET_IDX]], -1 ; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 4294967295 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 -1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i64 -1 ; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP11]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll b/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll index 8123092df1ccc..b9be77ff224ff 100644 --- a/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll +++ b/llvm/test/Transforms/LoopVectorize/struct-return-replicate.ll @@ -53,7 +53,7 @@ define void @struct_return_1xi64_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2IC2-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[IN]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 2 +; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 2 ; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 4 ; VF2IC2-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0 ; VF2IC2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1 @@ -81,7 +81,7 @@ define void @struct_return_1xi64_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2-NEXT: [[TMP25:%.*]] = extractvalue { <2 x i64> } [[TMP13]], 0 ; VF2IC2-NEXT: [[TMP26:%.*]] = extractvalue { <2 x i64> } [[TMP24]], 0 ; VF2IC2-NEXT: [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[OUT_A]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[TMP27]], i32 2 +; VF2IC2-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[TMP27]], i64 2 ; VF2IC2-NEXT: store <2 x i64> [[TMP25]], ptr [[TMP27]], align 4 ; VF2IC2-NEXT: store <2 x i64> [[TMP26]], ptr [[TMP29]], align 4 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 @@ -166,7 +166,7 @@ define void @struct_return_2xf32_replicate(ptr noalias %in, ptr noalias writeonl ; VF4-NEXT: store <4 x float> [[TMP42]], ptr [[TMP45]], align 4 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; VF4-NEXT: br i1 [[TMP47]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF4-NEXT: br i1 [[TMP47]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF4: [[MIDDLE_BLOCK]]: ; ; VF2IC2-LABEL: define void @struct_return_2xf32_replicate( @@ -178,7 +178,7 @@ define void @struct_return_2xf32_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2IC2-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[IN]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 2 +; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 2 ; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 4 ; VF2IC2-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 0 ; VF2IC2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i32 1 @@ -224,16 +224,16 @@ define void @struct_return_2xf32_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2-NEXT: [[TMP43:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP21]], 1 ; VF2IC2-NEXT: [[TMP44:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP40]], 1 ; VF2IC2-NEXT: [[TMP45:%.*]] = getelementptr inbounds float, ptr [[OUT_A]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[TMP45]], i32 2 +; VF2IC2-NEXT: [[TMP47:%.*]] = getelementptr inbounds float, ptr [[TMP45]], i64 2 ; VF2IC2-NEXT: store <2 x float> [[TMP41]], ptr [[TMP45]], align 4 ; VF2IC2-NEXT: store <2 x float> [[TMP42]], ptr [[TMP47]], align 4 ; VF2IC2-NEXT: [[TMP48:%.*]] = getelementptr inbounds float, ptr [[OUT_B]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i32 2 +; VF2IC2-NEXT: [[TMP50:%.*]] = getelementptr inbounds float, ptr [[TMP48]], i64 2 ; VF2IC2-NEXT: store <2 x float> [[TMP43]], ptr [[TMP48]], align 4 ; VF2IC2-NEXT: store <2 x float> [[TMP44]], ptr [[TMP50]], align 4 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF2IC2-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; VF2IC2-NEXT: br i1 [[TMP51]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF2IC2-NEXT: br i1 [[TMP51]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; VF2IC2: [[MIDDLE_BLOCK]]: ; entry: @@ -336,7 +336,7 @@ define void @struct_return_3xi32_replicate(ptr noalias %in, ptr noalias writeonl ; VF4-NEXT: store <4 x i32> [[TMP63]], ptr [[TMP64]], align 4 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[TMP66:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; VF4-NEXT: br i1 [[TMP66]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF4-NEXT: br i1 [[TMP66]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF4: [[MIDDLE_BLOCK]]: ; ; VF2IC2-LABEL: define void @struct_return_3xi32_replicate( @@ -348,7 +348,7 @@ define void @struct_return_3xi32_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2IC2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2 ; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 ; VF2IC2-NEXT: [[TMP30:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 ; VF2IC2-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 @@ -408,24 +408,24 @@ define void @struct_return_3xi32_replicate(ptr noalias %in, ptr noalias writeonl ; VF2IC2-NEXT: [[TMP57:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP29]], 0 ; VF2IC2-NEXT: [[TMP58:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP56]], 0 ; VF2IC2-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[DST_A]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, ptr [[TMP59]], i32 2 +; VF2IC2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, ptr [[TMP59]], i64 2 ; VF2IC2-NEXT: store <2 x i32> [[TMP57]], ptr [[TMP59]], align 4 ; VF2IC2-NEXT: store <2 x i32> [[TMP58]], ptr [[TMP61]], align 4 ; VF2IC2-NEXT: [[TMP62:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP29]], 1 ; VF2IC2-NEXT: [[TMP63:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP56]], 1 ; VF2IC2-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[DST_B]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[TMP64]], i32 2 +; VF2IC2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[TMP64]], i64 2 ; VF2IC2-NEXT: store <2 x i32> [[TMP62]], ptr [[TMP64]], align 4 ; VF2IC2-NEXT: store <2 x i32> [[TMP63]], ptr [[TMP66]], align 4 ; VF2IC2-NEXT: [[TMP67:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP29]], 2 ; VF2IC2-NEXT: [[TMP68:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP56]], 2 ; VF2IC2-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[DST_C]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i32 2 +; VF2IC2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[TMP69]], i64 2 ; VF2IC2-NEXT: store <2 x i32> [[TMP67]], ptr [[TMP69]], align 4 ; VF2IC2-NEXT: store <2 x i32> [[TMP68]], ptr [[TMP71]], align 4 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF2IC2-NEXT: [[TMP72:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; VF2IC2-NEXT: br i1 [[TMP72]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF2IC2-NEXT: br i1 [[TMP72]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF2IC2: [[MIDDLE_BLOCK]]: ; entry: @@ -580,7 +580,7 @@ define void @struct_return_2xf32_replicate_predicated(ptr %a) { ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ] ; VF2IC2-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] -; VF2IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 2 +; VF2IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 2 ; VF2IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP0]], align 8 ; VF2IC2-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x float>, ptr [[TMP1]], align 8 ; VF2IC2-NEXT: [[TMP2:%.*]] = fcmp ogt <2 x float> [[WIDE_LOAD]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll index 9adcba3c0d024..31c37a8abe845 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-branch-weights.ll @@ -26,7 +26,7 @@ define void @test_tc_between_8_and_17(ptr %A, i64 range(i64 8, 17) %N) { ; VF8UF1-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !prof [[PROF0:![0-9]+]], !llvm.loop [[LOOP1:![0-9]+]] ; VF8UF1: [[MIDDLE_BLOCK]]: ; VF8UF1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; VF8UF1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH:.*]], !prof [[PROF4:![0-9]+]] +; VF8UF1-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH:.*]], !prof [[PROF5:![0-9]+]] ; VF8UF1: [[SCALAR_PH]]: ; VF8UF1-NEXT: br label %[[LOOP:.*]] ; VF8UF1: [[LOOP]]: @@ -38,7 +38,7 @@ define void @test_tc_between_8_and_17(ptr %A, i64 range(i64 8, 17) %N) { ; VF8UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1 ; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 ; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !prof [[PROF5:![0-9]+]], !llvm.loop [[LOOP6:![0-9]+]] +; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !prof [[PROF6:![0-9]+]], !llvm.loop [[LOOP7:![0-9]+]] ; VF8UF1: [[EXIT]]: ; VF8UF1-NEXT: ret void ; @@ -53,7 +53,7 @@ define void @test_tc_between_8_and_17(ptr %A, i64 range(i64 8, 17) %N) { ; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 [[N_VEC]] ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[A]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 ; VF8UF2-NEXT: [[TMP3:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10) diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll index 2f6c2cc275b72..b1dc6bdcb1d38 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll @@ -42,7 +42,7 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; VF8UF2-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer @@ -141,7 +141,7 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; VF8UF2-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer @@ -257,7 +257,7 @@ define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosyn ; VF8UF2: [[VECTOR_PH]]: ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 ; VF8UF2-NEXT: [[TMP2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll index 2317af5619749..cce9ed2783e4e 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-outside-iv-users.ll @@ -11,7 +11,7 @@ define i64 @remove_loop_region_int_iv_used_outside(ptr %dst) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[DST]], align 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -44,7 +44,7 @@ define i64 @remove_loop_region_int_iv_inc_used_outside(ptr %dst) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[DST]], align 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -78,7 +78,7 @@ define ptr @remove_loop_region_ptr_iv_used_outside(ptr %dst) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 128 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[DST]], align 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -114,7 +114,7 @@ define ptr @remove_loop_region_ptr_iv_inc_used_outside(ptr %dst) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 128 ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i32 8 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr ptr, ptr [[DST]], i64 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[DST]], align 8 ; CHECK-NEXT: store <8 x ptr> zeroinitializer, ptr [[TMP2]], align 8 ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll index bba459f776050..5da6fc3179043 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll @@ -512,10 +512,10 @@ define void @remove_loop_region_outer_loop(i64 range(i64 8, 17) %N, ptr noalias ; VF8UF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i32 8 +; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1 -; VF8UF2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i32 8 +; VF8UF2-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 8 ; VF8UF2-NEXT: store <8 x i8> [[WIDE_LOAD]], ptr [[DST]], align 1 ; VF8UF2-NEXT: store <8 x i8> [[WIDE_LOAD1]], ptr [[TMP5]], align 1 ; VF8UF2-NEXT: br label %[[MIDDLE_BLOCK:.*]] @@ -1140,7 +1140,7 @@ define void @test_vector_tc_eq_16(ptr %A) { ; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16 ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF8UF2: [[VECTOR_BODY]]: -; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i32 8 +; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 8 ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[A]], align 1 ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 ; VF8UF2-NEXT: [[TMP2:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10) diff --git a/llvm/test/Transforms/LoopVectorize/vector-pointer-gep-idxty-addrspace.ll b/llvm/test/Transforms/LoopVectorize/vector-pointer-gep-idxty-addrspace.ll new file mode 100644 index 0000000000000..4f459209b3683 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vector-pointer-gep-idxty-addrspace.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 6 +; RUN: opt -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=4 -S %s | FileCheck %s + +target datalayout = "p1:16:16" + +define void @vector_pointer_gep_idxty_addrspace(ptr addrspace(1) noalias %a, ptr addrspace(1) noalias %b) { +; CHECK-LABEL: define void @vector_pointer_gep_idxty_addrspace( +; CHECK-SAME: ptr addrspace(1) noalias [[A:%.*]], ptr addrspace(1) noalias [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0]], i16 2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0]], i16 4 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP0]], i16 6 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP0]], align 4 +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP1]], align 4 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <2 x i32>, ptr addrspace(1) [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[WIDE_LOAD]], splat (i32 1) +; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[WIDE_LOAD1]], splat (i32 1) +; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[WIDE_LOAD2]], splat (i32 1) +; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i32> [[WIDE_LOAD3]], splat (i32 1) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP8]], i16 2 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP8]], i16 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[TMP8]], i16 6 +; CHECK-NEXT: store <2 x i32> [[TMP4]], ptr addrspace(1) [[TMP8]], align 4 +; CHECK-NEXT: store <2 x i32> [[TMP5]], ptr addrspace(1) [[TMP9]], align 4 +; CHECK-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(1) [[TMP10]], align 4 +; CHECK-NEXT: store <2 x i32> [[TMP7]], ptr addrspace(1) [[TMP11]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.b.iv = getelementptr inbounds i32, ptr addrspace(1) %b, i64 %iv + %ld.b = load i32, ptr addrspace(1) %gep.b.iv + %add = add i32 %ld.b, 1 + %gep.a.iv = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %iv + store i32 %add, ptr addrspace(1) %gep.a.iv + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv, 1024 + br i1 %exitcond, label %end, label %loop + +end: + ret void +}