diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index bbeb5da2cfec3..ae2fc522ba400 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -597,13 +597,15 @@ void VPWidenCallRecipe::execute(VPTransformState &State) { for (const auto &I : enumerate(operands())) { // Some intrinsics have a scalar argument - don't replace it with a // vector. - // Some vectorized function variants may also take a scalar argument, - // e.g. linear parameters for pointers. Value *Arg; - if ((VFTy && !VFTy->getParamType(I.index())->isVectorTy()) || - (UseIntrinsic && - isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index()))) + if (UseIntrinsic && + isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())) Arg = State.get(I.value(), VPIteration(0, 0)); + // Some vectorized function variants may also take a scalar argument, + // e.g. linear parameters for pointers. This needs to be the scalar value + // from the start of the respective part when interleaving. + else if (VFTy && !VFTy->getParamType(I.index())->isVectorTy()) + Arg = State.get(I.value(), VPIteration(Part, 0)); else Arg = State.get(I.value(), Part); if (UseIntrinsic && diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll index 29440ca174248..f60ab5e848dd3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll @@ -21,7 +21,8 @@ define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) { ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { ; NEON_INTERLEAVE: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP2:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP5:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP4]]) -; NEON_INTERLEAVE: [[TMP6:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP4]]) +; NEON_INTERLEAVE: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP3:%.*]], i32 0 +; NEON_INTERLEAVE: [[TMP7:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP6]]) ; NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear8 @@ -34,8 +35,9 @@ define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) { ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; SVE_OR_NEON_INTERLEAVE: [[TMP33:%.*]] = extractelement [[TMP31:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: [[TMP34:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP33]], [[ACTIVE_LANE_MASK:%.*]]) -; SVE_OR_NEON_INTERLEAVE: [[TMP35:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP33]], [[ACTIVE_LANE_MASK2:%.*]]) -; SVE_OR_NEON_INTERLEAVE: [[TMP47:%.*]] = extractelement [[TMP45:%.*]], i32 0 +; SVE_OR_NEON_INTERLEAVE: [[TMP35:%.*]] = extractelement [[TMP32:%.*]], i32 0 +; SVE_OR_NEON_INTERLEAVE: [[TMP36:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP35]], [[ACTIVE_LANE_MASK2:%.*]]) +; SVE_OR_NEON_INTERLEAVE: [[TMP48:%.*]] = extractelement [[TMP46:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear8 @@ -49,8 +51,9 @@ define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) { ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; SVE_TF_INTERLEAVE: [[TMP33:%.*]] = extractelement [[TMP31:%.*]], i32 0 ; SVE_TF_INTERLEAVE: [[TMP34:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP33]], [[ACTIVE_LANE_MASK:%.*]]) -; SVE_TF_INTERLEAVE: [[TMP35:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP33]], [[ACTIVE_LANE_MASK2:%.*]]) -; SVE_TF_INTERLEAVE: [[TMP47:%.*]] = extractelement [[TMP45:%.*]], i32 0 +; SVE_TF_INTERLEAVE: [[TMP35:%.*]] = extractelement [[TMP32:%.*]], i32 0 +; SVE_TF_INTERLEAVE: [[TMP36:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP35]], [[ACTIVE_LANE_MASK2:%.*]]) +; SVE_TF_INTERLEAVE: [[TMP48:%.*]] = extractelement [[TMP46:%.*]], i32 0 ; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] ; entry: @@ -81,7 +84,8 @@ define void @test_vector_linear4(ptr noalias %a, ptr readnone %b, ptr readonly % ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { ; NEON_INTERLEAVE: [[TMP8:%.*]] = extractelement <4 x ptr> [[TMP6:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP9:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD:%.*]], ptr [[TMP8]]) -; NEON_INTERLEAVE: [[TMP10:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD2:%.*]], ptr [[TMP8]]) +; NEON_INTERLEAVE: [[TMP10:%.*]] = extractelement <4 x ptr> [[TMP7:%.*]], i32 0 +; NEON_INTERLEAVE: [[TMP11:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD2:%.*]], ptr [[TMP10]]) ; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_vector_linear4 @@ -176,7 +180,8 @@ define void @test_linear16_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { ; NEON_INTERLEAVE: [[TMP6:%.*]] = extractelement <2 x ptr> [[TMP4:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP7:%.*]] = call <2 x i64> @vec_foo_linear16_nomask_neon(ptr [[TMP6]]) -; NEON_INTERLEAVE: [[TMP8:%.*]] = call <2 x i64> @vec_foo_linear16_nomask_neon(ptr [[TMP6]]) +; NEON_INTERLEAVE: [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP5:%.*]], i32 0 +; NEON_INTERLEAVE: [[TMP9:%.*]] = call <2 x i64> @vec_foo_linear16_nomask_neon(ptr [[TMP8]]) ; NEON_INTERLEAVE: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]] ; ; SVE_OR_NEON-LABEL: define void @test_linear16_wide_stride @@ -228,7 +233,9 @@ define void @test_linear4_linear8(ptr noalias %a, ptr readnone %b, ptr readonly ; NEON_INTERLEAVE: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP2:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP7:%.*]] = extractelement <4 x ptr> [[TMP4:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP8:%.*]] = call <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr [[TMP6]], ptr [[TMP7]]) -; NEON_INTERLEAVE: [[TMP9:%.*]] = call <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr [[TMP6]], ptr [[TMP7]]) +; NEON_INTERLEAVE: [[TMP9:%.*]] = extractelement <4 x ptr> [[TMP3:%.*]], i32 0 +; NEON_INTERLEAVE: [[TMP10:%.*]] = extractelement <4 x ptr> [[TMP5:%.*]], i32 0 +; NEON_INTERLEAVE: [[TMP11:%.*]] = call <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr [[TMP9]], ptr [[TMP10]]) ; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear4_linear8 @@ -243,8 +250,10 @@ define void @test_linear4_linear8(ptr noalias %a, ptr readnone %b, ptr readonly ; SVE_OR_NEON_INTERLEAVE: [[TMP35:%.*]] = extractelement [[TMP31:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: [[TMP36:%.*]] = extractelement [[TMP33:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: [[TMP37:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP35]], ptr [[TMP36]], [[ACTIVE_LANE_MASK:%.*]]) -; SVE_OR_NEON_INTERLEAVE: [[TMP38:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP35]], ptr [[TMP36]], [[ACTIVE_LANE_MASK2:%.*]]) -; SVE_OR_NEON_INTERLEAVE: [[TMP50:%.*]] = extractelement [[TMP48:%.*]], i32 0 +; SVE_OR_NEON_INTERLEAVE: [[TMP38:%.*]] = extractelement [[TMP32:%.*]], i32 0 +; SVE_OR_NEON_INTERLEAVE: [[TMP39:%.*]] = extractelement [[TMP34:%.*]], i32 0 +; SVE_OR_NEON_INTERLEAVE: [[TMP40:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP38]], ptr [[TMP39]], [[ACTIVE_LANE_MASK2:%.*]]) +; SVE_OR_NEON_INTERLEAVE: [[TMP52:%.*]] = extractelement [[TMP50:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR7:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear4_linear8 @@ -260,8 +269,10 @@ define void @test_linear4_linear8(ptr noalias %a, ptr readnone %b, ptr readonly ; SVE_TF_INTERLEAVE: [[TMP35:%.*]] = extractelement [[TMP31:%.*]], i32 0 ; SVE_TF_INTERLEAVE: [[TMP36:%.*]] = extractelement [[TMP33:%.*]], i32 0 ; SVE_TF_INTERLEAVE: [[TMP37:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP35]], ptr [[TMP36]], [[ACTIVE_LANE_MASK:%.*]]) -; SVE_TF_INTERLEAVE: [[TMP38:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP35]], ptr [[TMP36]], [[ACTIVE_LANE_MASK2:%.*]]) -; SVE_TF_INTERLEAVE: [[TMP50:%.*]] = extractelement [[TMP48:%.*]], i32 0 +; SVE_TF_INTERLEAVE: [[TMP38:%.*]] = extractelement [[TMP32:%.*]], i32 0 +; SVE_TF_INTERLEAVE: [[TMP39:%.*]] = extractelement [[TMP34:%.*]], i32 0 +; SVE_TF_INTERLEAVE: [[TMP40:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP38]], ptr [[TMP39]], [[ACTIVE_LANE_MASK2:%.*]]) +; SVE_TF_INTERLEAVE: [[TMP52:%.*]] = extractelement [[TMP50:%.*]], i32 0 ; SVE_TF_INTERLEAVE: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR7:[0-9]+]] ; entry: @@ -293,7 +304,8 @@ define void @test_linear3_non_ptr(ptr noalias %a, i64 %n) { ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) { ; NEON_INTERLEAVE: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP5:%.*]] = call <4 x i32> @vec_bar_linear3_nomask_neon(i32 [[TMP4]]) -; NEON_INTERLEAVE: [[TMP6:%.*]] = call <4 x i32> @vec_bar_linear3_nomask_neon(i32 [[TMP4]]) +; NEON_INTERLEAVE: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3:%.*]], i32 0 +; NEON_INTERLEAVE: [[TMP7:%.*]] = call <4 x i32> @vec_bar_linear3_nomask_neon(i32 [[TMP6]]) ; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR4:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear3_non_ptr @@ -343,7 +355,8 @@ define void @test_linearn5_non_ptr_neg_stride(ptr noalias %a, i64 %n) { ; NEON_INTERLEAVE-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) { ; NEON_INTERLEAVE: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2:%.*]], i32 0 ; NEON_INTERLEAVE: [[TMP5:%.*]] = call <4 x i32> @vec_bar_linearn5_nomask_neon(i32 [[TMP4]]) -; NEON_INTERLEAVE: [[TMP6:%.*]] = call <4 x i32> @vec_bar_linearn5_nomask_neon(i32 [[TMP4]]) +; NEON_INTERLEAVE: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3:%.*]], i32 0 +; NEON_INTERLEAVE: [[TMP7:%.*]] = call <4 x i32> @vec_bar_linearn5_nomask_neon(i32 [[TMP6]]) ; NEON_INTERLEAVE: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR5:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride @@ -393,7 +406,8 @@ define void @test_linear8_return_void(ptr noalias %in, ptr noalias %out, i64 %n) ; NEON_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) { ; NEON_INTERLEAVE: [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP6:%.*]], i32 0 ; NEON_INTERLEAVE: call void @vec_goo_linear8_nomask_neon(<2 x i64> [[WIDE_LOAD:%.*]], ptr [[TMP8]]) -; NEON_INTERLEAVE: call void @vec_goo_linear8_nomask_neon(<2 x i64> [[WIDE_LOAD2:%.*]], ptr [[TMP8]]) +; NEON_INTERLEAVE: [[TMP9:%.*]] = extractelement <2 x ptr> [[TMP7:%.*]], i32 0 +; NEON_INTERLEAVE: call void @vec_goo_linear8_nomask_neon(<2 x i64> [[WIDE_LOAD2:%.*]], ptr [[TMP9]]) ; NEON_INTERLEAVE: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR6:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear8_return_void @@ -406,8 +420,9 @@ define void @test_linear8_return_void(ptr noalias %in, ptr noalias %out, i64 %n) ; SVE_OR_NEON_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_OR_NEON_INTERLEAVE: [[TMP39:%.*]] = extractelement [[TMP37:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: call void @vec_goo_linear8_mask_sve( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP39]], [[ACTIVE_LANE_MASK:%.*]]) -; SVE_OR_NEON_INTERLEAVE: call void @vec_goo_linear8_mask_sve( [[WIDE_MASKED_LOAD4:%.*]], ptr [[TMP39]], [[ACTIVE_LANE_MASK2:%.*]]) -; SVE_OR_NEON_INTERLEAVE: [[TMP45:%.*]] = extractelement [[TMP43:%.*]], i32 0 +; SVE_OR_NEON_INTERLEAVE: [[TMP40:%.*]] = extractelement [[TMP38:%.*]], i32 0 +; SVE_OR_NEON_INTERLEAVE: call void @vec_goo_linear8_mask_sve( [[WIDE_MASKED_LOAD4:%.*]], ptr [[TMP40]], [[ACTIVE_LANE_MASK2:%.*]]) +; SVE_OR_NEON_INTERLEAVE: [[TMP46:%.*]] = extractelement [[TMP44:%.*]], i32 0 ; SVE_OR_NEON_INTERLEAVE: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR10:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear8_return_void @@ -421,8 +436,9 @@ define void @test_linear8_return_void(ptr noalias %in, ptr noalias %out, i64 %n) ; SVE_TF_INTERLEAVE-SAME: (ptr noalias [[IN:%.*]], ptr noalias [[OUT:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; SVE_TF_INTERLEAVE: [[TMP39:%.*]] = extractelement [[TMP37:%.*]], i32 0 ; SVE_TF_INTERLEAVE: call void @vec_goo_linear8_mask_sve( [[WIDE_MASKED_LOAD:%.*]], ptr [[TMP39]], [[ACTIVE_LANE_MASK:%.*]]) -; SVE_TF_INTERLEAVE: call void @vec_goo_linear8_mask_sve( [[WIDE_MASKED_LOAD4:%.*]], ptr [[TMP39]], [[ACTIVE_LANE_MASK2:%.*]]) -; SVE_TF_INTERLEAVE: [[TMP45:%.*]] = extractelement [[TMP43:%.*]], i32 0 +; SVE_TF_INTERLEAVE: [[TMP40:%.*]] = extractelement [[TMP38:%.*]], i32 0 +; SVE_TF_INTERLEAVE: call void @vec_goo_linear8_mask_sve( [[WIDE_MASKED_LOAD4:%.*]], ptr [[TMP40]], [[ACTIVE_LANE_MASK2:%.*]]) +; SVE_TF_INTERLEAVE: [[TMP46:%.*]] = extractelement [[TMP44:%.*]], i32 0 ; SVE_TF_INTERLEAVE: call void @goo(i64 [[NUM:%.*]], ptr [[GEP_OUT:%.*]]) #[[ATTR10:[0-9]+]] ; entry: