diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index c07e9f043b7b8..d84c2717077f2 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6888,6 +6888,30 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { ParamsOk = false; break; } + case VFParamKind::OMP_Linear: { + Value *ScalarParam = CI->getArgOperand(Param.ParamPos); + // Find the stride for the scalar parameter in this loop and see if + // it matches the stride for the variant. + // TODO: do we need to figure out the cost of an extract to get the + // first lane? Or do we hope that it will be folded away? + ScalarEvolution *SE = PSE.getSE(); + const auto *SAR = + dyn_cast(SE->getSCEV(ScalarParam)); + + if (!SAR || SAR->getLoop() != TheLoop) { + ParamsOk = false; + break; + } + + const SCEVConstant *Step = + dyn_cast(SAR->getStepRecurrence(*SE)); + + if (!Step || + Step->getAPInt().getSExtValue() != Param.LinearStepOrPos) + ParamsOk = false; + + break; + } case VFParamKind::GlobalPredicate: UsesMask = true; break; diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll index cd133371f66ce..876d58131bd7a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call" --version 2 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(foo|bar|baz|quux)" --version 2 ; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=NEON ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=SVE_OR_NEON ; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_TF @@ -10,15 +10,18 @@ target triple = "aarch64-unknown-linux-gnu" define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) { ; NEON-LABEL: define void @test_linear8 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { +; NEON: [[TMP3:%.*]] = call <2 x i64> @vec_foo_linear8_nomask_neon(ptr [[TMP2:%.*]]) ; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear8 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] +; SVE_OR_NEON: [[TMP13:%.*]] = call @vec_foo_linear8_nomask_sve(ptr [[TMP12:%.*]]) +; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear8 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] +; SVE_TF: [[TMP19:%.*]] = call @vec_foo_linear8_mask_sve(ptr [[TMP18:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; entry: br label %for.body @@ -40,15 +43,17 @@ for.cond.cleanup: define void @test_vector_linear4(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) { ; NEON-LABEL: define void @test_vector_linear4 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { +; NEON: [[TMP5:%.*]] = call <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32> [[WIDE_LOAD:%.*]], ptr [[TMP4:%.*]]) ; NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_vector_linear4 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] +; SVE_OR_NEON: [[TMP15:%.*]] = call @vec_baz_vector_linear4_nomask_sve( [[WIDE_LOAD:%.*]], ptr [[TMP14:%.*]]) +; SVE_OR_NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_vector_linear4 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] +; SVE_TF: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] ; entry: br label %for.body @@ -76,11 +81,11 @@ define void @test_linear8_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) { ; ; SVE_OR_NEON-LABEL: define void @test_linear8_bad_stride ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] +; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear8_bad_stride ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] +; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]] ; entry: br label %for.body @@ -102,15 +107,17 @@ for.cond.cleanup: define void @test_linear16_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) { ; NEON-LABEL: define void @test_linear16_wide_stride ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { +; NEON: [[TMP4:%.*]] = call <2 x i64> @vec_foo_linear16_nomask_neon(ptr [[TMP3:%.*]]) ; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]] ; ; SVE_OR_NEON-LABEL: define void @test_linear16_wide_stride ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]] +; SVE_OR_NEON: [[TMP14:%.*]] = call @vec_foo_linear16_nomask_sve(ptr [[TMP13:%.*]]) +; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR4]] ; ; SVE_TF-LABEL: define void @test_linear16_wide_stride ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]] +; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR5]] ; entry: br label %for.body @@ -133,15 +140,18 @@ for.cond.cleanup: define void @test_linear4_linear8(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) { ; NEON-LABEL: define void @test_linear4_linear8 ; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { +; NEON: [[TMP5:%.*]] = call <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr [[TMP3:%.*]], ptr [[TMP4:%.*]]) ; NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear4_linear8 ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] +; SVE_OR_NEON: [[TMP15:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP13:%.*]], ptr [[TMP14:%.*]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; SVE_OR_NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR5:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear4_linear8 ; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] +; SVE_TF: [[TMP21:%.*]] = call @vec_quux_linear4_linear8_mask_sve(ptr [[TMP19:%.*]], ptr [[TMP20:%.*]], [[ACTIVE_LANE_MASK:%.*]]) +; SVE_TF: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR6:[0-9]+]] ; entry: br label %for.body @@ -164,15 +174,17 @@ for.cond.cleanup: define void @test_linear3_non_ptr(ptr noalias %a, i64 %n) { ; NEON-LABEL: define void @test_linear3_non_ptr ; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) { +; NEON: [[TMP3:%.*]] = call <4 x i32> @vec_bar_linear3_nomask_neon(i32 [[TMP2:%.*]]) ; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR4:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linear3_non_ptr ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]] +; SVE_OR_NEON: [[TMP13:%.*]] = call @vec_bar_linear3_nomask_sve(i32 [[TMP12:%.*]]) +; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR6:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linear3_non_ptr ; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]] +; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR7:[0-9]+]] ; entry: br label %for.body @@ -195,15 +207,17 @@ for.cond.cleanup: define void @test_linearn5_non_ptr_neg_stride(ptr noalias %a, i64 %n) { ; NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride ; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) { +; NEON: [[TMP3:%.*]] = call <4 x i32> @vec_bar_linearn5_nomask_neon(i32 [[TMP2:%.*]]) ; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR5:[0-9]+]] ; ; SVE_OR_NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride ; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]] +; SVE_OR_NEON: [[TMP13:%.*]] = call @vec_bar_linearn5_nomask_sve(i32 [[TMP12:%.*]]) +; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR7:[0-9]+]] ; ; SVE_TF-LABEL: define void @test_linearn5_non_ptr_neg_stride ; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]] +; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR8:[0-9]+]] ; entry: br label %for.body