diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll new file mode 100644 index 0000000000000..cd133371f66ce --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll @@ -0,0 +1,253 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call" --version 2 +; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=NEON +; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=SVE_OR_NEON +; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_TF + +target triple = "aarch64-unknown-linux-gnu" + +; A call whose argument can remain a scalar because it's sequential and only the +; starting value is required. +define void @test_linear8(ptr noalias %a, ptr readnone %b, i64 %n) { +; NEON-LABEL: define void @test_linear8 +; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { +; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]] +; +; SVE_OR_NEON-LABEL: define void @test_linear8 +; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] +; +; SVE_TF-LABEL: define void @test_linear8 +; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %gepb = getelementptr i64, ptr %b, i64 %indvars.iv + %data = call i64 @foo(ptr %gepb) #0 + %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv + store i64 %data, ptr %gepa + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +define void @test_vector_linear4(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) { +; NEON-LABEL: define void @test_vector_linear4 +; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { +; NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] +; +; SVE_OR_NEON-LABEL: define void @test_vector_linear4 +; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_OR_NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] +; +; SVE_TF-LABEL: define void @test_vector_linear4 +; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_TF: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %gepc = getelementptr i32, ptr %c, i64 %indvars.iv + %input = load i32, ptr %gepc, align 8 + %gepb = getelementptr i32, ptr %b, i64 %indvars.iv + %data = call i32 @baz(i32 %input, ptr %gepb) #1 + %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + store i32 %data, ptr %gepa, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +define void @test_linear8_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) { +; NEON-LABEL: define void @test_linear8_bad_stride +; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { +; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] +; +; SVE_OR_NEON-LABEL: define void @test_linear8_bad_stride +; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] +; +; SVE_TF-LABEL: define void @test_linear8_bad_stride +; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %gepb = getelementptr i64, ptr %b, i64 %indvars.iv + %data = call i64 @foo(ptr %gepb) #2 + %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv + store i64 %data, ptr %gepa + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +define void @test_linear16_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) { +; NEON-LABEL: define void @test_linear16_wide_stride +; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { +; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]] +; +; SVE_OR_NEON-LABEL: define void @test_linear16_wide_stride +; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]] +; +; SVE_TF-LABEL: define void @test_linear16_wide_stride +; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]] +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %double = mul i64 %indvars.iv, 2 + %gepb = getelementptr i64, ptr %b, i64 %double + %data = call i64 @foo(ptr %gepb) #2 + %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv + store i64 %data, ptr %gepa + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +define void @test_linear4_linear8(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) { +; NEON-LABEL: define void @test_linear4_linear8 +; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { +; NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] +; +; SVE_OR_NEON-LABEL: define void @test_linear4_linear8 +; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_OR_NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] +; +; SVE_TF-LABEL: define void @test_linear4_linear8 +; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_TF: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %gepc = getelementptr i32, ptr %c, i64 %indvars.iv + %gepb = getelementptr i64, ptr %b, i64 %indvars.iv + %data = call i32 @quux(ptr %gepc, ptr %gepb) #3 + %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + store i32 %data, ptr %gepa, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +define void @test_linear3_non_ptr(ptr noalias %a, i64 %n) { +; NEON-LABEL: define void @test_linear3_non_ptr +; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) { +; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR4:[0-9]+]] +; +; SVE_OR_NEON-LABEL: define void @test_linear3_non_ptr +; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]] +; +; SVE_TF-LABEL: define void @test_linear3_non_ptr +; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]] +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %little.iv = trunc i64 %indvars.iv to i32 + %trebled = mul i32 %little.iv, 3 + %data = call i32 @bar(i32 %trebled) #4 + %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + store i32 %data, ptr %gepa + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +define void @test_linearn5_non_ptr_neg_stride(ptr noalias %a, i64 %n) { +; NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride +; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) { +; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR5:[0-9]+]] +; +; SVE_OR_NEON-LABEL: define void @test_linearn5_non_ptr_neg_stride +; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]] +; +; SVE_TF-LABEL: define void @test_linearn5_non_ptr_neg_stride +; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]] +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %little.iv = trunc i64 %indvars.iv to i32 + %negstride = mul i32 %little.iv, -5 + %data = call i32 @bar(i32 %negstride) #5 + %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + store i32 %data, ptr %gepa + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +declare i64 @foo(ptr) +declare i32 @baz(i32, ptr) +declare i32 @quux(ptr, ptr) +declare i32 @bar(i32) + +; neon vector variants of foo +declare <2 x i64> @vec_foo_linear8_nomask_neon(ptr) +declare <2 x i64> @vec_foo_linear16_nomask_neon(ptr) +declare <4 x i32> @vec_baz_vector_linear4_nomask_neon(<4 x i32>, ptr) +declare <4 x i32> @vec_quux_linear4_linear8_nomask_neon(ptr, ptr) +declare <4 x i32> @vec_bar_linear3_nomask_neon(i32) +declare <4 x i32> @vec_bar_linearn5_nomask_neon(i32) + +; scalable vector variants of foo +declare @vec_foo_linear8_mask_sve(ptr, ) +declare @vec_foo_linear8_nomask_sve(ptr) +declare @vec_foo_linear16_nomask_sve(ptr) +declare @vec_baz_vector_linear4_nomask_sve(, ptr) +declare @vec_quux_linear4_linear8_mask_sve(ptr, ptr, ) +declare @vec_bar_linear3_nomask_sve(i32) +declare @vec_bar_linearn5_nomask_sve(i32) + +attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVsNxl8_foo(vec_foo_linear8_nomask_sve),_ZGVsMxl8_foo(vec_foo_linear8_mask_sve),_ZGVnN2l8_foo(vec_foo_linear8_nomask_neon)" } +attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsNxvl4_baz(vec_baz_vector_linear4_nomask_sve),_ZGVnN4vl4_baz(vec_baz_vector_linear4_nomask_neon)" } +attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVsNxl16_foo(vec_foo_linear16_nomask_sve),_ZGVnN2l16_foo(vec_foo_linear16_nomask_neon)" } +attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsMxl4l8_quux(vec_quux_linear4_linear8_mask_sve),_ZGVnN4l4l8_quux(vec_quux_linear4_linear8_nomask_neon)" } +attributes #4 = { nounwind "vector-function-abi-variant"="_ZGVsNxl3_bar(vec_bar_linear3_nomask_sve),_ZGVnN4l3_bar(vec_bar_linear3_nomask_neon)" } +attributes #5 = { nounwind "vector-function-abi-variant"="_ZGVsNxln5_bar(vec_bar_linearn5_nomask_sve),_ZGVnN4ln5_bar(vec_bar_linearn5_nomask_neon)" }