diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll index ef6b8e1d83f38..ba9d57e1e4a16 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-call-linear-args.ll @@ -1,26 +1,24 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt < %s -passes=loop-vectorize,instsimplify -force-vector-interleave=1 -S | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call" --version 2 +; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=NEON +; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S | FileCheck %s --check-prefixes=SVE_OR_NEON +; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -S -prefer-predicate-over-epilogue=predicate-dont-vectorize | FileCheck %s --check-prefixes=SVE_TF target triple = "aarch64-unknown-linux-gnu" ; A call whose argument can remain a scalar because it's sequential and only the ; starting value is required. -define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) #0 { -; CHECK-LABEL: define void @test_linear -; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR1:[0-9]+]] -; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i64 [[CALL]], ptr [[GEPA]], align 8 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void +define void @test_linear(ptr noalias %a, ptr readnone %b, i64 %n) { +; NEON-LABEL: define void @test_linear +; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { +; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR0:[0-9]+]] +; +; SVE_OR_NEON-LABEL: define void @test_linear +; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] +; +; SVE_TF-LABEL: define void @test_linear +; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] ; entry: br label %for.body @@ -28,9 +26,9 @@ entry: for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %gepb = getelementptr i64, ptr %b, i64 %indvars.iv - %call = call i64 @foo(ptr %gepb) #1 + %data = call i64 @foo(ptr %gepb) #0 %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv - store i64 %call, ptr %gepa + store i64 %data, ptr %gepa %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body @@ -39,32 +37,30 @@ for.cond.cleanup: ret void } -define void @test_linear_with_mask(ptr noalias %a, ptr readnone %b, i64 %n) #0 { -; CHECK-LABEL: define void @test_linear_with_mask -; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR2:[0-9]+]] -; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i64 [[CALL]], ptr [[GEPA]], align 8 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void +define void @test_linear_with_vector(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) { +; NEON-LABEL: define void @test_linear_with_vector +; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { +; NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]] +; +; SVE_OR_NEON-LABEL: define void @test_linear_with_vector +; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_OR_NEON: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] +; +; SVE_TF-LABEL: define void @test_linear_with_vector +; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_TF: [[DATA:%.*]] = call i32 @baz(i32 [[INPUT:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] ; entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %gepb = getelementptr i64, ptr %b, i64 %indvars.iv - %call = call i64 @foo(ptr %gepb) #2 - %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv - store i64 %call, ptr %gepa + %gepc = getelementptr i32, ptr %c, i64 %indvars.iv + %input = load i32, ptr %gepc, align 8 + %gepb = getelementptr i32, ptr %b, i64 %indvars.iv + %data = call i32 @baz(i32 %input, ptr %gepb) #1 + %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + store i32 %data, ptr %gepa, align 8 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body @@ -73,36 +69,28 @@ for.cond.cleanup: ret void } -define void @test_linear_with_vector(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) #0 { -; CHECK-LABEL: define void @test_linear_with_vector -; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[GEPC:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[DATA:%.*]] = load i32, ptr [[GEPC]], align 8 -; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i32, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[CALL:%.*]] = call i32 @baz(i32 [[DATA]], ptr [[GEPB]]) #[[ATTR3:[0-9]+]] -; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[CALL]], ptr [[GEPA]], align 8 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void +define void @test_linear_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) { +; NEON-LABEL: define void @test_linear_bad_stride +; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { +; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]] +; +; SVE_OR_NEON-LABEL: define void @test_linear_bad_stride +; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] +; +; SVE_TF-LABEL: define void @test_linear_bad_stride +; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] ; entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %gepc = getelementptr i32, ptr %c, i64 %indvars.iv - %data = load i32, ptr %gepc, align 8 - %gepb = getelementptr i32, ptr %b, i64 %indvars.iv - %call = call i32 @baz(i32 %data, ptr %gepb) #3 - %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv - store i32 %call, ptr %gepa, align 8 + %gepb = getelementptr i64, ptr %b, i64 %indvars.iv + %data = call i64 @foo(ptr %gepb) #2 + %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv + store i64 %data, ptr %gepa %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body @@ -111,32 +99,29 @@ for.cond.cleanup: ret void } -define void @test_linear_bad_stride(ptr noalias %a, ptr readnone %b, i64 %n) #0 { -; CHECK-LABEL: define void @test_linear_bad_stride -; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR4:[0-9]+]] -; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i64 [[CALL]], ptr [[GEPA]], align 8 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void +define void @test_linear_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) { +; NEON-LABEL: define void @test_linear_wide_stride +; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) { +; NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR2]] +; +; SVE_OR_NEON-LABEL: define void @test_linear_wide_stride +; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_OR_NEON: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]] +; +; SVE_TF-LABEL: define void @test_linear_wide_stride +; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_TF: [[DATA:%.*]] = call i64 @foo(ptr [[GEPB:%.*]]) #[[ATTR3]] ; entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %gepb = getelementptr i64, ptr %b, i64 %indvars.iv - %call = call i64 @foo(ptr %gepb) #4 + %double = mul i64 %indvars.iv, 2 + %gepb = getelementptr i64, ptr %b, i64 %double + %data = call i64 @foo(ptr %gepb) #2 %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv - store i64 %call, ptr %gepa + store i64 %data, ptr %gepa %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body @@ -145,34 +130,29 @@ for.cond.cleanup: ret void } -define void @test_linear_wide_stride(ptr noalias %a, ptr readnone %b, i64 %n) #0 { -; CHECK-LABEL: define void @test_linear_wide_stride -; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[DOUBLE:%.*]] = mul i64 [[INDVARS_IV]], 2 -; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[DOUBLE]] -; CHECK-NEXT: [[CALL:%.*]] = call i64 @foo(ptr [[GEPB]]) #[[ATTR4]] -; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i64 [[CALL]], ptr [[GEPA]], align 8 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void +define void @test_linear_mixed_types(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) { +; NEON-LABEL: define void @test_linear_mixed_types +; NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) { +; NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]] +; +; SVE_OR_NEON-LABEL: define void @test_linear_mixed_types +; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_OR_NEON: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] +; +; SVE_TF-LABEL: define void @test_linear_mixed_types +; SVE_TF-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_TF: [[DATA:%.*]] = call i32 @quux(ptr [[GEPC:%.*]], ptr [[GEPB:%.*]]) #[[ATTR4:[0-9]+]] ; entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %double = mul i64 %indvars.iv, 2 - %gepb = getelementptr i64, ptr %b, i64 %double - %call = call i64 @foo(ptr %gepb) #4 - %gepa = getelementptr inbounds i64, ptr %a, i64 %indvars.iv - store i64 %call, ptr %gepa + %gepc = getelementptr i32, ptr %c, i64 %indvars.iv + %gepb = getelementptr i64, ptr %b, i64 %indvars.iv + %data = call i32 @quux(ptr %gepc, ptr %gepb) #3 + %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + store i32 %data, ptr %gepa, align 8 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body @@ -181,34 +161,29 @@ for.cond.cleanup: ret void } -define void @test_linear_mixed_types(ptr noalias %a, ptr readnone %b, ptr readonly %c, i64 %n) #0 { -; CHECK-LABEL: define void @test_linear_mixed_types -; CHECK-SAME: (ptr noalias [[A:%.*]], ptr readnone [[B:%.*]], ptr readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[GEPC:%.*]] = getelementptr i32, ptr [[C]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[GEPB:%.*]] = getelementptr i64, ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[CALL:%.*]] = call i32 @quux(ptr [[GEPC]], ptr [[GEPB]]) #[[ATTR5:[0-9]+]] -; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[CALL]], ptr [[GEPA]], align 8 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void +define void @test_linear_non_ptr(ptr noalias %a, i64 %n) { +; NEON-LABEL: define void @test_linear_non_ptr +; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) { +; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR4:[0-9]+]] +; +; SVE_OR_NEON-LABEL: define void @test_linear_non_ptr +; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]] +; +; SVE_TF-LABEL: define void @test_linear_non_ptr +; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[TREBLED:%.*]]) #[[ATTR5:[0-9]+]] ; entry: br label %for.body for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %gepc = getelementptr i32, ptr %c, i64 %indvars.iv - %gepb = getelementptr i64, ptr %b, i64 %indvars.iv - %call = call i32 @quux(ptr %gepc, ptr %gepb) #5 + %little.iv = trunc i64 %indvars.iv to i32 + %trebled = mul i32 %little.iv, 3 + %data = call i32 @bar(i32 %trebled) #4 %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv - store i32 %call, ptr %gepa, align 8 + store i32 %data, ptr %gepa %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body @@ -217,23 +192,18 @@ for.cond.cleanup: ret void } -define void @test_linear_non_ptr(ptr noalias %a, i64 %n) #0 { -; CHECK-LABEL: define void @test_linear_non_ptr -; CHECK-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[LITTLE_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[TREBLED:%.*]] = mul i32 [[LITTLE_IV]], 3 -; CHECK-NEXT: [[CALL:%.*]] = call i32 @bar(i32 [[TREBLED]]) #[[ATTR6:[0-9]+]] -; CHECK-NEXT: [[GEPA:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: store i32 [[CALL]], ptr [[GEPA]], align 4 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void +define void @test_linear_non_ptr_neg_stride(ptr noalias %a, i64 %n) { +; NEON-LABEL: define void @test_linear_non_ptr_neg_stride +; NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) { +; NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR5:[0-9]+]] +; +; SVE_OR_NEON-LABEL: define void @test_linear_non_ptr_neg_stride +; SVE_OR_NEON-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_OR_NEON: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]] +; +; SVE_TF-LABEL: define void @test_linear_non_ptr_neg_stride +; SVE_TF-SAME: (ptr noalias [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SVE_TF: [[DATA:%.*]] = call i32 @bar(i32 [[NEGSTRIDE:%.*]]) #[[ATTR6:[0-9]+]] ; entry: br label %for.body @@ -241,10 +211,10 @@ entry: for.body: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %little.iv = trunc i64 %indvars.iv to i32 - %trebled = mul i32 %little.iv, 3 - %call = call i32 @bar(i32 %trebled) #6 + %negstride = mul i32 %little.iv, -5 + %data = call i32 @bar(i32 %negstride) #5 %gepa = getelementptr inbounds i32, ptr %a, i64 %indvars.iv - store i32 %call, ptr %gepa + store i32 %data, ptr %gepa %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %n br i1 %exitcond, label %for.cond.cleanup, label %for.body @@ -258,18 +228,26 @@ declare i32 @baz(i32, ptr) declare i32 @quux(ptr, ptr) declare i32 @bar(i32) -; vector variants of foo -declare @foo_linear(ptr, ) -declare @foo_linear_nomask(ptr) -declare @foo_linear_nomask_2x(ptr) -declare @baz_vector_and_linear(, ptr) -declare @quux_linear_mask(ptr, ptr, ) -declare @bar_linear(i32) - -attributes #0 = { "target-features"="+sve" } -attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsNxl8_foo(foo_linear_nomask)" } -attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVsMxl8_foo(foo_linear)" } -attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsNxvl4_baz(baz_vector_and_linear)" } -attributes #4 = { nounwind "vector-function-abi-variant"="_ZGVsNxl16_foo(foo_linear_nomask_2x)" } -attributes #5 = { nounwind "vector-function-abi-variant"="_ZGVsMxl4l8_quux(quux_linear_mask)" } -attributes #6 = { nounwind "vector-function-abi-variant"="_ZGVsNxl3_bar(bar_linear)" } +; neon vector variants of foo +declare <2 x i64> @neon_foo_linear(ptr) +declare <2 x i64> @neon_foo_linear_2x(ptr) +declare <4 x i32> @neon_baz_vector_and_linear(<4 x i32>, ptr) +declare <4 x i32> @neon_quux_linear(ptr, ptr) +declare <4 x i32> @neon_bar_linear(i32) +declare <4 x i32> @neon_bar_neg_linear(i32) + +; scalable vector variants of foo +declare @sve_foo_linear(ptr, ) +declare @sve_foo_linear_nomask(ptr) +declare @sve_foo_linear_nomask_2x(ptr) +declare @sve_baz_vector_and_linear(, ptr) +declare @sve_quux_linear_mask(ptr, ptr, ) +declare @sve_bar_linear(i32) +declare @sve_bar_neg_linear(i32) + +attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVsNxl8_foo(sve_foo_linear_nomask),_ZGVsMxl8_foo(sve_foo_linear),_ZGVnN2l8_foo(neon_foo_linear)" } +attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsNxvl4_baz(sve_baz_vector_and_linear),_ZGVnN4vl4_baz(neon_baz_vector_and_linear)" } +attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVsNxl16_foo(sve_foo_linear_nomask_2x),_ZGVnN2l16_foo(neon_foo_linear_2x)" } +attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsMxl4l8_quux(sve_quux_linear_mask),_ZGVnN4l4l8_quux(neon_quux_linear)" } +attributes #4 = { nounwind "vector-function-abi-variant"="_ZGVsNxl3_bar(sve_bar_linear),_ZGVnN4l3_bar(neon_bar_linear)" } +attributes #5 = { nounwind "vector-function-abi-variant"="_ZGVsNxln5_bar(sve_bar_neg_linear),_ZGVnN4ln5_bar(neon_bar_neg_linear)" }