Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AArch64][SME2] Fix SME2 mla/mls tests #76711

Merged
merged 4 commits into from Jan 12, 2024
Merged

Conversation

MDevereau
Copy link
Contributor

The ACLE defines these builtins as svmla[_single]_za32[_f32]_vg1x2, which means the SVE_ACLE_FUNC macro should test the overloaded forms as

SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x2)

https://github.com/ARM-software/acle/blob/b88cbf7e9c104100bb5016c848763171494dee44/main/acle.md?plain=1#L10170-L10205

The ACLE defines these builtins as svmla[_single]_za32[_f32]_vg1x2,
which means the SVE_ACLE_FUNC macro should test the overloaded forms as

SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x2)

https://github.com/ARM-software/acle/blob/b88cbf7e9c104100bb5016c848763171494dee44/main/acle.md?plain=1#L10170-L10205
@llvmbot llvmbot added the clang Clang issues not falling into any other category label Jan 2, 2024
@llvmbot
Copy link
Collaborator

llvmbot commented Jan 2, 2024

@llvm/pr-subscribers-clang

Author: Matthew Devereau (MDevereau)

Changes

The ACLE defines these builtins as svmla[_single]_za32[_f32]_vg1x2, which means the SVE_ACLE_FUNC macro should test the overloaded forms as

SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x2)

https://github.com/ARM-software/acle/blob/b88cbf7e9c104100bb5016c848763171494dee44/main/acle.md?plain=1#L10170-L10205


Full diff: https://github.com/llvm/llvm-project/pull/76711.diff

2 Files Affected:

  • (modified) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c (+4-4)
  • (modified) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c (+4-4)
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c
index f52edd9888daa9..2679f9cc8dfd0c 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c
@@ -86,7 +86,7 @@ void test_svmla4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32x4_t zm) __
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svmla_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za {
-  SVE_ACLE_FUNC(svmla_single_za32,,_f32,,_vg1x2)(slice_base, zn, zm);
+  SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x2)(slice_base, zn, zm);
 }
 
 // CHECK-LABEL: @test_svmla_single4_f32(
@@ -108,7 +108,7 @@ void test_svmla_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t z
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svmla_single4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za {
-  SVE_ACLE_FUNC(svmla_single_za32,,_f32,,_vg1x4)(slice_base, zn, zm);
+  SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x4)(slice_base, zn, zm);
 }
 
 //
@@ -224,7 +224,7 @@ void test_svmla4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64x4_t zm) __
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svmla_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za {
-  SVE_ACLE_FUNC(svmla_single_za64,,_f64,,_vg1x2)(slice_base, zn, zm);
+  SVE_ACLE_FUNC(svmla,_single,_za64,_f64,_vg1x2)(slice_base, zn, zm);
 }
 
 // CHECK-LABEL: @test_svmla_single4_f64(
@@ -246,7 +246,7 @@ void test_svmla_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t z
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svmla_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za {
-  SVE_ACLE_FUNC(svmla_single_za64,,_f64,,_vg1x4)(slice_base, zn, zm);
+  SVE_ACLE_FUNC(svmla,_single,_za64,_f64,_vg1x4)(slice_base, zn, zm);
 }
 
 //
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c
index 6830a399e91d64..bb4cbbd5308c06 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c
@@ -86,7 +86,7 @@ void test_svmls4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32x4_t zm) __
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svmls_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za {
-  SVE_ACLE_FUNC(svmls_single_za32,,_f32,,_vg1x2)(slice_base, zn, zm);
+  SVE_ACLE_FUNC(svmls,_single,_za32,_f32,_vg1x2)(slice_base, zn, zm);
 }
 
 // CHECK-LABEL: @test_svmls_single4_f32(
@@ -108,7 +108,7 @@ void test_svmls_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t z
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svmls_single4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za {
-  SVE_ACLE_FUNC(svmls_single_za32,,_f32,,_vg1x4)(slice_base, zn, zm);
+  SVE_ACLE_FUNC(svmls,_single,_za32,_f32,_vg1x4)(slice_base, zn, zm);
 }
 
 //
@@ -224,7 +224,7 @@ void test_svmls4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64x4_t zm) __
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svmls_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za {
-  SVE_ACLE_FUNC(svmls_single_za64,,_f64,,_vg1x2)(slice_base, zn, zm);
+  SVE_ACLE_FUNC(svmls,_single,_za64,_f64,_vg1x2)(slice_base, zn, zm);
 }
 
 // CHECK-LABEL: @test_svmls_single4_f64(
@@ -246,7 +246,7 @@ void test_svmls_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t z
 // CPP-CHECK-NEXT:    ret void
 //
 void test_svmls_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za {
-  SVE_ACLE_FUNC(svmls_single_za64,,_f64,,_vg1x4)(slice_base, zn, zm);
+  SVE_ACLE_FUNC(svmls,_single,_za64,_f64,_vg1x4)(slice_base, zn, zm);
 }
 
 //

Copy link
Contributor

@kmclaughlin-arm kmclaughlin-arm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for fixing these tests @MDevereau!
There are also some tests in acle_sme2_mlal.c, acle_sme2_mlall.c & acle_sme2_mlsl.c which have a similar issue, could you please update them in this patch too?

@@ -246,7 +246,7 @@ void test_svmls_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t z
// CPP-CHECK-NEXT: ret void
//
void test_svmls_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za {
SVE_ACLE_FUNC(svmls_single_za64,,_f64,,_vg1x4)(slice_base, zn, zm);
SVE_ACLE_FUNC(svmls,_single,_za64,_f64,_vg1x4)(slice_base, zn, zm);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there are some svmls & svmls_lane tests that should be updated in this test file as well, for example on line 39:
SVE_ACLE_FUNC(svmls_za32,,_f32,,_vg1x2)(slice_base, zn, zm);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is SVE_ACLE_FUNC(svmls_za32,,_f32,,_vg1x2)(slice_base, zn, zm); Incorrect? Looking here it does not appear to be overloaded in the same manner

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is incorrect, for svmls_za32[_f32]_vg1x2 I would have expected something like SVE_ACLE_FUNC(svmls_za32,_f32,_vg1x2,,), since _f32 is the optional part of the name.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I see what you mean now. Some tests inside acle_sme2_mlsl.c were suffering from the same fate too. I've changed them now. Thanks for pointing this out.

Copy link
Contributor

@kmclaughlin-arm kmclaughlin-arm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for updating this @MDevereau, I think there are just a few more tests that should be included in this PR.

@@ -460,7 +460,7 @@ void test_svmla_single4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm)
//
void test_svmla_single4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za
{
SVE_ACLE_FUNC(svmla_single_za32,,_s16,,_vg2x4)(slice_base, zn, zm);
SVE_ACLE_FUNC(svmla,_single,_za32,_s16,_vg2x4)(slice_base, zn, zm);
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tests for the svmla_lane builtins in this file also need to be updated.

Copy link
Contributor Author

@MDevereau MDevereau Jan 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since SVE_ACLE_FUNC is defined as SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 The svmla_lane tests
SVE_ACLE_FUNC(svmla_lane_za32,,,_s16,_vg2x4)(slice_base, zn, zm, 7);
are functionally the same as
SVE_ACLE_FUNC(svmla_lane_za32,_s16,_vg2x4,,,)(slice_base, zn, zm, 7);
which is why I didn't update these tests earlier. Do we still want the commas to be pushed to the back?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was trivial to change them so I just went ahead and did it.

@@ -494,7 +494,7 @@ void test_svmls_lane1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) _
//
void test_svmls_lane1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za
{
SVE_ACLE_FUNC(svmls_lane_za32,,_bf16,,_vg2x1)(slice_base, zn, zm, 7);
SVE_ACLE_FUNC(svmls_lane_za32,_bf16,_vg2x1,,)(slice_base, zn, zm, 7);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is one extra svmls_lane test above this that also needs to be fixed :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops. Fixed.

Copy link
Contributor

@kmclaughlin-arm kmclaughlin-arm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM!

@MDevereau MDevereau merged commit 42fe3bc into llvm:main Jan 12, 2024
4 checks passed
@MDevereau MDevereau deleted the mla-mls-cleanup branch January 12, 2024 09:56
justinfargnoli pushed a commit to justinfargnoli/llvm-project that referenced this pull request Jan 28, 2024
The ACLE defines these builtins as svmla[_single]_za32[_f32]_vg1x2,
which means the SVE_ACLE_FUNC macro should test the overloaded forms as

SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x2)


https://github.com/ARM-software/acle/blob/b88cbf7e9c104100bb5016c848763171494dee44/main/acle.md?plain=1#L10170-L10205
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants