New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64][SME2] Fix SME2 mla/mls tests #76711
Conversation
The ACLE defines these builtins as svmla[_single]_za32[_f32]_vg1x2, which means the SVE_ACLE_FUNC macro should test the overloaded forms as SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x2) https://github.com/ARM-software/acle/blob/b88cbf7e9c104100bb5016c848763171494dee44/main/acle.md?plain=1#L10170-L10205
@llvm/pr-subscribers-clang Author: Matthew Devereau (MDevereau) ChangesThe ACLE defines these builtins as svmla[_single]_za32[_f32]_vg1x2, which means the SVE_ACLE_FUNC macro should test the overloaded forms as SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x2) Full diff: https://github.com/llvm/llvm-project/pull/76711.diff 2 Files Affected:
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c
index f52edd9888daa9..2679f9cc8dfd0c 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c
@@ -86,7 +86,7 @@ void test_svmla4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32x4_t zm) __
// CPP-CHECK-NEXT: ret void
//
void test_svmla_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za {
- SVE_ACLE_FUNC(svmla_single_za32,,_f32,,_vg1x2)(slice_base, zn, zm);
+ SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x2)(slice_base, zn, zm);
}
// CHECK-LABEL: @test_svmla_single4_f32(
@@ -108,7 +108,7 @@ void test_svmla_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t z
// CPP-CHECK-NEXT: ret void
//
void test_svmla_single4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za {
- SVE_ACLE_FUNC(svmla_single_za32,,_f32,,_vg1x4)(slice_base, zn, zm);
+ SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x4)(slice_base, zn, zm);
}
//
@@ -224,7 +224,7 @@ void test_svmla4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64x4_t zm) __
// CPP-CHECK-NEXT: ret void
//
void test_svmla_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za {
- SVE_ACLE_FUNC(svmla_single_za64,,_f64,,_vg1x2)(slice_base, zn, zm);
+ SVE_ACLE_FUNC(svmla,_single,_za64,_f64,_vg1x2)(slice_base, zn, zm);
}
// CHECK-LABEL: @test_svmla_single4_f64(
@@ -246,7 +246,7 @@ void test_svmla_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t z
// CPP-CHECK-NEXT: ret void
//
void test_svmla_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za {
- SVE_ACLE_FUNC(svmla_single_za64,,_f64,,_vg1x4)(slice_base, zn, zm);
+ SVE_ACLE_FUNC(svmla,_single,_za64,_f64,_vg1x4)(slice_base, zn, zm);
}
//
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c
index 6830a399e91d64..bb4cbbd5308c06 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c
@@ -86,7 +86,7 @@ void test_svmls4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32x4_t zm) __
// CPP-CHECK-NEXT: ret void
//
void test_svmls_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za {
- SVE_ACLE_FUNC(svmls_single_za32,,_f32,,_vg1x2)(slice_base, zn, zm);
+ SVE_ACLE_FUNC(svmls,_single,_za32,_f32,_vg1x2)(slice_base, zn, zm);
}
// CHECK-LABEL: @test_svmls_single4_f32(
@@ -108,7 +108,7 @@ void test_svmls_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t z
// CPP-CHECK-NEXT: ret void
//
void test_svmls_single4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) __arm_streaming __arm_shared_za {
- SVE_ACLE_FUNC(svmls_single_za32,,_f32,,_vg1x4)(slice_base, zn, zm);
+ SVE_ACLE_FUNC(svmls,_single,_za32,_f32,_vg1x4)(slice_base, zn, zm);
}
//
@@ -224,7 +224,7 @@ void test_svmls4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64x4_t zm) __
// CPP-CHECK-NEXT: ret void
//
void test_svmls_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za {
- SVE_ACLE_FUNC(svmls_single_za64,,_f64,,_vg1x2)(slice_base, zn, zm);
+ SVE_ACLE_FUNC(svmls,_single,_za64,_f64,_vg1x2)(slice_base, zn, zm);
}
// CHECK-LABEL: @test_svmls_single4_f64(
@@ -246,7 +246,7 @@ void test_svmls_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t z
// CPP-CHECK-NEXT: ret void
//
void test_svmls_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za {
- SVE_ACLE_FUNC(svmls_single_za64,,_f64,,_vg1x4)(slice_base, zn, zm);
+ SVE_ACLE_FUNC(svmls,_single,_za64,_f64,_vg1x4)(slice_base, zn, zm);
}
//
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for fixing these tests @MDevereau!
There are also some tests in acle_sme2_mlal.c, acle_sme2_mlall.c & acle_sme2_mlsl.c which have a similar issue, could you please update them in this patch too?
@@ -246,7 +246,7 @@ void test_svmls_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t z | |||
// CPP-CHECK-NEXT: ret void | |||
// | |||
void test_svmls_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) __arm_streaming __arm_shared_za { | |||
SVE_ACLE_FUNC(svmls_single_za64,,_f64,,_vg1x4)(slice_base, zn, zm); | |||
SVE_ACLE_FUNC(svmls,_single,_za64,_f64,_vg1x4)(slice_base, zn, zm); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think there are some svmls
& svmls_lane
tests that should be updated in this test file as well, for example on line 39:
SVE_ACLE_FUNC(svmls_za32,,_f32,,_vg1x2)(slice_base, zn, zm);
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is SVE_ACLE_FUNC(svmls_za32,,_f32,,_vg1x2)(slice_base, zn, zm);
Incorrect? Looking here it does not appear to be overloaded in the same manner
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it is incorrect, for svmls_za32[_f32]_vg1x2
I would have expected something like SVE_ACLE_FUNC(svmls_za32,_f32,_vg1x2,,)
, since _f32
is the optional part of the name.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah I see what you mean now. Some tests inside acle_sme2_mlsl.c
were suffering from the same fate too. I've changed them now. Thanks for pointing this out.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for updating this @MDevereau, I think there are just a few more tests that should be included in this PR.
@@ -460,7 +460,7 @@ void test_svmla_single4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) | |||
// | |||
void test_svmla_single4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) __arm_streaming __arm_shared_za | |||
{ | |||
SVE_ACLE_FUNC(svmla_single_za32,,_s16,,_vg2x4)(slice_base, zn, zm); | |||
SVE_ACLE_FUNC(svmla,_single,_za32,_s16,_vg2x4)(slice_base, zn, zm); | |||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The tests for the svmla_lane
builtins in this file also need to be updated.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since SVE_ACLE_FUNC
is defined as SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5
The svmla_lane tests
SVE_ACLE_FUNC(svmla_lane_za32,,,_s16,_vg2x4)(slice_base, zn, zm, 7);
are functionally the same as
SVE_ACLE_FUNC(svmla_lane_za32,_s16,_vg2x4,,,)(slice_base, zn, zm, 7);
which is why I didn't update these tests earlier. Do we still want the commas to be pushed to the back?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It was trivial to change them so I just went ahead and did it.
@@ -494,7 +494,7 @@ void test_svmls_lane1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) _ | |||
// | |||
void test_svmls_lane1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_shared_za | |||
{ | |||
SVE_ACLE_FUNC(svmls_lane_za32,,_bf16,,_vg2x1)(slice_base, zn, zm, 7); | |||
SVE_ACLE_FUNC(svmls_lane_za32,_bf16,_vg2x1,,)(slice_base, zn, zm, 7); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is one extra svmls_lane
test above this that also needs to be fixed :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oops. Fixed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM!
The ACLE defines these builtins as svmla[_single]_za32[_f32]_vg1x2, which means the SVE_ACLE_FUNC macro should test the overloaded forms as SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x2) https://github.com/ARM-software/acle/blob/b88cbf7e9c104100bb5016c848763171494dee44/main/acle.md?plain=1#L10170-L10205
The ACLE defines these builtins as svmla[_single]_za32[_f32]_vg1x2, which means the SVE_ACLE_FUNC macro should test the overloaded forms as
SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x2)
https://github.com/ARM-software/acle/blob/b88cbf7e9c104100bb5016c848763171494dee44/main/acle.md?plain=1#L10170-L10205