Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AArch64][SME] Fix definition of uclamp/sclamp instructions. #77619

Merged
merged 1 commit into from Jan 10, 2024

Conversation

sdesmalen-arm
Copy link
Collaborator

For some reason the arguments were in the wrong order.

For some reason the arguments were in the wrong order.
@llvmbot
Copy link
Collaborator

llvmbot commented Jan 10, 2024

@llvm/pr-subscribers-backend-aarch64

Author: Sander de Smalen (sdesmalen-arm)

Changes

For some reason the arguments were in the wrong order.


Full diff: https://github.com/llvm/llvm-project/pull/77619.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+1-1)
  • (modified) llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll (+8-8)
  • (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll (+4-8)
  • (modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll (+4-8)
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 70f3c2c99f0f05..44d9a8ac7cb677 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -1268,7 +1268,7 @@ multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> {
 }
 
 class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
-    : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd),
+    : I<(outs zpr_ty:$Zd), (ins zpr_ty:$_Zd, zpr_ty:$Zn, zpr_ty:$Zm),
         asm, "\t$Zd, $Zn, $Zm", "", []>,
       Sched<[]> {
   bits<5> Zm;
diff --git a/llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll b/llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll
index 28ec430aff3d3d..365fd534548495 100644
--- a/llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll
@@ -3,7 +3,7 @@
 
 ; Replace pattern min(max(v1,v2),v3) by clamp
 
-define <vscale x 16 x i8> @uclampi8(<vscale x 16 x i8> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+define <vscale x 16 x i8> @uclampi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: uclampi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uclamp z0.b, z1.b, z2.b
@@ -13,7 +13,7 @@ define <vscale x 16 x i8> @uclampi8(<vscale x 16 x i8> %c, <vscale x 16 x i8> %a
   ret <vscale x 16 x i8> %res
 }
 
-define <vscale x 8 x i16> @uclampi16(<vscale x 8 x i16> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+define <vscale x 8 x i16> @uclampi16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: uclampi16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uclamp z0.h, z1.h, z2.h
@@ -23,7 +23,7 @@ define <vscale x 8 x i16> @uclampi16(<vscale x 8 x i16> %c, <vscale x 8 x i16> %
   ret <vscale x 8 x i16> %res
 }
 
-define <vscale x 4 x i32> @uclampi32(<vscale x 4 x i32> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+define <vscale x 4 x i32> @uclampi32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: uclampi32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uclamp z0.s, z1.s, z2.s
@@ -33,7 +33,7 @@ define <vscale x 4 x i32> @uclampi32(<vscale x 4 x i32> %c, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %res
 }
 
-define <vscale x 2 x i64> @uclampi64(<vscale x 2 x i64> %c, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+define <vscale x 2 x i64> @uclampi64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: uclampi64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uclamp z0.d, z1.d, z2.d
@@ -43,7 +43,7 @@ define <vscale x 2 x i64> @uclampi64(<vscale x 2 x i64> %c, <vscale x 2 x i64> %
   ret <vscale x 2 x i64> %res
 }
 
-define <vscale x 16 x i8> @sclampi8(<vscale x 16 x i8> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+define <vscale x 16 x i8> @sclampi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: sclampi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sclamp z0.b, z1.b, z2.b
@@ -53,7 +53,7 @@ define <vscale x 16 x i8> @sclampi8(<vscale x 16 x i8> %c, <vscale x 16 x i8> %a
   ret <vscale x 16 x i8> %res
 }
 
-define <vscale x 8 x i16> @sclampi16(<vscale x 8 x i16> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+define <vscale x 8 x i16> @sclampi16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: sclampi16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sclamp z0.h, z1.h, z2.h
@@ -63,7 +63,7 @@ define <vscale x 8 x i16> @sclampi16(<vscale x 8 x i16> %c, <vscale x 8 x i16> %
   ret <vscale x 8 x i16> %res
 }
 
-define <vscale x 4 x i32> @sclampi32(<vscale x 4 x i32> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+define <vscale x 4 x i32> @sclampi32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: sclampi32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sclamp z0.s, z1.s, z2.s
@@ -73,7 +73,7 @@ define <vscale x 4 x i32> @sclampi32(<vscale x 4 x i32> %c, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %res
 }
 
-define <vscale x 2 x i64> @sclampi64(<vscale x 2 x i64> %c, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+define <vscale x 2 x i64> @sclampi64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: sclampi64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sclamp z0.d, z1.d, z2.d
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
index cf59036d42dbf5..912d5d853aa8d5 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
@@ -6,8 +6,7 @@ target triple = "aarch64-linux-gnu"
 define <vscale x 16 x i8> @test_sclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
 ; CHECK-LABEL: test_sclamp_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sclamp z2.b, z0.b, z1.b
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    sclamp z0.b, z1.b, z2.b
 ; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sclamp.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
   ret <vscale x 16 x i8> %res
@@ -16,8 +15,7 @@ define <vscale x 16 x i8> @test_sclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x
 define <vscale x 8 x i16> @test_sclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
 ; CHECK-LABEL: test_sclamp_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sclamp z2.h, z0.h, z1.h
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    sclamp z0.h, z1.h, z2.h
 ; CHECK-NEXT:    ret
   %res = call <vscale x  8 x i16> @llvm.aarch64.sve.sclamp.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
   ret <vscale x 8 x i16> %res
@@ -26,8 +24,7 @@ define <vscale x 8 x i16> @test_sclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x
 define <vscale x 4 x i32> @test_sclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
 ; CHECK-LABEL: test_sclamp_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sclamp z2.s, z0.s, z1.s
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    sclamp z0.s, z1.s, z2.s
 ; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sclamp.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
   ret <vscale x 4 x i32> %res
@@ -36,8 +33,7 @@ define <vscale x 4 x i32> @test_sclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x
 define <vscale x 2 x i64> @test_sclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
 ; CHECK-LABEL: test_sclamp_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sclamp z2.d, z0.d, z1.d
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    sclamp z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sclamp.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
   ret <vscale x 2 x i64> %res
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
index 81a34e82d8450e..de1695162c98eb 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
@@ -6,8 +6,7 @@ target triple = "aarch64-linux-gnu"
 define <vscale x 16 x i8> @test_uclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
 ; CHECK-LABEL: test_uclamp_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uclamp z2.b, z0.b, z1.b
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    uclamp z0.b, z1.b, z2.b
 ; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.uclamp.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
   ret <vscale x 16 x i8> %res
@@ -16,8 +15,7 @@ define <vscale x 16 x i8> @test_uclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x
 define <vscale x 8 x i16> @test_uclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
 ; CHECK-LABEL: test_uclamp_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uclamp z2.h, z0.h, z1.h
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    uclamp z0.h, z1.h, z2.h
 ; CHECK-NEXT:    ret
   %res = call <vscale x  8 x i16> @llvm.aarch64.sve.uclamp.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
   ret <vscale x 8 x i16> %res
@@ -26,8 +24,7 @@ define <vscale x 8 x i16> @test_uclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x
 define <vscale x 4 x i32> @test_uclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
 ; CHECK-LABEL: test_uclamp_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uclamp z2.s, z0.s, z1.s
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    uclamp z0.s, z1.s, z2.s
 ; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uclamp.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
   ret <vscale x 4 x i32> %res
@@ -36,8 +33,7 @@ define <vscale x 4 x i32> @test_uclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x
 define <vscale x 2 x i64> @test_uclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
 ; CHECK-LABEL: test_uclamp_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uclamp z2.d, z0.d, z1.d
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    uclamp z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uclamp.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
   ret <vscale x 2 x i64> %res

Copy link
Collaborator

@momchil-velikov momchil-velikov left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@sdesmalen-arm sdesmalen-arm merged commit d7ac412 into llvm:main Jan 10, 2024
4 of 5 checks passed
justinfargnoli pushed a commit to justinfargnoli/llvm-project that referenced this pull request Jan 28, 2024
)

For some reason the arguments were in the wrong order.
@sdesmalen-arm sdesmalen-arm deleted the fix-sme-clamp-instructions branch February 23, 2024 11:35
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants