[AArch64][SME] Fix definition of uclamp/sclamp instructions. #77619

sdesmalen-arm · 2024-01-10T16:36:10Z

For some reason the arguments were in the wrong order.

llvmbot · 2024-01-10T16:36:48Z

@llvm/pr-subscribers-backend-aarch64

Author: Sander de Smalen (sdesmalen-arm)

Changes

For some reason the arguments were in the wrong order.

Full diff: https://github.com/llvm/llvm-project/pull/77619.diff

4 Files Affected:

(modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+1-1)
(modified) llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll (+8-8)
(modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll (+4-8)
(modified) llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll (+4-8)

diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 70f3c2c99f0f05..44d9a8ac7cb677 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -1268,7 +1268,7 @@ multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> {
 }
 
 class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
-    : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd),
+    : I<(outs zpr_ty:$Zd), (ins zpr_ty:$_Zd, zpr_ty:$Zn, zpr_ty:$Zm),
         asm, "\t$Zd, $Zn, $Zm", "", []>,
       Sched<[]> {
   bits<5> Zm;
diff --git a/llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll b/llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll
index 28ec430aff3d3d..365fd534548495 100644
--- a/llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll
@@ -3,7 +3,7 @@
 
 ; Replace pattern min(max(v1,v2),v3) by clamp
 
-define <vscale x 16 x i8> @uclampi8(<vscale x 16 x i8> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+define <vscale x 16 x i8> @uclampi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: uclampi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uclamp z0.b, z1.b, z2.b
@@ -13,7 +13,7 @@ define <vscale x 16 x i8> @uclampi8(<vscale x 16 x i8> %c, <vscale x 16 x i8> %a
   ret <vscale x 16 x i8> %res
 }
 
-define <vscale x 8 x i16> @uclampi16(<vscale x 8 x i16> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+define <vscale x 8 x i16> @uclampi16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: uclampi16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uclamp z0.h, z1.h, z2.h
@@ -23,7 +23,7 @@ define <vscale x 8 x i16> @uclampi16(<vscale x 8 x i16> %c, <vscale x 8 x i16> %
   ret <vscale x 8 x i16> %res
 }
 
-define <vscale x 4 x i32> @uclampi32(<vscale x 4 x i32> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+define <vscale x 4 x i32> @uclampi32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: uclampi32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uclamp z0.s, z1.s, z2.s
@@ -33,7 +33,7 @@ define <vscale x 4 x i32> @uclampi32(<vscale x 4 x i32> %c, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %res
 }
 
-define <vscale x 2 x i64> @uclampi64(<vscale x 2 x i64> %c, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+define <vscale x 2 x i64> @uclampi64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: uclampi64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    uclamp z0.d, z1.d, z2.d
@@ -43,7 +43,7 @@ define <vscale x 2 x i64> @uclampi64(<vscale x 2 x i64> %c, <vscale x 2 x i64> %
   ret <vscale x 2 x i64> %res
 }
 
-define <vscale x 16 x i8> @sclampi8(<vscale x 16 x i8> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+define <vscale x 16 x i8> @sclampi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
 ; CHECK-LABEL: sclampi8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sclamp z0.b, z1.b, z2.b
@@ -53,7 +53,7 @@ define <vscale x 16 x i8> @sclampi8(<vscale x 16 x i8> %c, <vscale x 16 x i8> %a
   ret <vscale x 16 x i8> %res
 }
 
-define <vscale x 8 x i16> @sclampi16(<vscale x 8 x i16> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+define <vscale x 8 x i16> @sclampi16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
 ; CHECK-LABEL: sclampi16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sclamp z0.h, z1.h, z2.h
@@ -63,7 +63,7 @@ define <vscale x 8 x i16> @sclampi16(<vscale x 8 x i16> %c, <vscale x 8 x i16> %
   ret <vscale x 8 x i16> %res
 }
 
-define <vscale x 4 x i32> @sclampi32(<vscale x 4 x i32> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+define <vscale x 4 x i32> @sclampi32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
 ; CHECK-LABEL: sclampi32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sclamp z0.s, z1.s, z2.s
@@ -73,7 +73,7 @@ define <vscale x 4 x i32> @sclampi32(<vscale x 4 x i32> %c, <vscale x 4 x i32> %
   ret <vscale x 4 x i32> %res
 }
 
-define <vscale x 2 x i64> @sclampi64(<vscale x 2 x i64> %c, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+define <vscale x 2 x i64> @sclampi64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
 ; CHECK-LABEL: sclampi64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sclamp z0.d, z1.d, z2.d
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
index cf59036d42dbf5..912d5d853aa8d5 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
@@ -6,8 +6,7 @@ target triple = "aarch64-linux-gnu"
 define <vscale x 16 x i8> @test_sclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
 ; CHECK-LABEL: test_sclamp_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sclamp z2.b, z0.b, z1.b
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    sclamp z0.b, z1.b, z2.b
 ; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sclamp.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
   ret <vscale x 16 x i8> %res
@@ -16,8 +15,7 @@ define <vscale x 16 x i8> @test_sclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x
 define <vscale x 8 x i16> @test_sclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
 ; CHECK-LABEL: test_sclamp_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sclamp z2.h, z0.h, z1.h
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    sclamp z0.h, z1.h, z2.h
 ; CHECK-NEXT:    ret
   %res = call <vscale x  8 x i16> @llvm.aarch64.sve.sclamp.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
   ret <vscale x 8 x i16> %res
@@ -26,8 +24,7 @@ define <vscale x 8 x i16> @test_sclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x
 define <vscale x 4 x i32> @test_sclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
 ; CHECK-LABEL: test_sclamp_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sclamp z2.s, z0.s, z1.s
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    sclamp z0.s, z1.s, z2.s
 ; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sclamp.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
   ret <vscale x 4 x i32> %res
@@ -36,8 +33,7 @@ define <vscale x 4 x i32> @test_sclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x
 define <vscale x 2 x i64> @test_sclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
 ; CHECK-LABEL: test_sclamp_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sclamp z2.d, z0.d, z1.d
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    sclamp z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sclamp.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
   ret <vscale x 2 x i64> %res
diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
index 81a34e82d8450e..de1695162c98eb 100644
--- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
@@ -6,8 +6,7 @@ target triple = "aarch64-linux-gnu"
 define <vscale x 16 x i8> @test_uclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
 ; CHECK-LABEL: test_uclamp_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uclamp z2.b, z0.b, z1.b
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    uclamp z0.b, z1.b, z2.b
 ; CHECK-NEXT:    ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.uclamp.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
   ret <vscale x 16 x i8> %res
@@ -16,8 +15,7 @@ define <vscale x 16 x i8> @test_uclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x
 define <vscale x 8 x i16> @test_uclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
 ; CHECK-LABEL: test_uclamp_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uclamp z2.h, z0.h, z1.h
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    uclamp z0.h, z1.h, z2.h
 ; CHECK-NEXT:    ret
   %res = call <vscale x  8 x i16> @llvm.aarch64.sve.uclamp.nxv8i16(<vscale x  8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
   ret <vscale x 8 x i16> %res
@@ -26,8 +24,7 @@ define <vscale x 8 x i16> @test_uclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x
 define <vscale x 4 x i32> @test_uclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
 ; CHECK-LABEL: test_uclamp_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uclamp z2.s, z0.s, z1.s
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    uclamp z0.s, z1.s, z2.s
 ; CHECK-NEXT:    ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.uclamp.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
   ret <vscale x 4 x i32> %res
@@ -36,8 +33,7 @@ define <vscale x 4 x i32> @test_uclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x
 define <vscale x 2 x i64> @test_uclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
 ; CHECK-LABEL: test_uclamp_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    uclamp z2.d, z0.d, z1.d
-; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    uclamp z0.d, z1.d, z2.d
 ; CHECK-NEXT:    ret
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.uclamp.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
   ret <vscale x 2 x i64> %res

momchil-velikov

LGTM

) For some reason the arguments were in the wrong order.

[AArch64][SME] Fix definition of uclamp/sclamp instructions.

4f79c77

For some reason the arguments were in the wrong order.

sdesmalen-arm requested review from momchil-velikov, hassnaaHamdi and david-arm January 10, 2024 16:36

llvmbot added the backend:AArch64 label Jan 10, 2024

momchil-velikov approved these changes Jan 10, 2024

View reviewed changes

sdesmalen-arm merged commit d7ac412 into llvm:main Jan 10, 2024
4 of 5 checks passed

justinfargnoli pushed a commit to justinfargnoli/llvm-project that referenced this pull request Jan 28, 2024

[AArch64][SME] Fix definition of uclamp/sclamp instructions. (llvm#77619

53a3b35

) For some reason the arguments were in the wrong order.

sdesmalen-arm deleted the fix-sme-clamp-instructions branch February 23, 2024 11:35

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AArch64][SME] Fix definition of uclamp/sclamp instructions. #77619

[AArch64][SME] Fix definition of uclamp/sclamp instructions. #77619

sdesmalen-arm commented Jan 10, 2024

llvmbot commented Jan 10, 2024

momchil-velikov left a comment

[AArch64][SME] Fix definition of uclamp/sclamp instructions. #77619

[AArch64][SME] Fix definition of uclamp/sclamp instructions. #77619

Conversation

sdesmalen-arm commented Jan 10, 2024

llvmbot commented Jan 10, 2024

momchil-velikov left a comment

Choose a reason for hiding this comment