Skip to content

Commit

Permalink
[AArch64][SME] Fix definition of uclamp/sclamp instructions. (#77619)
Browse files Browse the repository at this point in the history
For some reason the arguments were in the wrong order.
  • Loading branch information
sdesmalen-arm committed Jan 10, 2024
1 parent af78e5d commit d7ac412
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 25 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/SMEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -1268,7 +1268,7 @@ multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> {
}

class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
: I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd),
: I<(outs zpr_ty:$Zd), (ins zpr_ty:$_Zd, zpr_ty:$Zn, zpr_ty:$Zm),
asm, "\t$Zd, $Zn, $Zm", "", []>,
Sched<[]> {
bits<5> Zm;
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AArch64/sve2-min-max-clamp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

; Replace pattern min(max(v1,v2),v3) by clamp

define <vscale x 16 x i8> @uclampi8(<vscale x 16 x i8> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
define <vscale x 16 x i8> @uclampi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
; CHECK-LABEL: uclampi8:
; CHECK: // %bb.0:
; CHECK-NEXT: uclamp z0.b, z1.b, z2.b
Expand All @@ -13,7 +13,7 @@ define <vscale x 16 x i8> @uclampi8(<vscale x 16 x i8> %c, <vscale x 16 x i8> %a
ret <vscale x 16 x i8> %res
}

define <vscale x 8 x i16> @uclampi16(<vscale x 8 x i16> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
define <vscale x 8 x i16> @uclampi16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
; CHECK-LABEL: uclampi16:
; CHECK: // %bb.0:
; CHECK-NEXT: uclamp z0.h, z1.h, z2.h
Expand All @@ -23,7 +23,7 @@ define <vscale x 8 x i16> @uclampi16(<vscale x 8 x i16> %c, <vscale x 8 x i16> %
ret <vscale x 8 x i16> %res
}

define <vscale x 4 x i32> @uclampi32(<vscale x 4 x i32> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
define <vscale x 4 x i32> @uclampi32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
; CHECK-LABEL: uclampi32:
; CHECK: // %bb.0:
; CHECK-NEXT: uclamp z0.s, z1.s, z2.s
Expand All @@ -33,7 +33,7 @@ define <vscale x 4 x i32> @uclampi32(<vscale x 4 x i32> %c, <vscale x 4 x i32> %
ret <vscale x 4 x i32> %res
}

define <vscale x 2 x i64> @uclampi64(<vscale x 2 x i64> %c, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
define <vscale x 2 x i64> @uclampi64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
; CHECK-LABEL: uclampi64:
; CHECK: // %bb.0:
; CHECK-NEXT: uclamp z0.d, z1.d, z2.d
Expand All @@ -43,7 +43,7 @@ define <vscale x 2 x i64> @uclampi64(<vscale x 2 x i64> %c, <vscale x 2 x i64> %
ret <vscale x 2 x i64> %res
}

define <vscale x 16 x i8> @sclampi8(<vscale x 16 x i8> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
define <vscale x 16 x i8> @sclampi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
; CHECK-LABEL: sclampi8:
; CHECK: // %bb.0:
; CHECK-NEXT: sclamp z0.b, z1.b, z2.b
Expand All @@ -53,7 +53,7 @@ define <vscale x 16 x i8> @sclampi8(<vscale x 16 x i8> %c, <vscale x 16 x i8> %a
ret <vscale x 16 x i8> %res
}

define <vscale x 8 x i16> @sclampi16(<vscale x 8 x i16> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
define <vscale x 8 x i16> @sclampi16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
; CHECK-LABEL: sclampi16:
; CHECK: // %bb.0:
; CHECK-NEXT: sclamp z0.h, z1.h, z2.h
Expand All @@ -63,7 +63,7 @@ define <vscale x 8 x i16> @sclampi16(<vscale x 8 x i16> %c, <vscale x 8 x i16> %
ret <vscale x 8 x i16> %res
}

define <vscale x 4 x i32> @sclampi32(<vscale x 4 x i32> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
define <vscale x 4 x i32> @sclampi32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
; CHECK-LABEL: sclampi32:
; CHECK: // %bb.0:
; CHECK-NEXT: sclamp z0.s, z1.s, z2.s
Expand All @@ -73,7 +73,7 @@ define <vscale x 4 x i32> @sclampi32(<vscale x 4 x i32> %c, <vscale x 4 x i32> %
ret <vscale x 4 x i32> %res
}

define <vscale x 2 x i64> @sclampi64(<vscale x 2 x i64> %c, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
define <vscale x 2 x i64> @sclampi64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
; CHECK-LABEL: sclampi64:
; CHECK: // %bb.0:
; CHECK-NEXT: sclamp z0.d, z1.d, z2.d
Expand Down
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ target triple = "aarch64-linux-gnu"
define <vscale x 16 x i8> @test_sclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
; CHECK-LABEL: test_sclamp_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sclamp z2.b, z0.b, z1.b
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: sclamp z0.b, z1.b, z2.b
; CHECK-NEXT: ret
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.sclamp.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
ret <vscale x 16 x i8> %res
Expand All @@ -16,8 +15,7 @@ define <vscale x 16 x i8> @test_sclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x
define <vscale x 8 x i16> @test_sclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
; CHECK-LABEL: test_sclamp_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sclamp z2.h, z0.h, z1.h
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: sclamp z0.h, z1.h, z2.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.sclamp.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
ret <vscale x 8 x i16> %res
Expand All @@ -26,8 +24,7 @@ define <vscale x 8 x i16> @test_sclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x
define <vscale x 4 x i32> @test_sclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
; CHECK-LABEL: test_sclamp_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sclamp z2.s, z0.s, z1.s
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: sclamp z0.s, z1.s, z2.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.sclamp.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
ret <vscale x 4 x i32> %res
Expand All @@ -36,8 +33,7 @@ define <vscale x 4 x i32> @test_sclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x
define <vscale x 2 x i64> @test_sclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
; CHECK-LABEL: test_sclamp_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: sclamp z2.d, z0.d, z1.d
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: sclamp z0.d, z1.d, z2.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.sclamp.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
ret <vscale x 2 x i64> %res
Expand Down
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@ target triple = "aarch64-linux-gnu"
define <vscale x 16 x i8> @test_uclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
; CHECK-LABEL: test_uclamp_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: uclamp z2.b, z0.b, z1.b
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: uclamp z0.b, z1.b, z2.b
; CHECK-NEXT: ret
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.uclamp.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
ret <vscale x 16 x i8> %res
Expand All @@ -16,8 +15,7 @@ define <vscale x 16 x i8> @test_uclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x
define <vscale x 8 x i16> @test_uclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
; CHECK-LABEL: test_uclamp_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: uclamp z2.h, z0.h, z1.h
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: uclamp z0.h, z1.h, z2.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.uclamp.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
ret <vscale x 8 x i16> %res
Expand All @@ -26,8 +24,7 @@ define <vscale x 8 x i16> @test_uclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x
define <vscale x 4 x i32> @test_uclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
; CHECK-LABEL: test_uclamp_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: uclamp z2.s, z0.s, z1.s
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: uclamp z0.s, z1.s, z2.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.uclamp.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
ret <vscale x 4 x i32> %res
Expand All @@ -36,8 +33,7 @@ define <vscale x 4 x i32> @test_uclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x
define <vscale x 2 x i64> @test_uclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
; CHECK-LABEL: test_uclamp_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: uclamp z2.d, z0.d, z1.d
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: uclamp z0.d, z1.d, z2.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.uclamp.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
ret <vscale x 2 x i64> %res
Expand Down

0 comments on commit d7ac412

Please sign in to comment.