Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AArch64][SME] Add SVE2 psel, uclamp, sclamp and revd IR intrinsics
When the SME feature is enabled we also gain access to a few extra SVE2 instructions. This patch adds LLVM IR intrinsics to make use of these new instructions: @llvm.aarch64.sve.psel @llvm.aarch64.sve.revd @llvm.aarch64.sve.sclamp @llvm.aarch64.sve.uclamp Differential Revision: https://reviews.llvm.org/D128332
- Loading branch information
Showing
10 changed files
with
303 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s | ||
|
||
define <vscale x 16 x i1> @psel_b(<vscale x 16 x i1> %p1, <vscale x 16 x i1> %p2, i32 %idx) { | ||
; CHECK-LABEL: psel_b: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: mov w12, w0 | ||
; CHECK-NEXT: psel p0, p0, p1.b[w12, 0] | ||
; CHECK-NEXT: ret | ||
%res = call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1> %p1, <vscale x 16 x i1> %p2, i32 %idx) | ||
ret <vscale x 16 x i1> %res | ||
} | ||
|
||
define <vscale x 16 x i1> @psel_b_imm(<vscale x 16 x i1> %p1, <vscale x 16 x i1> %p2, i32 %idx) { | ||
; CHECK-LABEL: psel_b_imm: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: mov w12, w0 | ||
; CHECK-NEXT: psel p0, p0, p1.b[w12, 15] | ||
; CHECK-NEXT: ret | ||
%add = add i32 %idx, 15 | ||
%res = call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1> %p1, <vscale x 16 x i1> %p2, i32 %add) | ||
ret <vscale x 16 x i1> %res | ||
} | ||
|
||
define <vscale x 8 x i1> @psel_h(<vscale x 8 x i1> %p1, <vscale x 8 x i1> %p2, i32 %idx) { | ||
; CHECK-LABEL: psel_h: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: mov w12, w0 | ||
; CHECK-NEXT: psel p0, p0, p1.h[w12, 0] | ||
; CHECK-NEXT: ret | ||
%res = call <vscale x 8 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 8 x i1> %p1, <vscale x 8 x i1> %p2, i32 %idx) | ||
ret <vscale x 8 x i1> %res | ||
} | ||
|
||
define <vscale x 8 x i1> @psel_h_imm(<vscale x 8 x i1> %p1, <vscale x 8 x i1> %p2, i32 %idx) { | ||
; CHECK-LABEL: psel_h_imm: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: mov w12, w0 | ||
; CHECK-NEXT: psel p0, p0, p1.h[w12, 7] | ||
; CHECK-NEXT: ret | ||
%add = add i32 %idx, 7 | ||
%res = call <vscale x 8 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 8 x i1> %p1, <vscale x 8 x i1> %p2, i32 %add) | ||
ret <vscale x 8 x i1> %res | ||
} | ||
|
||
define <vscale x 4 x i1> @psel_s(<vscale x 4 x i1> %p1, <vscale x 4 x i1> %p2, i32 %idx) { | ||
; CHECK-LABEL: psel_s: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: mov w12, w0 | ||
; CHECK-NEXT: psel p0, p0, p1.s[w12, 0] | ||
; CHECK-NEXT: ret | ||
%res = call <vscale x 4 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 4 x i1> %p1, <vscale x 4 x i1> %p2, i32 %idx) | ||
ret <vscale x 4 x i1> %res | ||
} | ||
|
||
define <vscale x 4 x i1> @psel_s_imm(<vscale x 4 x i1> %p1, <vscale x 4 x i1> %p2, i32 %idx) { | ||
; CHECK-LABEL: psel_s_imm: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: mov w12, w0 | ||
; CHECK-NEXT: psel p0, p0, p1.s[w12, 3] | ||
; CHECK-NEXT: ret | ||
%add = add i32 %idx, 3 | ||
%res = call <vscale x 4 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 4 x i1> %p1, <vscale x 4 x i1> %p2, i32 %add) | ||
ret <vscale x 4 x i1> %res | ||
} | ||
|
||
define <vscale x 2 x i1> @psel_d(<vscale x 2 x i1> %p1, <vscale x 2 x i1> %p2, i32 %idx) { | ||
; CHECK-LABEL: psel_d: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: mov w12, w0 | ||
; CHECK-NEXT: psel p0, p0, p1.d[w12, 0] | ||
; CHECK-NEXT: ret | ||
%res = call <vscale x 2 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 2 x i1> %p1, <vscale x 2 x i1> %p2, i32 %idx) | ||
ret <vscale x 2 x i1> %res | ||
} | ||
|
||
define <vscale x 2 x i1> @psel_d_imm(<vscale x 2 x i1> %p1, <vscale x 2 x i1> %p2, i32 %idx) { | ||
; CHECK-LABEL: psel_d_imm: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: mov w12, w0 | ||
; CHECK-NEXT: psel p0, p0, p1.d[w12, 1] | ||
; CHECK-NEXT: ret | ||
%add = add i32 %idx, 1 | ||
%res = call <vscale x 2 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 2 x i1> %p1, <vscale x 2 x i1> %p2, i32 %add) | ||
ret <vscale x 2 x i1> %res | ||
} | ||
|
||
declare <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, i32) | ||
declare <vscale x 8 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>, i32) | ||
declare <vscale x 4 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>, i32) | ||
declare <vscale x 2 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>, i32) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s | ||
|
||
define <vscale x 16 x i8> @test_revd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) { | ||
; CHECK-LABEL: test_revd_i8: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: revd z0.q, p0/m, z1.q | ||
; CHECK-NEXT: ret | ||
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) | ||
ret <vscale x 16 x i8> %res | ||
} | ||
|
||
define <vscale x 8 x i16> @test_revd_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) { | ||
; CHECK-LABEL: test_revd_i16: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: revd z0.q, p0/m, z1.q | ||
; CHECK-NEXT: ret | ||
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.revd.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) | ||
ret <vscale x 8 x i16> %res | ||
} | ||
|
||
define <vscale x 4 x i32> @test_revd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) { | ||
; CHECK-LABEL: test_revd_i32: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: revd z0.q, p0/m, z1.q | ||
; CHECK-NEXT: ret | ||
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) | ||
ret <vscale x 4 x i32> %res | ||
} | ||
|
||
define <vscale x 2 x i64> @test_revd_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) { | ||
; CHECK-LABEL: test_revd_i64: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: revd z0.q, p0/m, z1.q | ||
; CHECK-NEXT: ret | ||
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.revd.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) | ||
ret <vscale x 2 x i64> %res | ||
} | ||
|
||
declare <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>) | ||
declare <vscale x 8 x i16> @llvm.aarch64.sve.revd.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>) | ||
declare <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>) | ||
declare <vscale x 2 x i64> @llvm.aarch64.sve.revd.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s | ||
|
||
define <vscale x 16 x i8> @test_sclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { | ||
; CHECK-LABEL: test_sclamp_i8: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: sclamp z2.b, z0.b, z1.b | ||
; CHECK-NEXT: mov z0.d, z2.d | ||
; CHECK-NEXT: ret | ||
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.sclamp.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) | ||
ret <vscale x 16 x i8> %res | ||
} | ||
|
||
define <vscale x 8 x i16> @test_sclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { | ||
; CHECK-LABEL: test_sclamp_i16: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: sclamp z2.h, z0.h, z1.h | ||
; CHECK-NEXT: mov z0.d, z2.d | ||
; CHECK-NEXT: ret | ||
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.sclamp.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) | ||
ret <vscale x 8 x i16> %res | ||
} | ||
|
||
define <vscale x 4 x i32> @test_sclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { | ||
; CHECK-LABEL: test_sclamp_i32: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: sclamp z2.s, z0.s, z1.s | ||
; CHECK-NEXT: mov z0.d, z2.d | ||
; CHECK-NEXT: ret | ||
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.sclamp.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) | ||
ret <vscale x 4 x i32> %res | ||
} | ||
|
||
define <vscale x 2 x i64> @test_sclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { | ||
; CHECK-LABEL: test_sclamp_i64: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: sclamp z2.d, z0.d, z1.d | ||
; CHECK-NEXT: mov z0.d, z2.d | ||
; CHECK-NEXT: ret | ||
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.sclamp.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) | ||
ret <vscale x 2 x i64> %res | ||
} | ||
|
||
declare <vscale x 16 x i8> @llvm.aarch64.sve.sclamp.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) | ||
declare <vscale x 8 x i16> @llvm.aarch64.sve.sclamp.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) | ||
declare <vscale x 4 x i32> @llvm.aarch64.sve.sclamp.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) | ||
declare <vscale x 2 x i64> @llvm.aarch64.sve.sclamp.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s | ||
|
||
define <vscale x 16 x i8> @test_uclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) { | ||
; CHECK-LABEL: test_uclamp_i8: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: uclamp z2.b, z0.b, z1.b | ||
; CHECK-NEXT: mov z0.d, z2.d | ||
; CHECK-NEXT: ret | ||
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.uclamp.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) | ||
ret <vscale x 16 x i8> %res | ||
} | ||
|
||
define <vscale x 8 x i16> @test_uclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) { | ||
; CHECK-LABEL: test_uclamp_i16: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: uclamp z2.h, z0.h, z1.h | ||
; CHECK-NEXT: mov z0.d, z2.d | ||
; CHECK-NEXT: ret | ||
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.uclamp.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) | ||
ret <vscale x 8 x i16> %res | ||
} | ||
|
||
define <vscale x 4 x i32> @test_uclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) { | ||
; CHECK-LABEL: test_uclamp_i32: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: uclamp z2.s, z0.s, z1.s | ||
; CHECK-NEXT: mov z0.d, z2.d | ||
; CHECK-NEXT: ret | ||
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.uclamp.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) | ||
ret <vscale x 4 x i32> %res | ||
} | ||
|
||
define <vscale x 2 x i64> @test_uclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) { | ||
; CHECK-LABEL: test_uclamp_i64: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: uclamp z2.d, z0.d, z1.d | ||
; CHECK-NEXT: mov z0.d, z2.d | ||
; CHECK-NEXT: ret | ||
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.uclamp.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) | ||
ret <vscale x 2 x i64> %res | ||
} | ||
|
||
declare <vscale x 16 x i8> @llvm.aarch64.sve.uclamp.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) | ||
declare <vscale x 8 x i16> @llvm.aarch64.sve.uclamp.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) | ||
declare <vscale x 4 x i32> @llvm.aarch64.sve.uclamp.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) | ||
declare <vscale x 2 x i64> @llvm.aarch64.sve.uclamp.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) |