Skip to content

Commit

Permalink
[AArch64][SME] Add SVE2 psel, uclamp, sclamp and revd IR intrinsics
Browse files Browse the repository at this point in the history
When the SME feature is enabled we also gain access to a few extra
SVE2 instructions. This patch adds LLVM IR intrinsics to make use
of these new instructions:

  @llvm.aarch64.sve.psel
  @llvm.aarch64.sve.revd
  @llvm.aarch64.sve.sclamp
  @llvm.aarch64.sve.uclamp

Differential Revision: https://reviews.llvm.org/D128332
  • Loading branch information
david-arm committed Jun 28, 2022
1 parent 7f5d7bc commit 054faac
Show file tree
Hide file tree
Showing 10 changed files with 303 additions and 6 deletions.
20 changes: 20 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Expand Up @@ -2689,4 +2689,24 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sme_set_tpidr2
: DefaultAttrsIntrinsic<[], [llvm_i64_ty],
[IntrNoMem, IntrHasSideEffects]>;
// Clamp
//

def int_aarch64_sve_sclamp : AdvSIMD_3VectorArg_Intrinsic;
def int_aarch64_sve_uclamp : AdvSIMD_3VectorArg_Intrinsic;

//
// Reversal
//

def int_aarch64_sve_revd : AdvSIMD_Merged1VectorArg_Intrinsic;

//
// Predicate selection
//

def int_aarch64_sve_psel
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>, llvm_i32_ty]>;
}
5 changes: 5 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -208,6 +208,7 @@ static bool isMergePassthruOpcode(unsigned Opc) {
case AArch64ISD::BSWAP_MERGE_PASSTHRU:
case AArch64ISD::REVH_MERGE_PASSTHRU:
case AArch64ISD::REVW_MERGE_PASSTHRU:
case AArch64ISD::REVD_MERGE_PASSTHRU:
case AArch64ISD::CTLZ_MERGE_PASSTHRU:
case AArch64ISD::CTPOP_MERGE_PASSTHRU:
case AArch64ISD::DUP_MERGE_PASSTHRU:
Expand Down Expand Up @@ -2251,6 +2252,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::REVH_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::REVW_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::REVD_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
Expand Down Expand Up @@ -4634,6 +4636,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::aarch64_sve_revw:
return DAG.getNode(AArch64ISD::REVW_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_revd:
return DAG.getNode(AArch64ISD::REVD_MERGE_PASSTHRU, dl, Op.getValueType(),
Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
case Intrinsic::aarch64_sve_sxtb:
return DAG.getNode(
AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Expand Up @@ -408,6 +408,7 @@ enum NodeType : unsigned {

// SME
RDSVL,
REVD_MERGE_PASSTHRU,

// Asserts that a function argument (i32) is zero-extended to i8 by
// the caller
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
Expand Up @@ -148,11 +148,11 @@ def : Pat<(i64 (int_aarch64_sme_get_tpidr2)),
// SVE2 instructions
//===----------------------------------------------------------------------===//

def REVD_ZPmZ : sve2_int_perm_revd<"revd">;
defm REVD_ZPmZ : sve2_int_perm_revd<"revd", AArch64revd_mt>;

defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0>;
defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1>;
defm SCLAMP_ZZZ : sve2_clamp<"sclamp", 0b0, int_aarch64_sve_sclamp>;
defm UCLAMP_ZZZ : sve2_clamp<"uclamp", 0b1, int_aarch64_sve_uclamp>;

defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel">;
defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>;

} // End let Predicates = [HasSME]
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Expand Up @@ -233,6 +233,7 @@ def AArch64rbit_mt : SDNode<"AArch64ISD::BITREVERSE_MERGE_PASSTHRU", SDT_AArch
def AArch64revb_mt : SDNode<"AArch64ISD::BSWAP_MERGE_PASSTHRU", SDT_AArch64Arith>;
def AArch64revh_mt : SDNode<"AArch64ISD::REVH_MERGE_PASSTHRU", SDT_AArch64Arith>;
def AArch64revw_mt : SDNode<"AArch64ISD::REVW_MERGE_PASSTHRU", SDT_AArch64Arith>;
def AArch64revd_mt : SDNode<"AArch64ISD::REVD_MERGE_PASSTHRU", SDT_AArch64Arith>;

// These are like the above but we don't yet have need for ISD nodes. They allow
// a single pattern to match intrinsic and ISD operand layouts.
Expand Down
46 changes: 44 additions & 2 deletions llvm/lib/Target/AArch64/SMEInstrFormats.td
Expand Up @@ -1037,6 +1037,15 @@ class sve2_int_perm_revd<string asm>
let ElementSize = ZPR128.ElementSize;
}

multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> {
def NAME : sve2_int_perm_revd<asm>;

def : SVE_1_Op_Passthru_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME)>;
def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME)>;
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME)>;
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME)>;
}

class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
: I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd),
asm, "\t$Zd, $Zn, $Zm", "", []>,
Expand All @@ -1058,11 +1067,16 @@ class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
let ElementSize = zpr_ty.ElementSize;
}

multiclass sve2_clamp<string asm, bit U> {
multiclass sve2_clamp<string asm, bit U, SDPatternOperator op> {
def _B : sve2_clamp<asm, 0b00, U, ZPR8>;
def _H : sve2_clamp<asm, 0b01, U, ZPR16>;
def _S : sve2_clamp<asm, 0b10, U, ZPR32>;
def _D : sve2_clamp<asm, 0b11, U, ZPR64>;

def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}

class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
Expand All @@ -1085,7 +1099,7 @@ class sve2_int_perm_sel_p<string asm, PPRRegOp ppr_ty, Operand imm_ty>
let Inst{3-0} = Pd;
}

multiclass sve2_int_perm_sel_p<string asm> {
multiclass sve2_int_perm_sel_p<string asm, SDPatternOperator op> {
def _B : sve2_int_perm_sel_p<asm, PPR8, sme_elm_idx0_15> {
bits<4> imm;
let Inst{23-22} = imm{3-2};
Expand All @@ -1109,4 +1123,32 @@ multiclass sve2_int_perm_sel_p<string asm> {
let Inst{22} = 0b1;
let Inst{20-18} = 0b000;
}

def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm),
MatrixIndexGPR32Op12_15:$idx)),
(!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, 0)>;
def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm),
MatrixIndexGPR32Op12_15:$idx)),
(!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, 0)>;
def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm),
MatrixIndexGPR32Op12_15:$idx)),
(!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, 0)>;
def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm),
MatrixIndexGPR32Op12_15:$idx)),
(!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, 0)>;

let AddedComplexity = 1 in {
def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm),
(i32 (tileslice8 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm)))),
(!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, $imm)>;
def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm),
(i32 (tileslice16 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_7:$imm)))),
(!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, $imm)>;
def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm),
(i32 (tileslice32 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_3:$imm)))),
(!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, $imm)>;
def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm),
(i32 (tileslice64 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_1:$imm)))),
(!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, $imm)>;
}
}
91 changes: 91 additions & 0 deletions llvm/test/CodeGen/AArch64/sve2-intrinsics-psel.ll
@@ -0,0 +1,91 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s

define <vscale x 16 x i1> @psel_b(<vscale x 16 x i1> %p1, <vscale x 16 x i1> %p2, i32 %idx) {
; CHECK-LABEL: psel_b:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, w0
; CHECK-NEXT: psel p0, p0, p1.b[w12, 0]
; CHECK-NEXT: ret
%res = call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1> %p1, <vscale x 16 x i1> %p2, i32 %idx)
ret <vscale x 16 x i1> %res
}

define <vscale x 16 x i1> @psel_b_imm(<vscale x 16 x i1> %p1, <vscale x 16 x i1> %p2, i32 %idx) {
; CHECK-LABEL: psel_b_imm:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, w0
; CHECK-NEXT: psel p0, p0, p1.b[w12, 15]
; CHECK-NEXT: ret
%add = add i32 %idx, 15
%res = call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1> %p1, <vscale x 16 x i1> %p2, i32 %add)
ret <vscale x 16 x i1> %res
}

define <vscale x 8 x i1> @psel_h(<vscale x 8 x i1> %p1, <vscale x 8 x i1> %p2, i32 %idx) {
; CHECK-LABEL: psel_h:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, w0
; CHECK-NEXT: psel p0, p0, p1.h[w12, 0]
; CHECK-NEXT: ret
%res = call <vscale x 8 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 8 x i1> %p1, <vscale x 8 x i1> %p2, i32 %idx)
ret <vscale x 8 x i1> %res
}

define <vscale x 8 x i1> @psel_h_imm(<vscale x 8 x i1> %p1, <vscale x 8 x i1> %p2, i32 %idx) {
; CHECK-LABEL: psel_h_imm:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, w0
; CHECK-NEXT: psel p0, p0, p1.h[w12, 7]
; CHECK-NEXT: ret
%add = add i32 %idx, 7
%res = call <vscale x 8 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 8 x i1> %p1, <vscale x 8 x i1> %p2, i32 %add)
ret <vscale x 8 x i1> %res
}

define <vscale x 4 x i1> @psel_s(<vscale x 4 x i1> %p1, <vscale x 4 x i1> %p2, i32 %idx) {
; CHECK-LABEL: psel_s:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, w0
; CHECK-NEXT: psel p0, p0, p1.s[w12, 0]
; CHECK-NEXT: ret
%res = call <vscale x 4 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 4 x i1> %p1, <vscale x 4 x i1> %p2, i32 %idx)
ret <vscale x 4 x i1> %res
}

define <vscale x 4 x i1> @psel_s_imm(<vscale x 4 x i1> %p1, <vscale x 4 x i1> %p2, i32 %idx) {
; CHECK-LABEL: psel_s_imm:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, w0
; CHECK-NEXT: psel p0, p0, p1.s[w12, 3]
; CHECK-NEXT: ret
%add = add i32 %idx, 3
%res = call <vscale x 4 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 4 x i1> %p1, <vscale x 4 x i1> %p2, i32 %add)
ret <vscale x 4 x i1> %res
}

define <vscale x 2 x i1> @psel_d(<vscale x 2 x i1> %p1, <vscale x 2 x i1> %p2, i32 %idx) {
; CHECK-LABEL: psel_d:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, w0
; CHECK-NEXT: psel p0, p0, p1.d[w12, 0]
; CHECK-NEXT: ret
%res = call <vscale x 2 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 2 x i1> %p1, <vscale x 2 x i1> %p2, i32 %idx)
ret <vscale x 2 x i1> %res
}

define <vscale x 2 x i1> @psel_d_imm(<vscale x 2 x i1> %p1, <vscale x 2 x i1> %p2, i32 %idx) {
; CHECK-LABEL: psel_d_imm:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w12, w0
; CHECK-NEXT: psel p0, p0, p1.d[w12, 1]
; CHECK-NEXT: ret
%add = add i32 %idx, 1
%res = call <vscale x 2 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 2 x i1> %p1, <vscale x 2 x i1> %p2, i32 %add)
ret <vscale x 2 x i1> %res
}

declare <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>, i32)
declare <vscale x 8 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>, i32)
declare <vscale x 4 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>, i32)
declare <vscale x 2 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>, i32)
43 changes: 43 additions & 0 deletions llvm/test/CodeGen/AArch64/sve2-intrinsics-revd.ll
@@ -0,0 +1,43 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s

define <vscale x 16 x i8> @test_revd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b) {
; CHECK-LABEL: test_revd_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: revd z0.q, p0/m, z1.q
; CHECK-NEXT: ret
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %b)
ret <vscale x 16 x i8> %res
}

define <vscale x 8 x i16> @test_revd_i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b) {
; CHECK-LABEL: test_revd_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: revd z0.q, p0/m, z1.q
; CHECK-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.revd.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %b)
ret <vscale x 8 x i16> %res
}

define <vscale x 4 x i32> @test_revd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
; CHECK-LABEL: test_revd_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: revd z0.q, p0/m, z1.q
; CHECK-NEXT: ret
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %res
}

define <vscale x 2 x i64> @test_revd_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b) {
; CHECK-LABEL: test_revd_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: revd z0.q, p0/m, z1.q
; CHECK-NEXT: ret
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.revd.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %pg, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %res
}

declare <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.revd.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.revd.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
47 changes: 47 additions & 0 deletions llvm/test/CodeGen/AArch64/sve2-intrinsics-sclamp.ll
@@ -0,0 +1,47 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s

define <vscale x 16 x i8> @test_sclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
; CHECK-LABEL: test_sclamp_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: sclamp z2.b, z0.b, z1.b
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.sclamp.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
ret <vscale x 16 x i8> %res
}

define <vscale x 8 x i16> @test_sclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
; CHECK-LABEL: test_sclamp_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: sclamp z2.h, z0.h, z1.h
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.sclamp.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
ret <vscale x 8 x i16> %res
}

define <vscale x 4 x i32> @test_sclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
; CHECK-LABEL: test_sclamp_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: sclamp z2.s, z0.s, z1.s
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.sclamp.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
ret <vscale x 4 x i32> %res
}

define <vscale x 2 x i64> @test_sclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
; CHECK-LABEL: test_sclamp_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: sclamp z2.d, z0.d, z1.d
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.sclamp.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
ret <vscale x 2 x i64> %res
}

declare <vscale x 16 x i8> @llvm.aarch64.sve.sclamp.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sclamp.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sclamp.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sclamp.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
47 changes: 47 additions & 0 deletions llvm/test/CodeGen/AArch64/sve2-intrinsics-uclamp.ll
@@ -0,0 +1,47 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s

define <vscale x 16 x i8> @test_uclamp_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
; CHECK-LABEL: test_uclamp_i8:
; CHECK: // %bb.0:
; CHECK-NEXT: uclamp z2.b, z0.b, z1.b
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.uclamp.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
ret <vscale x 16 x i8> %res
}

define <vscale x 8 x i16> @test_uclamp_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
; CHECK-LABEL: test_uclamp_i16:
; CHECK: // %bb.0:
; CHECK-NEXT: uclamp z2.h, z0.h, z1.h
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.uclamp.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
ret <vscale x 8 x i16> %res
}

define <vscale x 4 x i32> @test_uclamp_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
; CHECK-LABEL: test_uclamp_i32:
; CHECK: // %bb.0:
; CHECK-NEXT: uclamp z2.s, z0.s, z1.s
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.uclamp.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
ret <vscale x 4 x i32> %res
}

define <vscale x 2 x i64> @test_uclamp_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
; CHECK-LABEL: test_uclamp_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: uclamp z2.d, z0.d, z1.d
; CHECK-NEXT: mov z0.d, z2.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.uclamp.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
ret <vscale x 2 x i64> %res
}

declare <vscale x 16 x i8> @llvm.aarch64.sve.uclamp.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uclamp.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uclamp.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.uclamp.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)

0 comments on commit 054faac

Please sign in to comment.