Skip to content

Commit

Permalink
[AArch64][SVE] Implement shift intrinsics
Browse files Browse the repository at this point in the history
Summary:
Adds the following intrinsics:
- asr & asrd
- insr
- lsl & lsr

This patch also adds a new AArch64ISD node (INSR) to represent the int_aarch64_sve_insr intrinsic.

Reviewers: huntergr, sdesmalen, dancgr, mgudim, rengolin, efriedma

Reviewed By: sdesmalen

Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cameron.mcinally, cfe-commits, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70437
  • Loading branch information
kmclaughlin-arm committed Dec 3, 2019
1 parent 14f7673 commit 7483eb6
Show file tree
Hide file tree
Showing 7 changed files with 482 additions and 33 deletions.
58 changes: 46 additions & 12 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Expand Up @@ -790,6 +790,21 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
llvm_i32_ty],
[IntrNoMem]>;

class AdvSIMD_Pred2VectorArg_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>,
LLVMMatchType<0>],
[IntrNoMem]>;

class AdvSIMD_Pred3VectorArg_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>,
LLVMMatchType<0>,
LLVMMatchType<0>],
[IntrNoMem]>;

class AdvSIMD_SVE_Compare_Intrinsic
: Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
Expand Down Expand Up @@ -817,6 +832,20 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
llvm_anyvector_ty],
[IntrNoMem]>;

class AdvSIMD_SVE_ShiftByImm_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>,
llvm_i32_ty],
[IntrNoMem]>;

class AdvSIMD_SVE_ShiftWide_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>,
llvm_nxv2i64_ty],
[IntrNoMem]>;

class AdvSIMD_SVE_Unpack_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMSubdivide2VectorType<0>],
Expand Down Expand Up @@ -867,6 +896,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
llvm_anyvector_ty],
[IntrNoMem]>;

class AdvSIMD_SVE_INSR_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
LLVMVectorElementType<0>],
[IntrNoMem]>;

class AdvSIMD_SVE_PUNPKHI_Intrinsic
: Intrinsic<[LLVMHalfElementsVectorType<0>],
[llvm_anyvector_ty],
Expand Down Expand Up @@ -919,18 +954,6 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".

let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".


class AdvSIMD_Pred2VectorArg_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;

class AdvSIMD_Pred3VectorArg_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;


//
// Integer arithmetic
//
Expand Down Expand Up @@ -975,6 +998,17 @@ def int_aarch64_sve_sdot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
def int_aarch64_sve_udot : AdvSIMD_SVE_DOT_Intrinsic;
def int_aarch64_sve_udot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;

// Shifts

def int_aarch64_sve_asr : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_asr_wide : AdvSIMD_SVE_ShiftWide_Intrinsic;
def int_aarch64_sve_asrd : AdvSIMD_SVE_ShiftByImm_Intrinsic;
def int_aarch64_sve_insr : AdvSIMD_SVE_INSR_Intrinsic;
def int_aarch64_sve_lsl : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_lsl_wide : AdvSIMD_SVE_ShiftWide_Intrinsic;
def int_aarch64_sve_lsr : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_lsr_wide : AdvSIMD_SVE_ShiftWide_Intrinsic;

//
// Counting bits
//
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -828,6 +828,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
if (isTypeLegal(VT) && VT.getVectorElementType() != MVT::i1)
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
}
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
}

PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
Expand Down Expand Up @@ -1333,6 +1335,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO";
case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI";
case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO";
case AArch64ISD::INSR: return "AArch64ISD::INSR";
}
return nullptr;
}
Expand Down Expand Up @@ -2884,6 +2887,16 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
Op.getOperand(1));

case Intrinsic::aarch64_sve_insr: {
SDValue Scalar = Op.getOperand(2);
EVT ScalarTy = Scalar.getValueType();
if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);

return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
Op.getOperand(1), Scalar);
}

case Intrinsic::localaddress: {
const auto &MF = DAG.getMachineFunction();
const auto *RegInfo = Subtarget->getRegisterInfo();
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Expand Up @@ -196,6 +196,8 @@ enum NodeType : unsigned {
UUNPKHI,
UUNPKLO,

INSR,

// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
LD3post,
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Expand Up @@ -214,6 +214,7 @@ def SDT_AArch64FCmp : SDTypeProfile<0, 2,
SDTCisSameAs<0, 1>]>;
def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>;
def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>]>;
Expand Down Expand Up @@ -401,6 +402,8 @@ def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;

def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>;

def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
Expand Down
28 changes: 14 additions & 14 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Expand Up @@ -199,8 +199,8 @@ let Predicates = [HasSVE] in {

defm SPLICE_ZPZ : sve_int_perm_splice<"splice">;
defm COMPACT_ZPZ : sve_int_perm_compact<"compact">;
defm INSR_ZR : sve_int_perm_insrs<"insr">;
defm INSR_ZV : sve_int_perm_insrv<"insr">;
defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>;
defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>;
def EXT_ZZI : sve_int_perm_extract_i<"ext">;

defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit">;
Expand Down Expand Up @@ -876,18 +876,18 @@ let Predicates = [HasSVE] in {
defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr">;
defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr">;
defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">;
defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd">;

defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr">;
defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr">;
defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl">;
defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr">;
defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr">;
defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr">;

defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr">;
defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">;
defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">;
defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", int_aarch64_sve_asrd>;

defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", int_aarch64_sve_asr>;
defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", int_aarch64_sve_lsr>;
defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", int_aarch64_sve_lsl>;
defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", null_frag>;
defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", null_frag>;
defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", null_frag>;

defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>;
defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>;
defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>;

defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, nxv8f16, nxv16i1, nxv4f32, ElementSizeS>;
defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, nxv4f32, nxv16i1, nxv8f16, ElementSizeS>;
Expand Down
44 changes: 37 additions & 7 deletions llvm/lib/Target/AArch64/SVEInstrFormats.td
Expand Up @@ -304,6 +304,12 @@ class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, vt4:$Op4)),
(inst $Op1, $Op2, $Op3, $Op4)>;

class SVE_3_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Operand ImmTy,
Instruction inst>
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, (vt3 ImmTy:$Op3))),
(inst $Op1, $Op2, ImmTy:$Op3)>;

def SVEDup0Undef : ComplexPattern<i64, 0, "SelectDupZeroOrUndef", []>;

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -888,14 +894,18 @@ class sve_int_perm_insrs<bits<2> sz8_64, string asm, ZPRRegOp zprty,

let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = Destructive;
let ElementSize = ElementSizeNone;
}

multiclass sve_int_perm_insrs<string asm> {
multiclass sve_int_perm_insrs<string asm, SDPatternOperator op> {
def _B : sve_int_perm_insrs<0b00, asm, ZPR8, GPR32>;
def _H : sve_int_perm_insrs<0b01, asm, ZPR16, GPR32>;
def _S : sve_int_perm_insrs<0b10, asm, ZPR32, GPR32>;
def _D : sve_int_perm_insrs<0b11, asm, ZPR64, GPR64>;

def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, i32, !cast<Instruction>(NAME # _B)>;
def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, i32, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, i32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, i64, !cast<Instruction>(NAME # _D)>;
}

class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty,
Expand All @@ -914,14 +924,17 @@ class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty,

let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = Destructive;
let ElementSize = ElementSizeNone;
}

multiclass sve_int_perm_insrv<string asm> {
multiclass sve_int_perm_insrv<string asm, SDPatternOperator op> {
def _B : sve_int_perm_insrv<0b00, asm, ZPR8, FPR8>;
def _H : sve_int_perm_insrv<0b01, asm, ZPR16, FPR16>;
def _S : sve_int_perm_insrv<0b10, asm, ZPR32, FPR32>;
def _D : sve_int_perm_insrv<0b11, asm, ZPR64, FPR64>;

def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, f16, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, f32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, f64, !cast<Instruction>(NAME # _D)>;
}

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -3929,7 +3942,8 @@ multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm> {
}
}

multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm> {
multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm,
SDPatternOperator op = null_frag> {
def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8,
ElementSizeB>;
def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16,
Expand All @@ -3945,6 +3959,11 @@ multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm> {
let Inst{22} = imm{5};
let Inst{9-8} = imm{4-3};
}

def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
}

class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
Expand All @@ -3971,17 +3990,28 @@ class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
let ElementSize = zprty.ElementSize;
}

multiclass sve_int_bin_pred_shift<bits<3> opc, string asm> {
multiclass sve_int_bin_pred_shift<bits<3> opc, string asm,
SDPatternOperator op> {
def _B : sve_int_bin_pred_shift<0b00, 0b0, opc, asm, ZPR8, ZPR8>;
def _H : sve_int_bin_pred_shift<0b01, 0b0, opc, asm, ZPR16, ZPR16>;
def _S : sve_int_bin_pred_shift<0b10, 0b0, opc, asm, ZPR32, ZPR32>;
def _D : sve_int_bin_pred_shift<0b11, 0b0, opc, asm, ZPR64, ZPR64>;

def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}

multiclass sve_int_bin_pred_shift_wide<bits<3> opc, string asm> {
multiclass sve_int_bin_pred_shift_wide<bits<3> opc, string asm,
SDPatternOperator op> {
def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>;
def _H : sve_int_bin_pred_shift<0b01, 0b1, opc, asm, ZPR16, ZPR64>;
def _S : sve_int_bin_pred_shift<0b10, 0b1, opc, asm, ZPR32, ZPR64>;

def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv2i64, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv2i64, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv2i64, !cast<Instruction>(NAME # _S)>;
}

//===----------------------------------------------------------------------===//
Expand Down

0 comments on commit 7483eb6

Please sign in to comment.