Skip to content

Commit

Permalink
[ARM] MVE Vector Shifts
Browse files Browse the repository at this point in the history
This adds basic lowering for MVE shifts. There are many shifts in MVE, but the
instructions handled here are:
 VSHL (imm)
 VSHRu (imm)
 VSHRs (imm)
 VSHL (vector)
 VSHL (register)

MVE, like NEON before it, doesn't have shift right by a vector (or register).
We instead have to negate the amount and shift in the opposite direction. This
means we have to convert any SHR's into a form of SHL (that is still signed or
unsigned) with a negated condition and selecting from there. MVE still does
have shifting by an immediate for SHL, ASR and LSR.

This adds lowering for these and for register forms, which work well for shift
lefts but may require an extra fold of neg(vdup(x)) -> vdup(neg(x)) to potentially
work optimally for right shifts.

Differential Revision: https://reviews.llvm.org/D64212

llvm-svn: 366056
  • Loading branch information
davemgreen committed Jul 15, 2019
1 parent 0bf0b8f commit 6e89887
Show file tree
Hide file tree
Showing 5 changed files with 508 additions and 60 deletions.
13 changes: 8 additions & 5 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Expand Up @@ -250,6 +250,9 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
setOperationAction(ISD::SMIN, VT, Legal);
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMIN, VT, Legal);
Expand Down Expand Up @@ -5718,10 +5721,11 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
return SDValue();

// We essentially have two forms here. Shift by an immediate and shift by a
// vector register. We cannot easily match shift by an immediate in tablegen
// so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM. For shifting
// by a vector, we don't have VSHR, only VSHL (which can be signed or
// unsigned, and a negative shift indicates a shift right).
// vector register (there are also shift by a gpr, but that is just handled
// with a tablegen pattern). We cannot easily match shift by an immediate in
// tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
// For shifting by a vector, we don't have VSHR, only VSHL (which can be
// signed or unsigned, and a negative shift indicates a shift right).
if (N->getOpcode() == ISD::SHL) {
if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
Expand Down Expand Up @@ -12852,7 +12856,6 @@ static SDValue PerformShiftCombine(SDNode *N,
if (!VT.isVector() || !TLI.isTypeLegal(VT))
return SDValue();

assert(ST->hasNEON() && "unexpected vector shift");
int64_t Cnt;

switch (N->getOpcode()) {
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/ARM/ARMInstrInfo.td
Expand Up @@ -254,6 +254,17 @@ def ARMvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
def ARMvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
def ARMvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>;


def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,]>;
def ARMvshlImm : SDNode<"ARMISD::VSHLIMM", SDTARMVSHIMM>;
def ARMvshrsImm : SDNode<"ARMISD::VSHRsIMM", SDTARMVSHIMM>;
def ARMvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>;
def ARMvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>;
def ARMvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>;

def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMWhileLoop,
[SDNPHasChain]>;

Expand Down
55 changes: 55 additions & 0 deletions llvm/lib/Target/ARM/ARMInstrMVE.td
Expand Up @@ -2119,6 +2119,22 @@ defm MVE_VQSHL_by_vec : mve_shift_by_vec_multi<"vqshl", 0b1, 0b0>;
defm MVE_VQRSHL_by_vec : mve_shift_by_vec_multi<"vqrshl", 0b1, 0b1>;
defm MVE_VRSHL_by_vec : mve_shift_by_vec_multi<"vrshl", 0b0, 0b1>;

let Predicates = [HasMVEInt] in {
def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
(v4i32 (MVE_VSHL_by_vecu32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
(v8i16 (MVE_VSHL_by_vecu16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
(v16i8 (MVE_VSHL_by_vecu8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;

def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
(v4i32 (MVE_VSHL_by_vecs32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
(v8i16 (MVE_VSHL_by_vecs16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
(v16i8 (MVE_VSHL_by_vecs8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;
}

class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
string ops, vpred_ops vpred, string cstr,
list<dag> pattern=[]>
Expand Down Expand Up @@ -2344,6 +2360,29 @@ def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> {
let Inst{21} = 0b1;
}

let Predicates = [HasMVEInt] in {
def : Pat<(v4i32 (ARMvshlImm (v4i32 MQPR:$src), imm0_31:$imm)),
(v4i32 (MVE_VSHL_immi32 (v4i32 MQPR:$src), imm0_31:$imm))>;
def : Pat<(v8i16 (ARMvshlImm (v8i16 MQPR:$src), imm0_15:$imm)),
(v8i16 (MVE_VSHL_immi16 (v8i16 MQPR:$src), imm0_15:$imm))>;
def : Pat<(v16i8 (ARMvshlImm (v16i8 MQPR:$src), imm0_7:$imm)),
(v16i8 (MVE_VSHL_immi8 (v16i8 MQPR:$src), imm0_7:$imm))>;

def : Pat<(v4i32 (ARMvshruImm (v4i32 MQPR:$src), imm0_31:$imm)),
(v4i32 (MVE_VSHR_immu32 (v4i32 MQPR:$src), imm0_31:$imm))>;
def : Pat<(v8i16 (ARMvshruImm (v8i16 MQPR:$src), imm0_15:$imm)),
(v8i16 (MVE_VSHR_immu16 (v8i16 MQPR:$src), imm0_15:$imm))>;
def : Pat<(v16i8 (ARMvshruImm (v16i8 MQPR:$src), imm0_7:$imm)),
(v16i8 (MVE_VSHR_immu8 (v16i8 MQPR:$src), imm0_7:$imm))>;

def : Pat<(v4i32 (ARMvshrsImm (v4i32 MQPR:$src), imm0_31:$imm)),
(v4i32 (MVE_VSHR_imms32 (v4i32 MQPR:$src), imm0_31:$imm))>;
def : Pat<(v8i16 (ARMvshrsImm (v8i16 MQPR:$src), imm0_15:$imm)),
(v8i16 (MVE_VSHR_imms16 (v8i16 MQPR:$src), imm0_15:$imm))>;
def : Pat<(v16i8 (ARMvshrsImm (v16i8 MQPR:$src), imm0_7:$imm)),
(v16i8 (MVE_VSHR_imms8 (v16i8 MQPR:$src), imm0_7:$imm))>;
}

// end of mve_shift instructions

// start of MVE Floating Point instructions
Expand Down Expand Up @@ -3353,6 +3392,22 @@ defm MVE_VRSHL_qr : MVE_VxSHL_qr_types<"vrshl", 0b0, 0b1>;
defm MVE_VQSHL_qr : MVE_VxSHL_qr_types<"vqshl", 0b1, 0b0>;
defm MVE_VQRSHL_qr : MVE_VxSHL_qr_types<"vqrshl", 0b1, 0b1>;

let Predicates = [HasMVEInt] in {
def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
(v4i32 (MVE_VSHL_qru32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
(v8i16 (MVE_VSHL_qru16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
(v16i8 (MVE_VSHL_qru8 (v16i8 MQPR:$Qm), GPR:$Rm))>;

def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 (ARMvdup GPR:$Rm)))),
(v4i32 (MVE_VSHL_qrs32 (v4i32 MQPR:$Qm), GPR:$Rm))>;
def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 (ARMvdup GPR:$Rm)))),
(v8i16 (MVE_VSHL_qrs16 (v8i16 MQPR:$Qm), GPR:$Rm))>;
def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 (ARMvdup GPR:$Rm)))),
(v16i8 (MVE_VSHL_qrs8 (v16i8 MQPR:$Qm), GPR:$Rm))>;
}

class MVE_VBRSR<string iname, string suffix, bits<2> size, list<dag> pattern=[]>
: MVE_qDest_rSrc<iname, suffix, pattern> {

Expand Down
98 changes: 43 additions & 55 deletions llvm/lib/Target/ARM/ARMInstrNEON.td
Expand Up @@ -493,26 +493,14 @@ def NEONvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
def NEONvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>;

// Vector Shifts
def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,]>;

def NEONvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>;
def NEONvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>;

// Types for vector shift by immediates. The "SHX" version is for long and
// narrow operations where the source and destination vectors have different
// types. The "SHINS" version is for shift and insert operations.
def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
SDTCisVT<2, i32>]>;
def SDTARMVSHXIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
SDTCisVT<2, i32>]>;
def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;

def NEONvshlImm : SDNode<"ARMISD::VSHLIMM", SDTARMVSHIMM>;
def NEONvshrsImm : SDNode<"ARMISD::VSHRsIMM", SDTARMVSHIMM>;
def NEONvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>;
def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>;

def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>;
Expand Down Expand Up @@ -4269,11 +4257,11 @@ defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
int_arm_neon_vraddhn, 1>;

let Predicates = [HasNEON] in {
def : Pat<(v8i8 (trunc (NEONvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
def : Pat<(v8i8 (trunc (ARMvshruImm (add (v8i16 QPR:$Vn), QPR:$Vm), 8))),
(VADDHNv8i8 QPR:$Vn, QPR:$Vm)>;
def : Pat<(v4i16 (trunc (NEONvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
def : Pat<(v4i16 (trunc (ARMvshruImm (add (v4i32 QPR:$Vn), QPR:$Vm), 16))),
(VADDHNv4i16 QPR:$Vn, QPR:$Vm)>;
def : Pat<(v2i32 (trunc (NEONvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
def : Pat<(v2i32 (trunc (ARMvshruImm (add (v2i64 QPR:$Vn), QPR:$Vm), 32))),
(VADDHNv2i32 QPR:$Vn, QPR:$Vm)>;
}

Expand Down Expand Up @@ -5027,11 +5015,11 @@ defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
int_arm_neon_vrsubhn, 0>;

let Predicates = [HasNEON] in {
def : Pat<(v8i8 (trunc (NEONvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
def : Pat<(v8i8 (trunc (ARMvshruImm (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))),
(VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>;
def : Pat<(v4i16 (trunc (NEONvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
def : Pat<(v4i16 (trunc (ARMvshruImm (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))),
(VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>;
def : Pat<(v2i32 (trunc (NEONvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
def : Pat<(v2i32 (trunc (ARMvshruImm (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))),
(VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>;
}

Expand Down Expand Up @@ -5522,7 +5510,7 @@ def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),

def abd_shr :
PatFrag<(ops node:$in1, node:$in2, node:$shift),
(NEONvshrsImm (sub (zext node:$in1),
(ARMvshrsImm (sub (zext node:$in1),
(zext node:$in2)), (i32 $shift))>;

let Predicates = [HasNEON] in {
Expand Down Expand Up @@ -5790,56 +5778,56 @@ defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
"vshl", "u", int_arm_neon_vshiftu>;

let Predicates = [HasNEON] in {
def : Pat<(v8i8 (NEONvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
def : Pat<(v8i8 (ARMvshls (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
(VSHLsv8i8 DPR:$Dn, DPR:$Dm)>;
def : Pat<(v4i16 (NEONvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
def : Pat<(v4i16 (ARMvshls (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
(VSHLsv4i16 DPR:$Dn, DPR:$Dm)>;
def : Pat<(v2i32 (NEONvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
def : Pat<(v2i32 (ARMvshls (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
(VSHLsv2i32 DPR:$Dn, DPR:$Dm)>;
def : Pat<(v1i64 (NEONvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
def : Pat<(v1i64 (ARMvshls (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
(VSHLsv1i64 DPR:$Dn, DPR:$Dm)>;
def : Pat<(v16i8 (NEONvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
def : Pat<(v16i8 (ARMvshls (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
(VSHLsv16i8 QPR:$Dn, QPR:$Dm)>;
def : Pat<(v8i16 (NEONvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
def : Pat<(v8i16 (ARMvshls (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
(VSHLsv8i16 QPR:$Dn, QPR:$Dm)>;
def : Pat<(v4i32 (NEONvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
def : Pat<(v4i32 (ARMvshls (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
(VSHLsv4i32 QPR:$Dn, QPR:$Dm)>;
def : Pat<(v2i64 (NEONvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
def : Pat<(v2i64 (ARMvshls (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
(VSHLsv2i64 QPR:$Dn, QPR:$Dm)>;

def : Pat<(v8i8 (NEONvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
def : Pat<(v8i8 (ARMvshlu (v8i8 DPR:$Dn), (v8i8 DPR:$Dm))),
(VSHLuv8i8 DPR:$Dn, DPR:$Dm)>;
def : Pat<(v4i16 (NEONvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
def : Pat<(v4i16 (ARMvshlu (v4i16 DPR:$Dn), (v4i16 DPR:$Dm))),
(VSHLuv4i16 DPR:$Dn, DPR:$Dm)>;
def : Pat<(v2i32 (NEONvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
def : Pat<(v2i32 (ARMvshlu (v2i32 DPR:$Dn), (v2i32 DPR:$Dm))),
(VSHLuv2i32 DPR:$Dn, DPR:$Dm)>;
def : Pat<(v1i64 (NEONvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
def : Pat<(v1i64 (ARMvshlu (v1i64 DPR:$Dn), (v1i64 DPR:$Dm))),
(VSHLuv1i64 DPR:$Dn, DPR:$Dm)>;
def : Pat<(v16i8 (NEONvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
def : Pat<(v16i8 (ARMvshlu (v16i8 QPR:$Dn), (v16i8 QPR:$Dm))),
(VSHLuv16i8 QPR:$Dn, QPR:$Dm)>;
def : Pat<(v8i16 (NEONvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
def : Pat<(v8i16 (ARMvshlu (v8i16 QPR:$Dn), (v8i16 QPR:$Dm))),
(VSHLuv8i16 QPR:$Dn, QPR:$Dm)>;
def : Pat<(v4i32 (NEONvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
def : Pat<(v4i32 (ARMvshlu (v4i32 QPR:$Dn), (v4i32 QPR:$Dm))),
(VSHLuv4i32 QPR:$Dn, QPR:$Dm)>;
def : Pat<(v2i64 (NEONvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
def : Pat<(v2i64 (ARMvshlu (v2i64 QPR:$Dn), (v2i64 QPR:$Dm))),
(VSHLuv2i64 QPR:$Dn, QPR:$Dm)>;

}

// VSHL : Vector Shift Left (Immediate)
defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshlImm>;
defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", ARMvshlImm>;

// VSHR : Vector Shift Right (Immediate)
defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", "VSHRs",
NEONvshrsImm>;
ARMvshrsImm>;
defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", "VSHRu",
NEONvshruImm>;
ARMvshruImm>;

// VSHLL : Vector Shift Left Long
defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s",
PatFrag<(ops node:$LHS, node:$RHS), (NEONvshlImm (sext node:$LHS), node:$RHS)>>;
PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (sext node:$LHS), node:$RHS)>>;
defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u",
PatFrag<(ops node:$LHS, node:$RHS), (NEONvshlImm (zext node:$LHS), node:$RHS)>>;
PatFrag<(ops node:$LHS, node:$RHS), (ARMvshlImm (zext node:$LHS), node:$RHS)>>;

// VSHLL : Vector Shift Left Long (with maximum shift count)
class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
Expand All @@ -5858,37 +5846,37 @@ def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
v2i64, v2i32, imm32>;

let Predicates = [HasNEON] in {
def : Pat<(v8i16 (NEONvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
def : Pat<(v8i16 (ARMvshlImm (zext (v8i8 DPR:$Rn)), (i32 8))),
(VSHLLi8 DPR:$Rn, 8)>;
def : Pat<(v4i32 (NEONvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
def : Pat<(v4i32 (ARMvshlImm (zext (v4i16 DPR:$Rn)), (i32 16))),
(VSHLLi16 DPR:$Rn, 16)>;
def : Pat<(v2i64 (NEONvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
def : Pat<(v2i64 (ARMvshlImm (zext (v2i32 DPR:$Rn)), (i32 32))),
(VSHLLi32 DPR:$Rn, 32)>;
def : Pat<(v8i16 (NEONvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
def : Pat<(v8i16 (ARMvshlImm (sext (v8i8 DPR:$Rn)), (i32 8))),
(VSHLLi8 DPR:$Rn, 8)>;
def : Pat<(v4i32 (NEONvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
def : Pat<(v4i32 (ARMvshlImm (sext (v4i16 DPR:$Rn)), (i32 16))),
(VSHLLi16 DPR:$Rn, 16)>;
def : Pat<(v2i64 (NEONvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
def : Pat<(v2i64 (ARMvshlImm (sext (v2i32 DPR:$Rn)), (i32 32))),
(VSHLLi32 DPR:$Rn, 32)>;
def : Pat<(v8i16 (NEONvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
def : Pat<(v8i16 (ARMvshlImm (anyext (v8i8 DPR:$Rn)), (i32 8))),
(VSHLLi8 DPR:$Rn, 8)>;
def : Pat<(v4i32 (NEONvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
def : Pat<(v4i32 (ARMvshlImm (anyext (v4i16 DPR:$Rn)), (i32 16))),
(VSHLLi16 DPR:$Rn, 16)>;
def : Pat<(v2i64 (NEONvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
def : Pat<(v2i64 (ARMvshlImm (anyext (v2i32 DPR:$Rn)), (i32 32))),
(VSHLLi32 DPR:$Rn, 32)>;
}

// VSHRN : Vector Shift Right and Narrow
defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
PatFrag<(ops node:$Rn, node:$amt),
(trunc (NEONvshrsImm node:$Rn, node:$amt))>>;
(trunc (ARMvshrsImm node:$Rn, node:$amt))>>;

let Predicates = [HasNEON] in {
def : Pat<(v8i8 (trunc (NEONvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
def : Pat<(v8i8 (trunc (ARMvshruImm (v8i16 QPR:$Vn), shr_imm8:$amt))),
(VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
def : Pat<(v4i16 (trunc (NEONvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
def : Pat<(v4i16 (trunc (ARMvshruImm (v4i32 QPR:$Vn), shr_imm16:$amt))),
(VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
def : Pat<(v2i32 (trunc (NEONvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
def : Pat<(v2i32 (trunc (ARMvshruImm (v2i64 QPR:$Vn), shr_imm32:$amt))),
(VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
}

Expand Down Expand Up @@ -5952,8 +5940,8 @@ defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
NEONvqrshrnsuImm>;

// VSRA : Vector Shift Right and Accumulate
defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrsImm>;
defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshruImm>;
defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", ARMvshrsImm>;
defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", ARMvshruImm>;
// VRSRA : Vector Rounding Shift Right and Accumulate
defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrsImm>;
defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshruImm>;
Expand Down

0 comments on commit 6e89887

Please sign in to comment.