Skip to content

Commit

Permalink
[X86] Use SADDSAT/SSUBSAT instead of ADDS/SUBS
Browse files Browse the repository at this point in the history
Migrate the X86 backend from X86ISD opcodes ADDS and SUBS to generic
ISD opcodes SADDSAT and SSUBSAT. This also improves scodegen for
@llvm.sadd.sat() and @llvm.ssub.sat() intrinsics.

This is a followup to D55787 and part of PR40056.

Differential Revision: https://reviews.llvm.org/D55833

llvm-svn: 349520
  • Loading branch information
nikic committed Dec 18, 2018
1 parent 20a6db5 commit f6058ff
Show file tree
Hide file tree
Showing 8 changed files with 538 additions and 38,098 deletions.
40 changes: 30 additions & 10 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -830,14 +830,20 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}

setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
// Use widening instead of promotion.
for (auto VT : { MVT::v8i8, MVT::v4i8, MVT::v2i8,
MVT::v4i16, MVT::v2i16 }) {
setOperationAction(ISD::UADDSAT, VT, Custom);
setOperationAction(ISD::USUBSAT, VT, Custom);
setOperationAction(ISD::UADDSAT, VT, Custom);
setOperationAction(ISD::SADDSAT, VT, Custom);
setOperationAction(ISD::USUBSAT, VT, Custom);
setOperationAction(ISD::SSUBSAT, VT, Custom);
}

setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
Expand Down Expand Up @@ -1212,9 +1218,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, MVT::v4i64, Custom);

setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);

for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
Expand Down Expand Up @@ -1334,7 +1344,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::UADDSAT, VT, Custom);
setOperationAction(ISD::SADDSAT, VT, Custom);
setOperationAction(ISD::USUBSAT, VT, Custom);
setOperationAction(ISD::SSUBSAT, VT, Custom);

setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
Expand Down Expand Up @@ -1596,7 +1608,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::UADDSAT, VT, Custom);
setOperationAction(ISD::SADDSAT, VT, Custom);
setOperationAction(ISD::USUBSAT, VT, Custom);
setOperationAction(ISD::SSUBSAT, VT, Custom);

setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
Expand Down Expand Up @@ -1678,7 +1692,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
setOperationAction(ISD::SSUBSAT, VT, Legal);

// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
Expand Down Expand Up @@ -23388,15 +23404,17 @@ static SDValue LowerADD_SUB(SDValue Op, SelectionDAG &DAG) {
return split256IntArith(Op, DAG);
}

static SDValue LowerUADDSAT_USUBSAT(SDValue Op, SelectionDAG &DAG) {
static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
if (VT.getScalarType() == MVT::i1) {
SDLoc dl(Op);
switch (Op.getOpcode()) {
default: llvm_unreachable("Expected saturated arithmetic opcode");
case ISD::UADDSAT:
case ISD::SADDSAT:
return DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1));
case ISD::USUBSAT:
case ISD::SSUBSAT:
return DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0),
DAG.getNOT(dl, Op.getOperand(1), VT));
}
Expand Down Expand Up @@ -26194,7 +26212,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADD:
case ISD::SUB: return LowerADD_SUB(Op, DAG);
case ISD::UADDSAT:
case ISD::USUBSAT: return LowerUADDSAT_USUBSAT(Op, DAG);
case ISD::SADDSAT:
case ISD::USUBSAT:
case ISD::SSUBSAT: return LowerADDSAT_SUBSAT(Op, DAG);
case ISD::SMAX:
case ISD::SMIN:
case ISD::UMAX:
Expand Down Expand Up @@ -26277,11 +26297,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::UADDSAT:
case ISD::SADDSAT:
case ISD::USUBSAT:
case ISD::SSUBSAT:
case X86ISD::VPMADDWD:
case X86ISD::AVG: {
// Legalize types for ISD::UADDSAT/USUBSAT and X86ISD::AVG/VPMADDWD
// by widening.
// Legalize types for ISD::UADDSAT/SADDSAT/USUBSAT/SSUBSAT and
// X86ISD::AVG/VPMADDWD by widening.
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");

EVT VT = N->getValueType(0);
Expand Down Expand Up @@ -27228,8 +27250,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FGETEXPS_RND: return "X86ISD::FGETEXPS_RND";
case X86ISD::SCALEF: return "X86ISD::SCALEF";
case X86ISD::SCALEFS: return "X86ISD::SCALEFS";
case X86ISD::ADDS: return "X86ISD::ADDS";
case X86ISD::SUBS: return "X86ISD::SUBS";
case X86ISD::AVG: return "X86ISD::AVG";
case X86ISD::MULHRS: return "X86ISD::MULHRS";
case X86ISD::SINT_TO_FP_RND: return "X86ISD::SINT_TO_FP_RND";
Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Target/X86/X86ISelLowering.h
Expand Up @@ -226,10 +226,6 @@ namespace llvm {
SCALEF,
SCALEFS,

// Integer add/sub with signed saturation.
ADDS,
SUBS,

// Unsigned Integer average.
AVG,

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Expand Up @@ -4830,9 +4830,9 @@ defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add,
SchedWriteVecALU, 1>;
defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub,
SchedWriteVecALU, 0>;
defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", saddsat,
SchedWriteVecALU, HasBWI, 1>;
defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", ssubsat,
SchedWriteVecALU, HasBWI, 0>;
defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", uaddsat,
SchedWriteVecALU, HasBWI, 1>;
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
Expand Up @@ -227,8 +227,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVec<1>,
SDTCisSameAs<2, 1>]>;

def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>;
def X86subs : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
def X86mulhrs : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>;
def X86avg : SDNode<"X86ISD::AVG" , SDTIntBinOp, [SDNPCommutative]>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/X86/X86InstrSSE.td
Expand Up @@ -3623,9 +3623,9 @@ defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
SchedWriteVecALU, 1, NoVLX>;
defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
SchedWriteVecALU, 1, NoVLX>;
defm PADDSB : PDI_binop_all<0xEC, "paddsb", X86adds, v16i8, v32i8,
defm PADDSB : PDI_binop_all<0xEC, "paddsb", saddsat, v16i8, v32i8,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PADDSW : PDI_binop_all<0xED, "paddsw", X86adds, v8i16, v16i16,
defm PADDSW : PDI_binop_all<0xED, "paddsw", saddsat, v8i16, v16i16,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
defm PADDUSB : PDI_binop_all<0xDC, "paddusb", uaddsat, v16i8, v32i8,
SchedWriteVecALU, 1, NoVLX_Or_NoBWI>;
Expand All @@ -3645,9 +3645,9 @@ defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
SchedWriteVecALU, 0, NoVLX>;
defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
SchedWriteVecALU, 0, NoVLX>;
defm PSUBSB : PDI_binop_all<0xE8, "psubsb", X86subs, v16i8, v32i8,
defm PSUBSB : PDI_binop_all<0xE8, "psubsb", ssubsat, v16i8, v32i8,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
defm PSUBSW : PDI_binop_all<0xE9, "psubsw", X86subs, v8i16, v16i16,
defm PSUBSW : PDI_binop_all<0xE9, "psubsw", ssubsat, v8i16, v16i16,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", usubsat, v16i8, v32i8,
SchedWriteVecALU, 0, NoVLX_Or_NoBWI>;
Expand Down
24 changes: 12 additions & 12 deletions llvm/lib/Target/X86/X86IntrinsicsInfo.h
Expand Up @@ -319,8 +319,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, ISD::SADDSAT, 0),
X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, ISD::SADDSAT, 0),
X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
Expand Down Expand Up @@ -361,8 +361,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0),
X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
Expand Down Expand Up @@ -920,8 +920,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(avx512_padds_b_512, INTR_TYPE_2OP, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(avx512_padds_w_512, INTR_TYPE_2OP, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(avx512_padds_b_512, INTR_TYPE_2OP, ISD::SADDSAT, 0),
X86_INTRINSIC_DATA(avx512_padds_w_512, INTR_TYPE_2OP, ISD::SADDSAT, 0),
X86_INTRINSIC_DATA(avx512_permvar_df_256, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_permvar_df_512, VPERM_2OP, X86ISD::VPERMV, 0),
X86_INTRINSIC_DATA(avx512_permvar_di_256, VPERM_2OP, X86ISD::VPERMV, 0),
Expand Down Expand Up @@ -1004,8 +1004,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psubs_b_512, INTR_TYPE_2OP, X86ISD::SUBS, 0),
X86_INTRINSIC_DATA(avx512_psubs_w_512, INTR_TYPE_2OP, X86ISD::SUBS, 0),
X86_INTRINSIC_DATA(avx512_psubs_b_512, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
X86_INTRINSIC_DATA(avx512_psubs_w_512, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
Expand Down Expand Up @@ -1168,8 +1168,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, X86ISD::ADDS, 0),
X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, ISD::SADDSAT, 0),
X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, ISD::SADDSAT, 0),
X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
Expand All @@ -1191,8 +1191,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(sse2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(sse2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(sse2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, X86ISD::SUBS, 0),
X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, X86ISD::SUBS, 0),
X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
X86_INTRINSIC_DATA(sse2_ucomieq_sd, COMI, X86ISD::UCOMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse2_ucomige_sd, COMI, X86ISD::UCOMI, ISD::SETGE),
X86_INTRINSIC_DATA(sse2_ucomigt_sd, COMI, X86ISD::UCOMI, ISD::SETGT),
Expand Down

0 comments on commit f6058ff

Please sign in to comment.