Skip to content

Commit

Permalink
[X86][SSE] Use the general SMAX/SMIN/UMAX/UMIN opcodes and remove the…
Browse files Browse the repository at this point in the history
… X86 implementation

With the completion of D9746 there is now a common implementation of integer signed/unsigned min/max nodes, removing the need for the equivalent X86 specific implementations.

This patch removes the old X86ISD nodes, legalizes the relevant SSE2/SSE41/AVX2/AVX512 instructions for the ISD versions and converts the small amount of existing X86 code.

Differential Revision: http://reviews.llvm.org/D10947

llvm-svn: 241506
  • Loading branch information
RKSimon committed Jul 6, 2015
1 parent 40dd510 commit 8b75659
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 138 deletions.
78 changes: 64 additions & 14 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -825,6 +825,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
setOperationAction(ISD::FABS, MVT::v2f64, Custom);

setOperationAction(ISD::SMAX, MVT::v8i16, Legal);
setOperationAction(ISD::UMAX, MVT::v16i8, Legal);
setOperationAction(ISD::SMIN, MVT::v8i16, Legal);
setOperationAction(ISD::UMIN, MVT::v16i8, Legal);

setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
Expand Down Expand Up @@ -944,6 +949,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
}

setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
setOperationAction(ISD::UMIN, MVT::v4i32, Legal);

// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);

Expand Down Expand Up @@ -1141,6 +1155,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::MULHU, MVT::v16i16, Legal);
setOperationAction(ISD::MULHS, MVT::v16i16, Legal);

setOperationAction(ISD::SMAX, MVT::v32i8, Legal);
setOperationAction(ISD::SMAX, MVT::v16i16, Legal);
setOperationAction(ISD::SMAX, MVT::v8i32, Legal);
setOperationAction(ISD::UMAX, MVT::v32i8, Legal);
setOperationAction(ISD::UMAX, MVT::v16i16, Legal);
setOperationAction(ISD::UMAX, MVT::v8i32, Legal);
setOperationAction(ISD::SMIN, MVT::v32i8, Legal);
setOperationAction(ISD::SMIN, MVT::v16i16, Legal);
setOperationAction(ISD::SMIN, MVT::v8i32, Legal);
setOperationAction(ISD::UMIN, MVT::v32i8, Legal);
setOperationAction(ISD::UMIN, MVT::v16i16, Legal);
setOperationAction(ISD::UMIN, MVT::v8i32, Legal);

// The custom lowering for UINT_TO_FP for v8i32 becomes interesting
// when we have a 256bit-wide blend with immediate.
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
Expand Down Expand Up @@ -1376,6 +1403,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::v16i1, Custom);
setOperationAction(ISD::SELECT, MVT::v8i1, Custom);

setOperationAction(ISD::SMAX, MVT::v16i32, Legal);
setOperationAction(ISD::SMAX, MVT::v8i64, Legal);
setOperationAction(ISD::UMAX, MVT::v16i32, Legal);
setOperationAction(ISD::UMAX, MVT::v8i64, Legal);
setOperationAction(ISD::SMIN, MVT::v16i32, Legal);
setOperationAction(ISD::SMIN, MVT::v8i64, Legal);
setOperationAction(ISD::UMIN, MVT::v16i32, Legal);
setOperationAction(ISD::UMIN, MVT::v8i64, Legal);

setOperationAction(ISD::ADD, MVT::v8i64, Legal);
setOperationAction(ISD::ADD, MVT::v16i32, Legal);

Expand Down Expand Up @@ -1494,6 +1530,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);

setOperationAction(ISD::SMAX, MVT::v64i8, Legal);
setOperationAction(ISD::SMAX, MVT::v32i16, Legal);
setOperationAction(ISD::UMAX, MVT::v64i8, Legal);
setOperationAction(ISD::UMAX, MVT::v32i16, Legal);
setOperationAction(ISD::SMIN, MVT::v64i8, Legal);
setOperationAction(ISD::SMIN, MVT::v32i16, Legal);
setOperationAction(ISD::UMIN, MVT::v64i8, Legal);
setOperationAction(ISD::UMIN, MVT::v32i16, Legal);

for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
const MVT VT = (MVT::SimpleValueType)i;

Expand Down Expand Up @@ -1533,6 +1578,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::XOR, MVT::v4i32, Legal);
setOperationAction(ISD::SRA, MVT::v2i64, Custom);
setOperationAction(ISD::SRA, MVT::v4i64, Custom);

setOperationAction(ISD::SMAX, MVT::v2i64, Legal);
setOperationAction(ISD::SMAX, MVT::v4i64, Legal);
setOperationAction(ISD::UMAX, MVT::v2i64, Legal);
setOperationAction(ISD::UMAX, MVT::v4i64, Legal);
setOperationAction(ISD::SMIN, MVT::v2i64, Legal);
setOperationAction(ISD::SMIN, MVT::v4i64, Legal);
setOperationAction(ISD::UMIN, MVT::v2i64, Legal);
setOperationAction(ISD::UMIN, MVT::v4i64, Legal);
}

// We want to custom lower some of our intrinsics.
Expand Down Expand Up @@ -13355,8 +13409,8 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
if (hasMinMax) {
switch (SetCCOpcode) {
default: break;
case ISD::SETULE: Opc = X86ISD::UMIN; MinMax = true; break;
case ISD::SETUGE: Opc = X86ISD::UMAX; MinMax = true; break;
case ISD::SETULE: Opc = ISD::UMIN; MinMax = true; break;
case ISD::SETUGE: Opc = ISD::UMAX; MinMax = true; break;
}

if (MinMax) { Swap = false; Invert = false; FlipSigns = false; }
Expand Down Expand Up @@ -18446,10 +18500,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::HSUB: return "X86ISD::HSUB";
case X86ISD::FHADD: return "X86ISD::FHADD";
case X86ISD::FHSUB: return "X86ISD::FHSUB";
case X86ISD::UMAX: return "X86ISD::UMAX";
case X86ISD::UMIN: return "X86ISD::UMIN";
case X86ISD::SMAX: return "X86ISD::SMAX";
case X86ISD::SMIN: return "X86ISD::SMIN";
case X86ISD::ABS: return "X86ISD::ABS";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND";
Expand Down Expand Up @@ -21626,16 +21676,16 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
default: break;
case ISD::SETULT:
case ISD::SETULE:
Opc = hasUnsigned ? X86ISD::UMIN : 0u; break;
Opc = hasUnsigned ? ISD::UMIN : 0u; break;
case ISD::SETUGT:
case ISD::SETUGE:
Opc = hasUnsigned ? X86ISD::UMAX : 0u; break;
Opc = hasUnsigned ? ISD::UMAX : 0u; break;
case ISD::SETLT:
case ISD::SETLE:
Opc = hasSigned ? X86ISD::SMIN : 0u; break;
Opc = hasSigned ? ISD::SMIN : 0u; break;
case ISD::SETGT:
case ISD::SETGE:
Opc = hasSigned ? X86ISD::SMAX : 0u; break;
Opc = hasSigned ? ISD::SMAX : 0u; break;
}
// Check for x CC y ? y : x -- a min/max with reversed arms.
} else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
Expand All @@ -21644,16 +21694,16 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
default: break;
case ISD::SETULT:
case ISD::SETULE:
Opc = hasUnsigned ? X86ISD::UMAX : 0u; break;
Opc = hasUnsigned ? ISD::UMAX : 0u; break;
case ISD::SETUGT:
case ISD::SETUGE:
Opc = hasUnsigned ? X86ISD::UMIN : 0u; break;
Opc = hasUnsigned ? ISD::UMIN : 0u; break;
case ISD::SETLT:
case ISD::SETLE:
Opc = hasSigned ? X86ISD::SMAX : 0u; break;
Opc = hasSigned ? ISD::SMAX : 0u; break;
case ISD::SETGT:
case ISD::SETGE:
Opc = hasSigned ? X86ISD::SMIN : 0u; break;
Opc = hasSigned ? ISD::SMIN : 0u; break;
}
}

Expand Down
6 changes: 0 additions & 6 deletions llvm/lib/Target/X86/X86ISelLowering.h
Expand Up @@ -233,12 +233,6 @@ namespace llvm {
/// Floating point horizontal sub.
FHSUB,

/// Unsigned integer max and min.
UMAX, UMIN,

/// Signed integer max and min.
SMAX, SMIN,

// Integer absolute value
ABS,

Expand Down
26 changes: 13 additions & 13 deletions llvm/lib/Target/X86/X86InstrAVX512.td
Expand Up @@ -3236,32 +3236,32 @@ let Predicates = [HasBWI] in {
defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase, VEX_W;
}

defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", X86smax,
defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", smax,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", X86smax,
defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", smax,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", X86smax,
defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;

defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", X86umax,
defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", umax,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", X86umax,
defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", umax,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", X86umax,
defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;

defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", X86smin,
defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", smin,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", X86smin,
defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", smin,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", X86smin,
defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;

defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", X86umin,
defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", umin,
SSE_INTALU_ITINS_P, HasBWI, 1>;
defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", X86umin,
defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", umin,
SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", X86umin,
defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -4041,7 +4041,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(OpNode _.RC:$src1,
_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
AVX512FMA3Base, EVEX_B;
}
}
Expand Down
5 changes: 0 additions & 5 deletions llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
Expand Up @@ -39,11 +39,6 @@ def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
SDTCisFP<1>, SDTCisVT<3, i8>,
SDTCisVec<1>]>;

def X86umin : SDNode<"X86ISD::UMIN", SDTIntBinOp>;
def X86umax : SDNode<"X86ISD::UMAX", SDTIntBinOp>;
def X86smin : SDNode<"X86ISD::SMIN", SDTIntBinOp>;
def X86smax : SDNode<"X86ISD::SMAX", SDTIntBinOp>;

def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;

Expand Down
56 changes: 28 additions & 28 deletions llvm/lib/Target/X86/X86InstrSSE.td
Expand Up @@ -4035,13 +4035,13 @@ defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
SSE_INTALU_ITINS_P, 0>;
defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
SSE_INTALU_ITINS_P, 0>;
defm PMINUB : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8,
defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
SSE_INTALU_ITINS_P, 1>;
defm PMINSW : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16,
defm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16,
SSE_INTALU_ITINS_P, 1>;
defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8,
defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8,
SSE_INTALU_ITINS_P, 1>;
defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16,
defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16,
SSE_INTALU_ITINS_P, 1>;

// Intrinsic forms
Expand Down Expand Up @@ -6835,28 +6835,28 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,

let Predicates = [HasAVX, NoVLX] in {
let isCommutable = 0 in
defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128,
defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128,
defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128,
defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128,
defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128,
defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128,
defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128,
defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
defm VPMULDQ : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v2i64, v4i32,
Expand All @@ -6866,28 +6866,28 @@ let Predicates = [HasAVX, NoVLX] in {

let Predicates = [HasAVX2, NoVLX] in {
let isCommutable = 0 in
defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256,
defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256,
defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256,
defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256,
defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256,
defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256,
defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256,
defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
defm VPMULDQY : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v4i64, v8i32,
Expand All @@ -6897,21 +6897,21 @@ let Predicates = [HasAVX2, NoVLX] in {

let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in
defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128,
defm PMINSB : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128,
defm PMINSD : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128,
defm PMINUD : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128,
defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128,
defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128,
defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128,
defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128,
defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMULDQ : SS48I_binop_rm2<0x28, "pmuldq", X86pmuldq, v2i64, v4i32,
VR128, memopv2i64, i128mem,
Expand Down

0 comments on commit 8b75659

Please sign in to comment.