From 47c4de66e16373b57971e700374e83b8ff97b799 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 16 Nov 2024 01:02:48 +0300 Subject: [PATCH 1/4] X86 --- llvm/lib/Target/X86/CMakeLists.txt | 1 + llvm/lib/Target/X86/X86ISelLowering.cpp | 478 +-------- llvm/lib/Target/X86/X86ISelLowering.h | 982 +------------------ llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 6 +- llvm/lib/Target/X86/X86InstrFragments.td | 11 +- llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 52 +- llvm/lib/Target/X86/X86SelectionDAGInfo.cpp | 77 +- llvm/lib/Target/X86/X86SelectionDAGInfo.h | 56 +- 8 files changed, 185 insertions(+), 1478 deletions(-) diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt index f9bd233cf8ecf..407772197b24a 100644 --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -17,6 +17,7 @@ tablegen(LLVM X86GenInstrInfo.inc -gen-instr-info tablegen(LLVM X86GenMnemonicTables.inc -gen-x86-mnemonic-tables -asmwriternum=1) tablegen(LLVM X86GenRegisterBank.inc -gen-register-bank) tablegen(LLVM X86GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM X86GenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM X86GenFoldTables.inc -gen-x86-fold-tables -asmwriternum=1) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 621f1868d3311..7db2cda6eba46 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19499,9 +19499,9 @@ static SDValue GetTLSADDR(SelectionDAG &DAG, GlobalAddressSDNode *GA, } if (!Ret) { - X86ISD::NodeType CallType = UseTLSDESC ? X86ISD::TLSDESC - : LocalDynamic ? X86ISD::TLSBASEADDR - : X86ISD::TLSADDR; + unsigned CallType = UseTLSDESC ? X86ISD::TLSDESC + : LocalDynamic ? X86ISD::TLSBASEADDR + : X86ISD::TLSADDR; Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); if (LoadGlobalBaseReg) { @@ -29386,7 +29386,7 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget, APInt PreferredZero = APInt::getZero(SizeInBits); APInt OppositeZero = PreferredZero; EVT IVT = VT.changeTypeToInteger(); - X86ISD::NodeType MinMaxOp; + unsigned MinMaxOp; if (IsMaxOp) { MinMaxOp = X86ISD::FMAX; OppositeZero.setSignBit(); @@ -34914,476 +34914,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, } } -const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { - switch ((X86ISD::NodeType)Opcode) { - case X86ISD::FIRST_NUMBER: break; -#define NODE_NAME_CASE(NODE) case X86ISD::NODE: return "X86ISD::" #NODE; - NODE_NAME_CASE(BSF) - NODE_NAME_CASE(BSR) - NODE_NAME_CASE(FSHL) - NODE_NAME_CASE(FSHR) - NODE_NAME_CASE(FAND) - NODE_NAME_CASE(FANDN) - NODE_NAME_CASE(FOR) - NODE_NAME_CASE(FXOR) - NODE_NAME_CASE(FILD) - NODE_NAME_CASE(FIST) - NODE_NAME_CASE(FP_TO_INT_IN_MEM) - NODE_NAME_CASE(FLD) - NODE_NAME_CASE(FST) - NODE_NAME_CASE(CALL) - NODE_NAME_CASE(CALL_RVMARKER) - NODE_NAME_CASE(IMP_CALL) - NODE_NAME_CASE(BT) - NODE_NAME_CASE(CMP) - NODE_NAME_CASE(FCMP) - NODE_NAME_CASE(STRICT_FCMP) - NODE_NAME_CASE(STRICT_FCMPS) - NODE_NAME_CASE(COMI) - NODE_NAME_CASE(UCOMI) - NODE_NAME_CASE(COMX) - NODE_NAME_CASE(UCOMX) - NODE_NAME_CASE(CMPM) - NODE_NAME_CASE(CMPMM) - NODE_NAME_CASE(STRICT_CMPM) - NODE_NAME_CASE(CMPMM_SAE) - NODE_NAME_CASE(SETCC) - NODE_NAME_CASE(SETCC_CARRY) - NODE_NAME_CASE(FSETCC) - NODE_NAME_CASE(FSETCCM) - NODE_NAME_CASE(FSETCCM_SAE) - NODE_NAME_CASE(CMOV) - NODE_NAME_CASE(BRCOND) - NODE_NAME_CASE(RET_GLUE) - NODE_NAME_CASE(IRET) - NODE_NAME_CASE(REP_STOS) - NODE_NAME_CASE(REP_MOVS) - NODE_NAME_CASE(GlobalBaseReg) - NODE_NAME_CASE(Wrapper) - NODE_NAME_CASE(WrapperRIP) - NODE_NAME_CASE(MOVQ2DQ) - NODE_NAME_CASE(MOVDQ2Q) - NODE_NAME_CASE(MMX_MOVD2W) - NODE_NAME_CASE(MMX_MOVW2D) - NODE_NAME_CASE(PEXTRB) - NODE_NAME_CASE(PEXTRW) - NODE_NAME_CASE(INSERTPS) - NODE_NAME_CASE(PINSRB) - NODE_NAME_CASE(PINSRW) - NODE_NAME_CASE(PSHUFB) - NODE_NAME_CASE(ANDNP) - NODE_NAME_CASE(BLENDI) - NODE_NAME_CASE(BLENDV) - NODE_NAME_CASE(HADD) - NODE_NAME_CASE(HSUB) - NODE_NAME_CASE(FHADD) - NODE_NAME_CASE(FHSUB) - NODE_NAME_CASE(CONFLICT) - NODE_NAME_CASE(FMAX) - NODE_NAME_CASE(FMAXS) - NODE_NAME_CASE(FMAX_SAE) - NODE_NAME_CASE(FMAXS_SAE) - NODE_NAME_CASE(STRICT_FMAX) - NODE_NAME_CASE(FMIN) - NODE_NAME_CASE(FMINS) - NODE_NAME_CASE(FMIN_SAE) - NODE_NAME_CASE(FMINS_SAE) - NODE_NAME_CASE(STRICT_FMIN) - NODE_NAME_CASE(FMAXC) - NODE_NAME_CASE(FMINC) - NODE_NAME_CASE(FRSQRT) - NODE_NAME_CASE(FRCP) - NODE_NAME_CASE(EXTRQI) - NODE_NAME_CASE(INSERTQI) - NODE_NAME_CASE(TLSADDR) - NODE_NAME_CASE(TLSBASEADDR) - NODE_NAME_CASE(TLSCALL) - NODE_NAME_CASE(TLSDESC) - NODE_NAME_CASE(EH_SJLJ_SETJMP) - NODE_NAME_CASE(EH_SJLJ_LONGJMP) - NODE_NAME_CASE(EH_SJLJ_SETUP_DISPATCH) - NODE_NAME_CASE(EH_RETURN) - NODE_NAME_CASE(TC_RETURN) - NODE_NAME_CASE(FNSTCW16m) - NODE_NAME_CASE(FLDCW16m) - NODE_NAME_CASE(FNSTENVm) - NODE_NAME_CASE(FLDENVm) - NODE_NAME_CASE(LCMPXCHG_DAG) - NODE_NAME_CASE(LCMPXCHG8_DAG) - NODE_NAME_CASE(LCMPXCHG16_DAG) - NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG) - NODE_NAME_CASE(LADD) - NODE_NAME_CASE(LSUB) - NODE_NAME_CASE(LOR) - NODE_NAME_CASE(LXOR) - NODE_NAME_CASE(LAND) - NODE_NAME_CASE(LBTS) - NODE_NAME_CASE(LBTC) - NODE_NAME_CASE(LBTR) - NODE_NAME_CASE(LBTS_RM) - NODE_NAME_CASE(LBTC_RM) - NODE_NAME_CASE(LBTR_RM) - NODE_NAME_CASE(AADD) - NODE_NAME_CASE(AOR) - NODE_NAME_CASE(AXOR) - NODE_NAME_CASE(AAND) - NODE_NAME_CASE(VZEXT_MOVL) - NODE_NAME_CASE(VZEXT_LOAD) - NODE_NAME_CASE(VEXTRACT_STORE) - NODE_NAME_CASE(VTRUNC) - NODE_NAME_CASE(VTRUNCS) - NODE_NAME_CASE(VTRUNCUS) - NODE_NAME_CASE(VMTRUNC) - NODE_NAME_CASE(VMTRUNCS) - NODE_NAME_CASE(VMTRUNCUS) - NODE_NAME_CASE(VTRUNCSTORES) - NODE_NAME_CASE(VTRUNCSTOREUS) - NODE_NAME_CASE(VMTRUNCSTORES) - NODE_NAME_CASE(VMTRUNCSTOREUS) - NODE_NAME_CASE(VFPEXT) - NODE_NAME_CASE(STRICT_VFPEXT) - NODE_NAME_CASE(VFPEXT_SAE) - NODE_NAME_CASE(VFPEXTS) - NODE_NAME_CASE(VFPEXTS_SAE) - NODE_NAME_CASE(VFPROUND) - NODE_NAME_CASE(VFPROUND2) - NODE_NAME_CASE(VFPROUND2_RND) - NODE_NAME_CASE(STRICT_VFPROUND) - NODE_NAME_CASE(VMFPROUND) - NODE_NAME_CASE(VFPROUND_RND) - NODE_NAME_CASE(VFPROUNDS) - NODE_NAME_CASE(VFPROUNDS_RND) - NODE_NAME_CASE(VSHLDQ) - NODE_NAME_CASE(VSRLDQ) - NODE_NAME_CASE(VSHL) - NODE_NAME_CASE(VSRL) - NODE_NAME_CASE(VSRA) - NODE_NAME_CASE(VSHLI) - NODE_NAME_CASE(VSRLI) - NODE_NAME_CASE(VSRAI) - NODE_NAME_CASE(VSHLV) - NODE_NAME_CASE(VSRLV) - NODE_NAME_CASE(VSRAV) - NODE_NAME_CASE(VROTLI) - NODE_NAME_CASE(VROTRI) - NODE_NAME_CASE(VPPERM) - NODE_NAME_CASE(CMPP) - NODE_NAME_CASE(STRICT_CMPP) - NODE_NAME_CASE(PCMPEQ) - NODE_NAME_CASE(PCMPGT) - NODE_NAME_CASE(PHMINPOS) - NODE_NAME_CASE(ADD) - NODE_NAME_CASE(SUB) - NODE_NAME_CASE(ADC) - NODE_NAME_CASE(SBB) - NODE_NAME_CASE(SMUL) - NODE_NAME_CASE(UMUL) - NODE_NAME_CASE(OR) - NODE_NAME_CASE(XOR) - NODE_NAME_CASE(AND) - NODE_NAME_CASE(BEXTR) - NODE_NAME_CASE(BEXTRI) - NODE_NAME_CASE(BZHI) - NODE_NAME_CASE(PDEP) - NODE_NAME_CASE(PEXT) - NODE_NAME_CASE(MUL_IMM) - NODE_NAME_CASE(MOVMSK) - NODE_NAME_CASE(PTEST) - NODE_NAME_CASE(TESTP) - NODE_NAME_CASE(KORTEST) - NODE_NAME_CASE(KTEST) - NODE_NAME_CASE(KADD) - NODE_NAME_CASE(KSHIFTL) - NODE_NAME_CASE(KSHIFTR) - NODE_NAME_CASE(PACKSS) - NODE_NAME_CASE(PACKUS) - NODE_NAME_CASE(PALIGNR) - NODE_NAME_CASE(VALIGN) - NODE_NAME_CASE(VSHLD) - NODE_NAME_CASE(VSHRD) - NODE_NAME_CASE(PSHUFD) - NODE_NAME_CASE(PSHUFHW) - NODE_NAME_CASE(PSHUFLW) - NODE_NAME_CASE(SHUFP) - NODE_NAME_CASE(SHUF128) - NODE_NAME_CASE(MOVLHPS) - NODE_NAME_CASE(MOVHLPS) - NODE_NAME_CASE(MOVDDUP) - NODE_NAME_CASE(MOVSHDUP) - NODE_NAME_CASE(MOVSLDUP) - NODE_NAME_CASE(MOVSD) - NODE_NAME_CASE(MOVSS) - NODE_NAME_CASE(MOVSH) - NODE_NAME_CASE(UNPCKL) - NODE_NAME_CASE(UNPCKH) - NODE_NAME_CASE(VBROADCAST) - NODE_NAME_CASE(VBROADCAST_LOAD) - NODE_NAME_CASE(VBROADCASTM) - NODE_NAME_CASE(SUBV_BROADCAST_LOAD) - NODE_NAME_CASE(VPERMILPV) - NODE_NAME_CASE(VPERMILPI) - NODE_NAME_CASE(VPERM2X128) - NODE_NAME_CASE(VPERMV) - NODE_NAME_CASE(VPERMV3) - NODE_NAME_CASE(VPERMI) - NODE_NAME_CASE(VPTERNLOG) - NODE_NAME_CASE(FP_TO_SINT_SAT) - NODE_NAME_CASE(FP_TO_UINT_SAT) - NODE_NAME_CASE(VFIXUPIMM) - NODE_NAME_CASE(VFIXUPIMM_SAE) - NODE_NAME_CASE(VFIXUPIMMS) - NODE_NAME_CASE(VFIXUPIMMS_SAE) - NODE_NAME_CASE(VRANGE) - NODE_NAME_CASE(VRANGE_SAE) - NODE_NAME_CASE(VRANGES) - NODE_NAME_CASE(VRANGES_SAE) - NODE_NAME_CASE(PMULUDQ) - NODE_NAME_CASE(PMULDQ) - NODE_NAME_CASE(PSADBW) - NODE_NAME_CASE(DBPSADBW) - NODE_NAME_CASE(VASTART_SAVE_XMM_REGS) - NODE_NAME_CASE(VAARG_64) - NODE_NAME_CASE(VAARG_X32) - NODE_NAME_CASE(DYN_ALLOCA) - NODE_NAME_CASE(MFENCE) - NODE_NAME_CASE(SEG_ALLOCA) - NODE_NAME_CASE(PROBED_ALLOCA) - NODE_NAME_CASE(RDRAND) - NODE_NAME_CASE(RDSEED) - NODE_NAME_CASE(RDPKRU) - NODE_NAME_CASE(WRPKRU) - NODE_NAME_CASE(VPMADDUBSW) - NODE_NAME_CASE(VPMADDWD) - NODE_NAME_CASE(VPSHA) - NODE_NAME_CASE(VPSHL) - NODE_NAME_CASE(VPCOM) - NODE_NAME_CASE(VPCOMU) - NODE_NAME_CASE(VPERMIL2) - NODE_NAME_CASE(FMSUB) - NODE_NAME_CASE(STRICT_FMSUB) - NODE_NAME_CASE(FNMADD) - NODE_NAME_CASE(STRICT_FNMADD) - NODE_NAME_CASE(FNMSUB) - NODE_NAME_CASE(STRICT_FNMSUB) - NODE_NAME_CASE(FMADDSUB) - NODE_NAME_CASE(FMSUBADD) - NODE_NAME_CASE(FMADD_RND) - NODE_NAME_CASE(FNMADD_RND) - NODE_NAME_CASE(FMSUB_RND) - NODE_NAME_CASE(FNMSUB_RND) - NODE_NAME_CASE(FMADDSUB_RND) - NODE_NAME_CASE(FMSUBADD_RND) - NODE_NAME_CASE(VFMADDC) - NODE_NAME_CASE(VFMADDC_RND) - NODE_NAME_CASE(VFCMADDC) - NODE_NAME_CASE(VFCMADDC_RND) - NODE_NAME_CASE(VFMULC) - NODE_NAME_CASE(VFMULC_RND) - NODE_NAME_CASE(VFCMULC) - NODE_NAME_CASE(VFCMULC_RND) - NODE_NAME_CASE(VFMULCSH) - NODE_NAME_CASE(VFMULCSH_RND) - NODE_NAME_CASE(VFCMULCSH) - NODE_NAME_CASE(VFCMULCSH_RND) - NODE_NAME_CASE(VFMADDCSH) - NODE_NAME_CASE(VFMADDCSH_RND) - NODE_NAME_CASE(VFCMADDCSH) - NODE_NAME_CASE(VFCMADDCSH_RND) - NODE_NAME_CASE(VPMADD52H) - NODE_NAME_CASE(VPMADD52L) - NODE_NAME_CASE(VRNDSCALE) - NODE_NAME_CASE(STRICT_VRNDSCALE) - NODE_NAME_CASE(VRNDSCALE_SAE) - NODE_NAME_CASE(VRNDSCALES) - NODE_NAME_CASE(VRNDSCALES_SAE) - NODE_NAME_CASE(VREDUCE) - NODE_NAME_CASE(VREDUCE_SAE) - NODE_NAME_CASE(VREDUCES) - NODE_NAME_CASE(VREDUCES_SAE) - NODE_NAME_CASE(VGETMANT) - NODE_NAME_CASE(VGETMANT_SAE) - NODE_NAME_CASE(VGETMANTS) - NODE_NAME_CASE(VGETMANTS_SAE) - NODE_NAME_CASE(PCMPESTR) - NODE_NAME_CASE(PCMPISTR) - NODE_NAME_CASE(XTEST) - NODE_NAME_CASE(COMPRESS) - NODE_NAME_CASE(EXPAND) - NODE_NAME_CASE(SELECTS) - NODE_NAME_CASE(ADDSUB) - NODE_NAME_CASE(RCP14) - NODE_NAME_CASE(RCP14S) - NODE_NAME_CASE(RSQRT14) - NODE_NAME_CASE(RSQRT14S) - NODE_NAME_CASE(FADD_RND) - NODE_NAME_CASE(FADDS) - NODE_NAME_CASE(FADDS_RND) - NODE_NAME_CASE(FSUB_RND) - NODE_NAME_CASE(FSUBS) - NODE_NAME_CASE(FSUBS_RND) - NODE_NAME_CASE(FMUL_RND) - NODE_NAME_CASE(FMULS) - NODE_NAME_CASE(FMULS_RND) - NODE_NAME_CASE(FDIV_RND) - NODE_NAME_CASE(FDIVS) - NODE_NAME_CASE(FDIVS_RND) - NODE_NAME_CASE(FSQRT_RND) - NODE_NAME_CASE(FSQRTS) - NODE_NAME_CASE(FSQRTS_RND) - NODE_NAME_CASE(FGETEXP) - NODE_NAME_CASE(FGETEXP_SAE) - NODE_NAME_CASE(FGETEXPS) - NODE_NAME_CASE(FGETEXPS_SAE) - NODE_NAME_CASE(SCALEF) - NODE_NAME_CASE(SCALEF_RND) - NODE_NAME_CASE(SCALEFS) - NODE_NAME_CASE(SCALEFS_RND) - NODE_NAME_CASE(MULHRS) - NODE_NAME_CASE(SINT_TO_FP_RND) - NODE_NAME_CASE(UINT_TO_FP_RND) - NODE_NAME_CASE(CVTTP2SI) - NODE_NAME_CASE(CVTTP2UI) - NODE_NAME_CASE(STRICT_CVTTP2SI) - NODE_NAME_CASE(STRICT_CVTTP2UI) - NODE_NAME_CASE(MCVTTP2SI) - NODE_NAME_CASE(MCVTTP2UI) - NODE_NAME_CASE(CVTTP2SI_SAE) - NODE_NAME_CASE(CVTTP2UI_SAE) - NODE_NAME_CASE(CVTTS2SI) - NODE_NAME_CASE(CVTTS2UI) - NODE_NAME_CASE(CVTTS2SI_SAE) - NODE_NAME_CASE(CVTTS2UI_SAE) - NODE_NAME_CASE(CVTSI2P) - NODE_NAME_CASE(CVTUI2P) - NODE_NAME_CASE(STRICT_CVTSI2P) - NODE_NAME_CASE(STRICT_CVTUI2P) - NODE_NAME_CASE(MCVTSI2P) - NODE_NAME_CASE(MCVTUI2P) - NODE_NAME_CASE(VFPCLASS) - NODE_NAME_CASE(VFPCLASSS) - NODE_NAME_CASE(MULTISHIFT) - NODE_NAME_CASE(SCALAR_SINT_TO_FP) - NODE_NAME_CASE(SCALAR_SINT_TO_FP_RND) - NODE_NAME_CASE(SCALAR_UINT_TO_FP) - NODE_NAME_CASE(SCALAR_UINT_TO_FP_RND) - NODE_NAME_CASE(CVTPS2PH) - NODE_NAME_CASE(STRICT_CVTPS2PH) - NODE_NAME_CASE(CVTPS2PH_SAE) - NODE_NAME_CASE(MCVTPS2PH) - NODE_NAME_CASE(MCVTPS2PH_SAE) - NODE_NAME_CASE(CVTPH2PS) - NODE_NAME_CASE(STRICT_CVTPH2PS) - NODE_NAME_CASE(CVTPH2PS_SAE) - NODE_NAME_CASE(CVTP2SI) - NODE_NAME_CASE(CVTP2UI) - NODE_NAME_CASE(MCVTP2SI) - NODE_NAME_CASE(MCVTP2UI) - NODE_NAME_CASE(CVTP2SI_RND) - NODE_NAME_CASE(CVTP2UI_RND) - NODE_NAME_CASE(CVTS2SI) - NODE_NAME_CASE(CVTS2UI) - NODE_NAME_CASE(CVTS2SI_RND) - NODE_NAME_CASE(CVTS2UI_RND) - NODE_NAME_CASE(CVTNEPS2BF16) - NODE_NAME_CASE(MCVTNEPS2BF16) - NODE_NAME_CASE(DPBF16PS) - NODE_NAME_CASE(DPFP16PS) - NODE_NAME_CASE(MPSADBW) - NODE_NAME_CASE(LWPINS) - NODE_NAME_CASE(MGATHER) - NODE_NAME_CASE(MSCATTER) - NODE_NAME_CASE(VPDPBUSD) - NODE_NAME_CASE(VPDPBUSDS) - NODE_NAME_CASE(VPDPWSSD) - NODE_NAME_CASE(VPDPWSSDS) - NODE_NAME_CASE(VPSHUFBITQMB) - NODE_NAME_CASE(GF2P8MULB) - NODE_NAME_CASE(GF2P8AFFINEQB) - NODE_NAME_CASE(GF2P8AFFINEINVQB) - NODE_NAME_CASE(NT_CALL) - NODE_NAME_CASE(NT_BRIND) - NODE_NAME_CASE(UMWAIT) - NODE_NAME_CASE(TPAUSE) - NODE_NAME_CASE(ENQCMD) - NODE_NAME_CASE(ENQCMDS) - NODE_NAME_CASE(VP2INTERSECT) - NODE_NAME_CASE(VPDPBSUD) - NODE_NAME_CASE(VPDPBSUDS) - NODE_NAME_CASE(VPDPBUUD) - NODE_NAME_CASE(VPDPBUUDS) - NODE_NAME_CASE(VPDPBSSD) - NODE_NAME_CASE(VPDPBSSDS) - NODE_NAME_CASE(VPDPWSUD) - NODE_NAME_CASE(VPDPWSUDS) - NODE_NAME_CASE(VPDPWUSD) - NODE_NAME_CASE(VPDPWUSDS) - NODE_NAME_CASE(VPDPWUUD) - NODE_NAME_CASE(VPDPWUUDS) - NODE_NAME_CASE(VMINMAX) - NODE_NAME_CASE(VMINMAX_SAE) - NODE_NAME_CASE(VMINMAXS) - NODE_NAME_CASE(VMINMAXS_SAE) - NODE_NAME_CASE(CVTP2IBS) - NODE_NAME_CASE(CVTP2IUBS) - NODE_NAME_CASE(CVTP2IBS_RND) - NODE_NAME_CASE(CVTP2IUBS_RND) - NODE_NAME_CASE(CVTTP2IBS) - NODE_NAME_CASE(CVTTP2IUBS) - NODE_NAME_CASE(CVTTP2IBS_SAE) - NODE_NAME_CASE(CVTTP2IUBS_SAE) - NODE_NAME_CASE(VCVT2PH2BF8) - NODE_NAME_CASE(VCVT2PH2BF8S) - NODE_NAME_CASE(VCVT2PH2HF8) - NODE_NAME_CASE(VCVT2PH2HF8S) - NODE_NAME_CASE(VCVTBIASPH2BF8) - NODE_NAME_CASE(VCVTBIASPH2BF8S) - NODE_NAME_CASE(VCVTBIASPH2HF8) - NODE_NAME_CASE(VCVTBIASPH2HF8S) - NODE_NAME_CASE(VCVTPH2BF8) - NODE_NAME_CASE(VCVTPH2BF8S) - NODE_NAME_CASE(VCVTPH2HF8) - NODE_NAME_CASE(VCVTPH2HF8S) - NODE_NAME_CASE(VMCVTBIASPH2BF8) - NODE_NAME_CASE(VMCVTBIASPH2BF8S) - NODE_NAME_CASE(VMCVTBIASPH2HF8) - NODE_NAME_CASE(VMCVTBIASPH2HF8S) - NODE_NAME_CASE(VMCVTPH2BF8) - NODE_NAME_CASE(VMCVTPH2BF8S) - NODE_NAME_CASE(VMCVTPH2HF8) - NODE_NAME_CASE(VMCVTPH2HF8S) - NODE_NAME_CASE(VCVTHF82PH) - NODE_NAME_CASE(AESENC128KL) - NODE_NAME_CASE(AESDEC128KL) - NODE_NAME_CASE(AESENC256KL) - NODE_NAME_CASE(AESDEC256KL) - NODE_NAME_CASE(AESENCWIDE128KL) - NODE_NAME_CASE(AESDECWIDE128KL) - NODE_NAME_CASE(AESENCWIDE256KL) - NODE_NAME_CASE(AESDECWIDE256KL) - NODE_NAME_CASE(CMPCCXADD) - NODE_NAME_CASE(TESTUI) - NODE_NAME_CASE(FP80_ADD) - NODE_NAME_CASE(STRICT_FP80_ADD) - NODE_NAME_CASE(CCMP) - NODE_NAME_CASE(CTEST) - NODE_NAME_CASE(CLOAD) - NODE_NAME_CASE(CSTORE) - NODE_NAME_CASE(CVTTS2SIS) - NODE_NAME_CASE(CVTTS2UIS) - NODE_NAME_CASE(CVTTS2SIS_SAE) - NODE_NAME_CASE(CVTTS2UIS_SAE) - NODE_NAME_CASE(CVTTP2SIS) - NODE_NAME_CASE(MCVTTP2SIS) - NODE_NAME_CASE(CVTTP2UIS_SAE) - NODE_NAME_CASE(CVTTP2SIS_SAE) - NODE_NAME_CASE(CVTTP2UIS) - NODE_NAME_CASE(MCVTTP2UIS) - NODE_NAME_CASE(POP_FROM_X87_REG) - } - return nullptr; -#undef NODE_NAME_CASE -} - /// Return true if the addressing mode represented by AM is legal for this /// target, for a load/store of the specified type. bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL, diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index b7151f65942b4..4365bc0075fdc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H +#include "X86SelectionDAGInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/TargetLowering.h" @@ -21,984 +22,6 @@ namespace llvm { class X86Subtarget; class X86TargetMachine; - namespace X86ISD { - // X86 Specific DAG Nodes - enum NodeType : unsigned { - // Start the numbering where the builtin ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - /// Bit scan forward. - BSF, - /// Bit scan reverse. - BSR, - - /// X86 funnel/double shift i16 instructions. These correspond to - /// X86::SHLDW and X86::SHRDW instructions which have different amt - /// modulo rules to generic funnel shifts. - /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. - FSHL, - FSHR, - - /// Bitwise logical AND of floating point values. This corresponds - /// to X86::ANDPS or X86::ANDPD. - FAND, - - /// Bitwise logical OR of floating point values. This corresponds - /// to X86::ORPS or X86::ORPD. - FOR, - - /// Bitwise logical XOR of floating point values. This corresponds - /// to X86::XORPS or X86::XORPD. - FXOR, - - /// Bitwise logical ANDNOT of floating point values. This - /// corresponds to X86::ANDNPS or X86::ANDNPD. - FANDN, - - /// These operations represent an abstract X86 call - /// instruction, which includes a bunch of information. In particular the - /// operands of these node are: - /// - /// #0 - The incoming token chain - /// #1 - The callee - /// #2 - The number of arg bytes the caller pushes on the stack. - /// #3 - The number of arg bytes the callee pops off the stack. - /// #4 - The value to pass in AL/AX/EAX (optional) - /// #5 - The value to pass in DL/DX/EDX (optional) - /// - /// The result values of these nodes are: - /// - /// #0 - The outgoing token chain - /// #1 - The first register result value (optional) - /// #2 - The second register result value (optional) - /// - CALL, - - /// Same as call except it adds the NoTrack prefix. - NT_CALL, - - // Pseudo for a OBJC call that gets emitted together with a special - // marker instruction. - CALL_RVMARKER, - - /// The same as ISD::CopyFromReg except that this node makes it explicit - /// that it may lower to an x87 FPU stack pop. Optimizations should be more - /// cautious when handling this node than a normal CopyFromReg to avoid - /// removing a required FPU stack pop. A key requirement is optimizations - /// should not optimize any users of a chain that contains a - /// POP_FROM_X87_REG to use a chain from a point earlier than the - /// POP_FROM_X87_REG (which may remove a required FPU stack pop). - POP_FROM_X87_REG, - - // Pseudo for a call to an imported function to ensure the correct machine - // instruction is emitted for Import Call Optimization. - IMP_CALL, - - /// X86 compare and logical compare instructions. - CMP, - FCMP, - COMI, - UCOMI, - - // X86 compare with Intrinsics similar to COMI. - COMX, - UCOMX, - - /// X86 bit-test instructions. - BT, - - /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS - /// operand, usually produced by a CMP instruction. - SETCC, - - /// X86 Select - SELECTS, - - // Same as SETCC except it's materialized with a sbb and the value is all - // one's or all zero's. - SETCC_CARRY, // R = carry_bit ? ~0 : 0 - - /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. - /// Operands are two FP values to compare; result is a mask of - /// 0s or 1s. Generally DTRT for C/C++ with NaNs. - FSETCC, - - /// X86 FP SETCC, similar to above, but with output as an i1 mask and - /// and a version with SAE. - FSETCCM, - FSETCCM_SAE, - - /// X86 conditional moves. Operand 0 and operand 1 are the two values - /// to select from. Operand 2 is the condition code, and operand 3 is the - /// flag operand produced by a CMP or TEST instruction. - CMOV, - - /// X86 conditional branches. Operand 0 is the chain operand, operand 1 - /// is the block to branch if condition is true, operand 2 is the - /// condition code, and operand 3 is the flag operand produced by a CMP - /// or TEST instruction. - BRCOND, - - /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and - /// operand 1 is the target address. - NT_BRIND, - - /// Return with a glue operand. Operand 0 is the chain operand, operand - /// 1 is the number of bytes of stack to pop. - RET_GLUE, - - /// Return from interrupt. Operand 0 is the number of bytes to pop. - IRET, - - /// Repeat fill, corresponds to X86::REP_STOSx. - REP_STOS, - - /// Repeat move, corresponds to X86::REP_MOVSx. - REP_MOVS, - - /// On Darwin, this node represents the result of the popl - /// at function entry, used for PIC code. - GlobalBaseReg, - - /// A wrapper node for TargetConstantPool, TargetJumpTable, - /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, - /// MCSymbol and TargetBlockAddress. - Wrapper, - - /// Special wrapper used under X86-64 PIC mode for RIP - /// relative displacements. - WrapperRIP, - - /// Copies a 64-bit value from an MMX vector to the low word - /// of an XMM vector, with the high word zero filled. - MOVQ2DQ, - - /// Copies a 64-bit value from the low word of an XMM vector - /// to an MMX vector. - MOVDQ2Q, - - /// Copies a 32-bit value from the low word of a MMX - /// vector to a GPR. - MMX_MOVD2W, - - /// Copies a GPR into the low 32-bit word of a MMX vector - /// and zero out the high word. - MMX_MOVW2D, - - /// Extract an 8-bit value from a vector and zero extend it to - /// i32, corresponds to X86::PEXTRB. - PEXTRB, - - /// Extract a 16-bit value from a vector and zero extend it to - /// i32, corresponds to X86::PEXTRW. - PEXTRW, - - /// Insert any element of a 4 x float vector into any element - /// of a destination 4 x floatvector. - INSERTPS, - - /// Insert the lower 8-bits of a 32-bit value to a vector, - /// corresponds to X86::PINSRB. - PINSRB, - - /// Insert the lower 16-bits of a 32-bit value to a vector, - /// corresponds to X86::PINSRW. - PINSRW, - - /// Shuffle 16 8-bit values within a vector. - PSHUFB, - - /// Compute Sum of Absolute Differences. - PSADBW, - /// Compute Double Block Packed Sum-Absolute-Differences - DBPSADBW, - - /// Bitwise Logical AND NOT of Packed FP values. - ANDNP, - - /// Blend where the selector is an immediate. - BLENDI, - - /// Dynamic (non-constant condition) vector blend where only the sign bits - /// of the condition elements are used. This is used to enforce that the - /// condition mask is not valid for generic VSELECT optimizations. This - /// is also used to implement the intrinsics. - /// Operands are in VSELECT order: MASK, TRUE, FALSE - BLENDV, - - /// Combined add and sub on an FP vector. - ADDSUB, - - // FP vector ops with rounding mode. - FADD_RND, - FADDS, - FADDS_RND, - FSUB_RND, - FSUBS, - FSUBS_RND, - FMUL_RND, - FMULS, - FMULS_RND, - FDIV_RND, - FDIVS, - FDIVS_RND, - FMAX_SAE, - FMAXS_SAE, - FMIN_SAE, - FMINS_SAE, - FSQRT_RND, - FSQRTS, - FSQRTS_RND, - - // FP vector get exponent. - FGETEXP, - FGETEXP_SAE, - FGETEXPS, - FGETEXPS_SAE, - // Extract Normalized Mantissas. - VGETMANT, - VGETMANT_SAE, - VGETMANTS, - VGETMANTS_SAE, - // FP Scale. - SCALEF, - SCALEF_RND, - SCALEFS, - SCALEFS_RND, - - /// Integer horizontal add/sub. - HADD, - HSUB, - - /// Floating point horizontal add/sub. - FHADD, - FHSUB, - - // Detect Conflicts Within a Vector - CONFLICT, - - /// Floating point max and min. - FMAX, - FMIN, - - /// Commutative FMIN and FMAX. - FMAXC, - FMINC, - - /// Scalar intrinsic floating point max and min. - FMAXS, - FMINS, - - /// Floating point reciprocal-sqrt and reciprocal approximation. - /// Note that these typically require refinement - /// in order to obtain suitable precision. - FRSQRT, - FRCP, - - // AVX-512 reciprocal approximations with a little more precision. - RSQRT14, - RSQRT14S, - RCP14, - RCP14S, - - // Thread Local Storage. - TLSADDR, - - // Thread Local Storage. A call to get the start address - // of the TLS block for the current module. - TLSBASEADDR, - - // Thread Local Storage. When calling to an OS provided - // thunk at the address from an earlier relocation. - TLSCALL, - - // Thread Local Storage. A descriptor containing pointer to - // code and to argument to get the TLS offset for the symbol. - TLSDESC, - - // Exception Handling helpers. - EH_RETURN, - - // SjLj exception handling setjmp. - EH_SJLJ_SETJMP, - - // SjLj exception handling longjmp. - EH_SJLJ_LONGJMP, - - // SjLj exception handling dispatch. - EH_SJLJ_SETUP_DISPATCH, - - /// Tail call return. See X86TargetLowering::LowerCall for - /// the list of operands. - TC_RETURN, - - // Vector move to low scalar and zero higher vector elements. - VZEXT_MOVL, - - // Vector integer truncate. - VTRUNC, - // Vector integer truncate with unsigned/signed saturation. - VTRUNCUS, - VTRUNCS, - - // Masked version of the above. Used when less than a 128-bit result is - // produced since the mask only applies to the lower elements and can't - // be represented by a select. - // SRC, PASSTHRU, MASK - VMTRUNC, - VMTRUNCUS, - VMTRUNCS, - - // Vector FP extend. - VFPEXT, - VFPEXT_SAE, - VFPEXTS, - VFPEXTS_SAE, - - // Vector FP round. - VFPROUND, - // Convert TWO packed single data to one packed data - VFPROUND2, - VFPROUND2_RND, - VFPROUND_RND, - VFPROUNDS, - VFPROUNDS_RND, - - // Masked version of above. Used for v2f64->v4f32. - // SRC, PASSTHRU, MASK - VMFPROUND, - - // 128-bit vector logical left / right shift - VSHLDQ, - VSRLDQ, - - // Vector shift elements - VSHL, - VSRL, - VSRA, - - // Vector variable shift - VSHLV, - VSRLV, - VSRAV, - - // Vector shift elements by immediate - VSHLI, - VSRLI, - VSRAI, - - // Shifts of mask registers. - KSHIFTL, - KSHIFTR, - - // Bit rotate by immediate - VROTLI, - VROTRI, - - // Vector packed double/float comparison. - CMPP, - - // Vector integer comparisons. - PCMPEQ, - PCMPGT, - - // v8i16 Horizontal minimum and position. - PHMINPOS, - - MULTISHIFT, - - /// Vector comparison generating mask bits for fp and - /// integer signed and unsigned data types. - CMPM, - // Vector mask comparison generating mask bits for FP values. - CMPMM, - // Vector mask comparison with SAE for FP values. - CMPMM_SAE, - - // Arithmetic operations with FLAGS results. - ADD, - SUB, - ADC, - SBB, - SMUL, - UMUL, - OR, - XOR, - AND, - - // Bit field extract. - BEXTR, - BEXTRI, - - // Zero High Bits Starting with Specified Bit Position. - BZHI, - - // Parallel extract and deposit. - PDEP, - PEXT, - - // X86-specific multiply by immediate. - MUL_IMM, - - // Vector sign bit extraction. - MOVMSK, - - // Vector bitwise comparisons. - PTEST, - - // Vector packed fp sign bitwise comparisons. - TESTP, - - // OR/AND test for masks. - KORTEST, - KTEST, - - // ADD for masks. - KADD, - - // Several flavors of instructions with vector shuffle behaviors. - // Saturated signed/unnsigned packing. - PACKSS, - PACKUS, - // Intra-lane alignr. - PALIGNR, - // AVX512 inter-lane alignr. - VALIGN, - PSHUFD, - PSHUFHW, - PSHUFLW, - SHUFP, - // VBMI2 Concat & Shift. - VSHLD, - VSHRD, - - // Shuffle Packed Values at 128-bit granularity. - SHUF128, - MOVDDUP, - MOVSHDUP, - MOVSLDUP, - MOVLHPS, - MOVHLPS, - MOVSD, - MOVSS, - MOVSH, - UNPCKL, - UNPCKH, - VPERMILPV, - VPERMILPI, - VPERMI, - VPERM2X128, - - // Variable Permute (VPERM). - // Res = VPERMV MaskV, V0 - VPERMV, - - // 3-op Variable Permute (VPERMT2). - // Res = VPERMV3 V0, MaskV, V1 - VPERMV3, - - // Bitwise ternary logic. - VPTERNLOG, - // Fix Up Special Packed Float32/64 values. - VFIXUPIMM, - VFIXUPIMM_SAE, - VFIXUPIMMS, - VFIXUPIMMS_SAE, - // Range Restriction Calculation For Packed Pairs of Float32/64 values. - VRANGE, - VRANGE_SAE, - VRANGES, - VRANGES_SAE, - // Reduce - Perform Reduction Transformation on scalar\packed FP. - VREDUCE, - VREDUCE_SAE, - VREDUCES, - VREDUCES_SAE, - // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. - // Also used by the legacy (V)ROUND intrinsics where we mask out the - // scaling part of the immediate. - VRNDSCALE, - VRNDSCALE_SAE, - VRNDSCALES, - VRNDSCALES_SAE, - // Tests Types Of a FP Values for packed types. - VFPCLASS, - // Tests Types Of a FP Values for scalar types. - VFPCLASSS, - - // Broadcast (splat) scalar or element 0 of a vector. If the operand is - // a vector, this node may change the vector length as part of the splat. - VBROADCAST, - // Broadcast mask to vector. - VBROADCASTM, - - /// SSE4A Extraction and Insertion. - EXTRQI, - INSERTQI, - - // XOP arithmetic/logical shifts. - VPSHA, - VPSHL, - // XOP signed/unsigned integer comparisons. - VPCOM, - VPCOMU, - // XOP packed permute bytes. - VPPERM, - // XOP two source permutation. - VPERMIL2, - - // Vector multiply packed unsigned doubleword integers. - PMULUDQ, - // Vector multiply packed signed doubleword integers. - PMULDQ, - // Vector Multiply Packed UnsignedIntegers with Round and Scale. - MULHRS, - - // Multiply and Add Packed Integers. - VPMADDUBSW, - VPMADDWD, - - // AVX512IFMA multiply and add. - // NOTE: These are different than the instruction and perform - // op0 x op1 + op2. - VPMADD52L, - VPMADD52H, - - // VNNI - VPDPBUSD, - VPDPBUSDS, - VPDPWSSD, - VPDPWSSDS, - - // FMA nodes. - // We use the target independent ISD::FMA for the non-inverted case. - FNMADD, - FMSUB, - FNMSUB, - FMADDSUB, - FMSUBADD, - - // FMA with rounding mode. - FMADD_RND, - FNMADD_RND, - FMSUB_RND, - FNMSUB_RND, - FMADDSUB_RND, - FMSUBADD_RND, - - // AVX512-FP16 complex addition and multiplication. - VFMADDC, - VFMADDC_RND, - VFCMADDC, - VFCMADDC_RND, - - VFMULC, - VFMULC_RND, - VFCMULC, - VFCMULC_RND, - - VFMADDCSH, - VFMADDCSH_RND, - VFCMADDCSH, - VFCMADDCSH_RND, - - VFMULCSH, - VFMULCSH_RND, - VFCMULCSH, - VFCMULCSH_RND, - - VPDPBSUD, - VPDPBSUDS, - VPDPBUUD, - VPDPBUUDS, - VPDPBSSD, - VPDPBSSDS, - - VPDPWSUD, - VPDPWSUDS, - VPDPWUSD, - VPDPWUSDS, - VPDPWUUD, - VPDPWUUDS, - - VMINMAX, - VMINMAX_SAE, - VMINMAXS, - VMINMAXS_SAE, - - CVTP2IBS, - CVTP2IUBS, - CVTP2IBS_RND, - CVTP2IUBS_RND, - CVTTP2IBS, - CVTTP2IUBS, - CVTTP2IBS_SAE, - CVTTP2IUBS_SAE, - - MPSADBW, - - VCVT2PH2BF8, - VCVT2PH2BF8S, - VCVT2PH2HF8, - VCVT2PH2HF8S, - VCVTBIASPH2BF8, - VCVTBIASPH2BF8S, - VCVTBIASPH2HF8, - VCVTBIASPH2HF8S, - VCVTPH2BF8, - VCVTPH2BF8S, - VCVTPH2HF8, - VCVTPH2HF8S, - VMCVTBIASPH2BF8, - VMCVTBIASPH2BF8S, - VMCVTBIASPH2HF8, - VMCVTBIASPH2HF8S, - VMCVTPH2BF8, - VMCVTPH2BF8S, - VMCVTPH2HF8, - VMCVTPH2HF8S, - VCVTHF82PH, - - // Compress and expand. - COMPRESS, - EXPAND, - - // Bits shuffle - VPSHUFBITQMB, - - // Convert Unsigned/Integer to Floating-Point Value with rounding mode. - SINT_TO_FP_RND, - UINT_TO_FP_RND, - SCALAR_SINT_TO_FP, - SCALAR_UINT_TO_FP, - SCALAR_SINT_TO_FP_RND, - SCALAR_UINT_TO_FP_RND, - - // Vector float/double to signed/unsigned integer. - CVTP2SI, - CVTP2UI, - CVTP2SI_RND, - CVTP2UI_RND, - // Scalar float/double to signed/unsigned integer. - CVTS2SI, - CVTS2UI, - CVTS2SI_RND, - CVTS2UI_RND, - - // Vector float/double to signed/unsigned integer with truncation. - CVTTP2SI, - CVTTP2UI, - CVTTP2SI_SAE, - CVTTP2UI_SAE, - - // Saturation enabled Vector float/double to signed/unsigned - // integer with truncation. - CVTTP2SIS, - CVTTP2UIS, - CVTTP2SIS_SAE, - CVTTP2UIS_SAE, - // Masked versions of above. Used for v2f64 to v4i32. - // SRC, PASSTHRU, MASK - MCVTTP2SIS, - MCVTTP2UIS, - - // Scalar float/double to signed/unsigned integer with truncation. - CVTTS2SI, - CVTTS2UI, - CVTTS2SI_SAE, - CVTTS2UI_SAE, - - // Vector signed/unsigned integer to float/double. - CVTSI2P, - CVTUI2P, - - // Scalar float/double to signed/unsigned integer with saturation. - CVTTS2SIS, - CVTTS2UIS, - CVTTS2SIS_SAE, - CVTTS2UIS_SAE, - - // Masked versions of above. Used for v2f64->v4f32. - // SRC, PASSTHRU, MASK - MCVTP2SI, - MCVTP2UI, - MCVTTP2SI, - MCVTTP2UI, - MCVTSI2P, - MCVTUI2P, - - // Custom handling for FP_TO_xINT_SAT - FP_TO_SINT_SAT, - FP_TO_UINT_SAT, - - // Vector float to bfloat16. - // Convert packed single data to packed BF16 data - CVTNEPS2BF16, - // Masked version of above. - // SRC, PASSTHRU, MASK - MCVTNEPS2BF16, - - // Dot product of BF16/FP16 pairs to accumulated into - // packed single precision. - DPBF16PS, - DPFP16PS, - - // A stack checking function call. On Windows it's _chkstk call. - DYN_ALLOCA, - - // For allocating variable amounts of stack space when using - // segmented stacks. Check if the current stacklet has enough space, and - // falls back to heap allocation if not. - SEG_ALLOCA, - - // For allocating stack space when using stack clash protector. - // Allocation is performed by block, and each block is probed. - PROBED_ALLOCA, - - // Memory barriers. - MFENCE, - - // Get a random integer and indicate whether it is valid in CF. - RDRAND, - - // Get a NIST SP800-90B & C compliant random integer and - // indicate whether it is valid in CF. - RDSEED, - - // Protection keys - // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. - // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is - // value for ECX. - RDPKRU, - WRPKRU, - - // SSE42 string comparisons. - // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG - // will emit one or two instructions based on which results are used. If - // flags and index/mask this allows us to use a single instruction since - // we won't have to pick and opcode for flags. Instead we can rely on the - // DAG to CSE everything and decide at isel. - PCMPISTR, - PCMPESTR, - - // Test if in transactional execution. - XTEST, - - // Conversions between float and half-float. - CVTPS2PH, - CVTPS2PH_SAE, - CVTPH2PS, - CVTPH2PS_SAE, - - // Masked version of above. - // SRC, RND, PASSTHRU, MASK - MCVTPS2PH, - MCVTPS2PH_SAE, - - // Galois Field Arithmetic Instructions - GF2P8AFFINEINVQB, - GF2P8AFFINEQB, - GF2P8MULB, - - // LWP insert record. - LWPINS, - - // User level wait - UMWAIT, - TPAUSE, - - // Enqueue Stores Instructions - ENQCMD, - ENQCMDS, - - // For avx512-vp2intersect - VP2INTERSECT, - - // User level interrupts - testui - TESTUI, - - // Perform an FP80 add after changing precision control in FPCW. - FP80_ADD, - - // Conditional compare instructions - CCMP, - CTEST, - - /// X86 strict FP compare instructions. - FIRST_STRICTFP_OPCODE, - STRICT_FCMP = FIRST_STRICTFP_OPCODE, - STRICT_FCMPS, - - // Vector packed double/float comparison. - STRICT_CMPP, - - /// Vector comparison generating mask bits for fp and - /// integer signed and unsigned data types. - STRICT_CMPM, - - // Vector float/double to signed/unsigned integer with truncation. - STRICT_CVTTP2SI, - STRICT_CVTTP2UI, - - // Vector FP extend. - STRICT_VFPEXT, - - // Vector FP round. - STRICT_VFPROUND, - - // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. - // Also used by the legacy (V)ROUND intrinsics where we mask out the - // scaling part of the immediate. - STRICT_VRNDSCALE, - - // Vector signed/unsigned integer to float/double. - STRICT_CVTSI2P, - STRICT_CVTUI2P, - - // Strict FMA nodes. - STRICT_FNMADD, - STRICT_FMSUB, - STRICT_FNMSUB, - - // Conversions between float and half-float. - STRICT_CVTPS2PH, - STRICT_CVTPH2PS, - - // Perform an FP80 add after changing precision control in FPCW. - STRICT_FP80_ADD, - - /// Floating point max and min. - STRICT_FMAX, - STRICT_FMIN, - LAST_STRICTFP_OPCODE = STRICT_FMIN, - - // Compare and swap. - FIRST_MEMORY_OPCODE, - LCMPXCHG_DAG = FIRST_MEMORY_OPCODE, - LCMPXCHG8_DAG, - LCMPXCHG16_DAG, - LCMPXCHG16_SAVE_RBX_DAG, - - /// LOCK-prefixed arithmetic read-modify-write instructions. - /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) - LADD, - LSUB, - LOR, - LXOR, - LAND, - LBTS, - LBTC, - LBTR, - LBTS_RM, - LBTC_RM, - LBTR_RM, - - /// RAO arithmetic instructions. - /// OUTCHAIN = AADD(INCHAIN, PTR, RHS) - AADD, - AOR, - AXOR, - AAND, - - // Load, scalar_to_vector, and zero extend. - VZEXT_LOAD, - - // extract_vector_elt, store. - VEXTRACT_STORE, - - // scalar broadcast from memory. - VBROADCAST_LOAD, - - // subvector broadcast from memory. - SUBV_BROADCAST_LOAD, - - // Store FP control word into i16 memory. - FNSTCW16m, - - // Load FP control word from i16 memory. - FLDCW16m, - - // Store x87 FPU environment into memory. - FNSTENVm, - - // Load x87 FPU environment from memory. - FLDENVm, - - /// This instruction implements FP_TO_SINT with the - /// integer destination in memory and a FP reg source. This corresponds - /// to the X86::FIST*m instructions and the rounding mode change stuff. It - /// has two inputs (token chain and address) and two outputs (int value - /// and token chain). Memory VT specifies the type to store to. - FP_TO_INT_IN_MEM, - - /// This instruction implements SINT_TO_FP with the - /// integer source in memory and FP reg result. This corresponds to the - /// X86::FILD*m instructions. It has two inputs (token chain and address) - /// and two outputs (FP value and token chain). The integer source type is - /// specified by the memory VT. - FILD, - - /// This instruction implements a fp->int store from FP stack - /// slots. This corresponds to the fist instruction. It takes a - /// chain operand, value to store, address, and glue. The memory VT - /// specifies the type to store as. - FIST, - - /// This instruction implements an extending load to FP stack slots. - /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain - /// operand, and ptr to load from. The memory VT specifies the type to - /// load from. - FLD, - - /// This instruction implements a truncating store from FP stack - /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a - /// chain operand, value to store, address, and glue. The memory VT - /// specifies the type to store as. - FST, - - /// These instructions grab the address of the next argument - /// from a va_list. (reads and modifies the va_list in memory) - VAARG_64, - VAARG_X32, - - // Vector truncating store with unsigned/signed saturation - VTRUNCSTOREUS, - VTRUNCSTORES, - // Vector truncating masked store with unsigned/signed saturation - VMTRUNCSTOREUS, - VMTRUNCSTORES, - - // X86 specific gather and scatter - MGATHER, - MSCATTER, - - // Key locker nodes that produce flags. - AESENC128KL, - AESDEC128KL, - AESENC256KL, - AESDEC256KL, - AESENCWIDE128KL, - AESDECWIDE128KL, - AESENCWIDE256KL, - AESDECWIDE256KL, - - /// Compare and Add if Condition is Met. Compare value in operand 2 with - /// value in memory of operand 1. If condition of operand 4 is met, add - /// value operand 3 to m32 and write new value in operand 1. Operand 2 is - /// always updated with the original value from operand 1. - CMPCCXADD, - - // Save xmm argument registers to the stack, according to %al. An operator - // is needed so that this can be expanded with control flow. - VASTART_SAVE_XMM_REGS, - - // Conditional load/store instructions - CLOAD, - CSTORE, - LAST_MEMORY_OPCODE = CSTORE, - }; - } // end namespace X86ISD - namespace X86 { /// Current rounding mode is represented in bits 11:10 of FPSR. These /// values are same as corresponding constants for rounding mode used @@ -1186,9 +209,6 @@ namespace llvm { EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; - /// This method returns the name of a target specific DAG node. - const char *getTargetNodeName(unsigned Opcode) const override; - /// Do not merge vector stores after legalization because that may conflict /// with x86-specific store splitting optimizations. bool mergeStoresAfterLegalization(EVT MemVT) const override { diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index a61bbe56d9c26..4e3119f542c4c 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -942,10 +942,10 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, if (Glue.getNode()) RetOps.push_back(Glue); - X86ISD::NodeType opcode = X86ISD::RET_GLUE; + unsigned RetOpcode = X86ISD::RET_GLUE; if (CallConv == CallingConv::X86_INTR) - opcode = X86ISD::IRET; - return DAG.getNode(opcode, dl, MVT::Other, RetOps); + RetOpcode = X86ISD::IRET; + return DAG.getNode(RetOpcode, dl, MVT::Other, RetOps); } bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td index 116986a0fffea..fcf791b1a1f0f 100644 --- a/llvm/lib/Target/X86/X86InstrFragments.td +++ b/llvm/lib/Target/X86/X86InstrFragments.td @@ -141,8 +141,12 @@ def X86fshr : SDNode<"X86ISD::FSHR", SDTIntShiftDOp>; def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>; def X86fcmp : SDNode<"X86ISD::FCMP", SDTX86FCmp>; -def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86FCmp, [SDNPHasChain]>; -def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86FCmp, [SDNPHasChain]>; + +let IsStrictFP = true in { + def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86FCmp, [SDNPHasChain]>; + def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86FCmp, [SDNPHasChain]>; +} + def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>; def X86ccmp : SDNode<"X86ISD::CCMP", SDTX86Ccmp>; @@ -790,8 +794,11 @@ def SDTX86CwdLoad : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDTX86FPEnv : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def X86fp80_add : SDNode<"X86ISD::FP80_ADD", SDTFPBinOp, [SDNPCommutative]>; + +let IsStrictFP = true in def X86strict_fp80_add : SDNode<"X86ISD::STRICT_FP80_ADD", SDTFPBinOp, [SDNPHasChain,SDNPCommutative]>; + def any_X86fp80_add : PatFrags<(ops node:$lhs, node:$rhs), [(X86strict_fp80_add node:$lhs, node:$rhs), (X86fp80_add node:$lhs, node:$rhs)]>; diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 5321ecf0c1b2c..35c4d89a1b231 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -46,10 +46,12 @@ def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp, def X86fmaxc : SDNode<"X86ISD::FMAXC", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; -def X86strict_fmin : SDNode<"X86ISD::STRICT_FMIN", SDTFPBinOp, - [SDNPHasChain]>; -def X86strict_fmax : SDNode<"X86ISD::STRICT_FMAX", SDTFPBinOp, - [SDNPHasChain]>; +let IsStrictFP = true in { + def X86strict_fmin : SDNode<"X86ISD::STRICT_FMIN", SDTFPBinOp, + [SDNPHasChain]>; + def X86strict_fmax : SDNode<"X86ISD::STRICT_FMAX", SDTFPBinOp, + [SDNPHasChain]>; +} def X86any_fmin : PatFrags<(ops node:$src1, node:$src2), [(X86strict_fmin node:$src1, node:$src2), @@ -146,6 +148,7 @@ def X86vfpext : SDNode<"X86ISD::VFPEXT", SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisFP<1>, SDTCisVec<1>]>>; +let IsStrictFP = true in def X86strict_vfpext : SDNode<"X86ISD::STRICT_VFPEXT", SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisFP<1>, SDTCisVec<1>]>, @@ -165,6 +168,7 @@ def X86vfpround2 : SDNode<"X86ISD::VFPROUND2", SDTCisSameAs<1, 2>, SDTCisOpSmallerThanOp<0, 1>]>>; +let IsStrictFP = true in def X86strict_vfpround: SDNode<"X86ISD::STRICT_VFPROUND", SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisFP<1>, SDTCisVec<1>, @@ -215,7 +219,10 @@ def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>; def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>; def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>; + +let IsStrictFP = true in def X86strict_cmpp : SDNode<"X86ISD::STRICT_CMPP", SDTX86VFCMP, [SDNPHasChain]>; + def X86any_cmpp : PatFrags<(ops node:$src1, node:$src2, node:$src3), [(X86strict_cmpp node:$src1, node:$src2, node:$src3), (X86cmpp node:$src1, node:$src2, node:$src3)]>; @@ -235,7 +242,10 @@ def X86CmpMaskCCScalar : def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; def X86cmpmm : SDNode<"X86ISD::CMPMM", X86MaskCmpMaskCC>; + +let IsStrictFP = true in def X86strict_cmpm : SDNode<"X86ISD::STRICT_CMPM", X86CmpMaskCC, [SDNPHasChain]>; + def X86any_cmpm : PatFrags<(ops node:$src1, node:$src2, node:$src3), [(X86strict_cmpm node:$src1, node:$src2, node:$src3), (X86cmpm node:$src1, node:$src2, node:$src3)]>; @@ -494,8 +504,11 @@ def X86VRangeSAE : SDNode<"X86ISD::VRANGE_SAE", SDTFPBinOpImm>; def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImm>; def X86VReduceSAE : SDNode<"X86ISD::VREDUCE_SAE", SDTFPUnaryOpImm>; def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImm>; + +let IsStrictFP = true in def X86strict_VRndScale : SDNode<"X86ISD::STRICT_VRNDSCALE", SDTFPUnaryOpImm, [SDNPHasChain]>; + def X86any_VRndScale : PatFrags<(ops node:$src1, node:$src2), [(X86strict_VRndScale node:$src1, node:$src2), (X86VRndScale node:$src1, node:$src2)]>; @@ -554,17 +567,26 @@ def X86fgetexps : SDNode<"X86ISD::FGETEXPS", SDTFPBinOp>; def X86fgetexpSAEs : SDNode<"X86ISD::FGETEXPS_SAE", SDTFPBinOp>; def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>; + +let IsStrictFP = true in def X86strict_Fnmadd : SDNode<"X86ISD::STRICT_FNMADD", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; + def X86any_Fnmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3), [(X86strict_Fnmadd node:$src1, node:$src2, node:$src3), (X86Fnmadd node:$src1, node:$src2, node:$src3)]>; def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp, [SDNPCommutative]>; + +let IsStrictFP = true in def X86strict_Fmsub : SDNode<"X86ISD::STRICT_FMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; + def X86any_Fmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3), [(X86strict_Fmsub node:$src1, node:$src2, node:$src3), (X86Fmsub node:$src1, node:$src2, node:$src3)]>; def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp, [SDNPCommutative]>; + +let IsStrictFP = true in def X86strict_Fnmsub : SDNode<"X86ISD::STRICT_FNMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; + def X86any_Fnmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3), [(X86strict_Fnmsub node:$src1, node:$src2, node:$src3), (X86Fnmsub node:$src1, node:$src2, node:$src3)]>; @@ -709,8 +731,12 @@ def X86cvtp2UIntRnd : SDNode<"X86ISD::CVTP2UI_RND", SDTFloatToIntRnd>; // cvtt fp-to-int staff def X86cvttp2si : SDNode<"X86ISD::CVTTP2SI", SDTFloatToInt>; def X86cvttp2ui : SDNode<"X86ISD::CVTTP2UI", SDTFloatToInt>; -def X86strict_cvttp2si : SDNode<"X86ISD::STRICT_CVTTP2SI", SDTFloatToInt, [SDNPHasChain]>; -def X86strict_cvttp2ui : SDNode<"X86ISD::STRICT_CVTTP2UI", SDTFloatToInt, [SDNPHasChain]>; + +let IsStrictFP = true in { + def X86strict_cvttp2si : SDNode<"X86ISD::STRICT_CVTTP2SI", SDTFloatToInt, [SDNPHasChain]>; + def X86strict_cvttp2ui : SDNode<"X86ISD::STRICT_CVTTP2UI", SDTFloatToInt, [SDNPHasChain]>; +} + def X86any_cvttp2si : PatFrags<(ops node:$src), [(X86strict_cvttp2si node:$src), (X86cvttp2si node:$src)]>; @@ -720,8 +746,12 @@ def X86any_cvttp2ui : PatFrags<(ops node:$src), def X86VSintToFP : SDNode<"X86ISD::CVTSI2P", SDTVintToFP>; def X86VUintToFP : SDNode<"X86ISD::CVTUI2P", SDTVintToFP>; -def X86strict_VSintToFP : SDNode<"X86ISD::STRICT_CVTSI2P", SDTVintToFP, [SDNPHasChain]>; -def X86strict_VUintToFP : SDNode<"X86ISD::STRICT_CVTUI2P", SDTVintToFP, [SDNPHasChain]>; + +let IsStrictFP = true in { + def X86strict_VSintToFP : SDNode<"X86ISD::STRICT_CVTSI2P", SDTVintToFP, [SDNPHasChain]>; + def X86strict_VUintToFP : SDNode<"X86ISD::STRICT_CVTUI2P", SDTVintToFP, [SDNPHasChain]>; +} + def X86any_VSintToFP : PatFrags<(ops node:$src), [(X86strict_VSintToFP node:$src), (X86VSintToFP node:$src)]>; @@ -761,8 +791,11 @@ def X86mcvttp2uis : SDNode<"X86ISD::MCVTTP2UIS", SDTMFloatToInt>; def SDTcvtph2ps : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>, SDTCVecEltisVT<1, i16>]>; def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS", SDTcvtph2ps>; + +let IsStrictFP = true in def X86strict_cvtph2ps : SDNode<"X86ISD::STRICT_CVTPH2PS", SDTcvtph2ps, [SDNPHasChain]>; + def X86any_cvtph2ps : PatFrags<(ops node:$src), [(X86strict_cvtph2ps node:$src), (X86cvtph2ps node:$src)]>; @@ -773,8 +806,11 @@ def SDTcvtps2ph : SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>, SDTCVecEltisVT<1, f32>, SDTCisVT<2, i32>]>; def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH", SDTcvtps2ph>; + +let IsStrictFP = true in def X86strict_cvtps2ph : SDNode<"X86ISD::STRICT_CVTPS2PH", SDTcvtps2ph, [SDNPHasChain]>; + def X86any_cvtps2ph : PatFrags<(ops node:$src1, node:$src2), [(X86strict_cvtps2ph node:$src1, node:$src2), (X86cvtps2ph node:$src1, node:$src2)]>; diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp index aba62c36546f9..11e45616653ac 100644 --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "X86SelectionDAGInfo.h" -#include "X86ISelLowering.h" #include "X86InstrInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" @@ -19,6 +18,9 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" +#define GET_SDNODE_DESC +#include "X86GenSDNodeInfo.inc" + using namespace llvm; #define DEBUG_TYPE "x86-selectiondag-info" @@ -27,14 +29,77 @@ static cl::opt UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false), cl::desc("Use fast short rep mov in memcpy lowering")); +X86SelectionDAGInfo::X86SelectionDAGInfo() + : SelectionDAGGenTargetInfo(X86GenSDNodeInfo) {} + +const char *X86SelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { +#define NODE_NAME_CASE(NODE) \ + case X86ISD::NODE: \ + return "X86ISD::" #NODE; + + // These nodes don't have corresponding entries in *.td files yet. + switch (static_cast(Opcode)) { + NODE_NAME_CASE(POP_FROM_X87_REG) + NODE_NAME_CASE(GlobalBaseReg) + NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG) + NODE_NAME_CASE(PCMPESTR) + NODE_NAME_CASE(PCMPISTR) + NODE_NAME_CASE(MGATHER) + NODE_NAME_CASE(MSCATTER) + NODE_NAME_CASE(AESENCWIDE128KL) + NODE_NAME_CASE(AESDECWIDE128KL) + NODE_NAME_CASE(AESENCWIDE256KL) + NODE_NAME_CASE(AESDECWIDE256KL) + } +#undef NODE_NAME_CASE + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); +} + bool X86SelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { - return Opcode >= X86ISD::FIRST_MEMORY_OPCODE && - Opcode <= X86ISD::LAST_MEMORY_OPCODE; + // These nodes don't have corresponding entries in *.td files yet. + if (Opcode >= X86ISD::FIRST_MEMORY_OPCODE && + Opcode <= X86ISD::LAST_MEMORY_OPCODE) + return true; + + return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode); } -bool X86SelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const { - return Opcode >= X86ISD::FIRST_STRICTFP_OPCODE && - Opcode <= X86ISD::LAST_STRICTFP_OPCODE; +void X86SelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const { + switch (N->getOpcode()) { + default: + break; + case X86ISD::VP2INTERSECT: + // invalid number of results; expected 1, got 2 + case X86ISD::VTRUNCSTOREUS: + case X86ISD::VTRUNCSTORES: + case X86ISD::FSETCCM_SAE: + // invalid number of operands; expected 3, got 4 + case X86ISD::CVTPH2PS: + case X86ISD::CVTTP2SI_SAE: + case X86ISD::CVTTP2UI_SAE: + case X86ISD::CVTTP2IBS_SAE: + // invalid number of operands; expected 1, got 2 + case X86ISD::CMPMM_SAE: + // invalid number of operands; expected 4, got 5 + case X86ISD::CMPMM: + case X86ISD::FSETCCM: + // operand #2 must have type i8, but has type i32 + case X86ISD::CALL: + case X86ISD::NT_BRIND: + // operand #1 must have type i32 (iPTR), but has type i64 + case X86ISD::ADD: + // result #1 must have type i32, but has type i128 + case X86ISD::INSERTQI: + case X86ISD::EXTRQI: + // result #0 must have type v2i64, but has type v16i8/v8i16 + case X86ISD::CMPCCXADD: + // operand #4 must have type i8, but has type i32 + return; + } + + SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); } /// Returns the best type to use with repmovs/repstos depending on alignment. diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.h b/llvm/lib/Target/X86/X86SelectionDAGInfo.h index e77e16bab830d..19c5986982614 100644 --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.h +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.h @@ -15,20 +15,68 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "X86GenSDNodeInfo.inc" + namespace llvm { +namespace X86ISD { + +enum NodeType : unsigned { + /// The same as ISD::CopyFromReg except that this node makes it explicit + /// that it may lower to an x87 FPU stack pop. Optimizations should be more + /// cautious when handling this node than a normal CopyFromReg to avoid + /// removing a required FPU stack pop. A key requirement is optimizations + /// should not optimize any users of a chain that contains a + /// POP_FROM_X87_REG to use a chain from a point earlier than the + /// POP_FROM_X87_REG (which may remove a required FPU stack pop). + POP_FROM_X87_REG = X86ISD::GENERATED_OPCODE_END, + + /// On Darwin, this node represents the result of the popl + /// at function entry, used for PIC code. + GlobalBaseReg, + + // SSE42 string comparisons. + // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG + // will emit one or two instructions based on which results are used. If + // flags and index/mask this allows us to use a single instruction since + // we won't have to pick and opcode for flags. Instead we can rely on the + // DAG to CSE everything and decide at isel. + PCMPISTR, + PCMPESTR, + + // Compare and swap. + FIRST_MEMORY_OPCODE, + LCMPXCHG16_SAVE_RBX_DAG = FIRST_MEMORY_OPCODE, -class X86SelectionDAGInfo : public SelectionDAGTargetInfo { + // X86 specific gather and scatter + MGATHER, + MSCATTER, + + // Key locker nodes that produce flags. + AESENCWIDE128KL, + AESDECWIDE128KL, + AESENCWIDE256KL, + AESDECWIDE256KL, + LAST_MEMORY_OPCODE = AESDECWIDE256KL, +}; + +} // namespace X86ISD + +class X86SelectionDAGInfo : public SelectionDAGGenTargetInfo { /// Returns true if it is possible for the base register to conflict with the /// given set of clobbers for a memory intrinsic. bool isBaseRegConflictPossible(SelectionDAG &DAG, ArrayRef ClobberSet) const; public: - explicit X86SelectionDAGInfo() = default; + X86SelectionDAGInfo(); + + const char *getTargetNodeName(unsigned Opcode) const override; bool isTargetMemoryOpcode(unsigned Opcode) const override; - bool isTargetStrictFPOpcode(unsigned Opcode) const override; + void verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const override; SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, @@ -44,6 +92,6 @@ class X86SelectionDAGInfo : public SelectionDAGTargetInfo { MachinePointerInfo SrcPtrInfo) const override; }; -} +} // namespace llvm #endif From 8f06fa392ab8a0b3c637d3c7589254863d8282be Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Thu, 12 Dec 2024 01:59:14 +0300 Subject: [PATCH 2/4] Generic bugs --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 3 +-- llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c2b4c19846316..cd0137532aaa5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1955,7 +1955,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, SDValue SelectionDAG::getJumpTableDebugInfo(int JTI, SDValue Chain, const SDLoc &DL) { EVT PTy = getTargetLoweringInfo().getPointerTy(getDataLayout()); - return getNode(ISD::JUMP_TABLE_DEBUG_INFO, DL, MVT::Glue, Chain, + return getNode(ISD::JUMP_TABLE_DEBUG_INFO, DL, MVT::Other, Chain, getTargetConstant(static_cast(JTI), DL, PTy, true)); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 4f13f3b128ea4..42f308f77f76a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3186,8 +3186,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, Guard, GuardVal, ISD::SETNE); // If the guard/stackslot do not equal, branch to failure MBB. - SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, - MVT::Other, GuardVal.getOperand(0), + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, getControlRoot(), Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); // Otherwise branch to success MBB. SDValue Br = DAG.getNode(ISD::BR, dl, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 5bed32db528d6..7fc14d592d21e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2717,7 +2717,7 @@ getSimpleVT(const unsigned char *MatcherTable, unsigned &MatcherIndex) { void SelectionDAGISel::Select_JUMP_TABLE_DEBUG_INFO(SDNode *N) { SDLoc dl(N); - CurDAG->SelectNodeTo(N, TargetOpcode::JUMP_TABLE_DEBUG_INFO, MVT::Glue, + CurDAG->SelectNodeTo(N, TargetOpcode::JUMP_TABLE_DEBUG_INFO, MVT::Other, CurDAG->getTargetConstant(N->getConstantOperandVal(1), dl, MVT::i64, true)); } From 08a591b3e03748ffb1b0201402dd72e858ea7298 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Mon, 17 Nov 2025 21:26:05 +0300 Subject: [PATCH 3/4] Move comments --- llvm/lib/Target/X86/X86InstrCompiler.td | 3 +- llvm/lib/Target/X86/X86InstrFragments.td | 155 ++++++++- llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 335 +++++++++++++++---- llvm/lib/Target/X86/X86InstrMMX.td | 3 + llvm/lib/Target/X86/X86InstrRAOINT.td | 2 + llvm/lib/Target/X86/X86InstrSSE.td | 2 + llvm/lib/Target/X86/X86InstrTSX.td | 1 + 7 files changed, 436 insertions(+), 65 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index ec31675731b79..3a707c8d0ae99 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -870,7 +870,8 @@ let Predicates = [UseIncDec, In64BitMode] in { def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>; } -// Atomic bit test. +/// LOCK-prefixed arithmetic read-modify-write instructions. +/// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) def X86LBTest : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, i8>, SDTCisVT<3, i32>]>; def x86bts : SDNode<"X86ISD::LBTS", X86LBTest, diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td index fcf791b1a1f0f..254509c191979 100644 --- a/llvm/lib/Target/X86/X86InstrFragments.td +++ b/llvm/lib/Target/X86/X86InstrFragments.td @@ -131,47 +131,82 @@ def SDTX86Cmpccxadd : SDTypeProfile<1, 4, [SDTCisSameAs<0, 2>, SDTCisPtrTy<1>, SDTCisSameAs<2, 3>, SDTCisVT<4, i8>]>; +// Memory barriers. def X86MFence : SDNode<"X86ISD::MFENCE", SDTNone, [SDNPHasChain]>; - +// Bit scan forward. def X86bsf : SDNode<"X86ISD::BSF", SDTBinaryArithWithFlags>; + +// Bit scan reverse. def X86bsr : SDNode<"X86ISD::BSR", SDTBinaryArithWithFlags>; + +// X86 funnel/double shift i16 instructions. These correspond to +// X86::SHLDW and X86::SHRDW instructions which have different amt +// modulo rules to generic funnel shifts. +// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD. def X86fshl : SDNode<"X86ISD::FSHL", SDTIntShiftDOp>; def X86fshr : SDNode<"X86ISD::FSHR", SDTIntShiftDOp>; +// X86 compare and logical compare instructions. def X86cmp : SDNode<"X86ISD::CMP" , SDTX86CmpTest>; def X86fcmp : SDNode<"X86ISD::FCMP", SDTX86FCmp>; let IsStrictFP = true in { + /// X86 strict FP compare instructions. def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86FCmp, [SDNPHasChain]>; def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86FCmp, [SDNPHasChain]>; } +// X86 bit-test instructions. def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>; +// Conditional compare instructions def X86ccmp : SDNode<"X86ISD::CCMP", SDTX86Ccmp>; def X86ctest : SDNode<"X86ISD::CTEST", SDTX86Ccmp>; +// Conditional load/store instructions def X86cload : SDNode<"X86ISD::CLOAD", SDTX86Cload, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def X86cstore : SDNode<"X86ISD::CSTORE", SDTX86Cstore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +// X86 conditional moves. Operand 0 and operand 1 are the two values +// to select from. Operand 2 is the condition code, and operand 3 is the +// flag operand produced by a CMP or TEST instruction. def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>; + +// X86 conditional branches. Operand 0 is the chain operand, operand 1 +// is the block to branch if condition is true, operand 2 is the +// condition code, and operand 3 is the flag operand produced by a CMP +// or TEST instruction. def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond, [SDNPHasChain]>; + +// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS +// operand, usually produced by a CMP instruction. def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>; + +// Same as SETCC except it's materialized with a sbb and the value is all +// one's or all zero's. def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>; +// Get a random integer and indicate whether it is valid in CF. def X86rdrand : SDNode<"X86ISD::RDRAND", SDTX86rdrand, [SDNPHasChain, SDNPSideEffect]>; +// Get a NIST SP800-90B & C compliant random integer and +// indicate whether it is valid in CF. def X86rdseed : SDNode<"X86ISD::RDSEED", SDTX86rdrand, [SDNPHasChain, SDNPSideEffect]>; +// Protection keys +// RDPKRU - Operand 0 is chain. Operand 1 is value for ECX. +// WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is +// value for ECX. def X86rdpkru : SDNode<"X86ISD::RDPKRU", SDTX86rdpkru, [SDNPHasChain, SDNPSideEffect]>; def X86wrpkru : SDNode<"X86ISD::WRPKRU", SDTX86wrpkru, [SDNPHasChain, SDNPSideEffect]>; +// Compare and swap. def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; @@ -182,15 +217,24 @@ def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86cas16pair, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +// Return with a glue operand. Operand 0 is the chain operand, operand +// 1 is the number of bytes of stack to pop. def X86retglue : SDNode<"X86ISD::RET_GLUE", SDTX86Ret, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +// Return from interrupt. Operand 0 is the number of bytes to pop. def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret, [SDNPHasChain, SDNPOptInGlue]>; +// Save xmm argument registers to the stack, according to %al. An operator +// is needed so that this can be expanded with control flow. def X86vastart_save_xmm_regs : SDNode<"X86ISD::VASTART_SAVE_XMM_REGS", SDT_X86VASTART_SAVE_XMM_REGS, [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPVariadic]>; + +// These instructions grab the address of the next argument +// from a va_list. (reads and modifies the va_list in memory) def X86vaarg64 : SDNode<"X86ISD::VAARG_64", SDT_X86VAARG, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, @@ -199,6 +243,7 @@ def X86vaargx32 : SDNode<"X86ISD::VAARG_X32", SDT_X86VAARG, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; + def X86callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart, [SDNPHasChain, SDNPOutGlue]>; @@ -206,63 +251,111 @@ def X86callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_X86CallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +// These operations represent an abstract X86 call +// instruction, which includes a bunch of information. In particular the +// operands of these node are: +// +// #0 - The incoming token chain +// #1 - The callee +// #2 - The number of arg bytes the caller pushes on the stack. +// #3 - The number of arg bytes the callee pops off the stack. +// #4 - The value to pass in AL/AX/EAX (optional) +// #5 - The value to pass in DL/DX/EDX (optional) +// +// The result values of these nodes are: +// +// #0 - The outgoing token chain +// #1 - The first register result value (optional) +// #2 - The second register result value (optional) +// def X86call : SDNode<"X86ISD::CALL", SDT_X86Call, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; +// Pseudo for a OBJC call that gets emitted together with a special +// marker instruction. def X86call_rvmarker : SDNode<"X86ISD::CALL_RVMARKER", SDT_X86Call, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; +// Pseudo for a call to an imported function to ensure the correct machine +// instruction is emitted for Import Call Optimization. def X86imp_call : SDNode<"X86ISD::IMP_CALL", SDT_X86Call, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; +// Same as call except it adds the NoTrack prefix. def X86NoTrackCall : SDNode<"X86ISD::NT_CALL", SDT_X86Call, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; + +// BRIND node with NoTrack prefix. Operand 0 is the chain operand and +// operand 1 is the target address. def X86NoTrackBrind : SDNode<"X86ISD::NT_BRIND", SDT_X86NtBrind, [SDNPHasChain]>; +// Repeat fill, corresponds to X86::REP_STOSx. def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore]>; + +// Repeat move, corresponds to X86::REP_MOVSx. def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad]>; +// A wrapper node for TargetConstantPool, TargetJumpTable, +// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress, +// MCSymbol and TargetBlockAddress. def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>; + +// Special wrapper used under X86-64 PIC mode for RIP +// relative displacements. def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>; def X86RecoverFrameAlloc : SDNode<"ISD::LOCAL_RECOVER", SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisInt<1>]>>; +// Thread Local Storage. def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +// Thread Local Storage. A call to get the start address +// of the TLS block for the current module. def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +// Thread Local Storage. A descriptor containing pointer to +// code and to argument to get the TLS offset for the symbol. def X86tlsdesc : SDNode<"X86ISD::TLSDESC", SDT_X86TLSADDR, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +// Exception Handling helpers. def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, [SDNPHasChain]>; +// SjLj exception handling setjmp. def X86eh_sjlj_setjmp : SDNode<"X86ISD::EH_SJLJ_SETJMP", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, [SDNPHasChain, SDNPSideEffect]>; + +// SjLj exception handling longjmp. def X86eh_sjlj_longjmp : SDNode<"X86ISD::EH_SJLJ_LONGJMP", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPSideEffect]>; + +// SjLj exception handling dispatch. def X86eh_sjlj_setup_dispatch : SDNode<"X86ISD::EH_SJLJ_SETUP_DISPATCH", SDTypeProfile<0, 0, []>, [SDNPHasChain, SDNPSideEffect]>; +// Tail call return. See X86TargetLowering::LowerCall for +// the list of operands. def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +// Arithmetic operations with FLAGS results. def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86sub_flag : SDNode<"X86ISD::SUB", SDTBinaryArithWithFlags>; @@ -280,6 +373,8 @@ def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags, def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, [SDNPCommutative]>; +/// LOCK-prefixed arithmetic read-modify-write instructions. +/// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS) def X86lock_add : SDNode<"X86ISD::LADD", SDTLockBinaryArithWithFlags, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; @@ -296,33 +391,47 @@ def X86lock_and : SDNode<"X86ISD::LAND", SDTLockBinaryArithWithFlags, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +// Bit field extract. def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>; def X86bextri : SDNode<"X86ISD::BEXTRI", SDTIntBinOp>; +// Zero High Bits Starting with Specified Bit Position. def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntBinOp>; +// Parallel extract and deposit. def X86pdep : SDNode<"X86ISD::PDEP", SDTIntBinOp>; def X86pext : SDNode<"X86ISD::PEXT", SDTIntBinOp>; +// X86-specific multiply by immediate. def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; +// A stack checking function call. On Windows it's _chkstk call. def X86DynAlloca : SDNode<"X86ISD::DYN_ALLOCA", SDT_X86DYN_ALLOCA, [SDNPHasChain, SDNPOutGlue]>; +// For allocating variable amounts of stack space when using +// segmented stacks. Check if the current stacklet has enough space, and +// falls back to heap allocation if not. def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA, [SDNPHasChain]>; +// For allocating stack space when using stack clash protector. +// Allocation is performed by block, and each block is probed. def X86ProbedAlloca : SDNode<"X86ISD::PROBED_ALLOCA", SDT_X86PROBED_ALLOCA, [SDNPHasChain]>; +// Thread Local Storage. When calling to an OS provided +// thunk at the address from an earlier relocation. def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +// LWP insert record. def X86lwpins : SDNode<"X86ISD::LWPINS", SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPSideEffect]>; +// User level wait def X86umwait : SDNode<"X86ISD::UMWAIT", SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisInt<1>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, @@ -333,14 +442,18 @@ def X86tpause : SDNode<"X86ISD::TPAUSE", SDTCisVT<2, i32>, SDTCisVT<3, i32>]>, [SDNPHasChain, SDNPSideEffect]>; +// Enqueue Stores Instructions def X86enqcmd : SDNode<"X86ISD::ENQCMD", SDT_X86ENQCMD, [SDNPHasChain, SDNPSideEffect]>; def X86enqcmds : SDNode<"X86ISD::ENQCMDS", SDT_X86ENQCMD, [SDNPHasChain, SDNPSideEffect]>; + +// User level interrupts - testui def X86testui : SDNode<"X86ISD::TESTUI", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>, [SDNPHasChain, SDNPSideEffect]>; +// Key locker nodes that produce flags. def X86aesenc128kl : SDNode<"X86ISD::AESENC128KL", SDT_X86AESENCDECKL, [SDNPHasChain, SDNPMayLoad, SDNPSideEffect, SDNPMemOperand]>; @@ -354,6 +467,10 @@ def X86aesdec256kl : SDNode<"X86ISD::AESDEC256KL", SDT_X86AESENCDECKL, [SDNPHasChain, SDNPMayLoad, SDNPSideEffect, SDNPMemOperand]>; +// Compare and Add if Condition is Met. Compare value in operand 2 with +// value in memory of operand 1. If condition of operand 4 is met, add +// value operand 3 to m32 and write new value in operand 1. Operand 2 is +// always updated with the original value from operand 1. def X86cmpccxadd : SDNode<"X86ISD::CMPCCXADD", SDTX86Cmpccxadd, [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; @@ -793,8 +910,10 @@ def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDTX86CwdLoad : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDTX86FPEnv : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +// Perform an FP80 add after changing precision control in FPCW. def X86fp80_add : SDNode<"X86ISD::FP80_ADD", SDTFPBinOp, [SDNPCommutative]>; +// Perform an FP80 add after changing precision control in FPCW. let IsStrictFP = true in def X86strict_fp80_add : SDNode<"X86ISD::STRICT_FP80_ADD", SDTFPBinOp, [SDNPHasChain,SDNPCommutative]>; @@ -803,25 +922,59 @@ def any_X86fp80_add : PatFrags<(ops node:$lhs, node:$rhs), [(X86strict_fp80_add node:$lhs, node:$rhs), (X86fp80_add node:$lhs, node:$rhs)]>; +// This instruction implements an extending load to FP stack slots. +// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain +// operand, and ptr to load from. The memory VT specifies the type to +// load from. def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// This instruction implements a truncating store from FP stack +// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a +// chain operand, value to store, address, and glue. The memory VT +// specifies the type to store as. def X86fst : SDNode<"X86ISD::FST", SDTX86Fst, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// This instruction implements SINT_TO_FP with the +// integer source in memory and FP reg result. This corresponds to the +// X86::FILD*m instructions. It has two inputs (token chain and address) +// and two outputs (FP value and token chain). The integer source type is +// specified by the memory VT. def X86fild : SDNode<"X86ISD::FILD", SDTX86Fild, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// This instruction implements a fp->int store from FP stack +// slots. This corresponds to the fist instruction. It takes a +// chain operand, value to store, address, and glue. The memory VT +// specifies the type to store as. def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// This instruction implements FP_TO_SINT with the +// integer destination in memory and a FP reg source. This corresponds +// to the X86::FIST*m instructions and the rounding mode change stuff. It +// has two inputs (token chain and address) and two outputs (int value +// and token chain). Memory VT specifies the type to store to. def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// Store FP control word into i16 memory. def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore, [SDNPHasChain, SDNPMayStore, SDNPSideEffect, SDNPMemOperand]>; + +// Load FP control word from i16 memory. def X86fp_cwd_set16 : SDNode<"X86ISD::FLDCW16m", SDTX86CwdLoad, [SDNPHasChain, SDNPMayLoad, SDNPSideEffect, SDNPMemOperand]>; + +// Store x87 FPU environment into memory. def X86fpenv_get : SDNode<"X86ISD::FNSTENVm", SDTX86FPEnv, [SDNPHasChain, SDNPMayStore, SDNPSideEffect, SDNPMemOperand]>; + +// Load x87 FPU environment from memory. def X86fpenv_set : SDNode<"X86ISD::FLDENVm", SDTX86FPEnv, [SDNPHasChain, SDNPMayLoad, SDNPSideEffect, SDNPMemOperand]>; diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 35c4d89a1b231..b4b0a37cdd228 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -14,10 +14,13 @@ // MMX specific DAG Nodes. //===----------------------------------------------------------------------===// -// Low word of MMX to GPR. +// Copies a 32-bit value from the low word of a MMX +// vector to a GPR. def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>; -// GPR to low word of MMX. + +// Copies a GPR into the low 32-bit word of a MMX vector +// and zero out the high word. def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1, [SDTCisVT<0, x86mmx>, SDTCisVT<1, i32>]>>; @@ -35,8 +38,11 @@ def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; +/// Floating point max and min. def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; + +// Scalar intrinsic floating point max and min. def X86fmins : SDNode<"X86ISD::FMINS", SDTFPBinOp>; def X86fmaxs : SDNode<"X86ISD::FMAXS", SDTFPBinOp>; @@ -47,6 +53,7 @@ def X86fmaxc : SDNode<"X86ISD::FMAXC", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; let IsStrictFP = true in { + /// Floating point max and min. def X86strict_fmin : SDNode<"X86ISD::STRICT_FMIN", SDTFPBinOp, [SDNPHasChain]>; def X86strict_fmax : SDNode<"X86ISD::STRICT_FMAX", SDTFPBinOp, @@ -60,70 +67,128 @@ def X86any_fmax : PatFrags<(ops node:$src1, node:$src2), [(X86strict_fmax node:$src1, node:$src2), (X86fmax node:$src1, node:$src2)]>; +// Bitwise logical AND of floating point values. This corresponds +// to X86::ANDPS or X86::ANDPD. def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; + +// Bitwise logical OR of floating point values. This corresponds +// to X86::ORPS or X86::ORPD. def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; + +// Bitwise logical XOR of floating point values. This corresponds +// to X86::XORPS or X86::XORPD. def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, [SDNPCommutative, SDNPAssociative]>; + +// Bitwise logical ANDNOT of floating point values. This +// corresponds to X86::ANDNPS or X86::ANDNPD. def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp>; + +// Floating point reciprocal-sqrt and reciprocal approximation. +// Note that these typically require refinement +// in order to obtain suitable precision. def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; + +// Floating point horizontal add/sub. def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>; def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>; + +// Integer horizontal add/sub. def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>; def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>; + +// X86 compare and logical compare instructions. def X86comi : SDNode<"X86ISD::COMI", SDTX86FCmp>; def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86FCmp>; + +// X86 compare with Intrinsics similar to COMI. def X86comi512 : SDNode<"X86ISD::COMX", SDTX86FCmp>; def X86ucomi512 : SDNode<"X86ISD::UCOMX", SDTX86FCmp>; + def SDTX86Cmps : SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; + +// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. +// Operands are two FP values to compare; result is a mask of +// 0s or 1s. Generally DTRT for C/C++ with NaNs. def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>; +// Shuffle 16 8-bit values within a vector. def X86pshufb : SDNode<"X86ISD::PSHUFB", SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; + +// Compute Sum of Absolute Differences. def X86psadbw : SDNode<"X86ISD::PSADBW", SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>, SDTCVecEltisVT<1, i8>, SDTCisSameSizeAs<0,1>, SDTCisSameAs<1,2>]>, [SDNPCommutative]>; + def SDTX86PSADBW : SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>, SDTCVecEltisVT<1, i8>, SDTCisSameSizeAs<0,1>, SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>; + +// Compute Double Block Packed Sum-Absolute-Differences def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW", SDTX86PSADBW>; + +// Bitwise Logical AND NOT of Packed FP values. def X86andnp : SDNode<"X86ISD::ANDNP", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; + def X86multishift : SDNode<"X86ISD::MULTISHIFT", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>>; def X86pextrb : SDNode<"X86ISD::PEXTRB", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v16i8>, SDTCisVT<2, i8>]>>; + +// Extract a 16-bit value from a vector and zero extend it to +// i32, corresponds to X86::PEXTRW. def X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v8i16>, SDTCisVT<2, i8>]>>; + +// Insert the lower 8-bits of a 32-bit value to a vector, +// corresponds to X86::PINSRB. def X86pinsrb : SDNode<"X86ISD::PINSRB", SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisVT<2, i32>, SDTCisVT<3, i8>]>>; + +// Insert the lower 16-bits of a 32-bit value to a vector, +// corresponds to X86::PINSRW. def X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, SDTCisVT<2, i32>, SDTCisVT<3, i8>]>>; + +// Insert any element of a 4 x float vector into any element +// of a destination 4 x floatvector. def X86insertps : SDNode<"X86ISD::INSERTPS", SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>, SDTCisVT<2, v4f32>, SDTCisVT<3, i8>]>>; + +// Vector move to low scalar and zero higher vector elements. def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; +// Load, scalar_to_vector, and zero extend. def X86vzld : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// extract_vector_elt, store. def X86vextractst : SDNode<"X86ISD::VEXTRACT_STORE", SDTStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// scalar broadcast from memory. def X86VBroadcastld : SDNode<"X86ISD::VBROADCAST_LOAD", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// subvector broadcast from memory. def X86SubVBroadcastld : SDNode<"X86ISD::SUBV_BROADCAST_LOAD", SDTLoad, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; @@ -137,17 +202,31 @@ def SDTVmtrunc : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCVecEltisVT<3, i1>, SDTCisSameNumEltsAs<1, 3>]>; +// Vector integer truncate. def X86vtrunc : SDNode<"X86ISD::VTRUNC", SDTVtrunc>; + +// Vector integer truncate with unsigned/signed saturation. def X86vtruncs : SDNode<"X86ISD::VTRUNCS", SDTVtrunc>; def X86vtruncus : SDNode<"X86ISD::VTRUNCUS", SDTVtrunc>; + +// Masked version of the above. Used when less than a 128-bit result is +// produced since the mask only applies to the lower elements and can't +// be represented by a select. +// SRC, PASSTHRU, MASK def X86vmtrunc : SDNode<"X86ISD::VMTRUNC", SDTVmtrunc>; def X86vmtruncs : SDNode<"X86ISD::VMTRUNCS", SDTVmtrunc>; def X86vmtruncus : SDNode<"X86ISD::VMTRUNCUS", SDTVmtrunc>; +// Vector FP extend. def X86vfpext : SDNode<"X86ISD::VFPEXT", SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisFP<1>, SDTCisVec<1>]>>; +def X86vfpextSAE : SDNode<"X86ISD::VFPEXT_SAE", + SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, + SDTCisFP<1>, SDTCisVec<1>, + SDTCisOpSmallerThanOp<1, 0>]>>; +// Vector FP extend. let IsStrictFP = true in def X86strict_vfpext : SDNode<"X86ISD::STRICT_VFPEXT", SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, @@ -158,16 +237,20 @@ def X86any_vfpext : PatFrags<(ops node:$src), [(X86strict_vfpext node:$src), (X86vfpext node:$src)]>; +// Vector FP round. def X86vfpround: SDNode<"X86ISD::VFPROUND", SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisFP<1>, SDTCisVec<1>, SDTCisOpSmallerThanOp<0, 1>]>>; + +// Convert TWO packed single data to one packed data def X86vfpround2 : SDNode<"X86ISD::VFPROUND2", SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>, SDTCisFP<1>, SDTCisVec<1>, SDTCisSameAs<1, 2>, SDTCisOpSmallerThanOp<0, 1>]>>; +// Vector FP round. let IsStrictFP = true in def X86strict_vfpround: SDNode<"X86ISD::STRICT_VFPROUND", SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, @@ -192,6 +275,7 @@ def X86froundsRnd: SDNode<"X86ISD::VFPROUNDS_RND", SDTCisSameSizeAs<0, 2>, SDTCisVT<3, i32>]>>; +// Vector FP extend. def X86fpexts : SDNode<"X86ISD::VFPEXTS", SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>, @@ -203,6 +287,8 @@ def X86fpextsSAE : SDNode<"X86ISD::VFPEXTS_SAE", SDTCisFP<2>, SDTCisVec<2>, SDTCisSameSizeAs<0, 2>]>>; +// Masked version of VFPROUND. Used for v2f64->v4f32. +// SRC, PASSTHRU, MASK def X86vmfpround: SDNode<"X86ISD::VMFPROUND", SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisVec<0>, SDTCisFP<1>, SDTCisVec<1>, @@ -213,13 +299,18 @@ def X86vmfpround: SDNode<"X86ISD::VMFPROUND", def X86vshiftimm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisVT<2, i8>, SDTCisInt<0>]>; +// 128-bit vector logical left / right shift def X86vshldq : SDNode<"X86ISD::VSHLDQ", X86vshiftimm>; def X86vshrdq : SDNode<"X86ISD::VSRLDQ", X86vshiftimm>; + +// Vector integer comparisons. def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>; def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>; +// Vector packed double/float comparison. def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>; +// Vector packed double/float comparison. let IsStrictFP = true in def X86strict_cmpp : SDNode<"X86ISD::STRICT_CMPP", SDTX86VFCMP, [SDNPHasChain]>; @@ -240,19 +331,31 @@ def X86CmpMaskCCScalar : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisFP<1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; +// Vector comparison generating mask bits for fp and +// integer signed and unsigned data types. def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; + +// Vector mask comparison generating mask bits for FP values. def X86cmpmm : SDNode<"X86ISD::CMPMM", X86MaskCmpMaskCC>; +// Vector comparison generating mask bits for fp and +// integer signed and unsigned data types. let IsStrictFP = true in def X86strict_cmpm : SDNode<"X86ISD::STRICT_CMPM", X86CmpMaskCC, [SDNPHasChain]>; def X86any_cmpm : PatFrags<(ops node:$src1, node:$src2, node:$src3), [(X86strict_cmpm node:$src1, node:$src2, node:$src3), (X86cmpm node:$src1, node:$src2, node:$src3)]>; + +// Vector mask comparison with SAE for FP values. def X86cmpmmSAE : SDNode<"X86ISD::CMPMM_SAE", X86MaskCmpMaskCC>; + +// X86 FP SETCC, similar to FSETCC, but with output as an i1 mask and +// and a version with SAE. def X86cmpms : SDNode<"X86ISD::FSETCCM", X86CmpMaskCCScalar>; def X86cmpmsSAE : SDNode<"X86ISD::FSETCCM_SAE", X86CmpMaskCCScalar>; +// v8i16 Horizontal minimum and position. def X86phminpos: SDNode<"X86ISD::PHMINPOS", SDTypeProfile<1, 1, [SDTCisVT<0, v8i16>, SDTCisVT<1, v8i16>]>>; @@ -260,6 +363,7 @@ def X86vshiftuniform : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisVec<2>, SDTCisInt<0>, SDTCisInt<2>]>; +// Vector shift elements def X86vshl : SDNode<"X86ISD::VSHL", X86vshiftuniform>; def X86vsrl : SDNode<"X86ISD::VSRL", X86vshiftuniform>; def X86vsra : SDNode<"X86ISD::VSRA", X86vshiftuniform>; @@ -267,14 +371,17 @@ def X86vsra : SDNode<"X86ISD::VSRA", X86vshiftuniform>; def X86vshiftvariable : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<0>]>; +// Vector variable shift def X86vshlv : SDNode<"X86ISD::VSHLV", X86vshiftvariable>; def X86vsrlv : SDNode<"X86ISD::VSRLV", X86vshiftvariable>; def X86vsrav : SDNode<"X86ISD::VSRAV", X86vshiftvariable>; +// Vector shift elements by immediate def X86vshli : SDNode<"X86ISD::VSHLI", X86vshiftimm>; def X86vsrli : SDNode<"X86ISD::VSRLI", X86vshiftimm>; def X86vsrai : SDNode<"X86ISD::VSRAI", X86vshiftimm>; +// Shifts of mask registers. def X86kshiftl : SDNode<"X86ISD::KSHIFTL", SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i1>, SDTCisSameAs<0, 1>, @@ -284,14 +391,18 @@ def X86kshiftr : SDNode<"X86ISD::KSHIFTR", SDTCisSameAs<0, 1>, SDTCisVT<2, i8>]>>; +// ADD for masks. def X86kadd : SDNode<"X86ISD::KADD", SDTIntBinOp, [SDNPCommutative]>; +// Bit rotate by immediate def X86vrotli : SDNode<"X86ISD::VROTLI", X86vshiftimm>; def X86vrotri : SDNode<"X86ISD::VROTRI", X86vshiftimm>; +// XOP arithmetic/logical shifts. def X86vpshl : SDNode<"X86ISD::VPSHL", X86vshiftvariable>; def X86vpsha : SDNode<"X86ISD::VPSHA", X86vshiftvariable>; +// XOP signed/unsigned integer comparisons. def X86vpcom : SDNode<"X86ISD::VPCOM", SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, @@ -300,6 +411,8 @@ def X86vpcomu : SDNode<"X86ISD::VPCOMU", SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisVT<3, i8>, SDTCisInt<0>]>>; + +// XOP two source permutation. def X86vpermil2 : SDNode<"X86ISD::VPERMIL2", SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, @@ -307,6 +420,8 @@ def X86vpermil2 : SDNode<"X86ISD::VPERMIL2", SDTCisSameNumEltsAs<0, 3>, SDTCisSameSizeAs<0,3>, SDTCisVT<4, i8>]>>; + +// XOP packed permute bytes. def X86vpperm : SDNode<"X86ISD::VPPERM", SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisSameAs<0, 3>]>>; @@ -315,31 +430,44 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>; +// Vector Multiply Packed UnsignedIntegers with Round and Scale. def X86mulhrs : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>; + +// Vector bitwise comparisons. def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; + +// Vector packed fp sign bitwise comparisons. def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; + +// OR/AND test for masks. def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>; def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>; +// Vector sign bit extraction. def X86movmsk : SDNode<"X86ISD::MOVMSK", SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVec<1>]>>; +// X86 Select def X86selects : SDNode<"X86ISD::SELECTS", SDTypeProfile<1, 3, [SDTCisVT<1, v1i1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>>; +// Vector multiply packed unsigned doubleword integers. def X86pmuludq : SDNode<"X86ISD::PMULUDQ", SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>, SDTCisSameAs<0,1>, SDTCisSameAs<1,2>]>, [SDNPCommutative]>; + +// Vector multiply packed signed doubleword integers. def X86pmuldq : SDNode<"X86ISD::PMULDQ", SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>, SDTCisSameAs<0,1>, SDTCisSameAs<1,2>]>, [SDNPCommutative]>; +/// SSE4A Extraction and Insertion. def X86extrqi : SDNode<"X86ISD::EXTRQI", SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>, SDTCisVT<2, i8>, SDTCisVT<3, i8>]>>; @@ -404,19 +532,25 @@ def SDTFPToxIntSatOp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<0>, SDTCisFP<1>]>; +// Custom handling for FP_TO_xINT_SAT def X86fp2sisat : SDNode<"X86ISD::FP_TO_SINT_SAT", SDTFPToxIntSatOp>; def X86fp2uisat : SDNode<"X86ISD::FP_TO_UINT_SAT", SDTFPToxIntSatOp>; +// Intra-lane alignr. def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>>; + +// AVX512 inter-lane alignr. def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>; +// VBMI2 Concat & Shift. def X86VShld : SDNode<"X86ISD::VSHLD", SDTShuff3OpI>; def X86VShrd : SDNode<"X86ISD::VSHRD", SDTShuff3OpI>; +// Detect Conflicts Within a Vector def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>; def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; @@ -424,6 +558,8 @@ def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>; def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>; + +// Shuffle Packed Values at 128-bit granularity. def X86Shuf128 : SDNode<"X86ISD::SHUF128", SDTShuff3OpI>; def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>; @@ -458,12 +594,16 @@ def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>, SDTCisSameSizeAs<0,1>, SDTCisSameAs<1,2>, SDTCisOpSmallerThanOp<0, 1>]>; + +// Several flavors of instructions with vector shuffle behaviors. +// Saturated signed/unnsigned packing. def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>; def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>; def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>; def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>; +// Multiply and Add Packed Integers. def X86vpmaddubsw : SDNode<"X86ISD::VPMADDUBSW", SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>, SDTCVecEltisVT<1, i8>, @@ -478,12 +618,19 @@ def X86vpmaddwd : SDNode<"X86ISD::VPMADDWD", def X86VPermilpv : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>; def X86VPermilpi : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>; + +// Variable Permute (VPERM). +// Res = VPERMV MaskV, V0 def X86VPermv : SDNode<"X86ISD::VPERMV", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<1>, SDTCisSameNumEltsAs<0,1>, SDTCisSameSizeAs<0,1>, SDTCisSameAs<0,2>]>>; + def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>; + +// 3-op Variable Permute (VPERMT2). +// Res = VPERMV3 V0, MaskV, V1 def X86VPermt2 : SDNode<"X86ISD::VPERMV3", SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisInt<2>, @@ -491,20 +638,40 @@ def X86VPermt2 : SDNode<"X86ISD::VPERMV3", SDTCisSameSizeAs<0,2>, SDTCisSameAs<0,3>]>, []>; +// Bitwise ternary logic. def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>; def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; +// Fix Up Special Packed Float32/64 values. def X86VFixupimm : SDNode<"X86ISD::VFIXUPIMM", SDTFPTernaryOpImm>; def X86VFixupimmSAE : SDNode<"X86ISD::VFIXUPIMM_SAE", SDTFPTernaryOpImm>; def X86VFixupimms : SDNode<"X86ISD::VFIXUPIMMS", SDTFPTernaryOpImm>; def X86VFixupimmSAEs : SDNode<"X86ISD::VFIXUPIMMS_SAE", SDTFPTernaryOpImm>; + +// Range Restriction Calculation For Packed Pairs of Float32/64 values. def X86VRange : SDNode<"X86ISD::VRANGE", SDTFPBinOpImm>; def X86VRangeSAE : SDNode<"X86ISD::VRANGE_SAE", SDTFPBinOpImm>; +def X86Ranges : SDNode<"X86ISD::VRANGES", SDTFPBinOpImm>; +def X86RangesSAE : SDNode<"X86ISD::VRANGES_SAE", SDTFPBinOpImm>; + +// Reduce - Perform Reduction Transformation on scalar\packed FP. def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImm>; def X86VReduceSAE : SDNode<"X86ISD::VREDUCE_SAE", SDTFPUnaryOpImm>; +def X86Reduces : SDNode<"X86ISD::VREDUCES", SDTFPBinOpImm>; +def X86ReducesSAE : SDNode<"X86ISD::VREDUCES_SAE", SDTFPBinOpImm>; + +// RndScale - Round FP Values To Include A Given Number Of Fraction Bits. +// Also used by the legacy (V)ROUND intrinsics where we mask out the +// scaling part of the immediate. def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImm>; +def X86VRndScaleSAE: SDNode<"X86ISD::VRNDSCALE_SAE", SDTFPUnaryOpImm>; +def X86RndScales : SDNode<"X86ISD::VRNDSCALES", SDTFPBinOpImm>; +def X86RndScalesSAE: SDNode<"X86ISD::VRNDSCALES_SAE",SDTFPBinOpImm>; +// RndScale - Round FP Values To Include A Given Number Of Fraction Bits. +// Also used by the legacy (V)ROUND intrinsics where we mask out the +// scaling part of the immediate. let IsStrictFP = true in def X86strict_VRndScale : SDNode<"X86ISD::STRICT_VRNDSCALE", SDTFPUnaryOpImm, [SDNPHasChain]>; @@ -513,22 +680,39 @@ def X86any_VRndScale : PatFrags<(ops node:$src1, node:$src2), [(X86strict_VRndScale node:$src1, node:$src2), (X86VRndScale node:$src1, node:$src2)]>; -def X86VRndScaleSAE: SDNode<"X86ISD::VRNDSCALE_SAE", SDTFPUnaryOpImm>; +// Extract Normalized Mantissas. def X86VGetMant : SDNode<"X86ISD::VGETMANT", SDTFPUnaryOpImm>; def X86VGetMantSAE : SDNode<"X86ISD::VGETMANT_SAE", SDTFPUnaryOpImm>; +def X86GetMants : SDNode<"X86ISD::VGETMANTS", SDTFPBinOpImm>; +def X86GetMantsSAE : SDNode<"X86ISD::VGETMANTS_SAE", SDTFPBinOpImm>; + +// Tests Types Of a FP Values for packed types. def X86Vfpclass : SDNode<"X86ISD::VFPCLASS", SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i1>, SDTCisFP<1>, SDTCisSameNumEltsAs<0,1>, SDTCisVT<2, i32>]>, []>; + +// Tests Types Of a FP Values for scalar types. def X86Vfpclasss : SDNode<"X86ISD::VFPCLASSS", SDTypeProfile<1, 2, [SDTCisVT<0, v1i1>, SDTCisFP<1>, SDTCisVT<2, i32>]>,[]>; +// Broadcast (splat) scalar or element 0 of a vector. If the operand is +// a vector, this node may change the vector length as part of the splat. def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; + +// Broadcast mask to vector. def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>; +// Blend where the selector is an immediate. def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>; + +// Dynamic (non-constant condition) vector blend where only the sign bits +// of the condition elements are used. This is used to enforce that the +// condition mask is not valid for generic VSELECT optimizations. This +// is also used to implement the intrinsics. +// Operands are in VSELECT order: MASK, TRUE, FALSE def X86Blendv : SDNode<"X86ISD::BLENDV", SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, @@ -536,8 +720,10 @@ def X86Blendv : SDNode<"X86ISD::BLENDV", SDTCisSameNumEltsAs<0, 1>, SDTCisSameSizeAs<0, 1>]>>; +// Combined add and sub on an FP vector. def X86Addsub : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>; +// FP vector ops with rounding mode. def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>; def X86fadds : SDNode<"X86ISD::FADDS", SDTFPBinOp>; def X86faddRnds : SDNode<"X86ISD::FADDS_RND", SDTFPBinOpRound>; @@ -554,45 +740,52 @@ def X86fmaxSAE : SDNode<"X86ISD::FMAX_SAE", SDTFPBinOp>; def X86fmaxSAEs : SDNode<"X86ISD::FMAXS_SAE", SDTFPBinOp>; def X86fminSAE : SDNode<"X86ISD::FMIN_SAE", SDTFPBinOp>; def X86fminSAEs : SDNode<"X86ISD::FMINS_SAE", SDTFPBinOp>; + +// FP Scale. def X86scalef : SDNode<"X86ISD::SCALEF", SDTFPBinOp>; def X86scalefRnd : SDNode<"X86ISD::SCALEF_RND", SDTFPBinOpRound>; def X86scalefs : SDNode<"X86ISD::SCALEFS", SDTFPBinOp>; def X86scalefsRnd: SDNode<"X86ISD::SCALEFS_RND", SDTFPBinOpRound>; + +// FP vector ops with rounding mode. def X86fsqrtRnd : SDNode<"X86ISD::FSQRT_RND", SDTFPUnaryOpRound>; def X86fsqrts : SDNode<"X86ISD::FSQRTS", SDTFPBinOp>; def X86fsqrtRnds : SDNode<"X86ISD::FSQRTS_RND", SDTFPBinOpRound>; + +// FP vector get exponent. def X86fgetexp : SDNode<"X86ISD::FGETEXP", SDTFPUnaryOp>; def X86fgetexpSAE : SDNode<"X86ISD::FGETEXP_SAE", SDTFPUnaryOp>; def X86fgetexps : SDNode<"X86ISD::FGETEXPS", SDTFPBinOp>; def X86fgetexpSAEs : SDNode<"X86ISD::FGETEXPS_SAE", SDTFPBinOp>; +// FMA nodes. +// We use the target independent ISD::FMA for the non-inverted case. def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>; +def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp, [SDNPCommutative]>; +def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp, [SDNPCommutative]>; +def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFPTernaryOp, [SDNPCommutative]>; +def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFPTernaryOp, [SDNPCommutative]>; -let IsStrictFP = true in -def X86strict_Fnmadd : SDNode<"X86ISD::STRICT_FNMADD", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; +let IsStrictFP = true in { + // Strict FMA nodes. + def X86strict_Fnmadd : SDNode<"X86ISD::STRICT_FNMADD", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; + def X86strict_Fmsub : SDNode<"X86ISD::STRICT_FMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; + def X86strict_Fnmsub : SDNode<"X86ISD::STRICT_FNMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; +} def X86any_Fnmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3), [(X86strict_Fnmadd node:$src1, node:$src2, node:$src3), (X86Fnmadd node:$src1, node:$src2, node:$src3)]>; -def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp, [SDNPCommutative]>; - -let IsStrictFP = true in -def X86strict_Fmsub : SDNode<"X86ISD::STRICT_FMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; def X86any_Fmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3), [(X86strict_Fmsub node:$src1, node:$src2, node:$src3), (X86Fmsub node:$src1, node:$src2, node:$src3)]>; -def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp, [SDNPCommutative]>; - -let IsStrictFP = true in -def X86strict_Fnmsub : SDNode<"X86ISD::STRICT_FNMSUB", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>; def X86any_Fnmsub : PatFrags<(ops node:$src1, node:$src2, node:$src3), [(X86strict_Fnmsub node:$src1, node:$src2, node:$src3), (X86Fnmsub node:$src1, node:$src2, node:$src3)]>; -def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFPTernaryOp, [SDNPCommutative]>; -def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFPTernaryOp, [SDNPCommutative]>; +// FMA with rounding mode. def X86FmaddRnd : SDNode<"X86ISD::FMADD_RND", SDTFmaRound, [SDNPCommutative]>; def X86FnmaddRnd : SDNode<"X86ISD::FNMADD_RND", SDTFmaRound, [SDNPCommutative]>; def X86FmsubRnd : SDNode<"X86ISD::FMSUB_RND", SDTFmaRound, [SDNPCommutative]>; @@ -600,15 +793,21 @@ def X86FnmsubRnd : SDNode<"X86ISD::FNMSUB_RND", SDTFmaRound, [SDNPCommutat def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound, [SDNPCommutative]>; def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound, [SDNPCommutative]>; +// For avx512-vp2intersect def X86vp2intersect : SDNode<"X86ISD::VP2INTERSECT", SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisVec<1>, SDTCisSameAs<1, 2>]>>; def SDTIFma : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; + +// AVX512IFMA multiply and add. +// NOTE: These are different than the instruction and perform +// op0 x op1 + op2. def x86vpmadd52l : SDNode<"X86ISD::VPMADD52L", SDTIFma, [SDNPCommutative]>; def x86vpmadd52h : SDNode<"X86ISD::VPMADD52H", SDTIFma, [SDNPCommutative]>; +// AVX512-FP16 complex addition and multiplication. def x86vfmaddc : SDNode<"X86ISD::VFMADDC", SDTFPTernaryOp, [SDNPCommutative]>; def x86vfmaddcRnd : SDNode<"X86ISD::VFMADDC_RND", SDTFmaRound, [SDNPCommutative]>; def x86vfcmaddc : SDNode<"X86ISD::VFCMADDC", SDTFPTernaryOp>; @@ -627,28 +826,23 @@ def x86vfcmaddcShRnd : SDNode<"X86ISD::VFCMADDCSH_RND",SDTFmaRound>; def x86vfmulcShRnd : SDNode<"X86ISD::VFMULCSH_RND", SDTFPBinOpRound, [SDNPCommutative]>; def x86vfcmulcShRnd : SDNode<"X86ISD::VFCMULCSH_RND", SDTFPBinOpRound>; +// AVX-512 reciprocal approximations with a little more precision. def X86rsqrt14 : SDNode<"X86ISD::RSQRT14", SDTFPUnaryOp>; +def X86rsqrt14s : SDNode<"X86ISD::RSQRT14S", SDTFPBinOp>; def X86rcp14 : SDNode<"X86ISD::RCP14", SDTFPUnaryOp>; +def X86rcp14s : SDNode<"X86ISD::RCP14S", SDTFPBinOp>; // VNNI def SDTVnni : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; + +// VNNI def X86Vpdpbusd : SDNode<"X86ISD::VPDPBUSD", SDTVnni>; def X86Vpdpbusds : SDNode<"X86ISD::VPDPBUSDS", SDTVnni>; def X86Vpdpwssd : SDNode<"X86ISD::VPDPWSSD", SDTVnni>; def X86Vpdpwssds : SDNode<"X86ISD::VPDPWSSDS", SDTVnni>; -def X86rsqrt14s : SDNode<"X86ISD::RSQRT14S", SDTFPBinOp>; -def X86rcp14s : SDNode<"X86ISD::RCP14S", SDTFPBinOp>; -def X86Ranges : SDNode<"X86ISD::VRANGES", SDTFPBinOpImm>; -def X86RndScales : SDNode<"X86ISD::VRNDSCALES", SDTFPBinOpImm>; -def X86Reduces : SDNode<"X86ISD::VREDUCES", SDTFPBinOpImm>; -def X86GetMants : SDNode<"X86ISD::VGETMANTS", SDTFPBinOpImm>; -def X86RangesSAE : SDNode<"X86ISD::VRANGES_SAE", SDTFPBinOpImm>; -def X86RndScalesSAE : SDNode<"X86ISD::VRNDSCALES_SAE", SDTFPBinOpImm>; -def X86ReducesSAE : SDNode<"X86ISD::VREDUCES_SAE", SDTFPBinOpImm>; -def X86GetMantsSAE : SDNode<"X86ISD::VGETMANTS_SAE", SDTFPBinOpImm>; - +// Compress and expand. def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisVec<1>, SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>, @@ -658,6 +852,7 @@ def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3, SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>, SDTCisSameNumEltsAs<0, 3>]>, []>; +// Bits shuffle // vpshufbitqmb def X86Vpshufbitqmb : SDNode<"X86ISD::VPSHUFBITQMB", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, @@ -687,22 +882,25 @@ def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisFP<0>, SDTCisInt<1>, SDTCisVT<2, i32>]>; -// Scalar +// Convert Unsigned/Integer to Floating-Point Value with rounding mode. def X86SintToFp : SDNode<"X86ISD::SCALAR_SINT_TO_FP", SDTintToFP>; def X86SintToFpRnd : SDNode<"X86ISD::SCALAR_SINT_TO_FP_RND", SDTintToFPRound>; def X86UintToFp : SDNode<"X86ISD::SCALAR_UINT_TO_FP", SDTintToFP>; def X86UintToFpRnd : SDNode<"X86ISD::SCALAR_UINT_TO_FP_RND", SDTintToFPRound>; +// Scalar float/double to signed/unsigned integer with truncation. def X86cvtts2Int : SDNode<"X86ISD::CVTTS2SI", SDTSFloatToInt>; def X86cvtts2UInt : SDNode<"X86ISD::CVTTS2UI", SDTSFloatToInt>; def X86cvtts2IntSAE : SDNode<"X86ISD::CVTTS2SI_SAE", SDTSFloatToInt>; def X86cvtts2UIntSAE : SDNode<"X86ISD::CVTTS2UI_SAE", SDTSFloatToInt>; +// Scalar float/double to signed/unsigned integer. def X86cvts2si : SDNode<"X86ISD::CVTS2SI", SDTSFloatToInt>; def X86cvts2usi : SDNode<"X86ISD::CVTS2UI", SDTSFloatToInt>; def X86cvts2siRnd : SDNode<"X86ISD::CVTS2SI_RND", SDTSFloatToIntRnd>; def X86cvts2usiRnd : SDNode<"X86ISD::CVTS2UI_RND", SDTSFloatToIntRnd>; +// Scalar float/double to signed/unsigned integer with saturation. def X86cvttss2Int : SDNode<"X86ISD::CVTTS2SIS", SDTSFloatToInt>; def X86cvttss2UInt : SDNode<"X86ISD::CVTTS2UIS", SDTSFloatToInt>; def X86cvttss2IntSAE : SDNode<"X86ISD::CVTTS2SIS_SAE", SDTSFloatToInt>; @@ -710,29 +908,27 @@ def X86cvttss2UIntSAE : SDNode<"X86ISD::CVTTS2UIS_SAE", SDTSFloatToInt>; // Vector with rounding mode -// cvtt fp-to-int staff -def X86cvttp2siSAE : SDNode<"X86ISD::CVTTP2SI_SAE", SDTFloatToInt>; -def X86cvttp2uiSAE : SDNode<"X86ISD::CVTTP2UI_SAE", SDTFloatToInt>; - +// Convert Unsigned/Integer to Floating-Point Value with rounding mode. def X86VSintToFpRnd : SDNode<"X86ISD::SINT_TO_FP_RND", SDTVintToFPRound>; def X86VUintToFpRnd : SDNode<"X86ISD::UINT_TO_FP_RND", SDTVintToFPRound>; +// Saturation enabled Vector float/double to signed/unsigned +// integer with truncation. def X86cvttp2sisSAE : SDNode<"X86ISD::CVTTP2SIS_SAE", SDTFloatToInt>; def X86cvttp2uisSAE : SDNode<"X86ISD::CVTTP2UIS_SAE", SDTFloatToInt>; def X86cvttp2sis : SDNode<"X86ISD::CVTTP2SIS", SDTFloatToInt>; def X86cvttp2uis : SDNode<"X86ISD::CVTTP2UIS", SDTFloatToInt>; -// cvt fp-to-int staff -def X86cvtp2IntRnd : SDNode<"X86ISD::CVTP2SI_RND", SDTFloatToIntRnd>; -def X86cvtp2UIntRnd : SDNode<"X86ISD::CVTP2UI_RND", SDTFloatToIntRnd>; - // Vector without rounding mode -// cvtt fp-to-int staff +// Vector float/double to signed/unsigned integer with truncation. def X86cvttp2si : SDNode<"X86ISD::CVTTP2SI", SDTFloatToInt>; def X86cvttp2ui : SDNode<"X86ISD::CVTTP2UI", SDTFloatToInt>; +def X86cvttp2siSAE : SDNode<"X86ISD::CVTTP2SI_SAE", SDTFloatToInt>; +def X86cvttp2uiSAE : SDNode<"X86ISD::CVTTP2UI_SAE", SDTFloatToInt>; let IsStrictFP = true in { + // Vector float/double to signed/unsigned integer with truncation. def X86strict_cvttp2si : SDNode<"X86ISD::STRICT_CVTTP2SI", SDTFloatToInt, [SDNPHasChain]>; def X86strict_cvttp2ui : SDNode<"X86ISD::STRICT_CVTTP2UI", SDTFloatToInt, [SDNPHasChain]>; } @@ -744,10 +940,12 @@ def X86any_cvttp2ui : PatFrags<(ops node:$src), [(X86strict_cvttp2ui node:$src), (X86cvttp2ui node:$src)]>; +// Vector signed/unsigned integer to float/double. def X86VSintToFP : SDNode<"X86ISD::CVTSI2P", SDTVintToFP>; def X86VUintToFP : SDNode<"X86ISD::CVTUI2P", SDTVintToFP>; let IsStrictFP = true in { + // Vector signed/unsigned integer to float/double. def X86strict_VSintToFP : SDNode<"X86ISD::STRICT_CVTSI2P", SDTVintToFP, [SDNPHasChain]>; def X86strict_VUintToFP : SDNode<"X86ISD::STRICT_CVTUI2P", SDTVintToFP, [SDNPHasChain]>; } @@ -760,10 +958,11 @@ def X86any_VUintToFP : PatFrags<(ops node:$src), (X86VUintToFP node:$src)]>; -// cvt int-to-fp staff +// Vector float/double to signed/unsigned integer. def X86cvtp2Int : SDNode<"X86ISD::CVTP2SI", SDTFloatToInt>; def X86cvtp2UInt : SDNode<"X86ISD::CVTP2UI", SDTFloatToInt>; - +def X86cvtp2IntRnd : SDNode<"X86ISD::CVTP2SI_RND", SDTFloatToIntRnd>; +def X86cvtp2UIntRnd : SDNode<"X86ISD::CVTP2UI_RND", SDTFloatToIntRnd>; // Masked versions of above def SDTMVintToFP: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, @@ -781,55 +980,59 @@ def SDTMFloatToInt: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, def X86VMSintToFP : SDNode<"X86ISD::MCVTSI2P", SDTMVintToFP>; def X86VMUintToFP : SDNode<"X86ISD::MCVTUI2P", SDTMVintToFP>; +// Masked versions of CVTTS2[US]IS. Used for v2f64->v4f32. +// SRC, PASSTHRU, MASK def X86mcvtp2Int : SDNode<"X86ISD::MCVTP2SI", SDTMFloatToInt>; def X86mcvtp2UInt : SDNode<"X86ISD::MCVTP2UI", SDTMFloatToInt>; def X86mcvttp2si : SDNode<"X86ISD::MCVTTP2SI", SDTMFloatToInt>; def X86mcvttp2ui : SDNode<"X86ISD::MCVTTP2UI", SDTMFloatToInt>; + +// Masked versions of CVTP2[SU]IS. Used for v2f64 to v4i32. +// SRC, PASSTHRU, MASK def X86mcvttp2sis : SDNode<"X86ISD::MCVTTP2SIS", SDTMFloatToInt>; def X86mcvttp2uis : SDNode<"X86ISD::MCVTTP2UIS", SDTMFloatToInt>; def SDTcvtph2ps : SDTypeProfile<1, 1, [SDTCVecEltisVT<0, f32>, SDTCVecEltisVT<1, i16>]>; -def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS", SDTcvtph2ps>; - -let IsStrictFP = true in -def X86strict_cvtph2ps : SDNode<"X86ISD::STRICT_CVTPH2PS", SDTcvtph2ps, - [SDNPHasChain]>; - -def X86any_cvtph2ps : PatFrags<(ops node:$src), - [(X86strict_cvtph2ps node:$src), - (X86cvtph2ps node:$src)]>; - -def X86cvtph2psSAE : SDNode<"X86ISD::CVTPH2PS_SAE", SDTcvtph2ps>; def SDTcvtps2ph : SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>, SDTCVecEltisVT<1, f32>, SDTCisVT<2, i32>]>; + +// Conversions between float and half-float. def X86cvtps2ph : SDNode<"X86ISD::CVTPS2PH", SDTcvtps2ph>; +def X86cvtps2phSAE : SDNode<"X86ISD::CVTPS2PH_SAE", SDTcvtps2ph>; +def X86cvtph2ps : SDNode<"X86ISD::CVTPH2PS", SDTcvtph2ps>; +def X86cvtph2psSAE : SDNode<"X86ISD::CVTPH2PS_SAE", SDTcvtph2ps>; -let IsStrictFP = true in -def X86strict_cvtps2ph : SDNode<"X86ISD::STRICT_CVTPS2PH", SDTcvtps2ph, - [SDNPHasChain]>; +let IsStrictFP = true in { + // Conversions between float and half-float. + def X86strict_cvtps2ph : SDNode<"X86ISD::STRICT_CVTPS2PH", SDTcvtps2ph, + [SDNPHasChain]>; + def X86strict_cvtph2ps : SDNode<"X86ISD::STRICT_CVTPH2PS", SDTcvtph2ps, + [SDNPHasChain]>; +} + +def X86any_cvtph2ps : PatFrags<(ops node:$src), + [(X86strict_cvtph2ps node:$src), + (X86cvtph2ps node:$src)]>; def X86any_cvtps2ph : PatFrags<(ops node:$src1, node:$src2), [(X86strict_cvtps2ph node:$src1, node:$src2), (X86cvtps2ph node:$src1, node:$src2)]>; -def X86cvtps2phSAE : SDNode<"X86ISD::CVTPS2PH_SAE", SDTcvtps2ph>; - def SDTmcvtps2ph : SDTypeProfile<1, 4, [SDTCVecEltisVT<0, i16>, SDTCVecEltisVT<1, f32>, SDTCisVT<2, i32>, SDTCisSameAs<0, 3>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<1, 4>]>; + +// Masked version of CVTPS2PH. +// SRC, RND, PASSTHRU, MASK def X86mcvtps2ph : SDNode<"X86ISD::MCVTPS2PH", SDTmcvtps2ph>; def X86mcvtps2phSAE : SDNode<"X86ISD::MCVTPS2PH_SAE", SDTmcvtps2ph>; -def X86vfpextSAE : SDNode<"X86ISD::VFPEXT_SAE", - SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, - SDTCisFP<1>, SDTCisVec<1>, - SDTCisOpSmallerThanOp<1, 0>]>>; def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND", SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisVec<0>, SDTCisFP<1>, SDTCisVec<1>, @@ -846,16 +1049,23 @@ def X86vminmaxs : SDNode<"X86ISD::VMINMAXS", SDTypeProfile<1, 3, [SDTCisSameAs<0 def X86vminmaxsSae : SDNode<"X86ISD::VMINMAXS_SAE", SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>]>>; -// cvt fp to bfloat16 +// Masked version of CVTNEPS2BF16. +// SRC, PASSTHRU, MASK def X86mcvtneps2bf16 : SDNode<"X86ISD::MCVTNEPS2BF16", SDTypeProfile<1, 3, [SDTCVecEltisVT<0, bf16>, SDTCVecEltisVT<1, f32>, SDTCisSameAs<0, 2>, SDTCVecEltisVT<3, i1>, SDTCisSameNumEltsAs<1, 3>]>>; + +// Vector float to bfloat16. +// Convert packed single data to packed BF16 data def X86cvtneps2bf16 : SDNode<"X86ISD::CVTNEPS2BF16", SDTypeProfile<1, 1, [SDTCVecEltisVT<0, bf16>, SDTCVecEltisVT<1, f32>]>>; + +// Dot product of BF16/FP16 pairs to accumulated into +// packed single precision. def X86dpbf16ps : SDNode<"X86ISD::DPBF16PS", SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>, SDTCisSameAs<0,1>, @@ -867,7 +1077,7 @@ def X86dpfp16ps : SDNode<"X86ISD::DPFP16PS", SDTCVecEltisVT<2, f16>, SDTCisSameAs<2,3>]>>; -// galois field arithmetic +// Galois Field Arithmetic Instructions def X86GF2P8affineinvqb : SDNode<"X86ISD::GF2P8AFFINEINVQB", SDTBlend>; def X86GF2P8affineqb : SDNode<"X86ISD::GF2P8AFFINEQB", SDTBlend>; def X86GF2P8mulb : SDNode<"X86ISD::GF2P8MULB", SDTIntBinOp>; @@ -1368,15 +1578,15 @@ def masked_truncstorevi32 : return cast(N)->getMemoryVT().getScalarType() == MVT::i32; }]>; +// Vector truncating store with unsigned/signed saturation def X86TruncSStore : SDNode<"X86ISD::VTRUNCSTORES", SDTStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; - def X86TruncUSStore : SDNode<"X86ISD::VTRUNCSTOREUS", SDTStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +// Vector truncating masked store with unsigned/signed saturation def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES", SDTX86MaskedStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; - def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS", SDTX86MaskedStore, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; @@ -1548,4 +1758,3 @@ def X86vpmaddwd_su : PatFrag<(ops node:$lhs, node:$rhs), (X86vpmaddwd node:$lhs, node:$rhs), [{ return N->hasOneUse(); }]>; - diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index 644d6d0b92dfc..1cdf7f9093c17 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -226,6 +226,9 @@ def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1, [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>; + +// Copies a 64-bit value from the low word of an XMM vector +// to an MMX vector. def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1, [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>; diff --git a/llvm/lib/Target/X86/X86InstrRAOINT.td b/llvm/lib/Target/X86/X86InstrRAOINT.td index b1a7fc6e901de..fd966dc7c1248 100644 --- a/llvm/lib/Target/X86/X86InstrRAOINT.td +++ b/llvm/lib/Target/X86/X86InstrRAOINT.td @@ -16,6 +16,8 @@ def SDTRAOBinaryArith : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; +/// RAO arithmetic instructions. +/// OUTCHAIN = AADD(INCHAIN, PTR, RHS) def X86rao_add : SDNode<"X86ISD::AADD", SDTRAOBinaryArith, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def X86rao_or : SDNode<"X86ISD::AOR", SDTRAOBinaryArith, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 806b02b9f9359..4be9e26cffa79 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5265,6 +5265,8 @@ multiclass SS41I_extract8 opc, string OpcodeStr> { let Predicates = [HasAVX, NoBWI] in defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX, WIG; +// Extract an 8-bit value from a vector and zero extend it to +// i32, corresponds to X86::PEXTRB. defm PEXTRB : SS41I_extract8<0x14, "pextrb">; diff --git a/llvm/lib/Target/X86/X86InstrTSX.td b/llvm/lib/Target/X86/X86InstrTSX.td index 57604b682d54e..63792e7666a2e 100644 --- a/llvm/lib/Target/X86/X86InstrTSX.td +++ b/llvm/lib/Target/X86/X86InstrTSX.td @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// // TSX instructions +// Test if in transactional execution. def X86xtest: SDNode<"X86ISD::XTEST", SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>, [SDNPHasChain, SDNPSideEffect]>; From 8733785600b7388a1c419d6c448683dc3cc1b677 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Tue, 18 Nov 2025 14:59:18 +0300 Subject: [PATCH 4/4] Move misplaced comment --- llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 3 +++ llvm/lib/Target/X86/X86InstrSSE.td | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index b4b0a37cdd228..223c81de7b997 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -144,6 +144,9 @@ def X86andnp : SDNode<"X86ISD::ANDNP", def X86multishift : SDNode<"X86ISD::MULTISHIFT", SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>]>>; + +// Extract an 8-bit value from a vector and zero extend it to +// i32, corresponds to X86::PEXTRB. def X86pextrb : SDNode<"X86ISD::PEXTRB", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v16i8>, SDTCisVT<2, i8>]>>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 4be9e26cffa79..806b02b9f9359 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5265,8 +5265,6 @@ multiclass SS41I_extract8 opc, string OpcodeStr> { let Predicates = [HasAVX, NoBWI] in defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX, WIG; -// Extract an 8-bit value from a vector and zero extend it to -// i32, corresponds to X86::PEXTRB. defm PEXTRB : SS41I_extract8<0x14, "pextrb">;