diff --git a/llvm/lib/Target/PowerPC/CMakeLists.txt b/llvm/lib/Target/PowerPC/CMakeLists.txt index 2182039e0eef8..53d565013c4bc 100644 --- a/llvm/lib/Target/PowerPC/CMakeLists.txt +++ b/llvm/lib/Target/PowerPC/CMakeLists.txt @@ -11,6 +11,7 @@ tablegen(LLVM PPCGenFastISel.inc -gen-fast-isel) tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info) tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM PPCGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM PPCGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM PPCGenExegesis.inc -gen-exegesis) tablegen(LLVM PPCGenRegisterBank.inc -gen-register-bank) diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp index ea4e597d0fd7d..ca3fe18273ff5 100644 --- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp +++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp @@ -17,6 +17,7 @@ #include "PPCCallingConv.h" #include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" +#include "PPCSelectionDAGInfo.h" #include "PPCSubtarget.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 89165fa8f8fdb..dd537c204cec1 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -16,6 +16,7 @@ #include "PPC.h" #include "PPCISelLowering.h" #include "PPCMachineFunctionInfo.h" +#include "PPCSelectionDAGInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/APInt.h" diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f55336bafd251..220010c4d3d34 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -20,6 +20,7 @@ #include "PPCMachineFunctionInfo.h" #include "PPCPerfectShuffle.h" #include "PPCRegisterInfo.h" +#include "PPCSelectionDAGInfo.h" #include "PPCSubtarget.h" #include "PPCTargetMachine.h" #include "llvm/ADT/APFloat.h" @@ -1678,190 +1679,6 @@ bool PPCTargetLowering::shallExtractConstSplatVectorElementToStore( return false; } -const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch ((PPCISD::NodeType)Opcode) { - case PPCISD::FIRST_NUMBER: break; - case PPCISD::FSEL: return "PPCISD::FSEL"; - case PPCISD::XSMAXC: return "PPCISD::XSMAXC"; - case PPCISD::XSMINC: return "PPCISD::XSMINC"; - case PPCISD::FCFID: return "PPCISD::FCFID"; - case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; - case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; - case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; - case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; - case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; - case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; - case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; - case PPCISD::FRE: return "PPCISD::FRE"; - case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; - case PPCISD::FTSQRT: - return "PPCISD::FTSQRT"; - case PPCISD::FSQRT: - return "PPCISD::FSQRT"; - case PPCISD::STFIWX: return "PPCISD::STFIWX"; - case PPCISD::VPERM: return "PPCISD::VPERM"; - case PPCISD::XXSPLT: return "PPCISD::XXSPLT"; - case PPCISD::XXSPLTI_SP_TO_DP: - return "PPCISD::XXSPLTI_SP_TO_DP"; - case PPCISD::XXSPLTI32DX: - return "PPCISD::XXSPLTI32DX"; - case PPCISD::VECINSERT: return "PPCISD::VECINSERT"; - case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI"; - case PPCISD::XXPERM: - return "PPCISD::XXPERM"; - case PPCISD::VECSHL: return "PPCISD::VECSHL"; - case PPCISD::VSRQ: - return "PPCISD::VSRQ"; - case PPCISD::CMPB: return "PPCISD::CMPB"; - case PPCISD::Hi: return "PPCISD::Hi"; - case PPCISD::Lo: return "PPCISD::Lo"; - case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; - case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8"; - case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16"; - case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; - case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET"; - case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA"; - case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; - case PPCISD::SRL: return "PPCISD::SRL"; - case PPCISD::SRA: return "PPCISD::SRA"; - case PPCISD::SHL: return "PPCISD::SHL"; - case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE"; - case PPCISD::CALL: return "PPCISD::CALL"; - case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; - case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC"; - case PPCISD::CALL_RM: - return "PPCISD::CALL_RM"; - case PPCISD::CALL_NOP_RM: - return "PPCISD::CALL_NOP_RM"; - case PPCISD::CALL_NOTOC_RM: - return "PPCISD::CALL_NOTOC_RM"; - case PPCISD::MTCTR: return "PPCISD::MTCTR"; - case PPCISD::BCTRL: return "PPCISD::BCTRL"; - case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; - case PPCISD::BCTRL_RM: - return "PPCISD::BCTRL_RM"; - case PPCISD::BCTRL_LOAD_TOC_RM: - return "PPCISD::BCTRL_LOAD_TOC_RM"; - case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE"; - case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; - case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; - case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; - case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; - case PPCISD::MFVSR: return "PPCISD::MFVSR"; - case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; - case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; - case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP"; - case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP"; - case PPCISD::SCALAR_TO_VECTOR_PERMUTED: - return "PPCISD::SCALAR_TO_VECTOR_PERMUTED"; - case PPCISD::ANDI_rec_1_EQ_BIT: - return "PPCISD::ANDI_rec_1_EQ_BIT"; - case PPCISD::ANDI_rec_1_GT_BIT: - return "PPCISD::ANDI_rec_1_GT_BIT"; - case PPCISD::VCMP: return "PPCISD::VCMP"; - case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec"; - case PPCISD::LBRX: return "PPCISD::LBRX"; - case PPCISD::STBRX: return "PPCISD::STBRX"; - case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; - case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; - case PPCISD::LXSIZX: return "PPCISD::LXSIZX"; - case PPCISD::STXSIX: return "PPCISD::STXSIX"; - case PPCISD::VEXTS: return "PPCISD::VEXTS"; - case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; - case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; - case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE"; - case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE"; - case PPCISD::ST_VSR_SCAL_INT: - return "PPCISD::ST_VSR_SCAL_INT"; - case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; - case PPCISD::BDNZ: return "PPCISD::BDNZ"; - case PPCISD::BDZ: return "PPCISD::BDZ"; - case PPCISD::MFFS: return "PPCISD::MFFS"; - case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; - case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; - case PPCISD::CR6SET: return "PPCISD::CR6SET"; - case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; - case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; - case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT"; - case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; - case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; - case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; - case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; - case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; - case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; - case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX"; - case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER"; - case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR"; - case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX"; - case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX"; - case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; - case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; - case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; - case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR"; - case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; - case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; - case PPCISD::PADDI_DTPREL: - return "PPCISD::PADDI_DTPREL"; - case PPCISD::VADD_SPLAT: - return "PPCISD::VADD_SPLAT"; - case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD"; - case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN"; - case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; - case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64"; - case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE"; - case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; - case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH"; - case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF"; - case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR"; - case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR: - return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR"; - case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR: - return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR"; - case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD"; - case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD"; - case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG"; - case PPCISD::XXMFACC: return "PPCISD::XXMFACC"; - case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; - case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT"; - case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT"; - case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; - case PPCISD::STRICT_FADDRTZ: - return "PPCISD::STRICT_FADDRTZ"; - case PPCISD::STRICT_FCTIDZ: - return "PPCISD::STRICT_FCTIDZ"; - case PPCISD::STRICT_FCTIWZ: - return "PPCISD::STRICT_FCTIWZ"; - case PPCISD::STRICT_FCTIDUZ: - return "PPCISD::STRICT_FCTIDUZ"; - case PPCISD::STRICT_FCTIWUZ: - return "PPCISD::STRICT_FCTIWUZ"; - case PPCISD::STRICT_FCFID: - return "PPCISD::STRICT_FCFID"; - case PPCISD::STRICT_FCFIDU: - return "PPCISD::STRICT_FCFIDU"; - case PPCISD::STRICT_FCFIDS: - return "PPCISD::STRICT_FCFIDS"; - case PPCISD::STRICT_FCFIDUS: - return "PPCISD::STRICT_FCFIDUS"; - case PPCISD::LXVRZX: return "PPCISD::LXVRZX"; - case PPCISD::STORE_COND: - return "PPCISD::STORE_COND"; - case PPCISD::SETBC: - return "PPCISD::SETBC"; - case PPCISD::SETBCR: - return "PPCISD::SETBCR"; - case PPCISD::ADDC: - return "PPCISD::ADDC"; - case PPCISD::ADDE: - return "PPCISD::ADDE"; - case PPCISD::SUBC: - return "PPCISD::SUBC"; - case PPCISD::SUBE: - return "PPCISD::SUBE"; - } - return nullptr; -} - EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, EVT VT) const { if (!VT.isVector()) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index d967018982734..680b529b4e2e5 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -34,580 +34,6 @@ namespace llvm { - namespace PPCISD { - - // When adding a NEW PPCISD node please add it to the correct position in - // the enum. The order of elements in this enum matters! - // Values that are added between FIRST_MEMORY_OPCODE and LAST_MEMORY_OPCODE - // are considered memory opcodes and are treated differently than other - // entries. - enum NodeType : unsigned { - // Start the numbering where the builtin ops and target ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - /// FSEL - Traditional three-operand fsel node. - /// - FSEL, - - /// XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions. - XSMAXC, - XSMINC, - - /// FCFID - The FCFID instruction, taking an f64 operand and producing - /// and f64 value containing the FP representation of the integer that - /// was temporarily in the f64 operand. - FCFID, - - /// Newer FCFID[US] integer-to-floating-point conversion instructions for - /// unsigned integers and single-precision outputs. - FCFIDU, - FCFIDS, - FCFIDUS, - - /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 - /// operand, producing an f64 value containing the integer representation - /// of that FP value. - FCTIDZ, - FCTIWZ, - - /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for - /// unsigned integers with round toward zero. - FCTIDUZ, - FCTIWUZ, - - /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in - /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer. - VEXTS, - - /// Reciprocal estimate instructions (unary FP ops). - FRE, - FRSQRTE, - - /// Test instruction for software square root. - FTSQRT, - - /// Square root instruction. - FSQRT, - - /// VPERM - The PPC VPERM Instruction. - /// - VPERM, - - /// XXSPLT - The PPC VSX splat instructions - /// - XXSPLT, - - /// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for - /// converting immediate single precision numbers to double precision - /// vector or scalar. - XXSPLTI_SP_TO_DP, - - /// XXSPLTI32DX - The PPC XXSPLTI32DX instruction. - /// - XXSPLTI32DX, - - /// VECINSERT - The PPC vector insert instruction - /// - VECINSERT, - - /// VECSHL - The PPC vector shift left instruction - /// - VECSHL, - - /// XXPERMDI - The PPC XXPERMDI instruction - /// - XXPERMDI, - XXPERM, - - /// The CMPB instruction (takes two operands of i32 or i64). - CMPB, - - /// Hi/Lo - These represent the high and low 16-bit parts of a global - /// address respectively. These nodes have two operands, the first of - /// which must be a TargetGlobalAddress, and the second of which must be a - /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C', - /// though these are usually folded into other nodes. - Hi, - Lo, - - /// The following two target-specific nodes are used for calls through - /// function pointers in the 64-bit SVR4 ABI. - - /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) - /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to - /// compute an allocation on the stack. - DYNALLOC, - - /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to - /// compute an offset from native SP to the address of the most recent - /// dynamic alloca. - DYNAREAOFFSET, - - /// To avoid stack clash, allocation is performed by block and each block is - /// probed. - PROBED_ALLOCA, - - /// The result of the mflr at function entry, used for PIC code. - GlobalBaseReg, - - /// These nodes represent PPC shifts. - /// - /// For scalar types, only the last `n + 1` bits of the shift amounts - /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc. - /// for exact behaviors. - /// - /// For vector types, only the last n bits are used. See vsld. - SRL, - SRA, - SHL, - - /// These nodes represent PPC arithmetic operations with carry. - ADDC, - ADDE, - SUBC, - SUBE, - - /// FNMSUB - Negated multiply-subtract instruction. - FNMSUB, - - /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign - /// word and shift left immediate. - EXTSWSLI, - - /// The combination of sra[wd]i and addze used to implemented signed - /// integer division by a power of 2. The first operand is the dividend, - /// and the second is the constant shift amount (representing the - /// divisor). - SRA_ADDZE, - - /// CALL - A direct function call. - /// CALL_NOP is a call with the special NOP which follows 64-bit - /// CALL_NOTOC the caller does not use the TOC. - /// SVR4 calls and 32-bit/64-bit AIX calls. - CALL, - CALL_NOP, - CALL_NOTOC, - - /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a - /// MTCTR instruction. - MTCTR, - - /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a - /// BCTRL instruction. - BCTRL, - - /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl - /// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX - /// and 64-bit AIX. - BCTRL_LOAD_TOC, - - /// The variants that implicitly define rounding mode for calls with - /// strictfp semantics. - CALL_RM, - CALL_NOP_RM, - CALL_NOTOC_RM, - BCTRL_RM, - BCTRL_LOAD_TOC_RM, - - /// Return with a glue operand, matched by 'blr' - RET_GLUE, - - /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction. - /// This copies the bits corresponding to the specified CRREG into the - /// resultant GPR. Bits corresponding to other CR regs are undefined. - MFOCRF, - - /// Direct move from a VSX register to a GPR - MFVSR, - - /// Direct move from a GPR to a VSX register (algebraic) - MTVSRA, - - /// Direct move from a GPR to a VSX register (zero) - MTVSRZ, - - /// Direct move of 2 consecutive GPR to a VSX register. - BUILD_FP128, - - /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and - /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is - /// unsupported for this target. - /// Merge 2 GPRs to a single SPE register. - BUILD_SPE64, - - /// Extract SPE register component, second argument is high or low. - EXTRACT_SPE, - - /// Extract a subvector from signed integer vector and convert to FP. - /// It is primarily used to convert a (widened) illegal integer vector - /// type to a legal floating point vector type. - /// For example v2i32 -> widened to v4i32 -> v2f64 - SINT_VEC_TO_FP, - - /// Extract a subvector from unsigned integer vector and convert to FP. - /// As with SINT_VEC_TO_FP, used for converting illegal types. - UINT_VEC_TO_FP, - - /// PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to - /// place the value into the least significant element of the most - /// significant doubleword in the vector. This is not element zero for - /// anything smaller than a doubleword on either endianness. This node has - /// the same semantics as SCALAR_TO_VECTOR except that the value remains in - /// the aforementioned location in the vector register. - SCALAR_TO_VECTOR_PERMUTED, - - // FIXME: Remove these once the ANDI glue bug is fixed: - /// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the - /// eq or gt bit of CR0 after executing andi. x, 1. This is used to - /// implement truncation of i32 or i64 to i1. - ANDI_rec_1_EQ_BIT, - ANDI_rec_1_GT_BIT, - - // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit - // target (returns (Lo, Hi)). It takes a chain operand. - READ_TIME_BASE, - - // EH_SJLJ_SETJMP - SjLj exception handling setjmp. - EH_SJLJ_SETJMP, - - // EH_SJLJ_LONGJMP - SjLj exception handling longjmp. - EH_SJLJ_LONGJMP, - - /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* - /// instructions. For lack of better number, we use the opcode number - /// encoding for the OPC field to identify the compare. For example, 838 - /// is VCMPGTSH. - VCMP, - - /// RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the - /// altivec VCMP*_rec instructions. For lack of better number, we use the - /// opcode number encoding for the OPC field to identify the compare. For - /// example, 838 is VCMPGTSH. - VCMP_rec, - - /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This - /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the - /// condition register to branch on, OPC is the branch opcode to use (e.g. - /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is - /// an optional input flag argument. - COND_BRANCH, - - /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based - /// loops. - BDNZ, - BDZ, - - /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding - /// towards zero. Used only as part of the long double-to-int - /// conversion sequence. - FADDRTZ, - - /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. - MFFS, - - /// TC_RETURN - A tail call return. - /// operand #0 chain - /// operand #1 callee (register or absolute) - /// operand #2 stack adjustment - /// operand #3 optional in flag - TC_RETURN, - - /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls - CR6SET, - CR6UNSET, - - /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS - /// for non-position independent code on PPC32. - PPC32_GOT, - - /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and - /// local dynamic TLS and position indendepent code on PPC32. - PPC32_PICGOT, - - /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec - /// TLS model, produces an ADDIS8 instruction that adds the GOT - /// base to sym\@got\@tprel\@ha. - ADDIS_GOT_TPREL_HA, - - /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec - /// TLS model, produces a LD instruction with base register G8RReg - /// and offset sym\@got\@tprel\@l. This completes the addition that - /// finds the offset of "sym" relative to the thread pointer. - LD_GOT_TPREL_L, - - /// G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec - /// and local-exec TLS models, produces an ADD instruction that adds - /// the contents of G8RReg to the thread pointer. Symbol contains a - /// relocation sym\@tls which is to be replaced by the thread pointer - /// and identifies to the linker that the instruction is part of a - /// TLS sequence. - ADD_TLS, - - /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS - /// model, produces an ADDIS8 instruction that adds the GOT base - /// register to sym\@got\@tlsgd\@ha. - ADDIS_TLSGD_HA, - - /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS - /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by - /// ADDIS_TLSGD_L_ADDR until after register assignment. - ADDI_TLSGD_L, - - /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS - /// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by - /// ADDIS_TLSGD_L_ADDR until after register assignment. - GET_TLS_ADDR, - - /// %x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on - /// 32-bit AIX, produces a call to .__get_tpointer to retrieve the thread - /// pointer. At the end of the call, the thread pointer is found in R3. - GET_TPOINTER, - - /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that - /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following - /// register assignment. - ADDI_TLSGD_L_ADDR, - - /// GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY - /// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY - /// Op that combines two register copies of TOC entries - /// (region handle into R3 and variable offset into R4) followed by a - /// GET_TLS_ADDR node which will be expanded to a call to .__tls_get_addr. - /// This node is used in 64-bit mode as well (in which case the result is - /// G8RC and inputs are X3/X4). - TLSGD_AIX, - - /// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, - /// produces a call to .__tls_get_mod(_$TLSML\@ml). - GET_TLS_MOD_AIX, - - /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) - /// Op that requires a single input of the module handle TOC entry in R3, - /// and generates a GET_TLS_MOD_AIX node which will be expanded into a call - /// to .__tls_get_mod. This node is used in both 32-bit and 64-bit modes. - /// The only difference is the register class. - TLSLD_AIX, - - /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS - /// model, produces an ADDIS8 instruction that adds the GOT base - /// register to sym\@got\@tlsld\@ha. - ADDIS_TLSLD_HA, - - /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS - /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym\@got\@tlsld\@l and stores the result in X3. Hidden by - /// ADDIS_TLSLD_L_ADDR until after register assignment. - ADDI_TLSLD_L, - - /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS - /// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by - /// ADDIS_TLSLD_L_ADDR until after register assignment. - GET_TLSLD_ADDR, - - /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that - /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion - /// following register assignment. - ADDI_TLSLD_L_ADDR, - - /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS - /// model, produces an ADDIS8 instruction that adds X3 to - /// sym\@dtprel\@ha. - ADDIS_DTPREL_HA, - - /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS - /// model, produces an ADDI8 instruction that adds G8RReg to - /// sym\@got\@dtprel\@l. - ADDI_DTPREL_L, - - /// G8RC = PADDI_DTPREL %x3, Symbol - For the pc-rel based local-dynamic TLS - /// model, produces a PADDI8 instruction that adds X3 to sym\@dtprel. - PADDI_DTPREL, - - /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded - /// during instruction selection to optimize a BUILD_VECTOR into - /// operations on splats. This is necessary to avoid losing these - /// optimizations due to constant folding. - VADD_SPLAT, - - /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little - /// endian. Maps to an xxswapd instruction that corrects an lxvd2x - /// or stxvd2x instruction. The chain is necessary because the - /// sequence replaces a load and needs to provide the same number - /// of outputs. - XXSWAPD, - - /// An SDNode for swaps that are not associated with any loads/stores - /// and thereby have no chain. - SWAP_NO_CHAIN, - - /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or - /// lower (IDX=1) half of v4f32 to v2f64. - FP_EXTEND_HALF, - - /// MAT_PCREL_ADDR = Materialize a PC Relative address. This can be done - /// either through an add like PADDI or through a PC Relative load like - /// PLD. - MAT_PCREL_ADDR, - - /// TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for - /// TLS global address when using dynamic access models. This can be done - /// through an add like PADDI. - TLS_DYNAMIC_MAT_PCREL_ADDR, - - /// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address - /// when using local exec access models, and when prefixed instructions are - /// available. This is used with ADD_TLS to produce an add like PADDI. - TLS_LOCAL_EXEC_MAT_ADDR, - - /// ACC_BUILD = Build an accumulator register from 4 VSX registers. - ACC_BUILD, - - /// PAIR_BUILD = Build a vector pair register from 2 VSX registers. - PAIR_BUILD, - - /// EXTRACT_VSX_REG = Extract one of the underlying vsx registers of - /// an accumulator or pair register. This node is needed because - /// EXTRACT_SUBVECTOR expects the input and output vectors to have the same - /// element type. - EXTRACT_VSX_REG, - - /// XXMFACC = This corresponds to the xxmfacc instruction. - XXMFACC, - - // Constrained conversion from floating point to int - FIRST_STRICTFP_OPCODE, - STRICT_FCTIDZ = FIRST_STRICTFP_OPCODE, - STRICT_FCTIWZ, - STRICT_FCTIDUZ, - STRICT_FCTIWUZ, - - /// Constrained integer-to-floating-point conversion instructions. - STRICT_FCFID, - STRICT_FCFIDU, - STRICT_FCFIDS, - STRICT_FCFIDUS, - - /// Constrained floating point add in round-to-zero mode. - STRICT_FADDRTZ, - LAST_STRICTFP_OPCODE = STRICT_FADDRTZ, - - /// SETBC - The ISA 3.1 (P10) SETBC instruction. - SETBC, - - /// SETBCR - The ISA 3.1 (P10) SETBCR instruction. - SETBCR, - - /// VSRQ - The ISA 3.1 (P10) Vector Shift right quadword instruction - VSRQ, - - // NOTE: The nodes below may require PC-Rel specific patterns if the - // address could be PC-Relative. When adding new nodes below, consider - // whether or not the address can be PC-Relative and add the corresponding - // PC-relative patterns and tests. - - /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a - /// byte-swapping store instruction. It byte-swaps the low "Type" bits of - /// the GPRC input, then stores it through Ptr. Type can be either i16 or - /// i32. - FIRST_MEMORY_OPCODE, - STBRX = FIRST_MEMORY_OPCODE, - - /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a - /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, - /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 - /// or i32. - LBRX, - - /// STFIWX - The STFIWX instruction. The first operand is an input token - /// chain, then an f64 value to store, then an address to store it to. - STFIWX, - - /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point - /// load which sign-extends from a 32-bit integer value into the - /// destination 64-bit register. - LFIWAX, - - /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point - /// load which zero-extends from a 32-bit integer value into the - /// destination 64-bit register. - LFIWZX, - - /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an - /// integer smaller than 64 bits into a VSR. The integer is zero-extended. - /// This can be used for converting loaded integers to floating point. - LXSIZX, - - /// STXSIX - The STXSI[bh]X instruction. The first operand is an input - /// chain, then an f64 value to store, then an address to store it to, - /// followed by a byte-width for the store. - STXSIX, - - /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian. - /// Maps directly to an lxvd2x instruction that will be followed by - /// an xxswapd. - LXVD2X, - - /// LXVRZX - Load VSX Vector Rightmost and Zero Extend - /// This node represents v1i128 BUILD_VECTOR of a zero extending load - /// instruction from to i128. - /// Allows utilization of the Load VSX Vector Rightmost Instructions. - LXVRZX, - - /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian. - /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on - /// the vector type to load vector in big-endian element order. - LOAD_VEC_BE, - - /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a - /// v2f32 value into the lower half of a VSR register. - LD_VSX_LH, - - /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory - /// instructions such as LXVDSX, LXVWSX. - LD_SPLAT, - - /// VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory - /// that zero-extends. - ZEXT_LD_SPLAT, - - /// VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory - /// that sign-extends. - SEXT_LD_SPLAT, - - /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. - /// Maps directly to an stxvd2x instruction that will be preceded by - /// an xxswapd. - STXVD2X, - - /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian. - /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on - /// the vector type to store vector in big-endian element order. - STORE_VEC_BE, - - /// Store scalar integers from VSR. - ST_VSR_SCAL_INT, - - /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes - /// except they ensure that the compare input is zero-extended for - /// sub-word versions because the atomic loads zero-extend. - ATOMIC_CMP_SWAP_8, - ATOMIC_CMP_SWAP_16, - - /// CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr - /// The store conditional instruction ST[BHWD]ARX that produces a glue - /// result to attach it to a conditional branch. - STORE_COND, - - /// GPRC = TOC_ENTRY GA, TOC - /// Loads the entry for GA from the TOC, where the TOC base is given by - /// the last operand. - TOC_ENTRY, - LAST_MEMORY_OPCODE = TOC_ENTRY, - }; - - } // end namespace PPCISD - /// Define some predicates that are used for node matching. namespace PPC { @@ -752,10 +178,6 @@ namespace llvm { explicit PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI); - /// getTargetNodeName() - This method returns the name of a target specific - /// DAG node. - const char *getTargetNodeName(unsigned Opcode) const override; - bool isSelectSupported(SelectSupportKind Kind) const override { // PowerPC does not support scalar condition selects on vectors. return (Kind != SelectSupportKind::ScalarCondVectorVal); diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index f3998113ddd52..3ecc58c04e378 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -149,28 +149,49 @@ def SDT_PPCBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, [ def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>; def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>; + +// Square root instruction. def PPCfsqrt : SDNode<"PPCISD::FSQRT", SDTFPUnaryOp, []>; + +// Test instruction for software square root. def PPCftsqrt : SDNode<"PPCISD::FTSQRT", SDT_PPCFtsqrt,[]>; +// FCFID - The FCFID instruction, taking an f64 operand and producing +// and f64 value containing the FP representation of the integer that +// was temporarily in the f64 operand. def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>; + +// Newer FCFID[US] integer-to-floating-point conversion instructions for +// unsigned integers and single-precision outputs. def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>; def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>; def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>; + +// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 +// operand, producing an f64 value containing the integer representation +// of that FP value. def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>; def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; + +// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for +// unsigned integers with round toward zero. def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>; def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>; +// VSRQ - The ISA 3.1 (P10) Vector Shift right quadword instruction def PPCvsrq: SDNode<"PPCISD::VSRQ", SDT_PPCVecShiftQuad, []>; -def PPCstrict_fcfid : SDNode<"PPCISD::STRICT_FCFID", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fcfidu : SDNode<"PPCISD::STRICT_FCFIDU", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fcfids : SDNode<"PPCISD::STRICT_FCFIDS", - SDTFPRoundOp, [SDNPHasChain]>; -def PPCstrict_fcfidus : SDNode<"PPCISD::STRICT_FCFIDUS", - SDTFPRoundOp, [SDNPHasChain]>; +// Constrained integer-to-floating-point conversion instructions. +let IsStrictFP = true in { + def PPCstrict_fcfid : SDNode<"PPCISD::STRICT_FCFID", + SDTFPUnaryOp, [SDNPHasChain]>; + def PPCstrict_fcfidu : SDNode<"PPCISD::STRICT_FCFIDU", + SDTFPUnaryOp, [SDNPHasChain]>; + def PPCstrict_fcfids : SDNode<"PPCISD::STRICT_FCFIDS", + SDTFPRoundOp, [SDNPHasChain]>; + def PPCstrict_fcfidus : SDNode<"PPCISD::STRICT_FCFIDUS", + SDTFPRoundOp, [SDNPHasChain]>; +} def PPCany_fcfid : PatFrags<(ops node:$op), [(PPCfcfid node:$op), @@ -185,28 +206,56 @@ def PPCany_fcfidus : PatFrags<(ops node:$op), [(PPCfcfidus node:$op), (PPCstrict_fcfidus node:$op)]>; +// Store scalar integers from VSR. def PPCstore_scal_int_from_vsr: SDNode<"PPCISD::ST_VSR_SCAL_INT", SDT_PPCstore_scal_int_from_vsr, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// STFIWX - The STFIWX instruction. The first operand is an input token +// chain, then an f64 value to store, then an address to store it to. def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point +// load which sign-extends from a 32-bit integer value into the +// destination 64-bit register. def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point +// load which zero-extends from a 32-bit integer value into the +// destination 64-bit register. def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an +// integer smaller than 64 bits into a VSR. The integer is zero-extended. +// This can be used for converting loaded integers to floating point. def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// STXSIX - The STXSI[bh]X instruction. The first operand is an input +// chain, then an f64 value to store, then an address to store it to, +// followed by a byte-width for the store. def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in +// VSFRC that is sign-extended from ByteWidth to a 64-byte integer. def PPCVexts : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>; -// Extract FPSCR (not modeled at the DAG level). +// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. def PPCmffs : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, [SDNPHasChain]>; -// Perform FADD in round-to-zero mode. +// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding +// towards zero. Used only as part of the long double-to-int +// conversion sequence. def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>; + +// Constrained floating point add in round-to-zero mode. +let IsStrictFP = true in def PPCstrict_faddrtz: SDNode<"PPCISD::STRICT_FADDRTZ", SDTFPBinOp, [SDNPHasChain]>; @@ -214,72 +263,194 @@ def PPCany_faddrtz: PatFrags<(ops node:$lhs, node:$rhs), [(PPCfaddrtz node:$lhs, node:$rhs), (PPCstrict_faddrtz node:$lhs, node:$rhs)]>; +// FSEL - Traditional three-operand fsel node. def PPCfsel : SDNode<"PPCISD::FSEL", // Type constraint for fsel. SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0>, SDTCisVT<1, f64>]>, []>; + +// XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions. def PPCxsmaxc : SDNode<"PPCISD::XSMAXC", SDT_PPCFPMinMax, []>; def PPCxsminc : SDNode<"PPCISD::XSMINC", SDT_PPCFPMinMax, []>; + +// Hi/Lo - These represent the high and low 16-bit parts of a global +// address respectively. These nodes have two operands, the first of +// which must be a TargetGlobalAddress, and the second of which must be a +// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C', +// though these are usually folded into other nodes. def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>; def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>; + +// GPRC = TOC_ENTRY GA, TOC +// Loads the entry for GA from the TOC, where the TOC base is given by +// the last operand. def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad, SDNPMemOperand]>; +// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS +// for non-position independent code on PPC32. def PPCppc32GOT : SDNode<"PPCISD::PPC32_GOT", SDTIntLeaf, []>; +// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec +// TLS model, produces an ADDIS8 instruction that adds the GOT +// base to sym\@got\@tprel\@ha. def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>; + +// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec +// TLS model, produces a LD instruction with base register G8RReg +// and offset sym\@got\@tprel\@l. This completes the addition that +// finds the offset of "sym" relative to the thread pointer. def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp, [SDNPMayLoad]>; + +// G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec +// and local-exec TLS models, produces an ADD instruction that adds +// the contents of G8RReg to the thread pointer. Symbol contains a +// relocation sym\@tls which is to be replaced by the thread pointer +// and identifies to the linker that the instruction is part of a +// TLS sequence. def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>; + +// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS +// model, produces an ADDIS8 instruction that adds the GOT base +// register to sym\@got\@tlsgd\@ha. def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>; + +// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS +// model, produces an ADDI8 instruction that adds G8RReg to +// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by +// ADDIS_TLSGD_L_ADDR until after register assignment. def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>; + +// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS +// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by +// ADDIS_TLSGD_L_ADDR until after register assignment. def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>; + +// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, +// produces a call to .__tls_get_mod(_$TLSML\@ml). def PPCgetTlsMod : SDNode<"PPCISD::GET_TLS_MOD_AIX", SDTIntUnaryOp>; + +// %x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on +// 32-bit AIX, produces a call to .__get_tpointer to retrieve the thread +// pointer. At the end of the call, the thread pointer is found in R3. def PPCgetTpointer : SDNode<"PPCISD::GET_TPOINTER", SDTIntLeaf, []>; + +// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that +// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following +// register assignment. def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR", SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>; + +// GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY +// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY +// Op that combines two register copies of TOC entries +// (region handle into R3 and variable offset into R4) followed by a +// GET_TLS_ADDR node which will be expanded to a call to .__tls_get_addr. +// This node is used in 64-bit mode as well (in which case the result is +// G8RC and inputs are X3/X4). def PPCTlsgdAIX : SDNode<"PPCISD::TLSGD_AIX", SDTIntBinOp>; + +// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) +// Op that requires a single input of the module handle TOC entry in R3, +// and generates a GET_TLS_MOD_AIX node which will be expanded into a call +// to .__tls_get_mod. This node is used in both 32-bit and 64-bit modes. +// The only difference is the register class. def PPCTlsldAIX : SDNode<"PPCISD::TLSLD_AIX", SDTIntUnaryOp>; + +// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS +// model, produces an ADDIS8 instruction that adds the GOT base +// register to sym\@got\@tlsld\@ha. def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>; + +// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS +// model, produces an ADDI8 instruction that adds G8RReg to +// sym\@got\@tlsld\@l and stores the result in X3. Hidden by +// ADDIS_TLSLD_L_ADDR until after register assignment. def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>; + +// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS +// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by +// ADDIS_TLSLD_L_ADDR until after register assignment. def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>; + +// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that +// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion +// following register assignment. def PPCaddiTlsldLAddr : SDNode<"PPCISD::ADDI_TLSLD_L_ADDR", SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>; + +// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS +// model, produces an ADDIS8 instruction that adds X3 to +// sym\@dtprel\@ha. def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>; + +// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS +// model, produces an ADDI8 instruction that adds G8RReg to +// sym\@got\@dtprel\@l. def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>; + +// G8RC = PADDI_DTPREL %x3, Symbol - For the pc-rel based local-dynamic TLS +// model, produces a PADDI8 instruction that adds X3 to sym\@dtprel. def PPCpaddiDtprel : SDNode<"PPCISD::PADDI_DTPREL", SDTIntBinOp>; +// VPERM - The PPC VPERM Instruction. def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>; + +// XXSPLT - The PPC VSX splat instructions def PPCxxsplt : SDNode<"PPCISD::XXSPLT", SDT_PPCVecSplat, []>; + +// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for +// converting immediate single precision numbers to double precision +// vector or scalar. def PPCxxspltidp : SDNode<"PPCISD::XXSPLTI_SP_TO_DP", SDT_PPCSpToDp, []>; + +// VECINSERT - The PPC vector insert instruction def PPCvecinsert : SDNode<"PPCISD::VECINSERT", SDT_PPCVecInsert, []>; + +// XXPERMDI - The PPC XXPERMDI instruction def PPCxxpermdi : SDNode<"PPCISD::XXPERMDI", SDT_PPCxxpermdi, []>; + +// VECSHL - The PPC vector shift left instruction def PPCvecshl : SDNode<"PPCISD::VECSHL", SDT_PPCVecShift, []>; +// The CMPB instruction (takes two operands of i32 or i64). def PPCcmpb : SDNode<"PPCISD::CMPB", SDTIntBinOp, []>; // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift // amounts. These nodes are generated by the multi-precision shift code. +// +// For scalar types, only the last `n + 1` bits of the shift amounts +// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc. +// for exact behaviors. +// +// For vector types, only the last n bits are used. See vsld. def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>; def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>; def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>; +// FNMSUB - Negated multiply-subtract instruction. def PPCfnmsub : SDNode<"PPCISD::FNMSUB" , SDTFPTernaryOp>; +// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign +// word and shift left immediate. def PPCextswsli : SDNode<"PPCISD::EXTSWSLI" , SDT_PPCextswsli>; -def PPCstrict_fctidz : SDNode<"PPCISD::STRICT_FCTIDZ", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fctiwz : SDNode<"PPCISD::STRICT_FCTIWZ", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fctiduz : SDNode<"PPCISD::STRICT_FCTIDUZ", - SDTFPUnaryOp, [SDNPHasChain]>; -def PPCstrict_fctiwuz : SDNode<"PPCISD::STRICT_FCTIWUZ", +// Constrained conversion from floating point to int +let IsStrictFP = true in { + def PPCstrict_fctidz : SDNode<"PPCISD::STRICT_FCTIDZ", SDTFPUnaryOp, [SDNPHasChain]>; + def PPCstrict_fctiwz : SDNode<"PPCISD::STRICT_FCTIWZ", + SDTFPUnaryOp, [SDNPHasChain]>; + def PPCstrict_fctiduz : SDNode<"PPCISD::STRICT_FCTIDUZ", + SDTFPUnaryOp, [SDNPHasChain]>; + def PPCstrict_fctiwuz : SDNode<"PPCISD::STRICT_FCTIWUZ", + SDTFPUnaryOp, [SDNPHasChain]>; +} def PPCany_fctidz : PatFrags<(ops node:$op), [(PPCstrict_fctidz node:$op), @@ -294,19 +465,24 @@ def PPCany_fctiwuz : PatFrags<(ops node:$op), [(PPCstrict_fctiwuz node:$op), (PPCfctiwuz node:$op)]>; -// Move 2 i64 values into a VSX register +// Direct move of 2 consecutive GPR to a VSX register. def PPCbuild_fp128: SDNode<"PPCISD::BUILD_FP128", SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisSameSizeAs<1,2>, SDTCisSameAs<1,2>]>, []>; +// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and +// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is +// unsupported for this target. +// Merge 2 GPRs to a single SPE register. def PPCbuild_spe64: SDNode<"PPCISD::BUILD_SPE64", SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1,i32>, SDTCisVT<1,i32>]>, []>; +// Extract SPE register component, second argument is high or low. def PPCextract_spe : SDNode<"PPCISD::EXTRACT_SPE", SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, f64>, @@ -320,6 +496,11 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; + +// CALL - A direct function call. +// CALL_NOP is a call with the special NOP which follows 64-bit +// CALL_NOTOC the caller does not use the TOC. +// SVR4 calls and 32-bit/64-bit AIX calls. def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; @@ -329,17 +510,28 @@ def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall, def PPCcall_notoc : SDNode<"PPCISD::CALL_NOTOC", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a +// MTCTR instruction. def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a +// BCTRL instruction. def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl +// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX +// and 64-bit AIX. def PPCbctrl_load_toc : SDNode<"PPCISD::BCTRL_LOAD_TOC", SDTypeProfile<0, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; -// Call nodes for strictfp calls (that define RM). +// The variants that implicitly define rounding mode for calls with +// strictfp semantics. def PPCcall_rm : SDNode<"PPCISD::CALL_RM", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; @@ -357,42 +549,81 @@ def PPCbctrl_load_toc_rm : SDNode<"PPCISD::BCTRL_LOAD_TOC_RM", [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +// Return with a glue operand, matched by 'blr' def PPCretglue : SDNode<"PPCISD::RET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +// TC_RETURN - A tail call return. +// operand #0 chain +// operand #1 callee (register or absolute) +// operand #2 stack adjustment +// operand #3 optional in flag def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +// EH_SJLJ_SETJMP - SjLj exception handling setjmp. def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP", SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, [SDNPHasChain, SDNPSideEffect]>; + +// EH_SJLJ_LONGJMP - SjLj exception handling longjmp. def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPSideEffect]>; +// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* +// instructions. For lack of better number, we use the opcode number +// encoding for the OPC field to identify the compare. For example, 838 +// is VCMPGTSH. def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; + +// RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the +// altivec VCMP*_rec instructions. For lack of better number, we use the +// opcode number encoding for the OPC field to identify the compare. For +// example, 838 is VCMPGTSH. def PPCvcmp_rec : SDNode<"PPCISD::VCMP_rec", SDT_PPCvcmp, [SDNPOutGlue]>; +// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This +// corresponds to the COND_BRANCH pseudo instruction. CRRC is the +// condition register to branch on, OPC is the branch opcode to use (e.g. +// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is +// an optional input flag argument. def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr, [SDNPHasChain, SDNPOptInGlue]>; -// PPC-specific atomic operations. +// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes +// except they ensure that the compare input is zero-extended for +// sub-word versions because the atomic loads zero-extend. def PPCatomicCmpSwap_8 : SDNode<"PPCISD::ATOMIC_CMP_SWAP_8", SDTAtomic3, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def PPCatomicCmpSwap_16 : SDNode<"PPCISD::ATOMIC_CMP_SWAP_16", SDTAtomic3, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; + +// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a +// byte-swapping load instruction. It loads "Type" bits, byte swaps it, +// then puts it in the bottom bits of the GPRC. TYPE can be either i16 +// or i32. def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a +// byte-swapping store instruction. It byte-swaps the low "Type" bits of +// the GPRC input, then stores it through Ptr. Type can be either i16 or +// i32. def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr +// The store conditional instruction ST[BHWD]ARX that produces a glue +// result to attach it to a conditional branch. def PPCStoreCond : SDNode<"PPCISD::STORE_COND", SDT_StoreCond, [SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPOutGlue]>; -// Instructions to set/unset CR bit 6 for SVR4 vararg calls +// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone, @@ -401,17 +632,44 @@ def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone, // Instructions to support dynamic alloca. def SDTDynOp : SDTypeProfile<1, 2, []>; def SDTDynAreaOp : SDTypeProfile<1, 1, []>; + +// The following two target-specific nodes are used for calls through +// function pointers in the 64-bit SVR4 ABI. + +// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) +// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to +// compute an allocation on the stack. def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>; + +// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to +// compute an offset from native SP to the address of the most recent +// dynamic alloca. def PPCdynareaoffset : SDNode<"PPCISD::DYNAREAOFFSET", SDTDynAreaOp, [SDNPHasChain]>; + +// To avoid stack clash, allocation is performed by block and each block is +// probed. def PPCprobedalloca : SDNode<"PPCISD::PROBED_ALLOCA", SDTDynOp, [SDNPHasChain]>; // PC Relative Specific Nodes + +// MAT_PCREL_ADDR = Materialize a PC Relative address. This can be done +// either through an add like PADDI or through a PC Relative load like +// PLD. def PPCmatpcreladdr : SDNode<"PPCISD::MAT_PCREL_ADDR", SDTIntUnaryOp, []>; + +// TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for +// TLS global address when using dynamic access models. This can be done +// through an add like PADDI. def PPCtlsdynamatpcreladdr : SDNode<"PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR", SDTIntUnaryOp, []>; + +// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address +// when using local exec access models, and when prefixed instructions are +// available. This is used with ADD_TLS to produce an add like PADDI. def PPCtlslocalexecmataddr : SDNode<"PPCISD::TLS_LOCAL_EXEC_MAT_ADDR", SDTIntUnaryOp, []>; +// These nodes represent PPC arithmetic operations with carry. def PPCaddc : SDNode<"PPCISD::ADDC", SDT_PPCBinaryArithWithFlagsOut, [SDNPCommutative]>; def PPCadde : SDNode<"PPCISD::ADDE", SDT_PPCBinaryArithWithFlagsInOut, @@ -2535,6 +2793,7 @@ defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$RST), (ins f8rc:$RA, f8rc:$RB), // Reciprocal estimates. let mayRaiseFPException = 1 in { +// Reciprocal estimate instructions (unary FP ops). defm FRE : XForm_26r<63, 24, (outs f8rc:$RST), (ins f8rc:$RB), "fre", "$RST, $RB", IIC_FPGeneral, [(set f64:$RST, (PPCfre f64:$RB))]>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td index 2d8c633b9fef6..bd9a999237c09 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrP10.td +++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td @@ -83,15 +83,31 @@ def SDT_PPCsetbc : SDTypeProfile<1, 1, [ // ISA 3.1 specific PPCISD nodes. // +// XXSPLTI32DX - The PPC XXSPLTI32DX instruction. def PPCxxsplti32dx : SDNode<"PPCISD::XXSPLTI32DX", SDT_PPCSplat32, []>; + +// ACC_BUILD = Build an accumulator register from 4 VSX registers. def PPCAccBuild : SDNode<"PPCISD::ACC_BUILD", SDT_PPCAccBuild, []>; + +// PAIR_BUILD = Build a vector pair register from 2 VSX registers. def PPCPairBuild : SDNode<"PPCISD::PAIR_BUILD", SDT_PPCPairBuild, []>; + +// EXTRACT_VSX_REG = Extract one of the underlying vsx registers of +// an accumulator or pair register. This node is needed because +// EXTRACT_SUBVECTOR expects the input and output vectors to have the same +// element type. def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx, []>; def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx, []>; + +// XXMFACC = This corresponds to the xxmfacc instruction. def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>; + +// SETBC - The ISA 3.1 (P10) SETBC instruction. def PPCsetbc : SDNode<"PPCISD::SETBC", SDT_PPCsetbc, []>; + +// SETBCR - The ISA 3.1 (P10) SETBCR instruction. def PPCsetbcr : SDNode<"PPCISD::SETBCR", SDT_PPCsetbc, []>; //===----------------------------------------------------------------------===// @@ -105,7 +121,10 @@ def SDT_PPCLXVRZX : SDTypeProfile<1, 2, [ SDTCisVT<0, v1i128>, SDTCisPtrTy<1>, SDTCisPtrTy<2> ]>; -// PPC Specific DAG Nodes. +// LXVRZX - Load VSX Vector Rightmost and Zero Extend +// This node represents v1i128 BUILD_VECTOR of a zero extending load +// instruction from to i128. +// Allows utilization of the Load VSX Vector Rightmost Instructions. def PPClxvrzx : SDNode<"PPCISD::LXVRZX", SDT_PPCLXVRZX, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 885bed670e319..d72201df5b002 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -87,31 +87,91 @@ def SDT_PPCxxperm : SDTypeProfile<1, 3, [ SDTCisVT<0, v2f64>, SDTCisVT<1, v2f64>, SDTCisVT<2, v2f64>, SDTCisVT<3, v4i32>]>; //--------------------------- Custom PPC nodes -------------------------------// + +// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian. +// Maps directly to an lxvd2x instruction that will be followed by +// an xxswapd. def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian. +// Maps directly to an stxvd2x instruction that will be preceded by +// an xxswapd. def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian. +// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on +// the vector type to load vector in big-endian element order. def PPCld_vec_be : SDNode<"PPCISD::LOAD_VEC_BE", SDT_PPCld_vec_be, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian. +// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on +// the vector type to store vector in big-endian element order. def PPCst_vec_be : SDNode<"PPCISD::STORE_VEC_BE", SDT_PPCst_vec_be, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little +// endian. Maps to an xxswapd instruction that corrects an lxvd2x +// or stxvd2x instruction. The chain is necessary because the +// sequence replaces a load and needs to provide the same number +// of outputs. def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>; + +// Direct move from a VSX register to a GPR def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; + +// Direct move from a GPR to a VSX register (algebraic) def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; + +// Direct move from a GPR to a VSX register (zero) def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>; + +// Extract a subvector from signed integer vector and convert to FP. +// It is primarily used to convert a (widened) illegal integer vector +// type to a legal floating point vector type. +// For example v2i32 -> widened to v4i32 -> v2f64 def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>; + +// Extract a subvector from unsigned integer vector and convert to FP. +// As with SINT_VEC_TO_FP, used for converting illegal types. def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>; + +// An SDNode for swaps that are not associated with any loads/stores +// and thereby have no chain. def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>; +// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or +// lower (IDX=1) half of v4f32 to v2f64. def PPCfpexth : SDNode<"PPCISD::FP_EXTEND_HALF", SDT_PPCfpexth, []>; + +// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a +// v2f32 value into the lower half of a VSR register. def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory +// instructions such as LXVDSX, LXVWSX. def PPCldsplat : SDNode<"PPCISD::LD_SPLAT", SDT_PPCldsplat, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory +// that zero-extends. def PPCzextldsplat : SDNode<"PPCISD::ZEXT_LD_SPLAT", SDT_PPCldsplat, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory +// that sign-extends. def PPCsextldsplat : SDNode<"PPCISD::SEXT_LD_SPLAT", SDT_PPCldsplat, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; + +// PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to +// place the value into the least significant element of the most +// significant doubleword in the vector. This is not element zero for +// anything smaller than a doubleword on either endianness. This node has +// the same semantics as SCALAR_TO_VECTOR except that the value remains in +// the aforementioned location in the vector register. def PPCSToV : SDNode<"PPCISD::SCALAR_TO_VECTOR_PERMUTED", SDTypeProfile<1, 1, []>, []>; diff --git a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp index 93a4693c50168..80aa1122167df 100644 --- a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp @@ -7,20 +7,72 @@ //===----------------------------------------------------------------------===// #include "PPCSelectionDAGInfo.h" -#include "PPCISelLowering.h" +#include "llvm/CodeGen/SelectionDAG.h" + +#define GET_SDNODE_DESC +#include "PPCGenSDNodeInfo.inc" using namespace llvm; +PPCSelectionDAGInfo::PPCSelectionDAGInfo() + : SelectionDAGGenTargetInfo(PPCGenSDNodeInfo) {} + PPCSelectionDAGInfo::~PPCSelectionDAGInfo() = default; -bool PPCSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { - return Opcode >= PPCISD::FIRST_MEMORY_OPCODE && - Opcode <= PPCISD::LAST_MEMORY_OPCODE; +const char *PPCSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { + switch (static_cast(Opcode)) { + case PPCISD::GlobalBaseReg: + return "PPCISD::GlobalBaseReg"; + case PPCISD::SRA_ADDZE: + return "PPCISD::SRA_ADDZE"; + case PPCISD::READ_TIME_BASE: + return "PPCISD::READ_TIME_BASE"; + case PPCISD::MFOCRF: + return "PPCISD::MFOCRF"; + case PPCISD::ANDI_rec_1_EQ_BIT: + return "PPCISD::ANDI_rec_1_EQ_BIT"; + case PPCISD::ANDI_rec_1_GT_BIT: + return "PPCISD::ANDI_rec_1_GT_BIT"; + case PPCISD::BDNZ: + return "PPCISD::BDNZ"; + case PPCISD::BDZ: + return "PPCISD::BDZ"; + case PPCISD::PPC32_PICGOT: + return "PPCISD::PPC32_PICGOT"; + case PPCISD::VADD_SPLAT: + return "PPCISD::VADD_SPLAT"; + } + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); } -bool PPCSelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const { - return Opcode >= PPCISD::FIRST_STRICTFP_OPCODE && - Opcode <= PPCISD::LAST_STRICTFP_OPCODE; +void PPCSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const { + switch (N->getOpcode()) { + default: + break; + case PPCISD::DYNAREAOFFSET: + // invalid number of results; expected 2, got 1 + case PPCISD::TOC_ENTRY: + // invalid number of results; expected 1, got 2 + case PPCISD::STORE_COND: + // invalid number of results; expected 2, got 3 + case PPCISD::LD_SPLAT: + case PPCISD::SEXT_LD_SPLAT: + case PPCISD::ZEXT_LD_SPLAT: + // invalid number of operands; expected 2, got 3 + case PPCISD::ST_VSR_SCAL_INT: + // invalid number of operands; expected 4, got 5 + case PPCISD::XXPERM: + // operand #1 must have type v2f64, but has type v16i8 + case PPCISD::ACC_BUILD: + // operand #3 must have type v4i32, but has type v16i8 + case PPCISD::PAIR_BUILD: + // operand #1 must have type v4i32, but has type v16i8 + return; + } + + SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); } std::pair PPCSelectionDAGInfo::EmitTargetCodeForMemcmp( diff --git a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h index f962a7a5321aa..ffe8982ce1af4 100644 --- a/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h +++ b/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h @@ -11,15 +11,66 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "PPCGenSDNodeInfo.inc" + namespace llvm { +namespace PPCISD { + +enum NodeType : unsigned { + /// The result of the mflr at function entry, used for PIC code. + GlobalBaseReg = GENERATED_OPCODE_END, + + /// The combination of sra[wd]i and addze used to implemented signed + /// integer division by a power of 2. The first operand is the dividend, + /// and the second is the constant shift amount (representing the + /// divisor). + SRA_ADDZE, + + /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction. + /// This copies the bits corresponding to the specified CRREG into the + /// resultant GPR. Bits corresponding to other CR regs are undefined. + MFOCRF, + + // FIXME: Remove these once the ANDI glue bug is fixed: + /// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the + /// eq or gt bit of CR0 after executing andi. x, 1. This is used to + /// implement truncation of i32 or i64 to i1. + ANDI_rec_1_EQ_BIT, + ANDI_rec_1_GT_BIT, + + // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit + // target (returns (Lo, Hi)). It takes a chain operand. + READ_TIME_BASE, -class PPCSelectionDAGInfo : public SelectionDAGTargetInfo { + /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based + /// loops. + BDNZ, + BDZ, + + /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and + /// local dynamic TLS and position indendepent code on PPC32. + PPC32_PICGOT, + + /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded + /// during instruction selection to optimize a BUILD_VECTOR into + /// operations on splats. This is necessary to avoid losing these + /// optimizations due to constant folding. + VADD_SPLAT, +}; + +} // namespace PPCISD + +class PPCSelectionDAGInfo : public SelectionDAGGenTargetInfo { public: + PPCSelectionDAGInfo(); + ~PPCSelectionDAGInfo() override; - bool isTargetMemoryOpcode(unsigned Opcode) const override; + const char *getTargetNodeName(unsigned Opcode) const override; - bool isTargetStrictFPOpcode(unsigned Opcode) const override; + void verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const override; std::pair EmitTargetCodeForMemcmp(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain,