diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt index 693f0d0b35edc..f9c24750c4836 100644 --- a/llvm/lib/Target/NVPTX/CMakeLists.txt +++ b/llvm/lib/Target/NVPTX/CMakeLists.txt @@ -6,6 +6,7 @@ tablegen(LLVM NVPTXGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM NVPTXGenDAGISel.inc -gen-dag-isel) tablegen(LLVM NVPTXGenInstrInfo.inc -gen-instr-info) tablegen(LLVM NVPTXGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM NVPTXGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(NVPTXCommonTableGen) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index d525531766ddf..055f1ff47306d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -17,6 +17,7 @@ #include "NVPTX.h" #include "NVPTXISelLowering.h" #include "NVPTXRegisterInfo.h" +#include "NVPTXSelectionDAGInfo.h" #include "NVPTXTargetMachine.h" #include "llvm/ADT/MapVector.h" #include "llvm/CodeGen/SelectionDAGISel.h" diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 3e44e47c56ad7..8fc3a68de6c79 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -15,6 +15,7 @@ #include "MCTargetDesc/NVPTXBaseInfo.h" #include "NVPTX.h" #include "NVPTXISelDAGToDAG.h" +#include "NVPTXSelectionDAGInfo.h" #include "NVPTXSubtarget.h" #include "NVPTXTargetMachine.h" #include "NVPTXTargetObjectFile.h" @@ -1107,97 +1108,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, {MVT::i32, MVT::i128, MVT::v4f32, MVT::Other}, Custom); } -const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { - -#define MAKE_CASE(V) \ - case V: \ - return #V; - - switch ((NVPTXISD::NodeType)Opcode) { - case NVPTXISD::FIRST_NUMBER: - break; - - MAKE_CASE(NVPTXISD::ATOMIC_CMP_SWAP_B128) - MAKE_CASE(NVPTXISD::ATOMIC_SWAP_B128) - MAKE_CASE(NVPTXISD::RET_GLUE) - MAKE_CASE(NVPTXISD::DeclareArrayParam) - MAKE_CASE(NVPTXISD::DeclareScalarParam) - MAKE_CASE(NVPTXISD::CALL) - MAKE_CASE(NVPTXISD::MoveParam) - MAKE_CASE(NVPTXISD::UNPACK_VECTOR) - MAKE_CASE(NVPTXISD::BUILD_VECTOR) - MAKE_CASE(NVPTXISD::CallPrototype) - MAKE_CASE(NVPTXISD::ProxyReg) - MAKE_CASE(NVPTXISD::LoadV2) - MAKE_CASE(NVPTXISD::LoadV4) - MAKE_CASE(NVPTXISD::LoadV8) - MAKE_CASE(NVPTXISD::LDUV2) - MAKE_CASE(NVPTXISD::LDUV4) - MAKE_CASE(NVPTXISD::StoreV2) - MAKE_CASE(NVPTXISD::StoreV4) - MAKE_CASE(NVPTXISD::StoreV8) - MAKE_CASE(NVPTXISD::FSHL_CLAMP) - MAKE_CASE(NVPTXISD::FSHR_CLAMP) - MAKE_CASE(NVPTXISD::BFI) - MAKE_CASE(NVPTXISD::PRMT) - MAKE_CASE(NVPTXISD::FCOPYSIGN) - MAKE_CASE(NVPTXISD::FMAXNUM3) - MAKE_CASE(NVPTXISD::FMINNUM3) - MAKE_CASE(NVPTXISD::FMAXIMUM3) - MAKE_CASE(NVPTXISD::FMINIMUM3) - MAKE_CASE(NVPTXISD::DYNAMIC_STACKALLOC) - MAKE_CASE(NVPTXISD::STACKRESTORE) - MAKE_CASE(NVPTXISD::STACKSAVE) - MAKE_CASE(NVPTXISD::SETP_F16X2) - MAKE_CASE(NVPTXISD::SETP_BF16X2) - MAKE_CASE(NVPTXISD::MUL_WIDE_SIGNED) - MAKE_CASE(NVPTXISD::MUL_WIDE_UNSIGNED) - MAKE_CASE(NVPTXISD::BrxEnd) - MAKE_CASE(NVPTXISD::BrxItem) - MAKE_CASE(NVPTXISD::BrxStart) - MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED) - MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X) - MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y) - MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT) - MAKE_CASE( - NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT) - MAKE_CASE( - NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1) - MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2) - MAKE_CASE( - NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT) - MAKE_CASE( - NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT) - MAKE_CASE(NVPTXISD::CVT_E4M3X4_F32X4_RS_SF) - MAKE_CASE(NVPTXISD::CVT_E5M2X4_F32X4_RS_SF) - MAKE_CASE(NVPTXISD::CVT_E2M3X4_F32X4_RS_SF) - MAKE_CASE(NVPTXISD::CVT_E3M2X4_F32X4_RS_SF) - MAKE_CASE(NVPTXISD::CVT_E2M1X4_F32X4_RS_SF) - } - return nullptr; - -#undef MAKE_CASE -} - TargetLoweringBase::LegalizeTypeAction NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const { if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && @@ -2032,7 +1942,7 @@ static ISD::NodeType getScalarOpcodeForReduction(unsigned ReductionOpcode) { } /// Get 3-input scalar reduction opcode -static std::optional +static std::optional getScalar3OpcodeForReduction(unsigned ReductionOpcode) { switch (ReductionOpcode) { case ISD::VECREDUCE_FMAX: @@ -2931,7 +2841,7 @@ static SDValue lowerCvtRSIntrinsics(SDValue Op, SelectionDAG &DAG) { using NVPTX::PTXCvtMode::CvtMode; auto [OpCode, RetTy, CvtModeFlag] = - [&]() -> std::tuple { + [&]() -> std::tuple { switch (IntrinsicID) { case Intrinsic::nvvm_f32x4_to_e4m3x4_rs_relu_satfinite: return {NVPTXISD::CVT_E4M3X4_F32X4_RS_SF, MVT::v4i8, @@ -3314,7 +3224,7 @@ SDValue NVPTXTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { // Generate BrxEnd nodes SDValue EndOps[] = {Chain.getValue(0), DAG.getBasicBlock(MBBs.back()), Index, IdV, Chain.getValue(1)}; - SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, VTs, EndOps); + SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, MVT::Other, EndOps); return BrxEnd; } @@ -5457,7 +5367,7 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDLoc DL(LD); // the new opcode after we double the number of operands - NVPTXISD::NodeType Opcode; + unsigned Opcode; SmallVector Operands(LD->ops()); unsigned OldNumOutputs; // non-glue, non-chain outputs switch (LD->getOpcode()) { @@ -5540,7 +5450,7 @@ static SDValue combinePackingMovIntoStore(SDNode *N, auto *ST = cast(N); // The new opcode after we double the number of operands. - NVPTXISD::NodeType Opcode; + unsigned Opcode; switch (N->getOpcode()) { case ISD::STORE: // Any packed type is legal, so the legalizer will not have lowered @@ -5675,7 +5585,7 @@ static SDValue PerformFADDCombine(SDNode *N, } /// Get 3-input version of a 2-input min/max opcode -static NVPTXISD::NodeType getMinMax3Opcode(unsigned MinMax2Opcode) { +static unsigned getMinMax3Opcode(unsigned MinMax2Opcode) { switch (MinMax2Opcode) { case ISD::FMAXNUM: case ISD::FMAXIMUMNUM: @@ -5706,7 +5616,7 @@ static SDValue PerformFMinMaxCombine(SDNode *N, SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); unsigned MinMaxOp2 = N->getOpcode(); - NVPTXISD::NodeType MinMaxOp3 = getMinMax3Opcode(MinMaxOp2); + unsigned MinMaxOp3 = getMinMax3Opcode(MinMaxOp2); if (Op0.getOpcode() == MinMaxOp2 && Op0.hasOneUse()) { // (maxnum (maxnum a, b), c) -> (maxnum3 a, b, c) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h index 63fa0bb9159ff..d71a86fd463f6 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -20,118 +20,6 @@ #include "llvm/Support/AtomicOrdering.h" namespace llvm { -namespace NVPTXISD { -enum NodeType : unsigned { - // Start the numbering from where ISD NodeType finishes. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - RET_GLUE, - - /// These nodes represent a parameter declaration. In PTX this will look like: - /// .param .align 16 .b8 param0[1024]; - /// .param .b32 retval0; - /// - /// DeclareArrayParam(Chain, Externalsym, Align, Size, Glue) - /// DeclareScalarParam(Chain, Externalsym, Size, Glue) - DeclareScalarParam, - DeclareArrayParam, - - /// This node represents a PTX call instruction. It's operands are as follows: - /// - /// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns, - /// NumParams, Callee, Proto) - CALL, - - MoveParam, - CallPrototype, - ProxyReg, - FSHL_CLAMP, - FSHR_CLAMP, - MUL_WIDE_SIGNED, - MUL_WIDE_UNSIGNED, - SETP_F16X2, - SETP_BF16X2, - BFI, - PRMT, - - /// This node is similar to ISD::BUILD_VECTOR except that the output may be - /// implicitly bitcast to a scalar. This allows for the representation of - /// packing move instructions for vector types which are not legal i.e. v2i32 - BUILD_VECTOR, - - /// This node is the inverse of NVPTX::BUILD_VECTOR. It takes a single value - /// which may be a scalar and unpacks it into multiple values by implicitly - /// converting it to a vector. - UNPACK_VECTOR, - - FCOPYSIGN, - FMAXNUM3, - FMINNUM3, - FMAXIMUM3, - FMINIMUM3, - - DYNAMIC_STACKALLOC, - STACKRESTORE, - STACKSAVE, - BrxStart, - BrxItem, - BrxEnd, - CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED, - CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X, - CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y, - CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z, - CVT_E4M3X4_F32X4_RS_SF, - CVT_E5M2X4_F32X4_RS_SF, - CVT_E2M3X4_F32X4_RS_SF, - CVT_E3M2X4_F32X4_RS_SF, - CVT_E2M1X4_F32X4_RS_SF, - - FIRST_MEMORY_OPCODE, - - /// These nodes are used to lower atomic instructions with i128 type. They are - /// similar to the generic nodes, but the input and output values are split - /// into two 64-bit values. - /// ValLo, ValHi, OUTCHAIN = ATOMIC_CMP_SWAP_B128(INCHAIN, ptr, cmpLo, cmpHi, - /// swapLo, swapHi) - /// ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP_B128(INCHAIN, ptr, amtLo, amtHi) - ATOMIC_CMP_SWAP_B128 = FIRST_MEMORY_OPCODE, - ATOMIC_SWAP_B128, - - LoadV2, - LoadV4, - LoadV8, - LDUV2, // LDU.v2 - LDUV4, // LDU.v4 - StoreV2, - StoreV4, - StoreV8, - TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1, - TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2, - TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1, - TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2, - TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1, - TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2, - TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1, - TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2, - TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT, - TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT, - TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT, - TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT, - TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1, - TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2, - TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1, - TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2, - TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1, - TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2, - TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT, - TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT, - TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1, - TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2, - TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT, - TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT, - LAST_MEMORY_OPCODE = - TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT, -}; -} class NVPTXSubtarget; @@ -144,8 +32,6 @@ class NVPTXTargetLowering : public TargetLowering { const NVPTXSubtarget &STI); SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; - const char *getTargetNodeName(unsigned Opcode) const override; - bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override; diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index ff9d9723dddea..18b3a5ce914e8 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1686,13 +1686,19 @@ def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<0, 1>] def SDTProxyReg : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>]>; - +// These nodes represent a parameter declaration. In PTX this will look like: +// .param .align 16 .b8 param0[1024]; +// .param .b32 retval0; +// +// DeclareArrayParam(Chain, Externalsym, Align, Size, Glue) +// DeclareScalarParam(Chain, Externalsym, Size, Glue) def declare_array_param : SDNode<"NVPTXISD::DeclareArrayParam", SDTDeclareArrayParam, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; def declare_scalar_param : SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParam, [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; + def MoveParam : SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>; def proxy_reg : @@ -1754,7 +1760,7 @@ def : Pat<(declare_scalar_param externalsym:$a, imm:$size), def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def CallPrototype : SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype, - [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>; + [SDNPHasChain, SDNPSideEffect]>; def ProtoIdent : Operand { let PrintMethod = "printProtoIdent"; } def CALL_PROTOTYPE : NVPTXInst<(outs), (ins ProtoIdent:$ident), @@ -2181,6 +2187,9 @@ foreach vt = [v2f32, v2i32] in { def: Pat<(v2i16 (scalar_to_vector i16:$a)), (CVT_u32_u16 $a, CvtNONE)>; +// This node is similar to ISD::BUILD_VECTOR except that the output may be +// implicitly bitcast to a scalar. This allows for the representation of +// packing move instructions for vector types which are not legal i.e. v2i32 def nvptx_build_vector : SDNode<"NVPTXISD::BUILD_VECTOR", SDTypeProfile<1, 2, []>, []>; def : Pat<(i64 (nvptx_build_vector i32:$a, i32:$b)), diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index ea69a54e6db37..bcdb46eca9744 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -5625,7 +5625,7 @@ class Tcgen05MMADisableOutputLaneSDNode, - [SDNPHasChain, SDNPSideEffect]>; + [SDNPHasChain, SDNPSideEffect, SDNPMemOperand]>; class Tcgen05MMADisableOutputLaneInst(Opcode)) { + MAKE_CASE(NVPTXISD::ATOMIC_CMP_SWAP_B128) + MAKE_CASE(NVPTXISD::ATOMIC_SWAP_B128) + MAKE_CASE(NVPTXISD::LoadV2) + MAKE_CASE(NVPTXISD::LoadV4) + MAKE_CASE(NVPTXISD::LoadV8) + MAKE_CASE(NVPTXISD::LDUV2) + MAKE_CASE(NVPTXISD::LDUV4) + MAKE_CASE(NVPTXISD::StoreV2) + MAKE_CASE(NVPTXISD::StoreV4) + MAKE_CASE(NVPTXISD::StoreV8) + MAKE_CASE(NVPTXISD::SETP_F16X2) + MAKE_CASE(NVPTXISD::SETP_BF16X2) + MAKE_CASE(NVPTXISD::UNPACK_VECTOR) + } +#undef MAKE_CASE + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); +} + bool NVPTXSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { - return Opcode >= NVPTXISD::FIRST_MEMORY_OPCODE && - Opcode <= NVPTXISD::LAST_MEMORY_OPCODE; + // These nodes don't have corresponding entries in *.td files. + if (Opcode >= NVPTXISD::FIRST_MEMORY_OPCODE && + Opcode <= NVPTXISD::LAST_MEMORY_OPCODE) + return true; + + return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode); +} + +void NVPTXSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const { + switch (N->getOpcode()) { + default: + break; + case NVPTXISD::ProxyReg: + // invalid number of results; expected 2, got 1 + return; + } + + return SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); } diff --git a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h index 9d69f48026c79..07c130baeaa4f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h @@ -11,13 +11,53 @@ #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "NVPTXGenSDNodeInfo.inc" + namespace llvm { +namespace NVPTXISD { + +enum NodeType : unsigned { + SETP_F16X2 = GENERATED_OPCODE_END, + SETP_BF16X2, + UNPACK_VECTOR, + + FIRST_MEMORY_OPCODE, + + /// These nodes are used to lower atomic instructions with i128 type. They are + /// similar to the generic nodes, but the input and output values are split + /// into two 64-bit values. + /// ValLo, ValHi, OUTCHAIN = ATOMIC_CMP_SWAP_B128(INCHAIN, ptr, cmpLo, cmpHi, + /// swapLo, swapHi) + /// ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP_B128(INCHAIN, ptr, amtLo, amtHi) + ATOMIC_CMP_SWAP_B128 = FIRST_MEMORY_OPCODE, + ATOMIC_SWAP_B128, + + LoadV2, + LoadV4, + LoadV8, + LDUV2, // LDU.v2 + LDUV4, // LDU.v4 + StoreV2, + StoreV4, + StoreV8, + LAST_MEMORY_OPCODE = StoreV8, +}; + +} // namespace NVPTXISD -class NVPTXSelectionDAGInfo : public SelectionDAGTargetInfo { +class NVPTXSelectionDAGInfo : public SelectionDAGGenTargetInfo { public: + NVPTXSelectionDAGInfo(); + ~NVPTXSelectionDAGInfo() override; + const char *getTargetNodeName(unsigned Opcode) const override; + bool isTargetMemoryOpcode(unsigned Opcode) const override; + + void verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const override; }; } // namespace llvm