Skip to content

Conversation

@s-barannikov
Copy link
Contributor

This allows SDNodes to be validated against their expected type profiles
and reduces the number of changes required to add a new node.

The verification functionality detected a few issues, two of them were fixed (missing SDNPMemOperand property on TCGEN05_MMA nodes and extra glue operand/result on CallPrototype), the one remaining is with ProxyReg node, see NVPTXSelectionDAGInfo::verifyTargetNode().

Part of #119709.

@llvmbot
Copy link
Member

llvmbot commented Nov 17, 2025

@llvm/pr-subscribers-backend-nvptx

Author: Sergei Barannikov (s-barannikov)

Changes

This allows SDNodes to be validated against their expected type profiles
and reduces the number of changes required to add a new node.

The verification functionality detected a few issues, two of them were fixed (missing SDNPMemOperand property on TCGEN05_MMA nodes and extra glue operand/result on CallPrototype), the one remaining is with ProxyReg node, see NVPTXSelectionDAGInfo::verifyTargetNode().

Part of #119709.


Patch is 20.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168367.diff

8 Files Affected:

  • (modified) llvm/lib/Target/NVPTX/CMakeLists.txt (+1)
  • (modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h (+1)
  • (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp (+8-98)
  • (modified) llvm/lib/Target/NVPTX/NVPTXISelLowering.h (-114)
  • (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+11-2)
  • (modified) llvm/lib/Target/NVPTX/NVPTXIntrinsics.td (+1-1)
  • (modified) llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp (+51-3)
  • (modified) llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h (+41-1)
diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt
index 693f0d0b35edc..f9c24750c4836 100644
--- a/llvm/lib/Target/NVPTX/CMakeLists.txt
+++ b/llvm/lib/Target/NVPTX/CMakeLists.txt
@@ -6,6 +6,7 @@ tablegen(LLVM NVPTXGenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM NVPTXGenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM NVPTXGenInstrInfo.inc -gen-instr-info)
 tablegen(LLVM NVPTXGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM NVPTXGenSDNodeInfo.inc -gen-sd-node-info)
 tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)
 
 add_public_tablegen_target(NVPTXCommonTableGen)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index d525531766ddf..055f1ff47306d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -17,6 +17,7 @@
 #include "NVPTX.h"
 #include "NVPTXISelLowering.h"
 #include "NVPTXRegisterInfo.h"
+#include "NVPTXSelectionDAGInfo.h"
 #include "NVPTXTargetMachine.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 3e44e47c56ad7..8fc3a68de6c79 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -15,6 +15,7 @@
 #include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "NVPTX.h"
 #include "NVPTXISelDAGToDAG.h"
+#include "NVPTXSelectionDAGInfo.h"
 #include "NVPTXSubtarget.h"
 #include "NVPTXTargetMachine.h"
 #include "NVPTXTargetObjectFile.h"
@@ -1107,97 +1108,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
                      {MVT::i32, MVT::i128, MVT::v4f32, MVT::Other}, Custom);
 }
 
-const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
-
-#define MAKE_CASE(V)                                                           \
-  case V:                                                                      \
-    return #V;
-
-  switch ((NVPTXISD::NodeType)Opcode) {
-  case NVPTXISD::FIRST_NUMBER:
-    break;
-
-    MAKE_CASE(NVPTXISD::ATOMIC_CMP_SWAP_B128)
-    MAKE_CASE(NVPTXISD::ATOMIC_SWAP_B128)
-    MAKE_CASE(NVPTXISD::RET_GLUE)
-    MAKE_CASE(NVPTXISD::DeclareArrayParam)
-    MAKE_CASE(NVPTXISD::DeclareScalarParam)
-    MAKE_CASE(NVPTXISD::CALL)
-    MAKE_CASE(NVPTXISD::MoveParam)
-    MAKE_CASE(NVPTXISD::UNPACK_VECTOR)
-    MAKE_CASE(NVPTXISD::BUILD_VECTOR)
-    MAKE_CASE(NVPTXISD::CallPrototype)
-    MAKE_CASE(NVPTXISD::ProxyReg)
-    MAKE_CASE(NVPTXISD::LoadV2)
-    MAKE_CASE(NVPTXISD::LoadV4)
-    MAKE_CASE(NVPTXISD::LoadV8)
-    MAKE_CASE(NVPTXISD::LDUV2)
-    MAKE_CASE(NVPTXISD::LDUV4)
-    MAKE_CASE(NVPTXISD::StoreV2)
-    MAKE_CASE(NVPTXISD::StoreV4)
-    MAKE_CASE(NVPTXISD::StoreV8)
-    MAKE_CASE(NVPTXISD::FSHL_CLAMP)
-    MAKE_CASE(NVPTXISD::FSHR_CLAMP)
-    MAKE_CASE(NVPTXISD::BFI)
-    MAKE_CASE(NVPTXISD::PRMT)
-    MAKE_CASE(NVPTXISD::FCOPYSIGN)
-    MAKE_CASE(NVPTXISD::FMAXNUM3)
-    MAKE_CASE(NVPTXISD::FMINNUM3)
-    MAKE_CASE(NVPTXISD::FMAXIMUM3)
-    MAKE_CASE(NVPTXISD::FMINIMUM3)
-    MAKE_CASE(NVPTXISD::DYNAMIC_STACKALLOC)
-    MAKE_CASE(NVPTXISD::STACKRESTORE)
-    MAKE_CASE(NVPTXISD::STACKSAVE)
-    MAKE_CASE(NVPTXISD::SETP_F16X2)
-    MAKE_CASE(NVPTXISD::SETP_BF16X2)
-    MAKE_CASE(NVPTXISD::MUL_WIDE_SIGNED)
-    MAKE_CASE(NVPTXISD::MUL_WIDE_UNSIGNED)
-    MAKE_CASE(NVPTXISD::BrxEnd)
-    MAKE_CASE(NVPTXISD::BrxItem)
-    MAKE_CASE(NVPTXISD::BrxStart)
-    MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED)
-    MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X)
-    MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y)
-    MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
-    MAKE_CASE(
-        NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
-    MAKE_CASE(
-        NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
-    MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
-    MAKE_CASE(
-        NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
-    MAKE_CASE(
-        NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
-    MAKE_CASE(NVPTXISD::CVT_E4M3X4_F32X4_RS_SF)
-    MAKE_CASE(NVPTXISD::CVT_E5M2X4_F32X4_RS_SF)
-    MAKE_CASE(NVPTXISD::CVT_E2M3X4_F32X4_RS_SF)
-    MAKE_CASE(NVPTXISD::CVT_E3M2X4_F32X4_RS_SF)
-    MAKE_CASE(NVPTXISD::CVT_E2M1X4_F32X4_RS_SF)
-  }
-  return nullptr;
-
-#undef MAKE_CASE
-}
-
 TargetLoweringBase::LegalizeTypeAction
 NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const {
   if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
@@ -2032,7 +1942,7 @@ static ISD::NodeType getScalarOpcodeForReduction(unsigned ReductionOpcode) {
 }
 
 /// Get 3-input scalar reduction opcode
-static std::optional<NVPTXISD::NodeType>
+static std::optional<unsigned>
 getScalar3OpcodeForReduction(unsigned ReductionOpcode) {
   switch (ReductionOpcode) {
   case ISD::VECREDUCE_FMAX:
@@ -2931,7 +2841,7 @@ static SDValue lowerCvtRSIntrinsics(SDValue Op, SelectionDAG &DAG) {
   using NVPTX::PTXCvtMode::CvtMode;
 
   auto [OpCode, RetTy, CvtModeFlag] =
-      [&]() -> std::tuple<NVPTXISD::NodeType, MVT::SimpleValueType, uint32_t> {
+      [&]() -> std::tuple<unsigned, MVT::SimpleValueType, uint32_t> {
     switch (IntrinsicID) {
     case Intrinsic::nvvm_f32x4_to_e4m3x4_rs_relu_satfinite:
       return {NVPTXISD::CVT_E4M3X4_F32X4_RS_SF, MVT::v4i8,
@@ -3314,7 +3224,7 @@ SDValue NVPTXTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
   // Generate BrxEnd nodes
   SDValue EndOps[] = {Chain.getValue(0), DAG.getBasicBlock(MBBs.back()), Index,
                       IdV, Chain.getValue(1)};
-  SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, VTs, EndOps);
+  SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, MVT::Other, EndOps);
 
   return BrxEnd;
 }
@@ -5457,7 +5367,7 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
   SDLoc DL(LD);
 
   // the new opcode after we double the number of operands
-  NVPTXISD::NodeType Opcode;
+  unsigned Opcode;
   SmallVector<SDValue> Operands(LD->ops());
   unsigned OldNumOutputs; // non-glue, non-chain outputs
   switch (LD->getOpcode()) {
@@ -5540,7 +5450,7 @@ static SDValue combinePackingMovIntoStore(SDNode *N,
   auto *ST = cast<MemSDNode>(N);
 
   // The new opcode after we double the number of operands.
-  NVPTXISD::NodeType Opcode;
+  unsigned Opcode;
   switch (N->getOpcode()) {
   case ISD::STORE:
     // Any packed type is legal, so the legalizer will not have lowered
@@ -5675,7 +5585,7 @@ static SDValue PerformFADDCombine(SDNode *N,
 }
 
 /// Get 3-input version of a 2-input min/max opcode
-static NVPTXISD::NodeType getMinMax3Opcode(unsigned MinMax2Opcode) {
+static unsigned getMinMax3Opcode(unsigned MinMax2Opcode) {
   switch (MinMax2Opcode) {
   case ISD::FMAXNUM:
   case ISD::FMAXIMUMNUM:
@@ -5706,7 +5616,7 @@ static SDValue PerformFMinMaxCombine(SDNode *N,
   SDValue Op0 = N->getOperand(0);
   SDValue Op1 = N->getOperand(1);
   unsigned MinMaxOp2 = N->getOpcode();
-  NVPTXISD::NodeType MinMaxOp3 = getMinMax3Opcode(MinMaxOp2);
+  unsigned MinMaxOp3 = getMinMax3Opcode(MinMaxOp2);
 
   if (Op0.getOpcode() == MinMaxOp2 && Op0.hasOneUse()) {
     // (maxnum (maxnum a, b), c) -> (maxnum3 a, b, c)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 63fa0bb9159ff..d71a86fd463f6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -20,118 +20,6 @@
 #include "llvm/Support/AtomicOrdering.h"
 
 namespace llvm {
-namespace NVPTXISD {
-enum NodeType : unsigned {
-  // Start the numbering from where ISD NodeType finishes.
-  FIRST_NUMBER = ISD::BUILTIN_OP_END,
-  RET_GLUE,
-
-  /// These nodes represent a parameter declaration. In PTX this will look like:
-  ///   .param .align 16 .b8 param0[1024];
-  ///   .param .b32 retval0;
-  ///
-  /// DeclareArrayParam(Chain, Externalsym, Align, Size, Glue)
-  /// DeclareScalarParam(Chain, Externalsym, Size, Glue)
-  DeclareScalarParam,
-  DeclareArrayParam,
-
-  /// This node represents a PTX call instruction. It's operands are as follows:
-  ///
-  /// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
-  ///      NumParams, Callee, Proto)
-  CALL,
-
-  MoveParam,
-  CallPrototype,
-  ProxyReg,
-  FSHL_CLAMP,
-  FSHR_CLAMP,
-  MUL_WIDE_SIGNED,
-  MUL_WIDE_UNSIGNED,
-  SETP_F16X2,
-  SETP_BF16X2,
-  BFI,
-  PRMT,
-
-  /// This node is similar to ISD::BUILD_VECTOR except that the output may be
-  /// implicitly bitcast to a scalar. This allows for the representation of
-  /// packing move instructions for vector types which are not legal i.e. v2i32
-  BUILD_VECTOR,
-
-  /// This node is the inverse of NVPTX::BUILD_VECTOR. It takes a single value
-  /// which may be a scalar and unpacks it into multiple values by implicitly
-  /// converting it to a vector.
-  UNPACK_VECTOR,
-
-  FCOPYSIGN,
-  FMAXNUM3,
-  FMINNUM3,
-  FMAXIMUM3,
-  FMINIMUM3,
-
-  DYNAMIC_STACKALLOC,
-  STACKRESTORE,
-  STACKSAVE,
-  BrxStart,
-  BrxItem,
-  BrxEnd,
-  CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED,
-  CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X,
-  CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y,
-  CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z,
-  CVT_E4M3X4_F32X4_RS_SF,
-  CVT_E5M2X4_F32X4_RS_SF,
-  CVT_E2M3X4_F32X4_RS_SF,
-  CVT_E3M2X4_F32X4_RS_SF,
-  CVT_E2M1X4_F32X4_RS_SF,
-
-  FIRST_MEMORY_OPCODE,
-
-  /// These nodes are used to lower atomic instructions with i128 type. They are
-  /// similar to the generic nodes, but the input and output values are split
-  /// into two 64-bit values.
-  /// ValLo, ValHi, OUTCHAIN = ATOMIC_CMP_SWAP_B128(INCHAIN, ptr, cmpLo, cmpHi,
-  ///                                               swapLo, swapHi)
-  /// ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP_B128(INCHAIN, ptr, amtLo, amtHi)
-  ATOMIC_CMP_SWAP_B128 = FIRST_MEMORY_OPCODE,
-  ATOMIC_SWAP_B128,
-
-  LoadV2,
-  LoadV4,
-  LoadV8,
-  LDUV2, // LDU.v2
-  LDUV4, // LDU.v4
-  StoreV2,
-  StoreV4,
-  StoreV8,
-  TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1,
-  TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2,
-  TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
-  TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
-  TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1,
-  TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2,
-  TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
-  TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
-  TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
-  TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
-  TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
-  TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
-  TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1,
-  TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2,
-  TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
-  TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
-  TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1,
-  TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2,
-  TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
-  TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
-  TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
-  TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
-  TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
-  TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
-  LAST_MEMORY_OPCODE =
-      TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
-};
-}
 
 class NVPTXSubtarget;
 
@@ -144,8 +32,6 @@ class NVPTXTargetLowering : public TargetLowering {
                                const NVPTXSubtarget &STI);
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 
-  const char *getTargetNodeName(unsigned Opcode) const override;
-
   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
                           MachineFunction &MF,
                           unsigned Intrinsic) const override;
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index f0bdf472b96ed..b3d046dd05d51 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1686,13 +1686,19 @@ def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<0, 1>]
 
 def SDTProxyReg : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>]>;
 
-
+// These nodes represent a parameter declaration. In PTX this will look like:
+//   .param .align 16 .b8 param0[1024];
+//   .param .b32 retval0;
+//
+// DeclareArrayParam(Chain, Externalsym, Align, Size, Glue)
+// DeclareScalarParam(Chain, Externalsym, Size, Glue)
 def declare_array_param :
   SDNode<"NVPTXISD::DeclareArrayParam", SDTDeclareArrayParam,
          [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
 def declare_scalar_param :
   SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParam,
          [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+
 def MoveParam :
   SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>;
 def proxy_reg :
@@ -1754,7 +1760,7 @@ def : Pat<(declare_scalar_param externalsym:$a, imm:$size),
 def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 def CallPrototype :
   SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
-         [SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
+         [SDNPHasChain, SDNPSideEffect]>;
 def ProtoIdent : Operand<i32> { let PrintMethod = "printProtoIdent"; }
 def CALL_PROTOTYPE :
   NVPTXInst<(outs), (ins ProtoIdent:$ident),
@@ -2181,6 +2187,9 @@ foreach vt = [v2f32, v2i32] in {
 def: Pat<(v2i16 (scalar_to_vector i16:$a)),
          (CVT_u32_u16 $a, CvtNONE)>;
 
+// This node is similar to ISD::BUILD_VECTOR except that the output may be
+// implicitly bitcast to a scalar. This allows for the representation of
+// packing move instructions for vector types which are not legal i.e. v2i32
 def nvptx_build_vector : SDNode<"NVPTXISD::BUILD_VECTOR", SDTypeProfile<1, 2, []>, []>;
 
 def : Pat<(i64 (nvptx_build_vector i32:$a, i32:$b)),
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index ea69a54e6db37..bcdb46eca9744 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -5625,7 +5625,7 @@ class Tcgen05MMADisableOutputLaneSDNode<bit Sp, string ASpace,
                 # "_DISABLE_OUTPUT_LANE_CG" # CtaGroup
                 # !if(!eq(AShift, 1), "_ASHIFT", ""),
         Tcgen05MMADisableOutputLaneTypeProfile<Sp, ASpace, CtaGroup, ScaleInput>,
-                    [SDNPHasChain, SDNPSideEffect]>;
+                    [SDNPHasChain, SDNPSideEffect, SDNPMemOperand]>;
 
 class Tcgen05MMADisableOutputLaneInst<bit Sp, string ASpace,
                      string Kind, int CtaGroup, string CollectorUsageStr,
diff --git a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp
index d2035c6f8166f..e8ea1ad6c404d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.cpp
@@ -7,13 +7,61 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXSelectionDAGInfo.h"
-#include "NVPTXISelLowering.h"
+
+#define GET_SDNODE_DESC
+#include "NVPTXGenSDNodeInfo.inc"
 
 using namespace llvm;
 
+NVPTXSelectionDAGInfo::NVPTXSelectionDAGInfo()
+    : SelectionDAGGenTargetInfo(NVPTXGenSDNodeInfo) {}
+
 NVPTXSelectionDAGInfo::~NVPTXSelectionDAGInfo() = default;
 
+const char *NVPTXSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const {
+#define MAKE_CASE(V)                                                           \
+  case V:                                                                      \
+    return #V;
+
+  // These nodes don't have corresponding entries in *.td files yet.
+  switch (static_cast<NVPTXISD::NodeType>(Opcode)) {
+    MAKE_CASE(NVPTXISD::ATOMIC_CMP_SWAP_B128)
+    MAKE_CASE(NVPTXISD::ATOMIC_SWAP_B128)
+    MAKE_CASE(NVPTXISD::LoadV2)
+    MAKE_CASE(NVPTXISD::LoadV4)
+    MAKE_CASE(NVPTXISD::LoadV8)
+    MAKE_CASE(NVPTXISD::LDUV2)
+    MAKE_CASE(NVPTXISD::LDUV4)
+    MAKE_CASE(NVPTXISD::StoreV2)
+    MAKE_CASE(NVPTXISD::StoreV4)
+    MAKE_CASE(NVPTXISD::StoreV8)
+    MAKE_CASE(NVPTXISD::SETP_F16X2)
+    MAKE_CASE(NVPTXISD::SETP_BF16X2)
+    MAKE_CASE(NVPTXISD::UNPACK_VECTOR)
+  }
+#undef MAKE_CASE
+
+  return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode);
+}
+
 bool NVPTXSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const {
-  return Opcode >= NVPTXISD::FIRST_MEMORY_OPCODE &&
-         Opcode <= NVPTXISD::LAST_MEMORY_OPCODE;
+  // These nodes don't have corresponding entries in *.td files.
+  if (Opcode >= NVPTXISD::FIRST_MEMORY_OPCODE &&
+      Opcode <= NVPTXISD::LAST_MEMORY_OPCODE)
+    return true;
+
+  return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode);
+}
+
+void NVPTXSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
+                                             const SDNode *N) const {
+  switch (N->getOpcode()) {
+  default:
+    break;
+  case NVPTXISD::ProxyReg:
+    // invalid number of results; expected 2, got 1
+    return;
+  }
+
+  return SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N);
 }
diff --git a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h
index 9d69f48026c79..07c130baeaa4f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSelectionDAGInfo.h
@@ -11,13 +11,53 @@
 
 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
 
+#define GET_SDNODE_ENUM
+#include "NVPTXGenSDNodeInfo.inc"
+
 namespace llvm {
+namespace NVPTXISD {
+
+enum NodeType : unsigned {
+  SETP_F16X2 = GENERATED_OPCODE_END,
+  SETP_BF16X2,
+  UNPACK_VECTOR,
+
+  FIRST_MEMORY_OPCODE,
+
+  /// These nodes are used to lower atomic instructions with i128 type. They are
+  /// similar to the generic nodes, but the input and output values are split
+  /// into two 64-bit values.
+  /// ValLo, ValHi, OUTCHAIN = ATOMIC_CMP_SWAP_B128(INCHAIN, ptr, cmpLo, cmpHi,
+  ///                                               swapLo, swapHi)
+  /// ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP_B128(INCHAIN, ptr, amtLo, amtHi)
+  ATOMIC_CMP_SWAP_B128 = FIRST_MEMORY_OPCODE,
+  ATOMIC_SWAP_B128,
+
+  LoadV2,
+  LoadV4,
+  LoadV8,
+  LDUV2, // LDU.v2
+  LDUV4, // LDU.v4
+  StoreV2,
+  StoreV4,
+  StoreV8,
+  LAST_MEMORY_OPCODE = StoreV8,
+};
+
+} // namespace NVPTXISD
 
-class NVPTXSelectionDAGInfo : public SelectionDAGTargetInfo {
+class NVPTXSelectionDAGInfo : public SelectionDAGGenTargetInfo {
 public:
+  NVPTXSelectionDAGInfo();
+
   ~NVPTXSelectionDAGInfo() override;
 
+  const char *getTargetNodeName(unsigned Opcode) const override;
+
   bool isTargetMemoryOpcode(unsigned Opcode) const override;
+
+  void verifyTargetNode(const SelectionDAG &DAG,
+                        const SDNode *N) const override...
[truncated]

@schwarzschild-radius
Copy link
Contributor

Thanks for the fix! LGTM on the tcgen05.mma related changes!

@s-barannikov s-barannikov enabled auto-merge (squash) November 18, 2025 20:39
@s-barannikov s-barannikov merged commit e47e9f3 into llvm:main Nov 18, 2025
8 of 15 checks passed
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Nov 18, 2025
This allows SDNodes to be validated against their expected type profiles
and reduces the number of changes required to add a new node.

The verification functionality detected a few issues, two of them were
fixed (missing `SDNPMemOperand` property on `TCGEN05_MMA` nodes and
extra glue operand/result on `CallPrototype`), the one remaining is with
`ProxyReg` node, see `NVPTXSelectionDAGInfo::verifyTargetNode()`.

Part of #119709.

Pull Request: llvm/llvm-project#168367
pranavk added a commit that referenced this pull request Nov 19, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants