Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions llvm/lib/Target/NVPTX/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ tablegen(LLVM NVPTXGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM NVPTXGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM NVPTXGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM NVPTXGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM NVPTXGenSDNodeInfo.inc -gen-sd-node-info)
tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)

add_public_tablegen_target(NVPTXCommonTableGen)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "NVPTX.h"
#include "NVPTXISelLowering.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXSelectionDAGInfo.h"
#include "NVPTXTargetMachine.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
Expand Down
106 changes: 8 additions & 98 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
#include "NVPTXISelDAGToDAG.h"
#include "NVPTXSelectionDAGInfo.h"
#include "NVPTXSubtarget.h"
#include "NVPTXTargetMachine.h"
#include "NVPTXTargetObjectFile.h"
Expand Down Expand Up @@ -1107,97 +1108,6 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
{MVT::i32, MVT::i128, MVT::v4f32, MVT::Other}, Custom);
}

const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {

#define MAKE_CASE(V) \
case V: \
return #V;

switch ((NVPTXISD::NodeType)Opcode) {
case NVPTXISD::FIRST_NUMBER:
break;

MAKE_CASE(NVPTXISD::ATOMIC_CMP_SWAP_B128)
MAKE_CASE(NVPTXISD::ATOMIC_SWAP_B128)
MAKE_CASE(NVPTXISD::RET_GLUE)
MAKE_CASE(NVPTXISD::DeclareArrayParam)
MAKE_CASE(NVPTXISD::DeclareScalarParam)
MAKE_CASE(NVPTXISD::CALL)
MAKE_CASE(NVPTXISD::MoveParam)
MAKE_CASE(NVPTXISD::UNPACK_VECTOR)
MAKE_CASE(NVPTXISD::BUILD_VECTOR)
MAKE_CASE(NVPTXISD::CallPrototype)
MAKE_CASE(NVPTXISD::ProxyReg)
MAKE_CASE(NVPTXISD::LoadV2)
MAKE_CASE(NVPTXISD::LoadV4)
MAKE_CASE(NVPTXISD::LoadV8)
MAKE_CASE(NVPTXISD::LDUV2)
MAKE_CASE(NVPTXISD::LDUV4)
MAKE_CASE(NVPTXISD::StoreV2)
MAKE_CASE(NVPTXISD::StoreV4)
MAKE_CASE(NVPTXISD::StoreV8)
MAKE_CASE(NVPTXISD::FSHL_CLAMP)
MAKE_CASE(NVPTXISD::FSHR_CLAMP)
MAKE_CASE(NVPTXISD::BFI)
MAKE_CASE(NVPTXISD::PRMT)
MAKE_CASE(NVPTXISD::FCOPYSIGN)
MAKE_CASE(NVPTXISD::FMAXNUM3)
MAKE_CASE(NVPTXISD::FMINNUM3)
MAKE_CASE(NVPTXISD::FMAXIMUM3)
MAKE_CASE(NVPTXISD::FMINIMUM3)
MAKE_CASE(NVPTXISD::DYNAMIC_STACKALLOC)
MAKE_CASE(NVPTXISD::STACKRESTORE)
MAKE_CASE(NVPTXISD::STACKSAVE)
MAKE_CASE(NVPTXISD::SETP_F16X2)
MAKE_CASE(NVPTXISD::SETP_BF16X2)
MAKE_CASE(NVPTXISD::MUL_WIDE_SIGNED)
MAKE_CASE(NVPTXISD::MUL_WIDE_UNSIGNED)
MAKE_CASE(NVPTXISD::BrxEnd)
MAKE_CASE(NVPTXISD::BrxItem)
MAKE_CASE(NVPTXISD::BrxStart)
MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED)
MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X)
MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y)
MAKE_CASE(NVPTXISD::CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
MAKE_CASE(
NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
MAKE_CASE(
NVPTXISD::TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1)
MAKE_CASE(NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2)
MAKE_CASE(
NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT)
MAKE_CASE(
NVPTXISD::TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT)
MAKE_CASE(NVPTXISD::CVT_E4M3X4_F32X4_RS_SF)
MAKE_CASE(NVPTXISD::CVT_E5M2X4_F32X4_RS_SF)
MAKE_CASE(NVPTXISD::CVT_E2M3X4_F32X4_RS_SF)
MAKE_CASE(NVPTXISD::CVT_E3M2X4_F32X4_RS_SF)
MAKE_CASE(NVPTXISD::CVT_E2M1X4_F32X4_RS_SF)
}
return nullptr;

#undef MAKE_CASE
}

TargetLoweringBase::LegalizeTypeAction
NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const {
if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
Expand Down Expand Up @@ -2032,7 +1942,7 @@ static ISD::NodeType getScalarOpcodeForReduction(unsigned ReductionOpcode) {
}

/// Get 3-input scalar reduction opcode
static std::optional<NVPTXISD::NodeType>
static std::optional<unsigned>
getScalar3OpcodeForReduction(unsigned ReductionOpcode) {
switch (ReductionOpcode) {
case ISD::VECREDUCE_FMAX:
Expand Down Expand Up @@ -2931,7 +2841,7 @@ static SDValue lowerCvtRSIntrinsics(SDValue Op, SelectionDAG &DAG) {
using NVPTX::PTXCvtMode::CvtMode;

auto [OpCode, RetTy, CvtModeFlag] =
[&]() -> std::tuple<NVPTXISD::NodeType, MVT::SimpleValueType, uint32_t> {
[&]() -> std::tuple<unsigned, MVT::SimpleValueType, uint32_t> {
switch (IntrinsicID) {
case Intrinsic::nvvm_f32x4_to_e4m3x4_rs_relu_satfinite:
return {NVPTXISD::CVT_E4M3X4_F32X4_RS_SF, MVT::v4i8,
Expand Down Expand Up @@ -3314,7 +3224,7 @@ SDValue NVPTXTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
// Generate BrxEnd nodes
SDValue EndOps[] = {Chain.getValue(0), DAG.getBasicBlock(MBBs.back()), Index,
IdV, Chain.getValue(1)};
SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, VTs, EndOps);
SDValue BrxEnd = DAG.getNode(NVPTXISD::BrxEnd, DL, MVT::Other, EndOps);

return BrxEnd;
}
Expand Down Expand Up @@ -5457,7 +5367,7 @@ combineUnpackingMovIntoLoad(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
SDLoc DL(LD);

// the new opcode after we double the number of operands
NVPTXISD::NodeType Opcode;
unsigned Opcode;
SmallVector<SDValue> Operands(LD->ops());
unsigned OldNumOutputs; // non-glue, non-chain outputs
switch (LD->getOpcode()) {
Expand Down Expand Up @@ -5540,7 +5450,7 @@ static SDValue combinePackingMovIntoStore(SDNode *N,
auto *ST = cast<MemSDNode>(N);

// The new opcode after we double the number of operands.
NVPTXISD::NodeType Opcode;
unsigned Opcode;
switch (N->getOpcode()) {
case ISD::STORE:
// Any packed type is legal, so the legalizer will not have lowered
Expand Down Expand Up @@ -5675,7 +5585,7 @@ static SDValue PerformFADDCombine(SDNode *N,
}

/// Get 3-input version of a 2-input min/max opcode
static NVPTXISD::NodeType getMinMax3Opcode(unsigned MinMax2Opcode) {
static unsigned getMinMax3Opcode(unsigned MinMax2Opcode) {
switch (MinMax2Opcode) {
case ISD::FMAXNUM:
case ISD::FMAXIMUMNUM:
Expand Down Expand Up @@ -5706,7 +5616,7 @@ static SDValue PerformFMinMaxCombine(SDNode *N,
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
unsigned MinMaxOp2 = N->getOpcode();
NVPTXISD::NodeType MinMaxOp3 = getMinMax3Opcode(MinMaxOp2);
unsigned MinMaxOp3 = getMinMax3Opcode(MinMaxOp2);

if (Op0.getOpcode() == MinMaxOp2 && Op0.hasOneUse()) {
// (maxnum (maxnum a, b), c) -> (maxnum3 a, b, c)
Expand Down
114 changes: 0 additions & 114 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,118 +20,6 @@
#include "llvm/Support/AtomicOrdering.h"

namespace llvm {
namespace NVPTXISD {
enum NodeType : unsigned {
// Start the numbering from where ISD NodeType finishes.
FIRST_NUMBER = ISD::BUILTIN_OP_END,
RET_GLUE,

/// These nodes represent a parameter declaration. In PTX this will look like:
/// .param .align 16 .b8 param0[1024];
/// .param .b32 retval0;
///
/// DeclareArrayParam(Chain, Externalsym, Align, Size, Glue)
/// DeclareScalarParam(Chain, Externalsym, Size, Glue)
DeclareScalarParam,
DeclareArrayParam,

/// This node represents a PTX call instruction. It's operands are as follows:
///
/// CALL(Chain, IsConvergent, IsIndirectCall/IsUniform, NumReturns,
/// NumParams, Callee, Proto)
CALL,

MoveParam,
CallPrototype,
ProxyReg,
FSHL_CLAMP,
FSHR_CLAMP,
MUL_WIDE_SIGNED,
MUL_WIDE_UNSIGNED,
SETP_F16X2,
SETP_BF16X2,
BFI,
PRMT,

/// This node is similar to ISD::BUILD_VECTOR except that the output may be
/// implicitly bitcast to a scalar. This allows for the representation of
/// packing move instructions for vector types which are not legal i.e. v2i32
BUILD_VECTOR,

/// This node is the inverse of NVPTX::BUILD_VECTOR. It takes a single value
/// which may be a scalar and unpacks it into multiple values by implicitly
/// converting it to a vector.
UNPACK_VECTOR,

FCOPYSIGN,
FMAXNUM3,
FMINNUM3,
FMAXIMUM3,
FMINIMUM3,

DYNAMIC_STACKALLOC,
STACKRESTORE,
STACKSAVE,
BrxStart,
BrxItem,
BrxEnd,
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_IS_CANCELED,
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_X,
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Y,
CLUSTERLAUNCHCONTROL_QUERY_CANCEL_GET_FIRST_CTAID_Z,
CVT_E4M3X4_F32X4_RS_SF,
CVT_E5M2X4_F32X4_RS_SF,
CVT_E2M3X4_F32X4_RS_SF,
CVT_E3M2X4_F32X4_RS_SF,
CVT_E2M1X4_F32X4_RS_SF,

FIRST_MEMORY_OPCODE,

/// These nodes are used to lower atomic instructions with i128 type. They are
/// similar to the generic nodes, but the input and output values are split
/// into two 64-bit values.
/// ValLo, ValHi, OUTCHAIN = ATOMIC_CMP_SWAP_B128(INCHAIN, ptr, cmpLo, cmpHi,
/// swapLo, swapHi)
/// ValLo, ValHi, OUTCHAIN = ATOMIC_SWAP_B128(INCHAIN, ptr, amtLo, amtHi)
ATOMIC_CMP_SWAP_B128 = FIRST_MEMORY_OPCODE,
ATOMIC_SWAP_B128,

LoadV2,
LoadV4,
LoadV8,
LDUV2, // LDU.v2
LDUV4, // LDU.v4
StoreV2,
StoreV4,
StoreV8,
TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1,
TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2,
TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
TCGEN05_MMA_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1,
TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2,
TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
TCGEN05_MMA_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1,
TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2,
TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
TCGEN05_MMA_SP_SHARED_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1,
TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2,
TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
LAST_MEMORY_OPCODE =
TCGEN05_MMA_SP_TENSOR_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
};
}

class NVPTXSubtarget;

Expand All @@ -144,8 +32,6 @@ class NVPTXTargetLowering : public TargetLowering {
const NVPTXSubtarget &STI);
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;

const char *getTargetNodeName(unsigned Opcode) const override;

bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const override;
Expand Down
13 changes: 11 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1686,13 +1686,19 @@ def SDTMoveParamProfile : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<0, 1>]

def SDTProxyReg : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>]>;


// These nodes represent a parameter declaration. In PTX this will look like:
// .param .align 16 .b8 param0[1024];
// .param .b32 retval0;
//
// DeclareArrayParam(Chain, Externalsym, Align, Size, Glue)
// DeclareScalarParam(Chain, Externalsym, Size, Glue)
def declare_array_param :
SDNode<"NVPTXISD::DeclareArrayParam", SDTDeclareArrayParam,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def declare_scalar_param :
SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParam,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;

def MoveParam :
SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>;
def proxy_reg :
Expand Down Expand Up @@ -1754,7 +1760,7 @@ def : Pat<(declare_scalar_param externalsym:$a, imm:$size),
def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def CallPrototype :
SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
[SDNPHasChain, SDNPSideEffect]>;
def ProtoIdent : Operand<i32> { let PrintMethod = "printProtoIdent"; }
def CALL_PROTOTYPE :
NVPTXInst<(outs), (ins ProtoIdent:$ident),
Expand Down Expand Up @@ -2181,6 +2187,9 @@ foreach vt = [v2f32, v2i32] in {
def: Pat<(v2i16 (scalar_to_vector i16:$a)),
(CVT_u32_u16 $a, CvtNONE)>;

// This node is similar to ISD::BUILD_VECTOR except that the output may be
// implicitly bitcast to a scalar. This allows for the representation of
// packing move instructions for vector types which are not legal i.e. v2i32
def nvptx_build_vector : SDNode<"NVPTXISD::BUILD_VECTOR", SDTypeProfile<1, 2, []>, []>;

def : Pat<(i64 (nvptx_build_vector i32:$a, i32:$b)),
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -5625,7 +5625,7 @@ class Tcgen05MMADisableOutputLaneSDNode<bit Sp, string ASpace,
# "_DISABLE_OUTPUT_LANE_CG" # CtaGroup
# !if(!eq(AShift, 1), "_ASHIFT", ""),
Tcgen05MMADisableOutputLaneTypeProfile<Sp, ASpace, CtaGroup, ScaleInput>,
[SDNPHasChain, SDNPSideEffect]>;
[SDNPHasChain, SDNPSideEffect, SDNPMemOperand]>;

class Tcgen05MMADisableOutputLaneInst<bit Sp, string ASpace,
string Kind, int CtaGroup, string CollectorUsageStr,
Expand Down
Loading
Loading