-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RFC] implement convergence control in MIR using SelectionDAG #71785
Conversation
@llvm/pr-subscribers-backend-powerpc @llvm/pr-subscribers-llvm-support Author: Sameer Sahasrabuddhe (ssahasra) ChangesLLVM function calls carry convergence control tokens as operand bundles, where the tokens themselves are produced by convergence control intrinsics. This patch implements convergence control tokens in MIR as follows:
The actual lowering of controlled convergent operations (including function calls) and convergence control intrinsics is target-specific. On AMDGPU, the convergence control operand bundle at a non-intrinsic call is translated to an explicit argument to the SI_CALL_ISEL instruction. Post-selection adjustment converts this explicit argument to an implicit argument on the SI_CALL instruction. For intrinsics, AMDGPU introduces an AMDGPUISD opcode CONVERGENCECTRL_GLUE and a corresponding machine opcode with the same spelling. This is used as a glued argument to SDNodes that is later translated to an implicit argument in the MIR. All convergent intrinsics on AMDGPU need to set hasPostISelHook in their description. Patch is 98.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/71785.diff 50 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 349d1286c8dc4f4..ecda88d9abd6779 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -1378,6 +1378,10 @@ enum NodeType {
#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) VPSDID,
#include "llvm/IR/VPIntrinsics.def"
+ CONVERGENCECTRL_ANCHOR,
+ CONVERGENCECTRL_ENTRY,
+ CONVERGENCECTRL_LOOP,
+
/// BUILTIN_OP_END - This must be the last enum value in this list.
/// The target-specific pre-isel opcode values start here.
BUILTIN_OP_END
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index 884fdfadfcbef74..8f7f19fd3aa9423 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -324,6 +324,10 @@ class SelectionDAGISel : public MachineFunctionPass {
void Select_ARITH_FENCE(SDNode *N);
void Select_MEMBARRIER(SDNode *N);
+ void Select_CONVERGENCECTRL_ANCHOR(SDNode *N);
+ void Select_CONVERGENCECTRL_ENTRY(SDNode *N);
+ void Select_CONVERGENCECTRL_LOOP(SDNode *N);
+
void pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops, SDValue Operand,
SDLoc DL);
void Select_STACKMAP(SDNode *N);
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 58aad70c4bb36e6..4bb937b3cfeaecd 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4369,6 +4369,7 @@ class TargetLowering : public TargetLoweringBase {
SmallVector<ISD::InputArg, 32> Ins;
SmallVector<SDValue, 4> InVals;
const ConstantInt *CFIType = nullptr;
+ SDValue ConvergenceControlToken;
CallLoweringInfo(SelectionDAG &DAG)
: RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false),
@@ -4502,6 +4503,11 @@ class TargetLowering : public TargetLoweringBase {
return *this;
}
+ CallLoweringInfo &setConvergenceControlToken(SDValue Token) {
+ ConvergenceControlToken = Token;
+ return *this;
+ }
+
ArgListTy &getArgs() {
return Args;
}
@@ -4907,9 +4913,9 @@ class TargetLowering : public TargetLoweringBase {
// Targets may override this function to collect operands from the CallInst
// and for example, lower them into the SelectionDAG operands.
- virtual void CollectTargetIntrinsicOperands(const CallInst &I,
- SmallVectorImpl<SDValue> &Ops,
- SelectionDAG &DAG) const;
+ virtual void CollectTargetIntrinsicOperands(
+ const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG,
+ function_ref<SDValue(const Value *)> getValue) const;
//===--------------------------------------------------------------------===//
// Div utility functions
@@ -5337,8 +5343,9 @@ class TargetLowering : public TargetLoweringBase {
/// the 'hasPostISelHook' flag. These instructions must be adjusted after
/// instruction selection by target hooks. e.g. To fill in optional defs for
/// ARM 's' setting instructions.
- virtual void AdjustInstrPostInstrSelection(MachineInstr &MI,
- SDNode *Node) const;
+ virtual void
+ AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node,
+ function_ref<Register(SDValue)> getVR) const;
/// If this function returns true, SelectionDAGBuilder emits a
/// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector.
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 941c6d5f8cad8ce..0cda85a8944a8cf 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -230,6 +230,10 @@ HANDLE_TARGET_OPCODE(MEMBARRIER)
// using.
HANDLE_TARGET_OPCODE(JUMP_TABLE_DEBUG_INFO)
+HANDLE_TARGET_OPCODE(CONVERGENCECTRL_ENTRY)
+HANDLE_TARGET_OPCODE(CONVERGENCECTRL_ANCHOR)
+HANDLE_TARGET_OPCODE(CONVERGENCECTRL_LOOP)
+
/// The following generic opcodes are not supposed to appear after ISel.
/// This is something we might want to relax, but for now, this is convenient
/// to produce diagnostics.
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index 3b1d2f45267e9e0..621ac6048d0c763 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -1437,6 +1437,34 @@ def JUMP_TABLE_DEBUG_INFO : StandardPseudoInstruction {
let isMeta = true;
}
+def CONVERGENCECTRL_ANCHOR : StandardPseudoInstruction {
+ let OutOperandList = (outs unknown:$dst);
+ let InOperandList = (ins);
+ let AsmString = "";
+ let hasSideEffects = false;
+ let Size = 0;
+ let isMeta = true;
+ let isConvergent = true;
+}
+def CONVERGENCECTRL_ENTRY : StandardPseudoInstruction {
+ let OutOperandList = (outs unknown:$dst);
+ let InOperandList = (ins);
+ let AsmString = "";
+ let hasSideEffects = false;
+ let Size = 0;
+ let isMeta = true;
+ let isConvergent = true;
+}
+def CONVERGENCECTRL_LOOP : StandardPseudoInstruction {
+ let OutOperandList = (outs unknown:$dst);
+ let InOperandList = (ins unknown:$src);
+ let AsmString = "";
+ let hasSideEffects = false;
+ let Size = 0;
+ let isMeta = true;
+ let isConvergent = true;
+}
+
// Generic opcodes used in GlobalISel.
include "llvm/Target/GenericOpcodes.td"
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index fa5761c3a199a56..e71cd165543fd1d 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -770,6 +770,14 @@ def assertsext : SDNode<"ISD::AssertSext", SDT_assert>;
def assertzext : SDNode<"ISD::AssertZext", SDT_assert>;
def assertalign : SDNode<"ISD::AssertAlign", SDT_assert>;
+def convergencectrl_anchor : SDNode<"ISD::CONVERGENCECTRL_ANCHOR",
+ SDTypeProfile<1, 0, [SDTCisVT<0,untyped>]>>;
+def convergencectrl_entry : SDNode<"ISD::CONVERGENCECTRL_ENTRY",
+ SDTypeProfile<1, 0, [SDTCisVT<0,untyped>]>>;
+def convergencectrl_loop : SDNode<"ISD::CONVERGENCECTRL_LOOP",
+ SDTypeProfile<1, 1,
+ [SDTCisVT<0,untyped>, SDTCisVT<1,untyped>]>>;
+
//===----------------------------------------------------------------------===//
// Selection DAG Condition Codes
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index a27febe15db832d..10e01d2086cb9e4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -1193,7 +1193,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Run post-isel target hook to adjust this instruction if needed.
if (II.hasPostISelHook())
- TLI->AdjustInstrPostInstrSelection(*MIB, Node);
+ TLI->AdjustInstrPostInstrSelection(
+ *MIB, Node,
+ [this, &VRBaseMap](SDValue Op) { return getVR(Op, VRBaseMap); });
}
/// EmitSpecialNode - Generate machine code for a target-independent node and
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index aab0d5c5a348bfe..32251d240d457b8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4969,7 +4969,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Create the node.
SDValue Result;
// In some cases, custom collection of operands from CallInst I may be needed.
- TLI.CollectTargetIntrinsicOperands(I, Ops, DAG);
+ TLI.CollectTargetIntrinsicOperands(
+ I, Ops, DAG, [this](const Value *V) -> SDValue { return getValue(V); });
if (IsTgtIntrinsic) {
// This is target intrinsic that touches memory
//
@@ -5969,6 +5970,24 @@ bool SelectionDAGBuilder::visitEntryValueDbgValue(const DbgValueInst &DI) {
return true;
}
+/// Lower the call to the specified intrinsic function.
+void SelectionDAGBuilder::visitConvergenceControl(const CallInst &I,
+ unsigned Intrinsic) {
+ SDLoc sdl = getCurSDLoc();
+ switch (Intrinsic) {
+ case Intrinsic::experimental_convergence_anchor:
+ setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_ANCHOR, sdl, MVT::Untyped));
+ break;
+ case Intrinsic::experimental_convergence_entry:
+ setValue(&I, DAG.getNode(ISD::CONVERGENCECTRL_ENTRY, sdl, MVT::Untyped));
+ break;
+ case Intrinsic::experimental_convergence_loop: {
+ assert(false && "coming soon");
+ break;
+ }
+ }
+}
+
/// Lower the call to the specified intrinsic function.
void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned Intrinsic) {
@@ -7669,6 +7688,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::experimental_vector_deinterleave2:
visitVectorDeinterleave(I);
return;
+ case Intrinsic::experimental_convergence_anchor:
+ case Intrinsic::experimental_convergence_entry:
+ case Intrinsic::experimental_convergence_loop:
+ visitConvergenceControl(I, Intrinsic);
}
}
@@ -8343,6 +8366,14 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
}
}
+ SDValue ConvControlToken;
+ if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+ auto *Token = Bundle->Inputs[0].get();
+ ConvControlToken = getValue(Token);
+ } else {
+ ConvControlToken = DAG.getUNDEF(MVT::Untyped);
+ }
+
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
@@ -8351,7 +8382,8 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
.setConvergent(CB.isConvergent())
.setIsPreallocated(
CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0)
- .setCFIType(CFIType);
+ .setCFIType(CFIType)
+ .setConvergenceControlToken(ConvControlToken);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
if (Result.first.getNode()) {
@@ -8903,7 +8935,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
assert(!I.hasOperandBundlesOtherThan(
{LLVMContext::OB_deopt, LLVMContext::OB_funclet,
LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated,
- LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi}) &&
+ LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi,
+ LLVMContext::OB_convergencectrl}) &&
"Cannot lower calls with arbitrary operand bundles!");
SDValue Callee = getValue(I.getCalledOperand());
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index a97884f0efb9a9b..0640236f5ac693e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -631,6 +631,7 @@ class SelectionDAGBuilder {
void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
+ void visitConvergenceControl(const CallInst &I, unsigned Intrinsic);
void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
const SmallVectorImpl<SDValue> &OpValues);
void visitVPStore(const VPIntrinsic &VPIntrin,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 78cc60084068a5f..ea28a2432c70e01 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -446,6 +446,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SET_FPMODE: return "set_fpmode";
case ISD::RESET_FPMODE: return "reset_fpmode";
+ case ISD::CONVERGENCECTRL_ANCHOR: return "convergencectrl_anchor";
+ case ISD::CONVERGENCECTRL_ENTRY: return "convergencectrl_entry";
+ case ISD::CONVERGENCECTRL_LOOP: return "convergencectrl_loop";
+
// Bit manipulation
case ISD::ABS: return "abs";
case ISD::BITREVERSE: return "bitreverse";
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 0cc426254806ddf..0324022d9792969 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -303,8 +303,9 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
llvm_unreachable(nullptr);
}
-void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
- SDNode *Node) const {
+void TargetLowering::AdjustInstrPostInstrSelection(
+ MachineInstr &MI, SDNode *Node,
+ function_ref<Register(SDValue)> getVR) const {
assert(!MI.hasPostISelHook() &&
"If a target marks an instruction with 'hasPostISelHook', "
"it must implement TargetLowering::AdjustInstrPostInstrSelection!");
@@ -2324,6 +2325,21 @@ void SelectionDAGISel::Select_MEMBARRIER(SDNode *N) {
N->getOperand(0));
}
+void SelectionDAGISel::Select_CONVERGENCECTRL_ANCHOR(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_ANCHOR,
+ N->getValueType(0));
+}
+
+void SelectionDAGISel::Select_CONVERGENCECTRL_ENTRY(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_ENTRY,
+ N->getValueType(0));
+}
+
+void SelectionDAGISel::Select_CONVERGENCECTRL_LOOP(SDNode *N) {
+ CurDAG->SelectNodeTo(N, TargetOpcode::CONVERGENCECTRL_LOOP,
+ N->getValueType(0), N->getOperand(0));
+}
+
void SelectionDAGISel::pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops,
SDValue OpVal, SDLoc DL) {
SDNode *OpNode = OpVal.getNode();
@@ -3003,6 +3019,15 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::JUMP_TABLE_DEBUG_INFO:
Select_JUMP_TABLE_DEBUG_INFO(NodeToMatch);
return;
+ case ISD::CONVERGENCECTRL_ANCHOR:
+ Select_CONVERGENCECTRL_ANCHOR(NodeToMatch);
+ return;
+ case ISD::CONVERGENCECTRL_ENTRY:
+ Select_CONVERGENCECTRL_ENTRY(NodeToMatch);
+ return;
+ case ISD::CONVERGENCECTRL_LOOP:
+ Select_CONVERGENCECTRL_LOOP(NodeToMatch);
+ return;
}
assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 80f595ac4d4d9c0..8454e9a3009db0d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -5517,8 +5517,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
void TargetLowering::CollectTargetIntrinsicOperands(
- const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
-}
+ const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG,
+ function_ref<SDValue(const Value *)> getValue) const {}
std::pair<unsigned, const TargetRegisterClass *>
TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index cd810f0b43e50db..cfbbb6683023e35 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2610,7 +2610,17 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- unsigned Opcode;
+ unsigned Opcode = 0;
+ SDNode *ConvGlueNode = N->getGluedNode();
+ if (ConvGlueNode) {
+ // FIXME: Possibly iterate over multiple glue nodes?
+ assert(ConvGlueNode->getOpcode() == AMDGPUISD::CONVERGENCECTRL_GLUE);
+ ConvGlueNode = ConvGlueNode->getOperand(0).getNode();
+ ConvGlueNode = CurDAG->getMachineNode(AMDGPU::CONVERGENCECTRL_GLUE, {},
+ MVT::Glue, SDValue(ConvGlueNode, 0));
+ } else {
+ ConvGlueNode = nullptr;
+ }
switch (IntrID) {
case Intrinsic::amdgcn_wqm:
Opcode = AMDGPU::WQM;
@@ -2627,7 +2637,7 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
break;
case Intrinsic::amdgcn_interp_p1_f16:
SelectInterpP1F16(N);
- return;
+ break;
case Intrinsic::amdgcn_inverse_ballot:
switch (N->getOperand(1).getValueSizeInBits()) {
case 32:
@@ -2642,11 +2652,19 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
break;
default:
SelectCode(N);
- return;
+ break;
+ }
+
+ if (Opcode) {
+ SDValue Src = N->getOperand(1);
+ CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
}
- SDValue Src = N->getOperand(1);
- CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src});
+ if (ConvGlueNode) {
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ NewOps.push_back(SDValue(ConvGlueNode, 0));
+ CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), NewOps);
+ }
}
void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index d4df9b6f49eb11c..d882df72081de5a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -901,6 +901,20 @@ bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const
return true;
}
+void AMDGPUTargetLowering::CollectTargetIntrinsicOperands(
+ const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG,
+ function_ref<SDValue(const Value *)> getValue) const {
+ if (auto Bundle = I.getOperandBundle(LLVMContext::OB_convergencectrl)) {
+ auto *Token = Bundle->Inputs[0].get();
+ SDValue ConvControlToken = getValue(Token);
+ // FIXME: Possibly handle the case where the last node in Ops is already a
+ // glue node?
+ ConvControlToken = DAG.getNode(AMDGPUISD::CONVERGENCECTRL_GLUE, {}, MVT::Glue,
+ ConvControlToken);
+ Ops.push_back(ConvControlToken);
+ }
+}
+
bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
// Truncate is just accessing a subregister.
@@ -5209,6 +5223,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(IF)
NODE_NAME_CASE(ELSE)
NODE_NAME_CASE(LOOP)
+ NODE_NAME_CASE(CONVERGENCECTRL_GLUE)
NODE_NAME_CASE(CALL)
NODE_NAME_CASE(TC_RETURN)
NODE_NAME_CASE(TC_RETURN_GFX)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 51f95ef97a3308a..98a02ebce9d25ea 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -192,6 +192,10 @@ class AMDGPUTargetLowering : public TargetLowering {
bool isZExtFree(Type *Src, Type *Dest) const override;
bool isZExtFree(EVT Src, EVT Dest) const override;
+ void CollectTargetIntrinsicOperands(
+ const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG,
+ function_ref<SDValue(const Value *)> getValue) const override;
+
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
bool LegalOperations, bool ForCodeSize,
NegatibleCost &Cost,
@@ -397,6 +401,8 @@ enum NodeType : unsigned {
ELSE,
LOOP,
+ CONVERGENCECTRL_GLUE,
+
// A uniform kernel return that terminates the wavefront.
ENDPGM,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 324285e580bbaff..0ac91c9d235a2d8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -66,6 +66,9 @@ def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>;
def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChai...
[truncated]
|
#67006 is a slightly outdated attempt at doing the same thing with GlobalISel. That other PR will be redesigned after completing this current PR. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
Also, forgot to mention that this is a very early RFC, hoping to attract attention to the general scheme of things. The code contains occasional debug code, and the implementation is incomplete (for example, SelectINTRINSIC_W_CHAIN is not implemented, and the LOOP intrinsic is not implemented). |
@@ -2627,7 +2637,7 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { | |||
break; | |||
case Intrinsic::amdgcn_interp_p1_f16: | |||
SelectInterpP1F16(N); | |||
return; | |||
break; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This isn't convergent so I don't see why this would need to change
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@arsenm do you see any potential pitfalls in how the glue node is being passed around? The glue is needed only on intrinsics, and there are a couple of FIXME comments about the possibility of some intrinsic having multiple glue nodes. Also, at one point, SelectNodeTo() loses the glue node, so this change first stashes the node, then calls SelectNodeTo(), and then adds the glue node back with MorphNodeTo().
This seems like just a bug in SelectNodeTo. Glue shouldn't just get lost, that kind of defeats the purpose |
Our of curiosity, what's happening with this? |
It keeps getting pushed back behind other tasks! |
I gave the wrong impression earlier. The problem is Or there is a very real possibility that I misunderstood how glue nodes are used? |
llvm/test/CodeGen/AMDGPU/verify-convergencectrl/region-nesting.mir
Outdated
Show resolved
Hide resolved
06010d4
to
ad027c0
Compare
Squashed and rebased. Fixed the lit tests to use "-run-pass=none". The current state seems good enough to drop the "RFC" tag. |
bump! |
Bump! The only TODO is to mark every convergent function with HasPostISelHook = 1 in the TD files for AMDGPU. I am working on cleaner ways of doing this, but it need not block this review. Having the actual implementation of MIR tokens in upstream will be very useful. |
Pushed a commit that elevates CONVERGENCECTRL_GLUE to be a target-independent opcode. This is much simpler because handling tokens no longer needs target callbacks and also does not need post-isel adjustment hook on convergent operations. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is much simpler, a couple of nits
@@ -32,11 +32,12 @@ template <typename ContextT> class GenericConvergenceVerifier { | |||
|
|||
void initialize(raw_ostream *OS, | |||
function_ref<void(const Twine &Message)> FailureCB, | |||
const FunctionT &F) { | |||
const FunctionT &F, bool TokensAllowed_) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't really see why this needs to be configurable?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the IsSSA would be a more sensible name
@@ -61,12 +62,16 @@ void GenericConvergenceVerifier<ContextT>::visit(const BlockT &BB) { | |||
|
|||
template <class ContextT> | |||
void GenericConvergenceVerifier<ContextT>::visit(const InstructionT &I) { | |||
auto ID = ContextT::getIntrinsicID(I); | |||
ConvOpKind ConvOp = getConvOp(I); | |||
if (!TokensAllowed) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TokensAllowed
is needed here because the generic template cannot make checks specific to MIR. The caller has to check for conditions like "has SSA form" and pass it to the generic implementation instead. We could just call it "isSSA", but in #67006 for GMIR, we also check for Property::Selected
although I can't remember why.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The selected property approximately means GlobalISel succeeded, and you aren't running the fallback. I don't think you should need to check it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Line 342 is where the token operand gets killed on the GLUE node. The GLUE node is the only visible use of the token, because the actual use in READFIRSTLANE is this glue node.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At line 342, I could check for "getGluedUser()" in general, or check for a use that has opcode CONVERGENCECTRL_GLUE. That seems clearer than resetting the killed flag later. Would it be wrong to do the general check for getGluedUser() instead of a specific opcode?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In theory we should be able to just drop all the attempts at setting kill flags in InstrEmitter. They're fully recomputed later and deprecated
@@ -285,6 +285,27 @@ Register InstrEmitter::getVR(SDValue Op, | |||
return I->second; | |||
} | |||
|
|||
static bool isConvergenceCtrlIntrinsic(SDValue Op) { | |||
if (Op->isMachineOpcode()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The not-machine opcode case should never reach InstrEmitter
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The emitter encounters ISD::CopyFromReg here. I am not so sure about making a special case for it. At least this way, the body of the function matches its name!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@arsenm the review already has your approval, but I don't want to submit without closing all comments, especially this discussion about handling non-machine opcodes in instr-emitter.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CopyFromReg is fine, but you shouldn't need to handle the pre-selection convergence opcodes
LLVM function calls carry convergence control tokens as operand bundles, where the tokens themselves are produced by convergence control intrinsics. This patch implements convergence control tokens in MIR as follows: 1. Introduce target-independent ISD opcodes and MIR opcodes for convergence control intrinsics. 2. Model token values as untyped virtual registers in MIR. The change also introduces an additional ISD opcode CONVERGENCECTRL_GLUE and a corresponding machine opcode with the same spelling. This glues the convergence control token to SDNodes that represent calls to intrinsics. The glued token is later translated to an implicit argument in the MIR. The lowering of calls to user-defined functions is target-specific. On AMDGPU, the convergence control operand bundle at a non-intrinsic call is translated to an explicit argument to the SI_CALL_ISEL instruction. Post-selection adjustment converts this explicit argument to an implicit argument on the SI_CALL instruction.
ba3fc4c
to
abcdfb4
Compare
|
…)" This reverts commit 7988973. Encountered multiple buildbot failures.
)" Original commit 7988973. Perviously reverted in commit a2afcd5. LLVM function calls carry convergence control tokens as operand bundles, where the tokens themselves are produced by convergence control intrinsics. This patch implements convergence control tokens in MIR as follows: 1. Introduce target-independent ISD opcodes and MIR opcodes for convergence control intrinsics. 2. Model token values as untyped virtual registers in MIR. The change also introduces an additional ISD opcode CONVERGENCECTRL_GLUE and a corresponding machine opcode with the same spelling. This glues the convergence control token to SDNodes that represent calls to intrinsics. The glued token is later translated to an implicit argument in the MIR. The lowering of calls to user-defined functions is target-specific. On AMDGPU, the convergence control operand bundle at a non-intrinsic call is translated to an explicit argument to the SI_CALL_ISEL instruction. Post-selection adjustment converts this explicit argument to an implicit argument on the SI_CALL instruction.
Hi Sameer, Looks like this broke the sanitizer buildbots: https://lab.llvm.org/buildbot/#/builders/5/builds/41499
Looks like there's a memory corruption bug that can be revealed by removing the You can reproduce the bots in full by using the instructions at https://github.com/google/sanitizers/wiki/SanitizerBotReproduceBuild, but that's not fully necessary in this case. A quick repro is:
|
[AMD Official Use Only - General]
Thanks for the headsup, Mitch!
It's late pm in India, and I don't expect to have this fixed during the day tomorrow. If this is a blocker, can someone please revert the patch?
Thanks!
Sameer.
…________________________________
From: Mitch Phillips ***@***.***>
Sent: 04 March 2024 21:38
To: llvm/llvm-project ***@***.***>
Cc: Sahasrabuddhe, Sameer ***@***.***>; State change ***@***.***>
Subject: Re: [llvm/llvm-project] [RFC] implement convergence control in MIR using SelectionDAG (PR #71785)
Caution: This message originated from an External Source. Use proper caution when opening attachments, clicking links, or responding.
Hi Sameer,
Looks like this broke the sanitizer buildbots: https://lab.llvm.org/buildbot/#/builders/5/builds/41499
FAIL: LLVM :: MachineVerifier/verify-implicit-def.mir (2 of 81037)
******************** TEST 'LLVM :: MachineVerifier/verify-implicit-def.mir' FAILED ********************
Exit Code: 1
Command Output (stderr):
--
RUN: at line 2: not --crash /b/sanitizer-x86_64-linux-fast/build/llvm_build_asan_ubsan/bin/llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=none -o /dev/null /b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/test/MachineVerifier/verify-implicit-def.mir 2>&1 | /b/sanitizer-x86_64-linux-fast/build/llvm_build_asan_ubsan/bin/FileCheck /b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/test/MachineVerifier/verify-implicit-def.mir
+ not --crash /b/sanitizer-x86_64-linux-fast/build/llvm_build_asan_ubsan/bin/llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=none -o /dev/null /b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/test/MachineVerifier/verify-implicit-def.mir
+ /b/sanitizer-x86_64-linux-fast/build/llvm_build_asan_ubsan/bin/FileCheck /b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/test/MachineVerifier/verify-implicit-def.mir
--
********************
Testing:
FAIL: LLVM :: MachineVerifier/test_g_dyn_stackalloc.mir (3 of 81037)
******************** TEST 'LLVM :: MachineVerifier/test_g_dyn_stackalloc.mir' FAILED ********************
Exit Code: 1
Command Output (stderr):
--
RUN: at line 1: not --crash /b/sanitizer-x86_64-linux-fast/build/llvm_build_asan_ubsan/bin/llc -mtriple=aarch64 -o /dev/null -run-pass=none -verify-machineinstrs /b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/test/MachineVerifier/test_g_dyn_stackalloc.mir 2>&1 | /b/sanitizer-x86_64-linux-fast/build/llvm_build_asan_ubsan/bin/FileCheck /b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/test/MachineVerifier/test_g_dyn_stackalloc.mir
+ not --crash /b/sanitizer-x86_64-linux-fast/build/llvm_build_asan_ubsan/bin/llc -mtriple=aarch64 -o /dev/null -run-pass=none -verify-machineinstrs /b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/test/MachineVerifier/test_g_dyn_stackalloc.mir
+ /b/sanitizer-x86_64-linux-fast/build/llvm_build_asan_ubsan/bin/FileCheck /b/sanitizer-x86_64-linux-fast/build/llvm-project/llvm/test/MachineVerifier/test_g_dyn_stackalloc.mir
Looks like there's a memory corruption bug that can be revealed by removing the -o /dev/null from the tests.
You can reproduce the bots in full by using the instructions at https://github.com/google/sanitizers/wiki/SanitizerBotReproduceBuild, but that's not fully necessary in this case. A quick repro is:
$ cmake \
-DCMAKE_C_COMPILER=clang \
-DCMAKE_CXX_COMPILER=clang++ \
-DLLVM_USE_LINKER=lld \
-GNinja \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_FLAGS="-fsanitize=address" \
-DCMAKE_CXX_FLAGS="-fsanitize=address" \
-DLLVM_USE_SANITIZER=Address \
/path/to/llvm
$ LIT_OPTS='--filter MachineVerifier' ninja check-llvm
< ... snip ... >
********************
********************
Failed Tests (2):
LLVM :: MachineVerifier/test_g_dyn_stackalloc.mir
LLVM :: MachineVerifier/verify-implicit-def.mir
—
Reply to this email directly, view it on GitHub<#71785 (comment)>, or unsubscribe<https://github.com/notifications/unsubscribe-auth/AJ53IZM7ZLEVV7K5BE4KXMLYWSMAPAVCNFSM6AAAAAA7EDJEUOVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMYTSNZWHEZTENRQHA>.
You are receiving this because you modified the open/close state.
|
)" This restores commit c7fdd8c. Previously reverted in f010b1b. LLVM function calls carry convergence control tokens as operand bundles, where the tokens themselves are produced by convergence control intrinsics. This patch implements convergence control tokens in MIR as follows: 1. Introduce target-independent ISD opcodes and MIR opcodes for convergence control intrinsics. 2. Model token values as untyped virtual registers in MIR. The change also introduces an additional ISD opcode CONVERGENCECTRL_GLUE and a corresponding machine opcode with the same spelling. This glues the convergence control token to SDNodes that represent calls to intrinsics. The glued token is later translated to an implicit argument in the MIR. The lowering of calls to user-defined functions is target-specific. On AMDGPU, the convergence control operand bundle at a non-intrinsic call is translated to an explicit argument to the SI_CALL_ISEL instruction. Post-selection adjustment converts this explicit argument to an implicit argument on the SI_CALL instruction.
LLVM function calls carry convergence control tokens as operand bundles, where the tokens themselves are produced by convergence control intrinsics. This patch implements convergence control tokens in MIR as follows:
The actual lowering of controlled convergent operations (including function calls) and convergence control intrinsics is target-specific.
On AMDGPU, the convergence control operand bundle at a non-intrinsic call is translated to an explicit argument to the SI_CALL_ISEL instruction. Post-selection adjustment converts this explicit argument to an implicit argument on the SI_CALL instruction. For intrinsics, AMDGPU introduces an AMDGPUISD opcode CONVERGENCECTRL_GLUE and a corresponding machine opcode with the same spelling. This is used as a glued argument to SDNodes that is later translated to an implicit argument in the MIR.
All convergent intrinsics on AMDGPU need to set hasPostISelHook in their description.