-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[LLVM-Tablegen] Pretty Printing Arguments in LLVM Intrinsics #162629
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[LLVM-Tablegen] Pretty Printing Arguments in LLVM Intrinsics #162629
Conversation
@llvm/pr-subscribers-backend-nvptx @llvm/pr-subscribers-llvm-ir Author: Dharuni R Acharya (DharuniRAcharya) ChangesThis patch adds LLVM infrastructure to support pretty printing of immediate arguments of the intrinsics. The motivation is to increase the readability of LLVM intrinsics and facilitate easy modifications and debugging of LLVM IR. This adds a property Patch is 40.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162629.diff 13 Files Affected:
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index d3497716ca844..ee303618515fb 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -930,7 +930,8 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node<Function> {
/// AssemblyAnnotationWriter.
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW = nullptr,
bool ShouldPreserveUseListOrder = false,
- bool IsForDebug = false) const;
+ bool IsForDebug = false,
+ bool PrettyPrintIntrinsicArgs = false) const;
/// viewCFG - This function is meant for use from the debugger. You can just
/// say 'call F->viewCFG()' and a ghostview window should pop up from the
diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index 9577d0141f168..b197bf5340b15 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -30,6 +30,8 @@ class LLVMContext;
class Module;
class AttributeList;
class AttributeSet;
+class raw_ostream;
+class Constant;
/// This namespace contains an enum with a value for every intrinsic/builtin
/// function known by LLVM. The enum values are returned by
@@ -81,6 +83,9 @@ namespace Intrinsic {
/// Returns true if the intrinsic can be overloaded.
LLVM_ABI bool isOverloaded(ID id);
+ /// Returns true if the intrinsic has pretty printed immediate arguments.
+ LLVM_ABI bool hasPrettyPrintedArgs(ID id);
+
/// isTargetIntrinsic - Returns true if IID is an intrinsic specific to a
/// certain target. If it is a generic intrinsic false is returned.
LLVM_ABI bool isTargetIntrinsic(ID IID);
@@ -284,6 +289,8 @@ namespace Intrinsic {
/// N.
LLVM_ABI Intrinsic::ID getDeinterleaveIntrinsicID(unsigned Factor);
+ LLVM_ABI void printImmArg(ID IID, unsigned ArgIdx, raw_ostream &OS, const Constant *ImmArgVal);
+
} // namespace Intrinsic
} // namespace llvm
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 96da698538314..f56a7603f4963 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -87,6 +87,13 @@ class NoUndef<AttrIndex idx> : IntrinsicProperty {
int ArgNo = idx.Value;
}
+// PrettyPrintImmArg - The specified immediate argument has a custom pretty-print
+// function for diagnostic output.
+class PrettyPrintImmArg<AttrIndex idx, string funcname> : IntrinsicProperty {
+ int ArgNo = idx.Value;
+ string FunctionName = funcname;
+}
+
// NonNull - The return value or specified argument is not null.
class NonNull<AttrIndex idx> : IntrinsicProperty {
int ArgNo = idx.Value;
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 3af1750ffcf3f..6a132de3413bb 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -2871,7 +2871,14 @@ foreach sp = [0, 1] in {
defvar nargs = !size(args);
defvar scale_d_imm = ArgIndex<!sub(nargs, 1)>;
defvar scale_d_imm_range = [ImmArg<scale_d_imm>, Range<scale_d_imm, 0, 16>];
- defvar intrinsic_properties = !listconcat(
+
+ // Check if this is the specific llvm.nvvm.tcgen05.mma.tensor intrinsic
+ defvar is_target_intrinsic = !and(!eq(sp, 0),
+ !eq(space, "tensor"),
+ !eq(scale_d, 0),
+ !eq(ashift, 0));
+
+ defvar base_properties = !listconcat(
mma.common_intr_props,
!if(!eq(scale_d, 1), scale_d_imm_range, []),
[Range<ArgIndex<nargs>, 0, !if(!eq(scale_d, 1), 2, 4)>, // kind
@@ -2881,6 +2888,13 @@ foreach sp = [0, 1] in {
]
);
+ defvar intrinsic_properties = !if(is_target_intrinsic,
+ !listconcat(base_properties,
+ [PrettyPrintImmArg<ArgIndex<nargs>, "printTcgen05MMAKind">,
+ PrettyPrintImmArg<ArgIndex<!add(nargs, 1)>, "printCTAGroupKind">,
+ PrettyPrintImmArg<ArgIndex<!add(nargs, 2)>, "printTcgen05CollectorUsageOp">]),
+ base_properties);
+
def mma.record:
DefaultAttrsIntrinsicFlags<[], args, flags, intrinsic_properties,
mma.intr>;
diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h
index a99937a90cbb7..fe5c1308a190a 100644
--- a/llvm/include/llvm/IR/Module.h
+++ b/llvm/include/llvm/IR/Module.h
@@ -866,7 +866,8 @@ class LLVM_ABI Module {
/// the assembly.
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW,
bool ShouldPreserveUseListOrder = false,
- bool IsForDebug = false) const;
+ bool IsForDebug = false,
+ bool PrettyPrintIntrinsicArgs = false) const;
/// Dump the module to stderr (for debugging).
void dump() const;
diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
index d55100e5e709d..2ce1461f7f3c2 100644
--- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
+++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
@@ -18,6 +18,9 @@
#include <stdint.h>
#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
@@ -659,6 +662,66 @@ inline APFloat::roundingMode GetFMARoundingMode(Intrinsic::ID IntrinsicID) {
llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM fma");
}
+inline void printTcgen05MMAKind(raw_ostream &OS, const Constant *ImmArgVal) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(ImmArgVal)) {
+ uint64_t Val = CI->getZExtValue();
+ switch (static_cast<Tcgen05MMAKind>(Val)) {
+ case Tcgen05MMAKind::F16:
+ OS << "kind::f16";
+ return;
+ case Tcgen05MMAKind::TF32:
+ OS << "kind::tf32";
+ return;
+ case Tcgen05MMAKind::F8F6F4:
+ OS << "kind::f8f6f4";
+ return;
+ case Tcgen05MMAKind::I8:
+ OS << "kind::i8";
+ return;
+ }
+ }
+ llvm_unreachable("printTcgen05MMAKind called with invalid value for immediate argument");
+}
+
+inline void printTcgen05CollectorUsageOp(raw_ostream &OS, const Constant *ImmArgVal) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(ImmArgVal)) {
+ uint64_t Val = CI->getZExtValue();
+ switch (static_cast<Tcgen05CollectorUsageOp>(Val)) {
+ case Tcgen05CollectorUsageOp::DISCARD:
+ OS << "collector::a::discard";
+ return;
+ case Tcgen05CollectorUsageOp::LASTUSE:
+ OS << "collector::a::lastuse";
+ return;
+ case Tcgen05CollectorUsageOp::FILL:
+ OS << "collector::a::fill";
+ return;
+ case Tcgen05CollectorUsageOp::USE:
+ OS << "collector::a::use";
+ return;
+ }
+ }
+ llvm_unreachable("printTcgen05CollectorUsageOp called with invalid value for immediate argument");
+}
+
+inline void printCTAGroupKind(raw_ostream &OS, const Constant *ImmArgVal) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(ImmArgVal)) {
+ uint64_t Val = CI->getZExtValue();
+ switch (static_cast<CTAGroupKind>(Val)) {
+ case CTAGroupKind::CG_NONE:
+ OS << "cta_group::0";
+ return;
+ case CTAGroupKind::CG_1:
+ OS << "cta_group::1";
+ return;
+ case CTAGroupKind::CG_2:
+ OS << "cta_group::2";
+ return;
+ }
+ }
+ llvm_unreachable("printCTAGroupKind called with invalid value for immediate argument");
+}
+
} // namespace nvvm
} // namespace llvm
#endif // LLVM_IR_NVVMINTRINSICUTILS_H
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index ae086bcd3902d..c859cf980ef66 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -53,6 +53,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -2831,6 +2832,7 @@ class AssemblyWriter {
SetVector<const Comdat *> Comdats;
bool IsForDebug;
bool ShouldPreserveUseListOrder;
+ bool PrettyPrintIntrinsicArgs;
UseListOrderMap UseListOrders;
SmallVector<StringRef, 8> MDNames;
/// Synchronization scope names registered with LLVMContext.
@@ -2841,7 +2843,8 @@ class AssemblyWriter {
/// Construct an AssemblyWriter with an external SlotTracker
AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, const Module *M,
AssemblyAnnotationWriter *AAW, bool IsForDebug,
- bool ShouldPreserveUseListOrder = false);
+ bool ShouldPreserveUseListOrder = false,
+ bool PrettyPrintIntrinsicArgs = false);
AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
const ModuleSummaryIndex *Index, bool IsForDebug);
@@ -2930,10 +2933,12 @@ class AssemblyWriter {
AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
const Module *M, AssemblyAnnotationWriter *AAW,
- bool IsForDebug, bool ShouldPreserveUseListOrder)
+ bool IsForDebug, bool ShouldPreserveUseListOrder,
+ bool PrettyPrintIntrinsicArgs)
: Out(o), TheModule(M), Machine(Mac), TypePrinter(M), AnnotationWriter(AAW),
IsForDebug(IsForDebug),
- ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
+ ShouldPreserveUseListOrder(ShouldPreserveUseListOrder),
+ PrettyPrintIntrinsicArgs(PrettyPrintIntrinsicArgs) {
if (!TheModule)
return;
for (const GlobalObject &GO : TheModule->global_objects())
@@ -2944,7 +2949,8 @@ AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
const ModuleSummaryIndex *Index, bool IsForDebug)
: Out(o), TheIndex(Index), Machine(Mac), TypePrinter(/*Module=*/nullptr),
- IsForDebug(IsForDebug), ShouldPreserveUseListOrder(false) {}
+ IsForDebug(IsForDebug), ShouldPreserveUseListOrder(false),
+ PrettyPrintIntrinsicArgs(false) {}
void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
if (!Operand) {
@@ -4561,12 +4567,38 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ' ';
writeOperand(Operand, false);
Out << '(';
+ bool HasPrettyPrintedArgs = PrettyPrintIntrinsicArgs && isa<IntrinsicInst>(CI) &&
+ Intrinsic::hasPrettyPrintedArgs(CI->getIntrinsicID());
+
ListSeparator LS;
- for (unsigned op = 0, Eop = CI->arg_size(); op < Eop; ++op) {
- Out << LS;
- writeParamOperand(CI->getArgOperand(op), PAL.getParamAttrs(op));
- }
+ if (HasPrettyPrintedArgs) {
+ Function *CalledFunc = CI->getCalledFunction();
+ auto PrintArgComment = [&](unsigned ArgNo) {
+ if (!CalledFunc->hasParamAttribute(ArgNo, Attribute::ImmArg))
+ return;
+ const Constant *ConstArg = dyn_cast<Constant>(CI->getArgOperand(ArgNo));
+ if (!ConstArg)
+ return;
+ std::string ArgComment;
+ raw_string_ostream ArgCommentStream(ArgComment);
+ Intrinsic::ID IID = CalledFunc->getIntrinsicID();
+ Intrinsic::printImmArg(IID, ArgNo, ArgCommentStream, ConstArg);
+ if (ArgComment.empty())
+ return;
+ Out << "/* " << ArgComment << " */ ";
+ };
+ for (unsigned ArgNo = 0, NumArgs = CI->arg_size(); ArgNo < NumArgs; ++ArgNo) {
+ Out << LS;
+ PrintArgComment(ArgNo);
+ writeParamOperand(CI->getArgOperand(ArgNo), PAL.getParamAttrs(ArgNo));
+ }
+ } else {
+ for (unsigned op = 0, Eop = CI->arg_size(); op < Eop; ++op) {
+ Out << LS;
+ writeParamOperand(CI->getArgOperand(op), PAL.getParamAttrs(op));
+ }
+ }
// Emit an ellipsis if this is a musttail call in a vararg function. This
// is only to aid readability, musttail calls forward varargs by default.
if (CI->isMustTailCall() && CI->getParent() &&
@@ -4991,12 +5023,14 @@ void AssemblyWriter::printUseLists(const Function *F) {
void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW,
bool ShouldPreserveUseListOrder,
- bool IsForDebug) const {
+ bool IsForDebug,
+ bool PrettyPrintIntrinsicArgs) const {
SlotTracker SlotTable(this->getParent());
formatted_raw_ostream OS(ROS);
AssemblyWriter W(OS, SlotTable, this->getParent(), AAW,
IsForDebug,
- ShouldPreserveUseListOrder);
+ ShouldPreserveUseListOrder,
+ PrettyPrintIntrinsicArgs);
W.printFunction(this);
}
@@ -5012,11 +5046,11 @@ void BasicBlock::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW,
}
void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW,
- bool ShouldPreserveUseListOrder, bool IsForDebug) const {
+ bool ShouldPreserveUseListOrder, bool IsForDebug, bool PrettyPrintIntrinsicArgs) const {
SlotTracker SlotTable(this);
formatted_raw_ostream OS(ROS);
AssemblyWriter W(OS, SlotTable, this, AAW, IsForDebug,
- ShouldPreserveUseListOrder);
+ ShouldPreserveUseListOrder, PrettyPrintIntrinsicArgs);
W.printModule(this);
}
diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp
index 6797a100ff732..4159de3e630c8 100644
--- a/llvm/lib/IR/Intrinsics.cpp
+++ b/llvm/lib/IR/Intrinsics.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/IntrinsicsLoongArch.h"
#include "llvm/IR/IntrinsicsMips.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/IR/NVVMIntrinsicUtils.h"
#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/IntrinsicsRISCV.h"
@@ -601,6 +602,12 @@ bool Intrinsic::isOverloaded(ID id) {
#undef GET_INTRINSIC_OVERLOAD_TABLE
}
+bool Intrinsic::hasPrettyPrintedArgs(ID id) {
+#define GET_INTRINSIC_PRETTY_PRINT_TABLE
+#include "llvm/IR/IntrinsicImpl.inc"
+#undef GET_INTRINSIC_PRETTY_PRINT_TABLE
+}
+
/// Table of per-target intrinsic name tables.
#define GET_INTRINSIC_TARGET_DATA
#include "llvm/IR/IntrinsicImpl.inc"
@@ -1129,3 +1136,7 @@ Intrinsic::ID Intrinsic::getDeinterleaveIntrinsicID(unsigned Factor) {
assert(Factor >= 2 && Factor <= 8 && "Unexpected factor");
return InterleaveIntrinsics[Factor - 2].Deinterleave;
}
+
+#define GET_INTRINSIC_PRETTY_PRINT_ARGUMENTS
+#include "llvm/IR/IntrinsicImpl.inc"
+#undef GET_INTRINSIC_PRETTY_PRINT_ARGUMENTS
\ No newline at end of file
diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-mma.ll b/llvm/test/CodeGen/NVPTX/tcgen05-mma.ll
index 711e566df5034..ea3a5db82684f 100644
--- a/llvm/test/CodeGen/NVPTX/tcgen05-mma.ll
+++ b/llvm/test/CodeGen/NVPTX/tcgen05-mma.ll
@@ -34,13 +34,17 @@ define void @tcgen05_mma_fp16_cta1(ptr addrspace(6) %dtmem, ptr addrspace(6) %at
; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f16.collector::a::use [%r1], [%r3], %rd2, %r2, %p1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 0)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::f16 */ i32 0, /* cta_group::1 */ i32 1, /* collector::a::discard */ i32 0)
call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 0)
call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 0)
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 1)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::f16 */ i32 0, /* cta_group::1 */ i32 1, /* collector::a::lastuse */ i32 1)
call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 1)
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 2)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::f16 */ i32 0, /* cta_group::1 */ i32 1, /* collector::a::fill */ i32 2)
call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 2)
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 3)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::f16 */ i32 0, /* cta_group::1 */ i32 1, /* collector::a::use */ i32 3)
call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 3)
ret void
}
@@ -113,13 +117,17 @@ define void @tcgen05_mma_tf32_cta1(ptr addrspace(6) %dtmem, ptr addrspace(6) %at
; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::tf32.collector::a::use [%r1], [%r3], %rd2, %r2, %p1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 0)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::tf32 */ i32 1, /* cta_group::1 */ i32 1, /* collector::a::discard */ i32 0)
call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 0)
call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 0)
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 1)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::tf32 */ i32 1, /* cta_group::1 */ i32 1, /* collector::a::lastuse */ i32 1)
call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 1)
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 2)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::tf32 */ i32 1, /* cta_group::1 */ i32 1, /* collector::a::fill */ i32 2)
call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 2)
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 3)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::tf32 */ i32 1, /* cta_group::1 */ i32 1, /* collector::a::use */ i32 3)
call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 3)
ret void
}
@@ -192,13 +200,17 @@ define void @tcgen05_mma_f8f6f4_cta1(ptr addrspace(6) %dtmem, ptr addrspace(6) %
; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f8f6f4.collector::a::use [%r1], [%r3], %rd2, %r2, %p1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 2, i32 1, i32 0)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::f8f6f4 */ i32 2, /* cta_group::1 */ i32 1, /* collector::a::discard */ i32 0)
call vo...
[truncated]
|
@llvm/pr-subscribers-tablegen Author: Dharuni R Acharya (DharuniRAcharya) ChangesThis patch adds LLVM infrastructure to support pretty printing of immediate arguments of the intrinsics. The motivation is to increase the readability of LLVM intrinsics and facilitate easy modifications and debugging of LLVM IR. This adds a property Patch is 40.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162629.diff 13 Files Affected:
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index d3497716ca844..ee303618515fb 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -930,7 +930,8 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node<Function> {
/// AssemblyAnnotationWriter.
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW = nullptr,
bool ShouldPreserveUseListOrder = false,
- bool IsForDebug = false) const;
+ bool IsForDebug = false,
+ bool PrettyPrintIntrinsicArgs = false) const;
/// viewCFG - This function is meant for use from the debugger. You can just
/// say 'call F->viewCFG()' and a ghostview window should pop up from the
diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index 9577d0141f168..b197bf5340b15 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -30,6 +30,8 @@ class LLVMContext;
class Module;
class AttributeList;
class AttributeSet;
+class raw_ostream;
+class Constant;
/// This namespace contains an enum with a value for every intrinsic/builtin
/// function known by LLVM. The enum values are returned by
@@ -81,6 +83,9 @@ namespace Intrinsic {
/// Returns true if the intrinsic can be overloaded.
LLVM_ABI bool isOverloaded(ID id);
+ /// Returns true if the intrinsic has pretty printed immediate arguments.
+ LLVM_ABI bool hasPrettyPrintedArgs(ID id);
+
/// isTargetIntrinsic - Returns true if IID is an intrinsic specific to a
/// certain target. If it is a generic intrinsic false is returned.
LLVM_ABI bool isTargetIntrinsic(ID IID);
@@ -284,6 +289,8 @@ namespace Intrinsic {
/// N.
LLVM_ABI Intrinsic::ID getDeinterleaveIntrinsicID(unsigned Factor);
+ LLVM_ABI void printImmArg(ID IID, unsigned ArgIdx, raw_ostream &OS, const Constant *ImmArgVal);
+
} // namespace Intrinsic
} // namespace llvm
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 96da698538314..f56a7603f4963 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -87,6 +87,13 @@ class NoUndef<AttrIndex idx> : IntrinsicProperty {
int ArgNo = idx.Value;
}
+// PrettyPrintImmArg - The specified immediate argument has a custom pretty-print
+// function for diagnostic output.
+class PrettyPrintImmArg<AttrIndex idx, string funcname> : IntrinsicProperty {
+ int ArgNo = idx.Value;
+ string FunctionName = funcname;
+}
+
// NonNull - The return value or specified argument is not null.
class NonNull<AttrIndex idx> : IntrinsicProperty {
int ArgNo = idx.Value;
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 3af1750ffcf3f..6a132de3413bb 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -2871,7 +2871,14 @@ foreach sp = [0, 1] in {
defvar nargs = !size(args);
defvar scale_d_imm = ArgIndex<!sub(nargs, 1)>;
defvar scale_d_imm_range = [ImmArg<scale_d_imm>, Range<scale_d_imm, 0, 16>];
- defvar intrinsic_properties = !listconcat(
+
+ // Check if this is the specific llvm.nvvm.tcgen05.mma.tensor intrinsic
+ defvar is_target_intrinsic = !and(!eq(sp, 0),
+ !eq(space, "tensor"),
+ !eq(scale_d, 0),
+ !eq(ashift, 0));
+
+ defvar base_properties = !listconcat(
mma.common_intr_props,
!if(!eq(scale_d, 1), scale_d_imm_range, []),
[Range<ArgIndex<nargs>, 0, !if(!eq(scale_d, 1), 2, 4)>, // kind
@@ -2881,6 +2888,13 @@ foreach sp = [0, 1] in {
]
);
+ defvar intrinsic_properties = !if(is_target_intrinsic,
+ !listconcat(base_properties,
+ [PrettyPrintImmArg<ArgIndex<nargs>, "printTcgen05MMAKind">,
+ PrettyPrintImmArg<ArgIndex<!add(nargs, 1)>, "printCTAGroupKind">,
+ PrettyPrintImmArg<ArgIndex<!add(nargs, 2)>, "printTcgen05CollectorUsageOp">]),
+ base_properties);
+
def mma.record:
DefaultAttrsIntrinsicFlags<[], args, flags, intrinsic_properties,
mma.intr>;
diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h
index a99937a90cbb7..fe5c1308a190a 100644
--- a/llvm/include/llvm/IR/Module.h
+++ b/llvm/include/llvm/IR/Module.h
@@ -866,7 +866,8 @@ class LLVM_ABI Module {
/// the assembly.
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW,
bool ShouldPreserveUseListOrder = false,
- bool IsForDebug = false) const;
+ bool IsForDebug = false,
+ bool PrettyPrintIntrinsicArgs = false) const;
/// Dump the module to stderr (for debugging).
void dump() const;
diff --git a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
index d55100e5e709d..2ce1461f7f3c2 100644
--- a/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
+++ b/llvm/include/llvm/IR/NVVMIntrinsicUtils.h
@@ -18,6 +18,9 @@
#include <stdint.h>
#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
@@ -659,6 +662,66 @@ inline APFloat::roundingMode GetFMARoundingMode(Intrinsic::ID IntrinsicID) {
llvm_unreachable("Invalid FP instrinsic rounding mode for NVVM fma");
}
+inline void printTcgen05MMAKind(raw_ostream &OS, const Constant *ImmArgVal) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(ImmArgVal)) {
+ uint64_t Val = CI->getZExtValue();
+ switch (static_cast<Tcgen05MMAKind>(Val)) {
+ case Tcgen05MMAKind::F16:
+ OS << "kind::f16";
+ return;
+ case Tcgen05MMAKind::TF32:
+ OS << "kind::tf32";
+ return;
+ case Tcgen05MMAKind::F8F6F4:
+ OS << "kind::f8f6f4";
+ return;
+ case Tcgen05MMAKind::I8:
+ OS << "kind::i8";
+ return;
+ }
+ }
+ llvm_unreachable("printTcgen05MMAKind called with invalid value for immediate argument");
+}
+
+inline void printTcgen05CollectorUsageOp(raw_ostream &OS, const Constant *ImmArgVal) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(ImmArgVal)) {
+ uint64_t Val = CI->getZExtValue();
+ switch (static_cast<Tcgen05CollectorUsageOp>(Val)) {
+ case Tcgen05CollectorUsageOp::DISCARD:
+ OS << "collector::a::discard";
+ return;
+ case Tcgen05CollectorUsageOp::LASTUSE:
+ OS << "collector::a::lastuse";
+ return;
+ case Tcgen05CollectorUsageOp::FILL:
+ OS << "collector::a::fill";
+ return;
+ case Tcgen05CollectorUsageOp::USE:
+ OS << "collector::a::use";
+ return;
+ }
+ }
+ llvm_unreachable("printTcgen05CollectorUsageOp called with invalid value for immediate argument");
+}
+
+inline void printCTAGroupKind(raw_ostream &OS, const Constant *ImmArgVal) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(ImmArgVal)) {
+ uint64_t Val = CI->getZExtValue();
+ switch (static_cast<CTAGroupKind>(Val)) {
+ case CTAGroupKind::CG_NONE:
+ OS << "cta_group::0";
+ return;
+ case CTAGroupKind::CG_1:
+ OS << "cta_group::1";
+ return;
+ case CTAGroupKind::CG_2:
+ OS << "cta_group::2";
+ return;
+ }
+ }
+ llvm_unreachable("printCTAGroupKind called with invalid value for immediate argument");
+}
+
} // namespace nvvm
} // namespace llvm
#endif // LLVM_IR_NVVMINTRINSICUTILS_H
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index ae086bcd3902d..c859cf980ef66 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -53,6 +53,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
@@ -2831,6 +2832,7 @@ class AssemblyWriter {
SetVector<const Comdat *> Comdats;
bool IsForDebug;
bool ShouldPreserveUseListOrder;
+ bool PrettyPrintIntrinsicArgs;
UseListOrderMap UseListOrders;
SmallVector<StringRef, 8> MDNames;
/// Synchronization scope names registered with LLVMContext.
@@ -2841,7 +2843,8 @@ class AssemblyWriter {
/// Construct an AssemblyWriter with an external SlotTracker
AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, const Module *M,
AssemblyAnnotationWriter *AAW, bool IsForDebug,
- bool ShouldPreserveUseListOrder = false);
+ bool ShouldPreserveUseListOrder = false,
+ bool PrettyPrintIntrinsicArgs = false);
AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
const ModuleSummaryIndex *Index, bool IsForDebug);
@@ -2930,10 +2933,12 @@ class AssemblyWriter {
AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
const Module *M, AssemblyAnnotationWriter *AAW,
- bool IsForDebug, bool ShouldPreserveUseListOrder)
+ bool IsForDebug, bool ShouldPreserveUseListOrder,
+ bool PrettyPrintIntrinsicArgs)
: Out(o), TheModule(M), Machine(Mac), TypePrinter(M), AnnotationWriter(AAW),
IsForDebug(IsForDebug),
- ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
+ ShouldPreserveUseListOrder(ShouldPreserveUseListOrder),
+ PrettyPrintIntrinsicArgs(PrettyPrintIntrinsicArgs) {
if (!TheModule)
return;
for (const GlobalObject &GO : TheModule->global_objects())
@@ -2944,7 +2949,8 @@ AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
const ModuleSummaryIndex *Index, bool IsForDebug)
: Out(o), TheIndex(Index), Machine(Mac), TypePrinter(/*Module=*/nullptr),
- IsForDebug(IsForDebug), ShouldPreserveUseListOrder(false) {}
+ IsForDebug(IsForDebug), ShouldPreserveUseListOrder(false),
+ PrettyPrintIntrinsicArgs(false) {}
void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
if (!Operand) {
@@ -4561,12 +4567,38 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << ' ';
writeOperand(Operand, false);
Out << '(';
+ bool HasPrettyPrintedArgs = PrettyPrintIntrinsicArgs && isa<IntrinsicInst>(CI) &&
+ Intrinsic::hasPrettyPrintedArgs(CI->getIntrinsicID());
+
ListSeparator LS;
- for (unsigned op = 0, Eop = CI->arg_size(); op < Eop; ++op) {
- Out << LS;
- writeParamOperand(CI->getArgOperand(op), PAL.getParamAttrs(op));
- }
+ if (HasPrettyPrintedArgs) {
+ Function *CalledFunc = CI->getCalledFunction();
+ auto PrintArgComment = [&](unsigned ArgNo) {
+ if (!CalledFunc->hasParamAttribute(ArgNo, Attribute::ImmArg))
+ return;
+ const Constant *ConstArg = dyn_cast<Constant>(CI->getArgOperand(ArgNo));
+ if (!ConstArg)
+ return;
+ std::string ArgComment;
+ raw_string_ostream ArgCommentStream(ArgComment);
+ Intrinsic::ID IID = CalledFunc->getIntrinsicID();
+ Intrinsic::printImmArg(IID, ArgNo, ArgCommentStream, ConstArg);
+ if (ArgComment.empty())
+ return;
+ Out << "/* " << ArgComment << " */ ";
+ };
+ for (unsigned ArgNo = 0, NumArgs = CI->arg_size(); ArgNo < NumArgs; ++ArgNo) {
+ Out << LS;
+ PrintArgComment(ArgNo);
+ writeParamOperand(CI->getArgOperand(ArgNo), PAL.getParamAttrs(ArgNo));
+ }
+ } else {
+ for (unsigned op = 0, Eop = CI->arg_size(); op < Eop; ++op) {
+ Out << LS;
+ writeParamOperand(CI->getArgOperand(op), PAL.getParamAttrs(op));
+ }
+ }
// Emit an ellipsis if this is a musttail call in a vararg function. This
// is only to aid readability, musttail calls forward varargs by default.
if (CI->isMustTailCall() && CI->getParent() &&
@@ -4991,12 +5023,14 @@ void AssemblyWriter::printUseLists(const Function *F) {
void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW,
bool ShouldPreserveUseListOrder,
- bool IsForDebug) const {
+ bool IsForDebug,
+ bool PrettyPrintIntrinsicArgs) const {
SlotTracker SlotTable(this->getParent());
formatted_raw_ostream OS(ROS);
AssemblyWriter W(OS, SlotTable, this->getParent(), AAW,
IsForDebug,
- ShouldPreserveUseListOrder);
+ ShouldPreserveUseListOrder,
+ PrettyPrintIntrinsicArgs);
W.printFunction(this);
}
@@ -5012,11 +5046,11 @@ void BasicBlock::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW,
}
void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW,
- bool ShouldPreserveUseListOrder, bool IsForDebug) const {
+ bool ShouldPreserveUseListOrder, bool IsForDebug, bool PrettyPrintIntrinsicArgs) const {
SlotTracker SlotTable(this);
formatted_raw_ostream OS(ROS);
AssemblyWriter W(OS, SlotTable, this, AAW, IsForDebug,
- ShouldPreserveUseListOrder);
+ ShouldPreserveUseListOrder, PrettyPrintIntrinsicArgs);
W.printModule(this);
}
diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp
index 6797a100ff732..4159de3e630c8 100644
--- a/llvm/lib/IR/Intrinsics.cpp
+++ b/llvm/lib/IR/Intrinsics.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/IntrinsicsLoongArch.h"
#include "llvm/IR/IntrinsicsMips.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/IR/NVVMIntrinsicUtils.h"
#include "llvm/IR/IntrinsicsPowerPC.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/IntrinsicsRISCV.h"
@@ -601,6 +602,12 @@ bool Intrinsic::isOverloaded(ID id) {
#undef GET_INTRINSIC_OVERLOAD_TABLE
}
+bool Intrinsic::hasPrettyPrintedArgs(ID id) {
+#define GET_INTRINSIC_PRETTY_PRINT_TABLE
+#include "llvm/IR/IntrinsicImpl.inc"
+#undef GET_INTRINSIC_PRETTY_PRINT_TABLE
+}
+
/// Table of per-target intrinsic name tables.
#define GET_INTRINSIC_TARGET_DATA
#include "llvm/IR/IntrinsicImpl.inc"
@@ -1129,3 +1136,7 @@ Intrinsic::ID Intrinsic::getDeinterleaveIntrinsicID(unsigned Factor) {
assert(Factor >= 2 && Factor <= 8 && "Unexpected factor");
return InterleaveIntrinsics[Factor - 2].Deinterleave;
}
+
+#define GET_INTRINSIC_PRETTY_PRINT_ARGUMENTS
+#include "llvm/IR/IntrinsicImpl.inc"
+#undef GET_INTRINSIC_PRETTY_PRINT_ARGUMENTS
\ No newline at end of file
diff --git a/llvm/test/CodeGen/NVPTX/tcgen05-mma.ll b/llvm/test/CodeGen/NVPTX/tcgen05-mma.ll
index 711e566df5034..ea3a5db82684f 100644
--- a/llvm/test/CodeGen/NVPTX/tcgen05-mma.ll
+++ b/llvm/test/CodeGen/NVPTX/tcgen05-mma.ll
@@ -34,13 +34,17 @@ define void @tcgen05_mma_fp16_cta1(ptr addrspace(6) %dtmem, ptr addrspace(6) %at
; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f16.collector::a::use [%r1], [%r3], %rd2, %r2, %p1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 0)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::f16 */ i32 0, /* cta_group::1 */ i32 1, /* collector::a::discard */ i32 0)
call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 0)
call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 0)
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 1)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::f16 */ i32 0, /* cta_group::1 */ i32 1, /* collector::a::lastuse */ i32 1)
call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 1)
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 2)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::f16 */ i32 0, /* cta_group::1 */ i32 1, /* collector::a::fill */ i32 2)
call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 2)
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 3)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::f16 */ i32 0, /* cta_group::1 */ i32 1, /* collector::a::use */ i32 3)
call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 0, i32 1, i32 3)
ret void
}
@@ -113,13 +117,17 @@ define void @tcgen05_mma_tf32_cta1(ptr addrspace(6) %dtmem, ptr addrspace(6) %at
; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::tf32.collector::a::use [%r1], [%r3], %rd2, %r2, %p1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 0)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::tf32 */ i32 1, /* cta_group::1 */ i32 1, /* collector::a::discard */ i32 0)
call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 0)
call void @llvm.nvvm.tcgen05.mma.tensor.ashift(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 0)
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 1)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::tf32 */ i32 1, /* cta_group::1 */ i32 1, /* collector::a::lastuse */ i32 1)
call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 1)
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 2)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::tf32 */ i32 1, /* cta_group::1 */ i32 1, /* collector::a::fill */ i32 2)
call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 2)
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 3)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::tf32 */ i32 1, /* cta_group::1 */ i32 1, /* collector::a::use */ i32 3)
call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 1, i32 1, i32 3)
ret void
}
@@ -192,13 +200,17 @@ define void @tcgen05_mma_f8f6f4_cta1(ptr addrspace(6) %dtmem, ptr addrspace(6) %
; CHECK-NEXT: tcgen05.mma.cta_group::1.kind::f8f6f4.collector::a::use [%r1], [%r3], %rd2, %r2, %p1;
; CHECK-NEXT: ret;
call void @llvm.nvvm.tcgen05.mma.shared(ptr addrspace(6) %dtmem, i64 %ashared, i64 %b, i32 %idesc, i1 %enable_inp_d, i32 2, i32 1, i32 0)
+ ; call void @llvm.nvvm.tcgen05.mma.tensor(ptr addrspace(6) %dtmem, ptr addrspace(6) %atensor, i64 %b, i32 %idesc, i1 %enable_inp_d, /* kind::f8f6f4 */ i32 2, /* cta_group::1 */ i32 1, /* collector::a::discard */ i32 0)
call vo...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
c505654
to
c4caeb3
Compare
053ee98
to
203c7e3
Compare
203c7e3
to
0ec7236
Compare
This patch adds LLVM infrastructure to support pretty printing arguments of the intrinsics. The motivation is to increase the readability of LLVM intrinsics and facilitate easy modifications and debugging of LLVM IR. This adds a property ArgInfo<ArgIndex, "argName", "functionName"> to the intrinsic arguments that enables printing self-explanatory inline comment for the arguments. The addition of pretty print support can provide a simple, low-overhead feature that enhances usability of LLVM intrinsics without disrupting existing workflows. Link to the RFC: https://discourse.llvm.org/t/rfc-pretty-printing-immediate-arguments-in-llvm-intrinsics/88536 Signed-off-by: Dharuni R Acharya <dharunira@nvidia.com>
0ec7236
to
2df4ad3
Compare
This patch adds LLVM infrastructure to support pretty printing arguments of the intrinsics. The motivation is to increase the readability of LLVM intrinsics and facilitate easy modifications and debugging of LLVM IR.
This adds a property ArgInfo<ArgIndex, "argName", "functionName"> to the intrinsic arguments that enables printing self-explanatory inline comment for the arguments.
The addition of pretty print support can provide a simple, low-overhead feature that enhances usability of LLVM intrinsics without disrupting existing workflows.
Link to the RFC:
https://discourse.llvm.org/t/rfc-pretty-printing-immediate-arguments-in-llvm-intrinsics/88536