Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LLVM][NVPTX]: Add intrinsic for setmaxnreg #77289

Merged
merged 1 commit into from Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 10 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsNVVM.td
Expand Up @@ -4710,4 +4710,14 @@ def int_nvvm_is_explicit_cluster
[IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.is_explicit_cluster">;

// Setmaxnreg inc/dec intrinsics
def int_nvvm_setmaxnreg_inc_sync_aligned_u32
: DefaultAttrsIntrinsic<[], [llvm_i32_ty],
[IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>],
"llvm.nvvm.setmaxnreg.inc.sync.aligned.u32">;
def int_nvvm_setmaxnreg_dec_sync_aligned_u32
: DefaultAttrsIntrinsic<[], [llvm_i32_ty],
[IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>],
"llvm.nvvm.setmaxnreg.dec.sync.aligned.u32">;

} // let TargetPrefix = "nvvm"
11 changes: 11 additions & 0 deletions llvm/lib/IR/Verifier.cpp
Expand Up @@ -96,6 +96,7 @@
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
Expand Down Expand Up @@ -6031,6 +6032,16 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"Value for inactive lanes must be a VGPR function argument", &Call);
break;
}
case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32:
case Intrinsic::nvvm_setmaxnreg_dec_sync_aligned_u32: {
Value *V = Call.getArgOperand(0);
unsigned RegCount = cast<ConstantInt>(V)->getZExtValue();
Check(RegCount % 8 == 0,
"reg_count argument to nvvm.setmaxnreg must be in multiples of 8");
Check((RegCount >= 24 && RegCount <= 256),
"reg_count argument to nvvm.setmaxnreg must be within [24, 256]");
break;
}
case Intrinsic::experimental_convergence_entry:
LLVM_FALLTHROUGH;
case Intrinsic::experimental_convergence_anchor:
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Expand Up @@ -164,6 +164,9 @@ def True : Predicate<"true">;
class hasPTX<int version>: Predicate<"Subtarget->getPTXVersion() >= " # version>;
class hasSM<int version>: Predicate<"Subtarget->getSmVersion() >= " # version>;

// Explicit records for arch-accelerated SM versions
def hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">;

// non-sync shfl instructions are not available on sm_70+ in PTX6.4+
def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70"
"&& Subtarget->getPTXVersion() >= 64)">;
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Expand Up @@ -6727,3 +6727,16 @@ def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
"mov.pred\t$d, %is_explicit_cluster;",
[(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
Requires<[hasSM<90>, hasPTX<78>]>;

// setmaxnreg inc/dec intrinsics
let isConvergent = true in {
multiclass SET_MAXNREG<string Action, Intrinsic Intr> {
def : NVPTXInst<(outs), (ins i32imm:$reg_count),
"setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;",
[(Intr timm:$reg_count)]>,
Requires<[hasSM90a, hasPTX<80>]>;
}

defm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>;
defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_aligned_u32>;
} // isConvergent
16 changes: 16 additions & 0 deletions llvm/test/CodeGen/NVPTX/setmaxnreg.ll
@@ -0,0 +1,16 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90a -mattr=+ptx80| FileCheck --check-prefixes=CHECK %s
; RUN: %if ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_90a -mattr=+ptx80| %ptxas-verify -arch=sm_90a %}

declare void @llvm.nvvm.setmaxnreg.inc.sync.aligned.u32(i32 %reg_count)
declare void @llvm.nvvm.setmaxnreg.dec.sync.aligned.u32(i32 %reg_count)

; CHECK-LABEL: test_set_maxn_reg
define void @test_set_maxn_reg() {
; CHECK: setmaxnreg.inc.sync.aligned.u32 96;
call void @llvm.nvvm.setmaxnreg.inc.sync.aligned.u32(i32 96)

; CHECK: setmaxnreg.dec.sync.aligned.u32 64;
call void @llvm.nvvm.setmaxnreg.dec.sync.aligned.u32(i32 64)

ret void
}
2 changes: 2 additions & 0 deletions llvm/test/Verifier/NVPTX/lit.local.cfg
@@ -0,0 +1,2 @@
if not "NVPTX" in config.root.targets:
config.unsupported = True
14 changes: 14 additions & 0 deletions llvm/test/Verifier/NVPTX/setmaxnreg.ll
@@ -0,0 +1,14 @@
; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s

declare void @llvm.nvvm.setmaxnreg.inc.sync.aligned.u32(i32 %reg_count)
declare void @llvm.nvvm.setmaxnreg.dec.sync.aligned.u32(i32 %reg_count)

define void @test_set_maxn_reg() {
; CHECK: reg_count argument to nvvm.setmaxnreg must be in multiples of 8
call void @llvm.nvvm.setmaxnreg.inc.sync.aligned.u32(i32 95)

; CHECK: reg_count argument to nvvm.setmaxnreg must be within [24, 256]
call void @llvm.nvvm.setmaxnreg.dec.sync.aligned.u32(i32 16)

ret void
}