Skip to content

Commit

Permalink
[NVPTX] Add builtin support for 'nanosleep' PTX instrunction (#79888)
Browse files Browse the repository at this point in the history
Summary:
This patch adds a builtin for the `nanosleep` PTX function. It takes
either an immediate or a register and sleeps for [0, 2t] nanoseconds
given t. More information at the documentation:

https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#miscellaneous-instructions-nanosleep
  • Loading branch information
jhuber6 committed Jan 29, 2024
1 parent d492faa commit 5f12cc9
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 0 deletions.
1 change: 1 addition & 0 deletions clang/include/clang/Basic/BuiltinsNVPTX.def
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n")
// MISC

BUILTIN(__nvvm_prmt, "UiUiUiUi", "")
TARGET_BUILTIN(__nvvm_nanosleep, "vi", "n", AND(SM_70, PTX63))

// Min Max

Expand Down
11 changes: 11 additions & 0 deletions clang/test/CodeGen/builtins-nvptx.c
Original file line number Diff line number Diff line change
Expand Up @@ -818,6 +818,17 @@ __device__ void nvvm_vote(int pred) {
// CHECK: ret void
}

// CHECK-LABEL: nvvm_nanosleep
__device__ void nvvm_nanosleep(int d) {
#if __CUDA_ARCH__ >= 700
// CHECK_PTX70_SM80: call void @llvm.nvvm.nanosleep
__nvvm_nanosleep(d);

// CHECK_PTX70_SM80: call void @llvm.nvvm.nanosleep
__nvvm_nanosleep(1);
#endif
}

// CHECK-LABEL: nvvm_mbarrier
__device__ void nvvm_mbarrier(long long* addr, __attribute__((address_space(3))) long long* sharedAddr, int count, long long state) {
#if __CUDA_ARCH__ >= 800
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsNVVM.td
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,10 @@ let TargetPrefix = "nvvm" in {
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable]>;

def int_nvvm_nanosleep : ClangBuiltin<"__nvvm_nanosleep">,
DefaultAttrsIntrinsic<[], [llvm_i32_ty],
[IntrConvergent, IntrNoMem, IntrHasSideEffects]>;

//
// Min Max
//
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,12 @@ class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass,
def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs,
Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>;

def INT_NVVM_NANOSLEEP_I : NVPTXInst<(outs), (ins i32imm:$i), "nanosleep.u32 \t$i;",
[(int_nvvm_nanosleep imm:$i)]>,
Requires<[hasPTX<63>, hasSM<70>]>;
def INT_NVVM_NANOSLEEP_R : NVPTXInst<(outs), (ins Int32Regs:$i), "nanosleep.u32 \t$i;",
[(int_nvvm_nanosleep Int32Regs:$i)]>,
Requires<[hasPTX<63>, hasSM<70>]>;
//
// Min Max
//
Expand Down
20 changes: 20 additions & 0 deletions llvm/test/CodeGen/NVPTX/nanosleep.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
; RUN: llc < %s -march=nvptx64 -O2 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | %ptxas-verify %}

This comment has been minimized.

Copy link
@Artem-B

Artem-B Feb 9, 2024

Member

This needs -arch=sm_70 passed to ptxas. Otherwise lit runs it with sm_60 and it fails.


declare void @llvm.nvvm.nanosleep(i32)

; CHECK-LABEL: test_nanosleep_r
define void @test_nanosleep_r(i32 noundef %d) {
entry:
; CHECK: nanosleep.u32 %[[REG:.+]];
call void @llvm.nvvm.nanosleep(i32 %d)
ret void
}

; CHECK-LABEL: test_nanosleep_i
define void @test_nanosleep_i() {
entry:
; CHECK: nanosleep.u32 42;
call void @llvm.nvvm.nanosleep(i32 42)
ret void
}

0 comments on commit 5f12cc9

Please sign in to comment.