Skip to content

Commit

Permalink
[NVPTX] Add builtin support for 'globaltimer' (#79765)
Browse files Browse the repository at this point in the history
Summary:
This patch adds support for `globaltimer` to match `clock` and
`clock64`. See the PTX ISA reference for details. This patch does not
implement the `hi` or `lo` variants for brevity as they can be obtained
from this with the cost of an additional register.

https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#special-registers-globaltimer-globaltimer-lo-globaltimer-hi
  • Loading branch information
jhuber6 committed Jan 29, 2024
1 parent ea80140 commit e633807
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 1 deletion.
1 change: 1 addition & 0 deletions clang/include/clang/Basic/BuiltinsNVPTX.def
Expand Up @@ -148,6 +148,7 @@ BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc")

BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n")
BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n")
BUILTIN(__nvvm_read_ptx_sreg_globaltimer, "LLi", "n")

BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n")
BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n")
Expand Down
4 changes: 3 additions & 1 deletion clang/test/CodeGen/builtins-nvptx.c
Expand Up @@ -134,11 +134,13 @@ __device__ long long read_clocks() {

// CHECK: call i32 @llvm.nvvm.read.ptx.sreg.clock()
// CHECK: call i64 @llvm.nvvm.read.ptx.sreg.clock64()
// CHECK: call i64 @llvm.nvvm.read.ptx.sreg.globaltimer()

int a = __nvvm_read_ptx_sreg_clock();
long long b = __nvvm_read_ptx_sreg_clock64();
long long c = __nvvm_read_ptx_sreg_globaltimer();

return a + b;
return a + b + c;
}

__device__ int read_pms() {
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsNVVM.td
Expand Up @@ -4510,6 +4510,8 @@ def int_nvvm_read_ptx_sreg_lanemask_gt :
def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32<"clock">;
def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64<"clock64">;

def int_nvvm_read_ptx_sreg_globaltimer : PTXReadNCSRegIntrinsic_r64<"globaltimer">;

def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32<"pm0">;
def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32<"pm1">;
def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32<"pm2">;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Expand Up @@ -6376,6 +6376,8 @@ def INT_PTX_SREG_CLOCK :
PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
def INT_PTX_SREG_CLOCK64 :
PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
def INT_PTX_SREG_GLOBALTIMER :
PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>;

def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
Expand Down
12 changes: 12 additions & 0 deletions llvm/test/CodeGen/NVPTX/intrinsics.ll
Expand Up @@ -140,6 +140,17 @@ define void @test_exit() {
ret void
}

; CHECK-LABEL: test_globaltimer
define i64 @test_globaltimer() {
; CHECK: mov.u64 %r{{.*}}, %globaltimer;
%a = tail call i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
; CHECK: mov.u64 %r{{.*}}, %globaltimer;
%b = tail call i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
%ret = add i64 %a, %b
; CHECK: ret
ret i64 %ret
}

declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare float @llvm.nvvm.sqrt.f(float)
Expand All @@ -154,3 +165,4 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
declare i32 @llvm.nvvm.read.ptx.sreg.clock()
declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
declare void @llvm.nvvm.exit()
declare i64 @llvm.nvvm.read.ptx.sreg.globaltimer()

0 comments on commit e633807

Please sign in to comment.