diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 77ef79debac1a..856492b494914 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -1761,6 +1761,7 @@ def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic; def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic; def int_nvvm_read_ptx_sreg_globaltimer : PTXReadNCSRegIntrinsic; +def int_nvvm_read_ptx_sreg_globaltimer_lo : PTXReadNCSRegIntrinsic; def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic; def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic; diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 721afae4db51c..c280b68d2298d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -4358,10 +4358,12 @@ let hasSideEffects = 1 in { def SREG_CLOCK : PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>; def SREG_CLOCK64 : PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>; def SREG_GLOBALTIMER : PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>; + def SREG_GLOBALTIMER_LO : PTX_READ_SREG_R32<"globaltimer_lo", int_nvvm_read_ptx_sreg_globaltimer_lo>; } def: Pat <(i64 (readcyclecounter)), (SREG_CLOCK64)>; def: Pat <(i64 (readsteadycounter)), (SREG_GLOBALTIMER)>; +def: Pat <(i32 (readsteadycounter)), (SREG_GLOBALTIMER_LO)>; def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>; def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>; diff --git a/llvm/test/CodeGen/NVPTX/intrinsics.ll b/llvm/test/CodeGen/NVPTX/intrinsics.ll index 4ed50632251cb..6bdb8ead7a64a 100644 --- a/llvm/test/CodeGen/NVPTX/intrinsics.ll +++ b/llvm/test/CodeGen/NVPTX/intrinsics.ll @@ -267,6 +267,23 @@ define i64 @test_globaltimer() { ret i64 %ret } +define i32 @test_globaltimer_lo(){ +; CHECK-LABEL: test_globaltimer_lo( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: mov.u32 %r1, %globaltimer_lo; +; CHECK-NEXT: mov.u32 %r2, %globaltimer_lo; +; CHECK-NEXT: add.s32 %r3, %r1, %r2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; + %a = tail call i32 @llvm.nvvm.read.ptx.sreg.globaltimer.lo() + %b = tail call i32 @llvm.nvvm.read.ptx.sreg.globaltimer.lo() + %ret = add i32 %a, %b + ret i32 %ret +} + define i64 @test_cyclecounter() { ; CHECK-LABEL: test_cyclecounter( ; CHECK: { diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 09547e8ac6790..7b34783523e51 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -258,6 +258,7 @@ def NVVM_ClusterDim : NVVM_PureSpecialRangeableRegisterOp<"read.ptx.sreg.cluster def NVVM_ClockOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.clock">; def NVVM_Clock64Op : NVVM_SpecialRegisterOp<"read.ptx.sreg.clock64">; def NVVM_GlobalTimerOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.globaltimer">; +def NVVM_GlobalTimerLoOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.globaltimer.lo">; //===----------------------------------------------------------------------===// // envreg registers diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index 16191d925959b..34d3177d37699 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -64,92 +64,94 @@ llvm.func @nvvm_special_regs() -> i32 { %30 = nvvm.read.ptx.sreg.clock64 : i64 // CHECK: call i64 @llvm.nvvm.read.ptx.sreg.globaltimer %31 = nvvm.read.ptx.sreg.globaltimer : i64 - // CHECK: %32 = call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.x() - %32 = nvvm.read.ptx.sreg.tid.x range : i32 + // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.globaltimer.lo() + %32 = nvvm.read.ptx.sreg.globaltimer.lo : i32 + // CHECK: %33 = call range(i32 0, 64) i32 @llvm.nvvm.read.ptx.sreg.tid.x() + %33 = nvvm.read.ptx.sreg.tid.x range : i32 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.warpid - %33 = nvvm.read.ptx.sreg.warpid : i32 + %34 = nvvm.read.ptx.sreg.warpid : i32 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nwarpid - %34 = nvvm.read.ptx.sreg.nwarpid : i32 + %35 = nvvm.read.ptx.sreg.nwarpid : i32 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.smid - %35 = nvvm.read.ptx.sreg.smid : i32 + %36 = nvvm.read.ptx.sreg.smid : i32 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.nsmid - %36 = nvvm.read.ptx.sreg.nsmid : i32 + %37 = nvvm.read.ptx.sreg.nsmid : i32 // CHECK: call i32 @llvm.nvvm.read.ptx.sreg.gridid - %37 = nvvm.read.ptx.sreg.gridid : i32 + %38 = nvvm.read.ptx.sreg.gridid : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg0 - %38 = nvvm.read.ptx.sreg.envreg0 : i32 + %39 = nvvm.read.ptx.sreg.envreg0 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg1 - %39 = nvvm.read.ptx.sreg.envreg1 : i32 + %40 = nvvm.read.ptx.sreg.envreg1 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg2 - %40 = nvvm.read.ptx.sreg.envreg2 : i32 + %41 = nvvm.read.ptx.sreg.envreg2 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg3 - %41 = nvvm.read.ptx.sreg.envreg3 : i32 + %42 = nvvm.read.ptx.sreg.envreg3 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg4 - %42 = nvvm.read.ptx.sreg.envreg4 : i32 + %43 = nvvm.read.ptx.sreg.envreg4 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg5 - %43 = nvvm.read.ptx.sreg.envreg5 : i32 + %44 = nvvm.read.ptx.sreg.envreg5 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg6 - %44 = nvvm.read.ptx.sreg.envreg6 : i32 + %45 = nvvm.read.ptx.sreg.envreg6 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg7 - %45 = nvvm.read.ptx.sreg.envreg7 : i32 + %46 = nvvm.read.ptx.sreg.envreg7 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg8 - %46 = nvvm.read.ptx.sreg.envreg8 : i32 + %47 = nvvm.read.ptx.sreg.envreg8 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg9 - %47 = nvvm.read.ptx.sreg.envreg9 : i32 + %48 = nvvm.read.ptx.sreg.envreg9 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg10 - %48 = nvvm.read.ptx.sreg.envreg10 : i32 + %49 = nvvm.read.ptx.sreg.envreg10 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg11 - %49 = nvvm.read.ptx.sreg.envreg11 : i32 + %50 = nvvm.read.ptx.sreg.envreg11 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg12 - %50 = nvvm.read.ptx.sreg.envreg12 : i32 + %51 = nvvm.read.ptx.sreg.envreg12 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg13 - %51 = nvvm.read.ptx.sreg.envreg13 : i32 + %52 = nvvm.read.ptx.sreg.envreg13 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg14 - %52 = nvvm.read.ptx.sreg.envreg14 : i32 + %53 = nvvm.read.ptx.sreg.envreg14 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg15 - %53 = nvvm.read.ptx.sreg.envreg15 : i32 + %54 = nvvm.read.ptx.sreg.envreg15 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg16 - %54 = nvvm.read.ptx.sreg.envreg16 : i32 + %55 = nvvm.read.ptx.sreg.envreg16 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg17 - %55 = nvvm.read.ptx.sreg.envreg17 : i32 + %56 = nvvm.read.ptx.sreg.envreg17 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg18 - %56 = nvvm.read.ptx.sreg.envreg18 : i32 + %57 = nvvm.read.ptx.sreg.envreg18 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg19 - %57 = nvvm.read.ptx.sreg.envreg19 : i32 + %58 = nvvm.read.ptx.sreg.envreg19 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg20 - %58 = nvvm.read.ptx.sreg.envreg20 : i32 + %59 = nvvm.read.ptx.sreg.envreg20 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg21 - %59 = nvvm.read.ptx.sreg.envreg21 : i32 + %60 = nvvm.read.ptx.sreg.envreg21 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg22 - %60 = nvvm.read.ptx.sreg.envreg22 : i32 + %61 = nvvm.read.ptx.sreg.envreg22 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg23 - %61 = nvvm.read.ptx.sreg.envreg23 : i32 + %62 = nvvm.read.ptx.sreg.envreg23 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg24 - %62 = nvvm.read.ptx.sreg.envreg24 : i32 + %63 = nvvm.read.ptx.sreg.envreg24 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg25 - %63 = nvvm.read.ptx.sreg.envreg25 : i32 + %64 = nvvm.read.ptx.sreg.envreg25 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg26 - %64 = nvvm.read.ptx.sreg.envreg26 : i32 + %65 = nvvm.read.ptx.sreg.envreg26 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg27 - %65 = nvvm.read.ptx.sreg.envreg27 : i32 + %66 = nvvm.read.ptx.sreg.envreg27 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg28 - %66 = nvvm.read.ptx.sreg.envreg28 : i32 + %67 = nvvm.read.ptx.sreg.envreg28 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg29 - %67 = nvvm.read.ptx.sreg.envreg29 : i32 + %68 = nvvm.read.ptx.sreg.envreg29 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg30 - %68 = nvvm.read.ptx.sreg.envreg30 : i32 + %69 = nvvm.read.ptx.sreg.envreg30 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.envreg31 - %69 = nvvm.read.ptx.sreg.envreg31 : i32 + %70 = nvvm.read.ptx.sreg.envreg31 : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.eq - %70 = nvvm.read.ptx.sreg.lanemask.eq : i32 + %71 = nvvm.read.ptx.sreg.lanemask.eq : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.le - %71 = nvvm.read.ptx.sreg.lanemask.le : i32 + %72 = nvvm.read.ptx.sreg.lanemask.le : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.lt - %72 = nvvm.read.ptx.sreg.lanemask.lt : i32 + %73 = nvvm.read.ptx.sreg.lanemask.lt : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.ge - %73 = nvvm.read.ptx.sreg.lanemask.ge : i32 + %74 = nvvm.read.ptx.sreg.lanemask.ge : i32 //CHECK: call i32 @llvm.nvvm.read.ptx.sreg.lanemask.gt - %74 = nvvm.read.ptx.sreg.lanemask.gt : i32 + %75 = nvvm.read.ptx.sreg.lanemask.gt : i32 llvm.return %1 : i32 }