Skip to content

Commit

Permalink
[LLVM] Add __builtin_readsteadycounter intrinsic and builtin for re…
Browse files Browse the repository at this point in the history
…altime clocks (#81331)

Summary:
This patch adds a new intrinsic and builtin function mirroring the
existing `__builtin_readcyclecounter`. The difference is that this
implementation targets a separate counter that some targets have which
returns a fixed frequency clock that can be used to determine elapsed
time, this is different compared to the cycle counter which often has
variable frequency.

This patch only adds support for the NVPTX and AMDGPU targets.

This is done as a new and separate builtin rather than an argument to
`readcyclecounter` to avoid needing to change existing code and to make
the separation more explicit.
  • Loading branch information
jhuber6 committed Feb 13, 2024
1 parent 381a00d commit 11fcae6
Show file tree
Hide file tree
Showing 35 changed files with 229 additions and 72 deletions.
33 changes: 33 additions & 0 deletions clang/docs/LanguageExtensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2764,6 +2764,39 @@ Query for this feature with ``__has_builtin(__builtin_readcyclecounter)``. Note
that even if present, its use may depend on run-time privilege or other OS
controlled state.
``__builtin_readsteadycounter``
------------------------------
``__builtin_readsteadycounter`` is used to access the fixed frequency counter
register (or a similar steady-rate clock) on those targets that support it.
The function is similar to ``__builtin_readcyclecounter`` above except that the
frequency is fixed, making it suitable for measuring elapsed time.
**Syntax**:
.. code-block:: c++
__builtin_readsteadycounter()
**Example of Use**:
.. code-block:: c++
unsigned long long t0 = __builtin_readsteadycounter();
do_something();
unsigned long long t1 = __builtin_readsteadycounter();
unsigned long long secs_to_do_something = (t1 - t0) / tick_rate;
**Description**:
The ``__builtin_readsteadycounter()`` builtin returns the frequency counter value.
When not supported by the target, the return value is always zero. This builtin
takes no arguments and produces an unsigned long long result. The builtin does
not guarantee any particular frequency, only that it is stable. Knowledge of the
counter's true frequency will need to be provided by the user.
Query for this feature with ``__has_builtin(__builtin_readsteadycounter)``.
``__builtin_dump_struct``
-------------------------
Expand Down
3 changes: 3 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ C23 Feature Support
Non-comprehensive list of changes in this release
-------------------------------------------------

- Added ``__builtin_readsteadycounter`` for reading fixed frequency hardware
counters.

New Compiler Flags
------------------

Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -1110,6 +1110,12 @@ def ReadCycleCounter : Builtin {
let Prototype = "unsigned long long int()";
}

def ReadSteadyCounter : Builtin {
let Spellings = ["__builtin_readsteadycounter"];
let Attributes = [NoThrow];
let Prototype = "unsigned long long int()";
}

def Trap : Builtin {
let Spellings = ["__builtin_trap"];
let Attributes = [NoThrow, NoReturn];
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3443,6 +3443,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
return RValue::get(Builder.CreateCall(F));
}
case Builtin::BI__builtin_readsteadycounter: {
Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
return RValue::get(Builder.CreateCall(F));
}
case Builtin::BI__builtin___clear_cache: {
Value *Begin = EmitScalarExpr(E->getArg(0));
Value *End = EmitScalarExpr(E->getArg(1));
Expand Down
6 changes: 6 additions & 0 deletions clang/test/CodeGen/builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,12 @@ long long test_builtin_readcyclecounter(void) {
return __builtin_readcyclecounter();
}

// CHECK-LABEL: define{{.*}} i64 @test_builtin_readsteadycounter
long long test_builtin_readsteadycounter(void) {
// CHECK: call i64 @llvm.readsteadycounter()
return __builtin_readsteadycounter();
}

/// __builtin_launder should be a NOP in C since there are no vtables.
// CHECK-LABEL: define{{.*}} void @test_builtin_launder
void test_builtin_launder(int *p) {
Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/CodeGen/ISDOpcodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -1179,6 +1179,12 @@ enum NodeType {
/// counter-like register (or other high accuracy low latency clock source).
READCYCLECOUNTER,

/// READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
/// It has the same semantics as the READCYCLECOUNTER implementation except
/// that the result is the content of the architecture-specific fixed
/// frequency counter suitable for measuring elapsed time.
READSTEADYCOUNTER,

/// HANDLENODE node - Used as a handle for various purposes.
HANDLENODE,

Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/IR/Intrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,8 @@ def int_pcmarker : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;

def int_readcyclecounter : DefaultAttrsIntrinsic<[llvm_i64_ty]>;

def int_readsteadycounter : DefaultAttrsIntrinsic<[llvm_i64_ty]>;

// The assume intrinsic is marked InaccessibleMemOnly so that proper control
// dependencies will be maintained.
def int_assume : DefaultAttrsIntrinsic<
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/Support/TargetOpcodes.def
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,9 @@ HANDLE_TARGET_OPCODE(G_INTRINSIC_ROUNDEVEN)
/// INTRINSIC readcyclecounter
HANDLE_TARGET_OPCODE(G_READCYCLECOUNTER)

/// INTRINSIC readsteadycounter
HANDLE_TARGET_OPCODE(G_READSTEADYCOUNTER)

/// Generic load (including anyext load)
HANDLE_TARGET_OPCODE(G_LOAD)

Expand Down
6 changes: 6 additions & 0 deletions llvm/include/llvm/Target/GenericOpcodes.td
Original file line number Diff line number Diff line change
Expand Up @@ -1101,6 +1101,12 @@ def G_READCYCLECOUNTER : GenericInstruction {
let hasSideEffects = true;
}

def G_READSTEADYCOUNTER : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins);
let hasSideEffects = true;
}

//------------------------------------------------------------------------------
// Memory ops
//------------------------------------------------------------------------------
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ def : GINodeEquiv<G_FMAXNUM_IEEE, fmaxnum_ieee>;
def : GINodeEquiv<G_FMAXIMUM, fmaximum>;
def : GINodeEquiv<G_FMINIMUM, fminimum>;
def : GINodeEquiv<G_READCYCLECOUNTER, readcyclecounter>;
def : GINodeEquiv<G_READSTEADYCOUNTER, readsteadycounter>;
def : GINodeEquiv<G_ROTR, rotr>;
def : GINodeEquiv<G_ROTL, rotl>;
def : GINodeEquiv<G_LROUND, lround>;
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/Target/TargetSelectionDAG.td
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,9 @@ def prefetch : SDNode<"ISD::PREFETCH" , SDTPrefetch,
def readcyclecounter : SDNode<"ISD::READCYCLECOUNTER", SDTIntLeaf,
[SDNPHasChain, SDNPSideEffect]>;

def readsteadycounter : SDNode<"ISD::READSTEADYCOUNTER", SDTIntLeaf,
[SDNPHasChain, SDNPSideEffect]>;

def membarrier : SDNode<"ISD::MEMBARRIER", SDTNone,
[SDNPHasChain, SDNPSideEffect]>;

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1885,6 +1885,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_INTRINSIC_TRUNC;
case Intrinsic::readcyclecounter:
return TargetOpcode::G_READCYCLECOUNTER;
case Intrinsic::readsteadycounter:
return TargetOpcode::G_READSTEADYCOUNTER;
case Intrinsic::ptrmask:
return TargetOpcode::G_PTRMASK;
case Intrinsic::lrint:
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/IntrinsicLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
break;
}
case Intrinsic::readsteadycounter: {
errs() << "WARNING: this target does not support the llvm.readsteadycounter"
<< " intrinsic. It is being lowered to a constant 0\n";
CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
break;
}

case Intrinsic::dbg_declare:
case Intrinsic::dbg_label:
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1127,8 +1127,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TargetLowering::Custom;
break;
case ISD::READCYCLECOUNTER:
// READCYCLECOUNTER returns an i64, even if type legalization might have
// expanded that to several smaller types.
case ISD::READSTEADYCOUNTER:
// READCYCLECOUNTER and READSTEADYCOUNTER return a i64, even if type
// legalization might have expanded that to several smaller types.
Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64);
break;
case ISD::READ_REGISTER:
Expand Down Expand Up @@ -3080,6 +3081,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Node->getOperand(0));
break;
case ISD::READCYCLECOUNTER:
case ISD::READSTEADYCOUNTER:
// If the target didn't expand this, just return 'zero' and preserve the
// chain.
Results.append(Node->getNumValues() - 1,
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2648,7 +2648,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::LLRINT: ExpandIntRes_XROUND_XRINT(N, Lo, Hi); break;
case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
case ISD::READCYCLECOUNTER:
case ISD::READSTEADYCOUNTER: ExpandIntRes_READCOUNTER(N, Lo, Hi); break;
case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
Expand Down Expand Up @@ -4031,8 +4032,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
Lo, Hi);
}

void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
SDValue &Hi) {
void DAGTypeLegalizer::ExpandIntRes_READCOUNTER(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc DL(N);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_READCOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6781,6 +6781,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.setRoot(Res.getValue(1));
return;
}
case Intrinsic::readsteadycounter: {
SDValue Op = getRoot();
Res = DAG.getNode(ISD::READSTEADYCOUNTER, sdl,
DAG.getVTList(MVT::i64, MVT::Other), Op);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
return;
}
case Intrinsic::bitreverse:
setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
getValue(I.getArgOperand(0)).getValueType(),
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ATOMIC_STORE: return "AtomicStore";
case ISD::PCMARKER: return "PCMarker";
case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
case ISD::READSTEADYCOUNTER: return "ReadSteadyCounter";
case ISD::SRCVALUE: return "SrcValue";
case ISD::MDNODE_SDNODE: return "MDNode";
case ISD::EntryToken: return "EntryToken";
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/TargetLoweringBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,9 @@ void TargetLoweringBase::initActions() {
// Most targets also ignore the @llvm.readcyclecounter intrinsic.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand);

// Most targets also ignore the @llvm.readsteadycounter intrinsic.
setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Expand);

// ConstantFP nodes default to expand. Targets can either change this to
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
// to optimize expansions for certain constants.
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1988,6 +1988,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder(G_READCYCLECOUNTER)
.legalFor({S64});

getActionDefinitionsBuilder(G_READSTEADYCOUNTER).legalFor({S64});

getActionDefinitionsBuilder(G_FENCE)
.alwaysLegal();

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4051,6 +4051,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_CONSTANT:
case AMDGPU::G_GLOBAL_VALUE:
case AMDGPU::G_BLOCK_ADDR:
case AMDGPU::G_READSTEADYCOUNTER:
case AMDGPU::G_READCYCLECOUNTER: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,

// On SI this is s_memtime and s_memrealtime on VI.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);

if (Subtarget->hasSMemRealTime() ||
Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11)
setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Legal);
setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Custom);

if (Subtarget->has16BitInsts()) {
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Target/AMDGPU/SMInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1068,6 +1068,20 @@ def : GCNPat <
}
} // let OtherPredicates = [HasShaderCyclesRegister]

let OtherPredicates = [HasSMemRealTime] in {
def : GCNPat <
(i64 (readsteadycounter)),
(S_MEMREALTIME)
>;
} // let OtherPredicates = [HasSMemRealTime]

let SubtargetPredicate = isGFX11Plus in {
def : GCNPat <
(i64 (readsteadycounter)),
(S_SENDMSG_RTN_B64 (i32 /*MSG_RTN_GET_REALTIME=*/0x83))
>;
} // let SubtargetPredicate = [isGFX11Plus]

def i32imm_zero : TImmLeaf <i32, [{
return Imm == 0;
}]>;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f16, Expand);

setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
if (STI.getSmVersion() >= 30 && STI.getPTXVersion() > 31)
setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Legal);

setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote);
setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand);
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -3805,7 +3805,6 @@ def CALL_PROTOTYPE :

include "NVPTXIntrinsics.td"


//-----------------------------------
// Notes
//-----------------------------------
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -6382,6 +6382,7 @@ def INT_PTX_SREG_GLOBALTIMER :
}

def: Pat <(i64 (readcyclecounter)), (INT_PTX_SREG_CLOCK64)>;
def: Pat <(i64 (readsteadycounter)), (INT_PTX_SREG_GLOBALTIMER)>;

def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,9 @@
# DEBUG-NEXT: G_READCYCLECOUNTER (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_READSTEADYCOUNTER (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined

# DEBUG-NEXT: G_LOAD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
Expand Down
24 changes: 24 additions & 0 deletions llvm/test/CodeGen/AMDGPU/readsteadycounter.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX700
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX1100
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX1100

declare i64 @llvm.readsteadycounter() #0

; GCN-LABEL: {{^}}test_readsteadycounter:
; GFX700: s_mov_b32 s[[REG:[0-9]+]], 0
; GFX900: s_memrealtime s[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; GFX900: s_memrealtime s[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
; GFX1100: s_sendmsg_rtn_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], sendmsg(MSG_RTN_GET_REALTIME)
; GFX1100: s_sendmsg_rtn_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], sendmsg(MSG_RTN_GET_REALTIME)
define amdgpu_kernel void @test_readsteadycounter(ptr addrspace(1) %out) #0 {
%cycle0 = call i64 @llvm.readsteadycounter()
store volatile i64 %cycle0, ptr addrspace(1) %out

%cycle1 = call i64 @llvm.readsteadycounter()
store volatile i64 %cycle1, ptr addrspace(1) %out
ret void
}

attributes #0 = { nounwind }
12 changes: 12 additions & 0 deletions llvm/test/CodeGen/NVPTX/intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,17 @@ define i64 @test_cyclecounter() {
ret i64 %ret
}

; CHECK-LABEL: test_steadycounter
define i64 @test_steadycounter() {
; CHECK: mov.u64 %r{{.*}}, %globaltimer;
%a = tail call i64 @llvm.readsteadycounter()
; CHECK: mov.u64 %r{{.*}}, %globaltimer;
%b = tail call i64 @llvm.readsteadycounter()
%ret = add i64 %a, %b
; CHECK: ret
ret i64 %ret
}

declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare float @llvm.nvvm.sqrt.f(float)
Expand All @@ -178,3 +189,4 @@ declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
declare void @llvm.nvvm.exit()
declare i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
declare i64 @llvm.readcyclecounter()
declare i64 @llvm.readsteadycounter()

0 comments on commit 11fcae6

Please sign in to comment.