Skip to content

Commit 11fcae6

Browse files
authored
[LLVM] Add __builtin_readsteadycounter intrinsic and builtin for realtime clocks (#81331)
Summary: This patch adds a new intrinsic and builtin function mirroring the existing `__builtin_readcyclecounter`. The difference is that this implementation targets a separate counter that some targets have which returns a fixed frequency clock that can be used to determine elapsed time, this is different compared to the cycle counter which often has variable frequency. This patch only adds support for the NVPTX and AMDGPU targets. This is done as a new and separate builtin rather than an argument to `readcyclecounter` to avoid needing to change existing code and to make the separation more explicit.
1 parent 381a00d commit 11fcae6

35 files changed

+229
-72
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2764,6 +2764,39 @@ Query for this feature with ``__has_builtin(__builtin_readcyclecounter)``. Note
27642764
that even if present, its use may depend on run-time privilege or other OS
27652765
controlled state.
27662766
2767+
``__builtin_readsteadycounter``
2768+
------------------------------
2769+
2770+
``__builtin_readsteadycounter`` is used to access the fixed frequency counter
2771+
register (or a similar steady-rate clock) on those targets that support it.
2772+
The function is similar to ``__builtin_readcyclecounter`` above except that the
2773+
frequency is fixed, making it suitable for measuring elapsed time.
2774+
2775+
**Syntax**:
2776+
2777+
.. code-block:: c++
2778+
2779+
__builtin_readsteadycounter()
2780+
2781+
**Example of Use**:
2782+
2783+
.. code-block:: c++
2784+
2785+
unsigned long long t0 = __builtin_readsteadycounter();
2786+
do_something();
2787+
unsigned long long t1 = __builtin_readsteadycounter();
2788+
unsigned long long secs_to_do_something = (t1 - t0) / tick_rate;
2789+
2790+
**Description**:
2791+
2792+
The ``__builtin_readsteadycounter()`` builtin returns the frequency counter value.
2793+
When not supported by the target, the return value is always zero. This builtin
2794+
takes no arguments and produces an unsigned long long result. The builtin does
2795+
not guarantee any particular frequency, only that it is stable. Knowledge of the
2796+
counter's true frequency will need to be provided by the user.
2797+
2798+
Query for this feature with ``__has_builtin(__builtin_readsteadycounter)``.
2799+
27672800
``__builtin_dump_struct``
27682801
-------------------------
27692802

clang/docs/ReleaseNotes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,9 @@ C23 Feature Support
117117
Non-comprehensive list of changes in this release
118118
-------------------------------------------------
119119

120+
- Added ``__builtin_readsteadycounter`` for reading fixed frequency hardware
121+
counters.
122+
120123
New Compiler Flags
121124
------------------
122125

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,6 +1110,12 @@ def ReadCycleCounter : Builtin {
11101110
let Prototype = "unsigned long long int()";
11111111
}
11121112

1113+
def ReadSteadyCounter : Builtin {
1114+
let Spellings = ["__builtin_readsteadycounter"];
1115+
let Attributes = [NoThrow];
1116+
let Prototype = "unsigned long long int()";
1117+
}
1118+
11131119
def Trap : Builtin {
11141120
let Spellings = ["__builtin_trap"];
11151121
let Attributes = [NoThrow, NoReturn];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3443,6 +3443,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
34433443
Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
34443444
return RValue::get(Builder.CreateCall(F));
34453445
}
3446+
case Builtin::BI__builtin_readsteadycounter: {
3447+
Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3448+
return RValue::get(Builder.CreateCall(F));
3449+
}
34463450
case Builtin::BI__builtin___clear_cache: {
34473451
Value *Begin = EmitScalarExpr(E->getArg(0));
34483452
Value *End = EmitScalarExpr(E->getArg(1));

clang/test/CodeGen/builtins.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,12 @@ long long test_builtin_readcyclecounter(void) {
496496
return __builtin_readcyclecounter();
497497
}
498498

499+
// CHECK-LABEL: define{{.*}} i64 @test_builtin_readsteadycounter
500+
long long test_builtin_readsteadycounter(void) {
501+
// CHECK: call i64 @llvm.readsteadycounter()
502+
return __builtin_readsteadycounter();
503+
}
504+
499505
/// __builtin_launder should be a NOP in C since there are no vtables.
500506
// CHECK-LABEL: define{{.*}} void @test_builtin_launder
501507
void test_builtin_launder(int *p) {

llvm/include/llvm/CodeGen/ISDOpcodes.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,6 +1179,12 @@ enum NodeType {
11791179
/// counter-like register (or other high accuracy low latency clock source).
11801180
READCYCLECOUNTER,
11811181

1182+
/// READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
1183+
/// It has the same semantics as the READCYCLECOUNTER implementation except
1184+
/// that the result is the content of the architecture-specific fixed
1185+
/// frequency counter suitable for measuring elapsed time.
1186+
READSTEADYCOUNTER,
1187+
11821188
/// HANDLENODE node - Used as a handle for various purposes.
11831189
HANDLENODE,
11841190

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -870,6 +870,8 @@ def int_pcmarker : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
870870

871871
def int_readcyclecounter : DefaultAttrsIntrinsic<[llvm_i64_ty]>;
872872

873+
def int_readsteadycounter : DefaultAttrsIntrinsic<[llvm_i64_ty]>;
874+
873875
// The assume intrinsic is marked InaccessibleMemOnly so that proper control
874876
// dependencies will be maintained.
875877
def int_assume : DefaultAttrsIntrinsic<

llvm/include/llvm/Support/TargetOpcodes.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,9 @@ HANDLE_TARGET_OPCODE(G_INTRINSIC_ROUNDEVEN)
352352
/// INTRINSIC readcyclecounter
353353
HANDLE_TARGET_OPCODE(G_READCYCLECOUNTER)
354354

355+
/// INTRINSIC readsteadycounter
356+
HANDLE_TARGET_OPCODE(G_READSTEADYCOUNTER)
357+
355358
/// Generic load (including anyext load)
356359
HANDLE_TARGET_OPCODE(G_LOAD)
357360

llvm/include/llvm/Target/GenericOpcodes.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1101,6 +1101,12 @@ def G_READCYCLECOUNTER : GenericInstruction {
11011101
let hasSideEffects = true;
11021102
}
11031103

1104+
def G_READSTEADYCOUNTER : GenericInstruction {
1105+
let OutOperandList = (outs type0:$dst);
1106+
let InOperandList = (ins);
1107+
let hasSideEffects = true;
1108+
}
1109+
11041110
//------------------------------------------------------------------------------
11051111
// Memory ops
11061112
//------------------------------------------------------------------------------

llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ def : GINodeEquiv<G_FMAXNUM_IEEE, fmaxnum_ieee>;
168168
def : GINodeEquiv<G_FMAXIMUM, fmaximum>;
169169
def : GINodeEquiv<G_FMINIMUM, fminimum>;
170170
def : GINodeEquiv<G_READCYCLECOUNTER, readcyclecounter>;
171+
def : GINodeEquiv<G_READSTEADYCOUNTER, readsteadycounter>;
171172
def : GINodeEquiv<G_ROTR, rotr>;
172173
def : GINodeEquiv<G_ROTL, rotl>;
173174
def : GINodeEquiv<G_LROUND, lround>;

0 commit comments

Comments
 (0)