-
Notifications
You must be signed in to change notification settings - Fork 60
/
Copy pathrdtsc.h
61 lines (55 loc) · 1.94 KB
/
rdtsc.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#pragma once
#include <stdint.h>
#define barrier() __asm__ __volatile__("": : :"memory")
static inline uint64_t __attribute__((__always_inline__))
rdtsc(void)
{
uint32_t a, d;
__asm __volatile("rdtsc" : "=a" (a), "=d" (d));
return ((uint64_t) a) | (((uint64_t) d) << 32);
}
static inline uint64_t __attribute__((__always_inline__))
rdtsc_beg(void)
{
// Don't let anything float into or out of the TSC region.
// (The memory clobber on this is actually okay as long as GCC
// knows that no one ever took the address of things it has in
// registers.)
barrier();
// See the "Improved Benchmarking Method" in Intel's "How to
// Benchmark Code Execution Times on Intel® IA-32 and IA-64
// Instruction Set Architectures"
uint64_t tsc;
#if defined(__x86_64__)
// This generates tighter code than the __i386__ version
//__asm __volatile("cpuid; rdtscp; shl $32, %%rdx; or %%rdx, %%rax"
// use rdtscp rather than cpuid + rdtsc
__asm __volatile("rdtscp; shl $32, %%rdx; or %%rdx, %%rax"
: "=a" (tsc)
: : "%rbx", "%rcx", "%rdx");
#elif defined(__i386__)
uint32_t a, d;
__asm __volatile("cpuid; rdtscp; mov %%eax, %0; mov %%edx, %1"
: "=r" (a), "=r" (d)
: : "%rax", "%rbx", "%rcx", "%rdx");
tsc = ((uint64_t) a) | (((uint64_t) d) << 32);
#endif
barrier();
return tsc;
}
static inline uint64_t __attribute__((__always_inline__))
rdtsc_end(void)
{
barrier();
uint32_t a, d;
//__asm __volatile("rdtscp; mov %%eax, %0; mov %%edx, %1; cpuid"
// use rdtscp rather than cpuid + rdtsc
__asm __volatile("rdtscp; mov %%eax, %0; mov %%edx, %1;"
: "=r" (a), "=r" (d)
: : "%rax", "%rbx", "%rcx", "%rdx");
barrier();
return ((uint64_t) a) | (((uint64_t) d) << 32);
}
uint64_t cpu_freq(void);
uint64_t rdtsc_overhead(double *stddev_out);
uint64_t cpu_freq_measured(void);