/
timer.cpp
71 lines (66 loc) · 1.79 KB
/
timer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#include "common.h"
#include <algorithm>
namespace QPC_Timer
{
typedef LONGLONG Tick;
Tick frequency;
Tick overhead;
double ticksToNanosecs;
void Initialize()
{
LARGE_INTEGER f;
QueryPerformanceFrequency(&f);
frequency = f.QuadPart;
overhead = 0;
ticksToNanosecs = 1000000000.0 / frequency;
}
}
#if INTEGER_MAP_TIMING_METHOD(RDTSC)
namespace RDTSC_Timer
{
typedef __int64 Tick;
Tick frequency;
Tick overhead;
double ticksToNanosecs;
void Initialize()
{
QPC_Timer::Initialize();
QPC_Timer::Tick limit = QPC_Timer::frequency / 10;
QPC_Timer::Tick start = QPC_Timer::Sample();
Tick startTsc = __rdtsc();
while (QPC_Timer::Sample() - start < limit)
{
}
Tick endTsc = __rdtsc();
frequency = (endTsc - startTsc) * 10;
ticksToNanosecs = 1000000000.0 / frequency;
__asm
{
mov eax, 0
cpuid
mov eax, 0
cpuid
}
// Take the median average of a bunch of CPUID timings and consider that the overhead.
// Pretty sure I've seen a few magic fast CPUIDs, and a few slow ones.
// Median average seems to produce the most consistent overhead measurement between runs.
Tick timings[128];
for (int i = 0; i < 128; i++)
{
startTsc = __rdtsc();
__asm
{
mov eax, 0
cpuid
}
endTsc = __rdtsc();
timings[i] = endTsc - startTsc;
}
std::sort(timings, timings + 128);
Tick total = 0;
for (int i = 32; i < 96; i++)
total += timings[i];
overhead = total / 64;
}
}
#endif // INTEGER_MAP_TIMING_METHOD(RDTSC)