Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Implement TSC calibration #7966

Merged
merged 2 commits into from Apr 6, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
77 changes: 65 additions & 12 deletions Utilities/sysinfo.cpp
Expand Up @@ -131,11 +131,7 @@ std::string utils::get_system_info()

fmt::append(result, "%s | %d Threads | %.2f GiB RAM", brand, num_proc, mem_total / (1024.0f * 1024 * 1024));

if (!has_invariant_tsc())
{
fmt::append(result, " | TSC: Bad");
}
else if (const ullong tsc_freq = get_tsc_freq())
if (const ullong tsc_freq = get_tsc_freq())
{
fmt::append(result, " | TSC: %.02fGHz", tsc_freq / 1000000000.);
}
Expand Down Expand Up @@ -167,7 +163,7 @@ std::string utils::get_system_info()
if (has_fma3() || has_fma4())
{
result += " | FMA";

if (has_fma3() && has_fma4())
{
result += "3+4";
Expand Down Expand Up @@ -266,17 +262,74 @@ std::string utils::get_OS_version()
return output;
}

static constexpr ullong round_tsc(ullong val)
{
return ::rounded_div(val, 20'000'000) * 20'000'000;
}

ullong utils::get_tsc_freq()
{
const ullong cal_tsc = []() -> ullong
{
if (!has_invariant_tsc())
return 0;
#ifdef _WIN32
LARGE_INTEGER freq;
if (!QueryPerformanceFrequency(&freq))
return 0;

if (freq.QuadPart <= 9'999'999)
return round_tsc(freq.QuadPart * 1024);

const ullong timer_freq = freq.QuadPart;
Sleep(1);
#else
const ullong timer_freq = 1'000'000'000;
ullong sec_base = 0;
usleep(200);
#endif

// Calibrate TSC
constexpr int samples = 40;
ullong rdtsc_data[samples];
ullong timer_data[samples];
ullong rdtsc_diff[samples - 1];
ullong timer_diff[samples - 1];

for (int i = 0; i < samples; i++)
{
rdtsc_data[i] = (_mm_lfence(), __rdtsc());
if (i > 0)
rdtsc_diff[i - 1] = rdtsc_data[i] - rdtsc_data[i - 1];
#ifdef _WIN32
LARGE_INTEGER freq;
if (!QueryPerformanceFrequency(&freq) || freq.QuadPart > 9'999'999)
return 0;
return freq.QuadPart * 1024;
LARGE_INTEGER ctr;
QueryPerformanceCounter(&ctr);
timer_data[i] = ctr.QuadPart;
Sleep(1);
#else
// TODO
return 0;
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
if (i == 0)
sec_base = ts.tv_sec;
timer_data[i] = ts.tv_nsec + (ts.tv_sec - sec_base) * 1'000'000'000;
usleep(200);
#endif
if (i > 0)
timer_diff[i - 1] = timer_data[i] - timer_data[i - 1];
}

// Compute average TSC
ullong acc = 0;
for (int i = 0; i < samples - 1; i++)
{
acc += rdtsc_diff[i] * timer_freq / timer_diff[i];
}

// Rounding
return round_tsc(acc / (samples - 1));
}();

return cal_tsc;
}

u64 utils::get_total_memory()
Expand Down