Skip to content

Commit

Permalink
[XRay] [compiler-rt] Move machine-dependent code into machine-depende…
Browse files Browse the repository at this point in the history
…nt files.

Summary: Include the necessary headers while there.

Reviewers: dberris

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D25360

llvm-svn: 290077
  • Loading branch information
deanberris committed Dec 19, 2016
1 parent 8f687f7 commit 094173b
Show file tree
Hide file tree
Showing 7 changed files with 176 additions and 139 deletions.
14 changes: 14 additions & 0 deletions compiler-rt/lib/xray/xray_AArch64.cc
Expand Up @@ -14,12 +14,26 @@
//===----------------------------------------------------------------------===//
#include "sanitizer_common/sanitizer_common.h"
#include "xray_defs.h"
#include "xray_emulate_tsc.h"
#include "xray_interface_internal.h"
#include <atomic>
#include <cassert>

namespace __xray {

uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
// There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
// not have a constant frequency like TSC on x86[_64]; it may go faster or
// slower depending on CPU's turbo or power saving modes. Furthermore, to
// read from CP15 on ARM a kernel modification or a driver is needed.
// We can not require this from users of compiler-rt.
// So on ARM we use clock_gettime(2) which gives the result in nanoseconds.
// To get the measurements per second, we scale this by the number of
// nanoseconds per second, pretending that the TSC frequency is 1GHz and
// one TSC tick is 1 nanosecond.
return NanosecondsPerSecond;
}

// The machine codes for some instructions used in runtime patching.
enum class PatchOpcodes : uint32_t {
PO_StpX0X30SP_m16e = 0xA9BF7BE0, // STP X0, X30, [SP, #-16]!
Expand Down
14 changes: 14 additions & 0 deletions compiler-rt/lib/xray/xray_arm.cc
Expand Up @@ -14,12 +14,26 @@
//===----------------------------------------------------------------------===//
#include "sanitizer_common/sanitizer_common.h"
#include "xray_defs.h"
#include "xray_emulate_tsc.h"
#include "xray_interface_internal.h"
#include <atomic>
#include <cassert>

namespace __xray {

uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
// There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
// not have a constant frequency like TSC on x86[_64]; it may go faster or
// slower depending on CPU's turbo or power saving modes. Furthermore, to
// read from CP15 on ARM a kernel modification or a driver is needed.
// We can not require this from users of compiler-rt.
// So on ARM we use clock_gettime(2) which gives the result in nanoseconds.
// To get the measurements per second, we scale this by the number of
// nanoseconds per second, pretending that the TSC frequency is 1GHz and
// one TSC tick is 1 nanosecond.
return NanosecondsPerSecond;
}

// The machine codes for some instructions used in runtime patching.
enum class PatchOpcodes : uint32_t {
PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr}
Expand Down
23 changes: 23 additions & 0 deletions compiler-rt/lib/xray/xray_emulate_tsc.h
@@ -0,0 +1,23 @@
#pragma once
#include <time.h>

#include "sanitizer_common/sanitizer_internal_defs.h"
#include "xray_defs.h"

namespace __xray {

static constexpr uint64_t NanosecondsPerSecond = 1000ULL * 1000 * 1000;

ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
timespec TS;
int result = clock_gettime(CLOCK_REALTIME, &TS);
if (result != 0) {
Report("clock_gettime(2) returned %d, errno=%d.", result, int(errno));
TS.tv_sec = 0;
TS.tv_nsec = 0;
}
CPU = 0;
return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
}

}
185 changes: 46 additions & 139 deletions compiler-rt/lib/xray/xray_inmemory_log.cc
Expand Up @@ -26,12 +26,12 @@
#include <unistd.h>

#if defined(__x86_64__)
#include <x86intrin.h>
#include "xray_x86_64.h"
#elif defined(__arm__) || defined(__aarch64__)
static const int64_t NanosecondsPerSecond = 1000LL * 1000 * 1000;
#include "xray_emulate_tsc.h"
#else
#error "Unsupported CPU Architecture"
#endif /* CPU architecture */
#endif /* Architecture-specific inline intrinsics */

#include "sanitizer_common/sanitizer_libc.h"
#include "xray/xray_records.h"
Expand Down Expand Up @@ -71,52 +71,6 @@ static void retryingWriteAll(int Fd, char *Begin,
}
}

#if defined(__x86_64__)
static std::pair<ssize_t, bool>
retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
auto BytesToRead = std::distance(Begin, End);
ssize_t BytesRead;
ssize_t TotalBytesRead = 0;
while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
if (BytesRead == -1) {
if (errno == EINTR)
continue;
Report("Read error; errno = %d\n", errno);
return std::make_pair(TotalBytesRead, false);
}

TotalBytesRead += BytesRead;
BytesToRead -= BytesRead;
Begin += BytesRead;
}
return std::make_pair(TotalBytesRead, true);
}

static bool readValueFromFile(const char *Filename,
long long *Value) XRAY_NEVER_INSTRUMENT {
int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
if (Fd == -1)
return false;
static constexpr size_t BufSize = 256;
char Line[BufSize] = {};
ssize_t BytesRead;
bool Success;
std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
if (!Success)
return false;
close(Fd);
char *End = nullptr;
long long Tmp = internal_simple_strtoll(Line, &End, 10);
bool Result = false;
if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
*Value = Tmp;
Result = true;
}
return Result;
}

#endif /* CPU architecture */

class ThreadExitFlusher {
int Fd;
XRayRecord *Start;
Expand Down Expand Up @@ -151,82 +105,55 @@ void PrintToStdErr(const char *Buffer) XRAY_NEVER_INSTRUMENT {
fprintf(stderr, "%s", Buffer);
}

static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT {
// FIXME: Figure out how to make this less stderr-dependent.
SetPrintfAndReportCallback(PrintToStdErr);
// Open a temporary file once for the log.
static char TmpFilename[256] = {};
static char TmpWildcardPattern[] = "XXXXXX";
auto E = internal_strncat(TmpFilename, flags()->xray_logfile_base,
sizeof(TmpFilename) - 10);
if (static_cast<size_t>((E + 6) - TmpFilename) >
(sizeof(TmpFilename) - 1)) {
Report("XRay log file base too long: %s\n", flags()->xray_logfile_base);
return -1;
}
internal_strncat(TmpFilename, TmpWildcardPattern,
sizeof(TmpWildcardPattern) - 1);
int Fd = mkstemp(TmpFilename);
if (Fd == -1) {
Report("XRay: Failed opening temporary file '%s'; not logging events.\n",
TmpFilename);
return -1;
}
if (Verbosity())
fprintf(stderr, "XRay: Log file in '%s'\n", TmpFilename);

// Since we're here, we get to write the header. We set it up so that the
// header will only be written once, at the start, and let the threads
// logging do writes which just append.
XRayFileHeader Header;
Header.Version = 1;
Header.Type = FileTypes::NAIVE_LOG;
Header.CycleFrequency = __xray::cycleFrequency();

// FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
// before setting the values in the header.
Header.ConstantTSC = 1;
Header.NonstopTSC = 1;
retryingWriteAll(Fd, reinterpret_cast<char *>(&Header),
reinterpret_cast<char *>(&Header) + sizeof(Header));
return Fd;
}

void __xray_InMemoryRawLog(int32_t FuncId,
XRayEntryType Type) XRAY_NEVER_INSTRUMENT {
using Buffer =
std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
static constexpr size_t BuffLen = 1024;
thread_local static Buffer InMemoryBuffer[BuffLen] = {};
thread_local static size_t Offset = 0;
static int Fd = [] {
// FIXME: Figure out how to make this less stderr-dependent.
SetPrintfAndReportCallback(PrintToStdErr);
// Open a temporary file once for the log.
static char TmpFilename[256] = {};
static char TmpWildcardPattern[] = "XXXXXX";
auto E = internal_strncat(TmpFilename, flags()->xray_logfile_base,
sizeof(TmpFilename) - 10);
if (static_cast<size_t>((E + 6) - TmpFilename) >
(sizeof(TmpFilename) - 1)) {
Report("XRay log file base too long: %s\n", flags()->xray_logfile_base);
return -1;
}
internal_strncat(TmpFilename, TmpWildcardPattern,
sizeof(TmpWildcardPattern) - 1);
int Fd = mkstemp(TmpFilename);
if (Fd == -1) {
Report("XRay: Failed opening temporary file '%s'; not logging events.\n",
TmpFilename);
return -1;
}
if (Verbosity())
fprintf(stderr, "XRay: Log file in '%s'\n", TmpFilename);

// Get the cycle frequency from SysFS on Linux.
long long CPUFrequency = -1;
#if defined(__x86_64__)
if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
&CPUFrequency)) {
CPUFrequency *= 1000;
} else if (readValueFromFile(
"/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
&CPUFrequency)) {
CPUFrequency *= 1000;
} else {
Report("Unable to determine CPU frequency for TSC accounting.\n");
}
#elif defined(__arm__) || defined(__aarch64__)
// There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
// not have a constant frequency like TSC on x86(_64), it may go faster
// or slower depending on CPU turbo or power saving mode. Furthermore,
// to read from CP15 on ARM a kernel modification or a driver is needed.
// We can not require this from users of compiler-rt.
// So on ARM we use clock_gettime() which gives the result in nanoseconds.
// To get the measurements per second, we scale this by the number of
// nanoseconds per second, pretending that the TSC frequency is 1GHz and
// one TSC tick is 1 nanosecond.
CPUFrequency = NanosecondsPerSecond;
#else
#error "Unsupported CPU Architecture"
#endif /* CPU architecture */

// Since we're here, we get to write the header. We set it up so that the
// header will only be written once, at the start, and let the threads
// logging do writes which just append.
XRayFileHeader Header;
Header.Version = 1;
Header.Type = FileTypes::NAIVE_LOG;
Header.CycleFrequency =
CPUFrequency == -1 ? 0 : static_cast<uint64_t>(CPUFrequency);

// FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
// before setting the values in the header.
Header.ConstantTSC = 1;
Header.NonstopTSC = 1;
retryingWriteAll(Fd, reinterpret_cast<char *>(&Header),
reinterpret_cast<char *>(&Header) + sizeof(Header));
return Fd;
}();
static int Fd = __xray_OpenLogFile();
if (Fd == -1)
return;
thread_local __xray::ThreadExitFlusher Flusher(
Expand All @@ -237,27 +164,7 @@ void __xray_InMemoryRawLog(int32_t FuncId,
// through a pointer offset.
auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset];
R.RecordType = RecordTypes::NORMAL;
#if defined(__x86_64__)
{
unsigned CPU;
R.TSC = __rdtscp(&CPU);
R.CPU = CPU;
}
#elif defined(__arm__) || defined(__aarch64__)
{
timespec TS;
int result = clock_gettime(CLOCK_REALTIME, &TS);
if (result != 0) {
Report("clock_gettime() returned %d, errno=%d.\n", result, int(errno));
TS.tv_sec = 0;
TS.tv_nsec = 0;
}
R.TSC = TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
R.CPU = 0;
}
#else
#error "Unsupported CPU Architecture"
#endif /* CPU architecture */
R.TSC = __xray::readTSC(R.CPU);
R.TId = TId;
R.Type = Type;
R.FuncId = FuncId;
Expand Down
2 changes: 2 additions & 0 deletions compiler-rt/lib/xray/xray_interface_internal.h
Expand Up @@ -48,6 +48,8 @@ struct XRaySledMap {
size_t Entries;
};

uint64_t cycleFrequency();

bool patchFunctionEntry(bool Enable, uint32_t FuncId,
const XRaySledEntry &Sled);
bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
Expand Down
61 changes: 61 additions & 0 deletions compiler-rt/lib/xray/xray_x86_64.cc
Expand Up @@ -3,10 +3,71 @@
#include "xray_interface_internal.h"
#include <atomic>
#include <cstdint>
#include <fcntl.h>
#include <limits>
#include <tuple>
#include <unistd.h>

namespace __xray {

static std::pair<ssize_t, bool>
retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT {
auto BytesToRead = std::distance(Begin, End);
ssize_t BytesRead;
ssize_t TotalBytesRead = 0;
while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) {
if (BytesRead == -1) {
if (errno == EINTR)
continue;
Report("Read error; errno = %d\n", errno);
return std::make_pair(TotalBytesRead, false);
}

TotalBytesRead += BytesRead;
BytesToRead -= BytesRead;
Begin += BytesRead;
}
return std::make_pair(TotalBytesRead, true);
}

static bool readValueFromFile(const char *Filename,
long long *Value) XRAY_NEVER_INSTRUMENT {
int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
if (Fd == -1)
return false;
static constexpr size_t BufSize = 256;
char Line[BufSize] = {};
ssize_t BytesRead;
bool Success;
std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize);
if (!Success)
return false;
close(Fd);
char *End = nullptr;
long long Tmp = internal_simple_strtoll(Line, &End, 10);
bool Result = false;
if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
*Value = Tmp;
Result = true;
}
return Result;
}

uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT {
long long CPUFrequency = -1;
if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
&CPUFrequency)) {
CPUFrequency *= 1000;
} else if (readValueFromFile(
"/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
&CPUFrequency)) {
CPUFrequency *= 1000;
} else {
Report("Unable to determine CPU frequency for TSC accounting.\n");
}
return CPUFrequency == -1 ? 0 : static_cast<uint64_t>(CPUFrequency);
}

static constexpr uint8_t CallOpCode = 0xe8;
static constexpr uint16_t MovR10Seq = 0xba41;
static constexpr uint16_t Jmp9Seq = 0x09eb;
Expand Down
16 changes: 16 additions & 0 deletions compiler-rt/lib/xray/xray_x86_64.h
@@ -0,0 +1,16 @@
#pragma once
#include <x86intrin.h>

#include "sanitizer_common/sanitizer_internal_defs.h"
#include "xray_defs.h"

namespace __xray {

ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
unsigned LongCPU;
uint64_t TSC = __rdtscp(&LongCPU);
CPU = LongCPU;
return TSC;
}

}

0 comments on commit 094173b

Please sign in to comment.