Skip to content

Commit

Permalink
[XRay] [compiler-rt] Unwriting FDR mode buffers when functions are sh…
Browse files Browse the repository at this point in the history
…ort.

Summary:
"short" is defined as an xray flag, and buffer rewinding happens for both exits
 and tail exits.

 I've made the choice to seek backwards finding pairs of FunctionEntry, TailExit
 record pairs and erasing them if the FunctionEntry occurred before exit from the
 currently exiting function. This is a compromise so that we don't skip logging
 tail calls if the function that they call into takes longer our duration.

 This works by counting the consecutive function and function entry, tail exit
 pairs that proceed the current point in the buffer. The buffer is rewound to
 check whether these entry points happened recently enough to be erased.

 It is still possible we will omit them if they call into a child function that
 is not instrumented which calls a fast grandchild that is instrumented before
 doing other processing.

Reviewers: pelikan, dberris

Reviewed By: dberris

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D31345

llvm-svn: 299629
  • Loading branch information
deanberris committed Apr 6, 2017
1 parent fbe67da commit 895171e
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 7 deletions.
139 changes: 136 additions & 3 deletions compiler-rt/lib/xray/xray_fdr_logging_impl.h
Expand Up @@ -32,6 +32,8 @@
#include "xray_buffer_queue.h"
#include "xray_defs.h"
#include "xray_fdr_log_records.h"
#include "xray_flags.h"
#include "xray_tsc.h"

namespace __xray {

Expand Down Expand Up @@ -108,9 +110,17 @@ static void processFunctionHook(int32_t FuncId, XRayEntryType Entry,
//-----------------------------------------------------------------------------|

namespace {

thread_local BufferQueue::Buffer Buffer;
thread_local char *RecordPtr = nullptr;

// The number of FunctionEntry records immediately preceding RecordPtr.
thread_local uint8_t NumConsecutiveFnEnters = 0;

// The number of adjacent, consecutive pairs of FunctionEntry, Tail Exit
// records preceding RecordPtr.
thread_local uint8_t NumTailCalls = 0;

constexpr auto MetadataRecSize = sizeof(MetadataRecord);
constexpr auto FunctionRecSize = sizeof(FunctionRecord);

Expand Down Expand Up @@ -209,6 +219,8 @@ static inline void writeNewBufferPreamble(pid_t Tid, timespec TS,
}
std::memcpy(MemPtr, Records, sizeof(MetadataRecord) * InitRecordsCount);
MemPtr += sizeof(MetadataRecord) * InitRecordsCount;
NumConsecutiveFnEnters = 0;
NumTailCalls = 0;
}

static inline void setupNewBuffer(const BufferQueue::Buffer &Buffer,
Expand All @@ -221,6 +233,8 @@ static inline void setupNewBuffer(const BufferQueue::Buffer &Buffer,
// This is typically clock_gettime, but callers have injection ability.
wall_clock_reader(CLOCK_MONOTONIC, &TS);
writeNewBufferPreamble(Tid, TS, RecordPtr);
NumConsecutiveFnEnters = 0;
NumTailCalls = 0;
}

static inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
Expand All @@ -237,6 +251,8 @@ static inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC,
std::memcpy(&NewCPUId.Data[sizeof(CPU)], &TSC, sizeof(TSC));
std::memcpy(MemPtr, &NewCPUId, sizeof(MetadataRecord));
MemPtr += sizeof(MetadataRecord);
NumConsecutiveFnEnters = 0;
NumTailCalls = 0;
}

static inline void writeNewCPUIdMetadata(uint16_t CPU,
Expand All @@ -251,6 +267,8 @@ static inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT {
// For now we don't write any bytes into the Data field.
std::memcpy(MemPtr, &EOBMeta, sizeof(MetadataRecord));
MemPtr += sizeof(MetadataRecord);
NumConsecutiveFnEnters = 0;
NumTailCalls = 0;
}

static inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT {
Expand All @@ -269,6 +287,8 @@ static inline void writeTSCWrapMetadata(uint64_t TSC,
std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC));
std::memcpy(MemPtr, &TSCWrap, sizeof(MetadataRecord));
MemPtr += sizeof(MetadataRecord);
NumConsecutiveFnEnters = 0;
NumTailCalls = 0;
}

static inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT {
Expand All @@ -289,13 +309,35 @@ static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta,

switch (EntryType) {
case XRayEntryType::ENTRY:
++NumConsecutiveFnEnters;
FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
break;
case XRayEntryType::LOG_ARGS_ENTRY:
// We should not rewind functions with logged args.
NumConsecutiveFnEnters = 0;
NumTailCalls = 0;
FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter);
break;
case XRayEntryType::EXIT:
// If we've decided to log the function exit, we will never erase the log
// before it.
NumConsecutiveFnEnters = 0;
NumTailCalls = 0;
FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionExit);
break;
case XRayEntryType::TAIL:
// If we just entered the function we're tail exiting from or erased every
// invocation since then, this function entry tail pair is a candidate to
// be erased when the child function exits.
if (NumConsecutiveFnEnters > 0) {
++NumTailCalls;
NumConsecutiveFnEnters = 0;
} else {
// We will never be able to erase this tail call since we have logged
// something in between the function entry and tail exit.
NumTailCalls = 0;
NumConsecutiveFnEnters = 0;
}
FuncRecord.RecordKind =
uint8_t(FunctionRecord::RecordKinds::FunctionTailExit);
break;
Expand Down Expand Up @@ -337,6 +379,8 @@ static inline void processFunctionHook(
// We assume that we'll support only 65536 CPUs for x86_64.
thread_local uint16_t CurrentCPU = std::numeric_limits<uint16_t>::max();
thread_local uint64_t LastTSC = 0;
thread_local uint64_t LastFunctionEntryTSC = 0;
thread_local uint64_t NumberOfTicksThreshold = 0;

// Make sure a thread that's ever called handleArg0 has a thread-local
// live reference to the buffer queue for this particular instance of
Expand Down Expand Up @@ -379,6 +423,10 @@ static inline void processFunctionHook(
return;
}

uint64_t CycleFrequency = getTSCFrequency();
NumberOfTicksThreshold = CycleFrequency *
flags()->xray_fdr_log_func_duration_threshold_us /
1000000;
setupNewBuffer(Buffer, wall_clock_reader);
}

Expand Down Expand Up @@ -441,6 +489,10 @@ static inline void processFunctionHook(
BufferQueue::getErrorString(EC));
return;
}
uint64_t CycleFrequency = getTSCFrequency();
NumberOfTicksThreshold = CycleFrequency *
flags()->xray_fdr_log_func_duration_threshold_us /
1000000;
setupNewBuffer(Buffer, wall_clock_reader);
}

Expand Down Expand Up @@ -486,10 +538,92 @@ static inline void processFunctionHook(
RecordTSCDelta = Delta;
}

// We then update our "LastTSC" and "CurrentCPU" thread-local variables to aid
// us in future computations of this TSC delta value.
LastTSC = TSC;
CurrentCPU = CPU;
using AlignedFuncStorage =
std::aligned_storage<sizeof(FunctionRecord),
alignof(FunctionRecord)>::type;
switch (Entry) {
case XRayEntryType::ENTRY:
case XRayEntryType::LOG_ARGS_ENTRY:
// Update the thread local state for the next invocation.
LastFunctionEntryTSC = TSC;
break;
case XRayEntryType::TAIL:
break;
case XRayEntryType::EXIT:
// Break out and write the exit record if we can't erase any functions.
if (NumConsecutiveFnEnters == 0 ||
(TSC - LastFunctionEntryTSC) >= NumberOfTicksThreshold)
break;
// Otherwise we unwind functions that are too short to log by decrementing
// our view ptr into the buffer. We can erase a Function Entry record and
// neglect to log our EXIT. We have to make sure to update some state like
// the LastTSC and count of consecutive FunctionEntry records.
RecordPtr -= FunctionRecSize;
AlignedFuncStorage AlignedFuncRecordBuffer;
const auto &FuncRecord = *reinterpret_cast<FunctionRecord *>(
std::memcpy(&AlignedFuncRecordBuffer, RecordPtr, FunctionRecSize));
assert(FuncRecord.RecordKind ==
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
"Expected to find function entry recording when rewinding.");
assert(FuncRecord.FuncId == (FuncId & ~(0x0F << 28)) &&
"Expected matching function id when rewinding Exit");
--NumConsecutiveFnEnters;
LastTSC -= FuncRecord.TSCDelta;

// We unwound one call. Update the state and return without writing a log.
if (NumConsecutiveFnEnters != 0) {
LastFunctionEntryTSC = LastFunctionEntryTSC - FuncRecord.TSCDelta;
return;
}

// Otherwise we've rewound the stack of all function entries, we might be
// able to rewind further by erasing tail call functions that are being
// exited from via this exit.
LastFunctionEntryTSC = 0;
auto RewindingTSC = LastTSC;
auto RewindingRecordPtr = RecordPtr - FunctionRecSize;
while (NumTailCalls > 0) {
AlignedFuncStorage TailExitRecordBuffer;
// Rewind the TSC back over the TAIL EXIT record.
const auto &ExpectedTailExit =
*reinterpret_cast<FunctionRecord *>(std::memcpy(
&TailExitRecordBuffer, RewindingRecordPtr, FunctionRecSize));

assert(ExpectedTailExit.RecordKind ==
uint8_t(FunctionRecord::RecordKinds::FunctionTailExit) &&
"Expected to find tail exit when rewinding.");
auto TailExitFuncId = ExpectedTailExit.FuncId;
RewindingRecordPtr -= FunctionRecSize;
RewindingTSC -= ExpectedTailExit.TSCDelta;
AlignedFuncStorage FunctionEntryBuffer;
const auto &ExpectedFunctionEntry =
*reinterpret_cast<FunctionRecord *>(std::memcpy(
&FunctionEntryBuffer, RewindingRecordPtr, FunctionRecSize));
assert(ExpectedFunctionEntry.RecordKind ==
uint8_t(FunctionRecord::RecordKinds::FunctionEnter) &&
"Expected to find function entry when rewinding tail call.");
assert(ExpectedFunctionEntry.FuncId == TailExitFuncId &&
"Expected funcids to match when rewinding tail call.");

if ((TSC - RewindingTSC) < NumberOfTicksThreshold) {
// We can erase a tail exit pair that we're exiting through since
// its duration is under threshold.
--NumTailCalls;
RewindingRecordPtr -= FunctionRecSize;
RewindingTSC -= ExpectedFunctionEntry.TSCDelta;
RecordPtr -= 2 * FunctionRecSize;
LastTSC = RewindingTSC;
} else {
// This tail call exceeded the threshold duration. It will not
// be erased.
NumTailCalls = 0;
return;
}
}
return; // without writing log.
}

writeFunctionRecord(FuncId, RecordTSCDelta, Entry, RecordPtr);

Expand All @@ -510,5 +644,4 @@ static inline void processFunctionHook(
} // namespace __xray_fdr_internal

} // namespace __xray

#endif // XRAY_XRAY_FDR_LOGGING_IMPL_H
3 changes: 3 additions & 0 deletions compiler-rt/lib/xray/xray_flags.inc
Expand Up @@ -22,3 +22,6 @@ XRAY_FLAG(const char *, xray_logfile_base, "xray-log.",
"Filename base for the xray logfile.")
XRAY_FLAG(bool, xray_fdr_log, false,
"Whether to install the flight data recorder logging implementation.")
XRAY_FLAG(int, xray_fdr_log_func_duration_threshold_us, 5,
"FDR logging will try to skip functions that execute for fewer "
"microseconds than this threshold.")
15 changes: 11 additions & 4 deletions compiler-rt/test/xray/TestCases/Linux/fdr-mode.cc
@@ -1,7 +1,10 @@
// RUN: %clangxx_xray -g -std=c++11 %s -o %t
// RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false xray_logfile_base=fdr-logging-test- xray_fdr_log=true verbosity=1" %run %t 2>&1 | FileCheck %s
// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t "`ls fdr-logging-test-* | head -1`" | FileCheck %s --check-prefix TRACE
// RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false xray_logfile_base=fdr-logging-test- xray_fdr_log=true verbosity=1 xray_fdr_log_func_duration_threshold_us=0" %run %t 2>&1 | FileCheck %s
// RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false xray_logfile_base=fdr-unwrite-test- xray_fdr_log=true verbosity=1 xray_fdr_log_func_duration_threshold_us=5000" %run %t 2>&1 | FileCheck %s
// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t "`ls fdr-logging-test-* | head -1`" | FileCheck %s --check-prefix=TRACE
// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t "`ls fdr-unwrite-test-* | head -1`" | FileCheck %s --check-prefix=UNWRITE
// RUN: rm fdr-logging-test-*
// RUN: rm fdr-unwrite-test-*
// FIXME: Make llvm-xray work on non-x86_64 as well.
// REQUIRES: x86_64-linux

Expand Down Expand Up @@ -61,8 +64,6 @@ int main(int argc, char *argv[]) {
return 0;
}



// Check that we're able to see two threads, each entering and exiting fA().
// TRACE-DAG: - { type: 0, func-id: [[FIDA:[0-9]+]], function: {{.*fA.*}}, cpu: {{.*}}, thread: [[THREAD1:[0-9]+]], kind: function-enter, tsc: {{[0-9]+}} }
// TRACE: - { type: 0, func-id: [[FIDA]], function: {{.*fA.*}}, cpu: {{.*}}, thread: [[THREAD1]], kind: function-exit, tsc: {{[0-9]+}} }
Expand All @@ -80,3 +81,9 @@ int main(int argc, char *argv[]) {
// TRACE: - { type: 0, func-id: [[FIDB]], function: {{.*fB.*}}, cpu: {{.*}}, thread: [[THREAD1]], kind: function-exit, tsc: {{[0-9]+}} }
// TRACE-DAG: - { type: 0, func-id: [[FIDB]], function: {{.*fB.*}}, cpu: {{.*}}, thread: [[THREAD2:[0-9]+]], kind: function-enter, tsc: {{[0-9]+}} }
// TRACE: - { type: 0, func-id: [[FIDB]], function: {{.*fB.*}}, cpu: {{.*}}, thread: [[THREAD2]], kind: function-exit, tsc: {{[0-9]+}} }

// Assert that when unwriting is enabled with a high threshold time, all the function records are erased. A CPU switch could erroneously fail this test, but
// is unlikely given the test program.
// UNWRITE: header
// UNWRITE-NOT: function-enter
// UNWRITE-NOT: function-exit

0 comments on commit 895171e

Please sign in to comment.