Skip to content

Commit

Permalink
[XRay] XRAY_NEVER_INSTRUMENT more functions, consolidate allocators
Browse files Browse the repository at this point in the history
Summary:
In this change we apply `XRAY_NEVER_INSTRUMENT` to more functions in the
profiling implementation to ensure that these never get instrumented if
the compiler used to build the library is capable of doing XRay
instrumentation.

We also consolidate all the allocators into a single header
(xray_allocator.h) which sidestep the use of the internal allocator
implementation in sanitizer_common.

This addresses more cases mentioned in llvm.org/PR38577.

Reviewers: mboerger, eizan

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D51776

llvm-svn: 341647
  • Loading branch information
deanberris committed Sep 7, 2018
1 parent 6301574 commit edf0f6a
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 101 deletions.
50 changes: 45 additions & 5 deletions compiler-rt/lib/xray/xray_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,53 @@
#include "sanitizer_common/sanitizer_internal_defs.h"
#include "sanitizer_common/sanitizer_mutex.h"
#include "sanitizer_common/sanitizer_posix.h"
#include "xray_defs.h"
#include "xray_utils.h"
#include <cstddef>
#include <cstdint>
#include <sys/mman.h>

namespace __xray {

// We implement our own memory allocation routine which will bypass the
// internal allocator. This allows us to manage the memory directly, using
// mmap'ed memory to back the allocators.
template <class T> T *allocate() XRAY_NEVER_INSTRUMENT {
auto B = reinterpret_cast<void *>(
internal_mmap(NULL, sizeof(T), PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
if (B == MAP_FAILED) {
if (Verbosity())
Report("XRay Profiling: Failed to allocate memory of size %d.\n",
sizeof(T));
return nullptr;
}
return reinterpret_cast<T *>(B);
}

template <class T> void deallocate(T *B) XRAY_NEVER_INSTRUMENT {
if (B == nullptr)
return;
internal_munmap(B, sizeof(T));
}

inline void *allocateBuffer(size_t S) XRAY_NEVER_INSTRUMENT {
auto B = reinterpret_cast<void *>(internal_mmap(
NULL, S, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
if (B == MAP_FAILED) {
if (Verbosity())
Report("XRay Profiling: Failed to allocate memory of size %d.\n", S);
return nullptr;
}
return B;
}

inline void deallocateBuffer(void *B, size_t S) XRAY_NEVER_INSTRUMENT {
if (B == nullptr)
return;
internal_munmap(B, S);
}

/// The Allocator type hands out fixed-sized chunks of memory that are
/// cache-line aligned and sized. This is useful for placement of
/// performance-sensitive data in memory that's frequently accessed. The
Expand Down Expand Up @@ -59,12 +99,12 @@ template <size_t N> struct Allocator {
size_t AllocatedBlocks = 0;
SpinMutex Mutex{};

void *Alloc() {
void *Alloc() XRAY_NEVER_INSTRUMENT {
SpinMutexLock Lock(&Mutex);
if (UNLIKELY(BackingStore == nullptr)) {
BackingStore = reinterpret_cast<void *>(
internal_mmap(NULL, MaxMemory, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, 0, 0));
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
if (BackingStore == MAP_FAILED) {
BackingStore = nullptr;
if (Verbosity())
Expand Down Expand Up @@ -107,12 +147,12 @@ template <size_t N> struct Allocator {
}

public:
explicit Allocator(size_t M)
explicit Allocator(size_t M) XRAY_NEVER_INSTRUMENT
: MaxMemory(nearest_boundary(M, kCacheLineSize)) {}

Block Allocate() { return {Alloc()}; }
Block Allocate() XRAY_NEVER_INSTRUMENT { return {Alloc()}; }

~Allocator() NOEXCEPT {
~Allocator() NOEXCEPT XRAY_NEVER_INSTRUMENT {
if (BackingStore != nullptr) {
internal_munmap(BackingStore, MaxMemory);
}
Expand Down
61 changes: 31 additions & 30 deletions compiler-rt/lib/xray/xray_function_call_trie.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#ifndef XRAY_FUNCTION_CALL_TRIE_H
#define XRAY_FUNCTION_CALL_TRIE_H

#include "sanitizer_common/sanitizer_allocator_internal.h"
#include "xray_defs.h"
#include "xray_profiling_flags.h"
#include "xray_segmented_array.h"
#include <memory> // For placement new.
Expand Down Expand Up @@ -120,9 +120,11 @@ class FunctionCallTrie {
// We add a constructor here to allow us to inplace-construct through
// Array<...>'s AppendEmplace.
Node(Node *P, NodeIdPairAllocatorType &A, int64_t CC, int64_t CLT,
int32_t F)
: Parent(P), Callees(A), CallCount(CC), CumulativeLocalTime(CLT),
FId(F) {}
int32_t F) XRAY_NEVER_INSTRUMENT : Parent(P),
Callees(A),
CallCount(CC),
CumulativeLocalTime(CLT),
FId(F) {}

// TODO: Include the compact histogram.
};
Expand All @@ -134,7 +136,8 @@ class FunctionCallTrie {

// We add a constructor here to allow us to inplace-construct through
// Array<...>'s AppendEmplace.
ShadowStackEntry(uint64_t T, Node *N) : EntryTSC{T}, NodePtr{N} {}
ShadowStackEntry(uint64_t T, Node *N) XRAY_NEVER_INSTRUMENT : EntryTSC{T},
NodePtr{N} {}
};

using NodeArray = Array<Node>;
Expand All @@ -158,8 +161,9 @@ class FunctionCallTrie {
Allocators(const Allocators &) = delete;
Allocators &operator=(const Allocators &) = delete;

Allocators(Allocators &&O)
: NodeAllocator(O.NodeAllocator), RootAllocator(O.RootAllocator),
Allocators(Allocators &&O) XRAY_NEVER_INSTRUMENT
: NodeAllocator(O.NodeAllocator),
RootAllocator(O.RootAllocator),
ShadowStackAllocator(O.ShadowStackAllocator),
NodeIdPairAllocator(O.NodeIdPairAllocator) {
O.NodeAllocator = nullptr;
Expand All @@ -168,7 +172,7 @@ class FunctionCallTrie {
O.NodeIdPairAllocator = nullptr;
}

Allocators &operator=(Allocators &&O) {
Allocators &operator=(Allocators &&O) XRAY_NEVER_INSTRUMENT {
{
auto Tmp = O.NodeAllocator;
O.NodeAllocator = this->NodeAllocator;
Expand All @@ -192,58 +196,54 @@ class FunctionCallTrie {
return *this;
}

~Allocators() {
~Allocators() XRAY_NEVER_INSTRUMENT {
// Note that we cannot use delete on these pointers, as they need to be
// returned to the sanitizer_common library's internal memory tracking
// system.
if (NodeAllocator != nullptr) {
NodeAllocator->~NodeAllocatorType();
InternalFree(NodeAllocator);
deallocate(NodeAllocator);
NodeAllocator = nullptr;
}
if (RootAllocator != nullptr) {
RootAllocator->~RootAllocatorType();
InternalFree(RootAllocator);
deallocate(RootAllocator);
RootAllocator = nullptr;
}
if (ShadowStackAllocator != nullptr) {
ShadowStackAllocator->~ShadowStackAllocatorType();
InternalFree(ShadowStackAllocator);
deallocate(ShadowStackAllocator);
ShadowStackAllocator = nullptr;
}
if (NodeIdPairAllocator != nullptr) {
NodeIdPairAllocator->~NodeIdPairAllocatorType();
InternalFree(NodeIdPairAllocator);
deallocate(NodeIdPairAllocator);
NodeIdPairAllocator = nullptr;
}
}
};

// TODO: Support configuration of options through the arguments.
static Allocators InitAllocators() {
static Allocators InitAllocators() XRAY_NEVER_INSTRUMENT {
return InitAllocatorsCustom(profilingFlags()->per_thread_allocator_max);
}

static Allocators InitAllocatorsCustom(uptr Max) {
static Allocators InitAllocatorsCustom(uptr Max) XRAY_NEVER_INSTRUMENT {
Allocators A;
auto NodeAllocator = reinterpret_cast<Allocators::NodeAllocatorType *>(
InternalAlloc(sizeof(Allocators::NodeAllocatorType)));
auto NodeAllocator = allocate<Allocators::NodeAllocatorType>();
new (NodeAllocator) Allocators::NodeAllocatorType(Max);
A.NodeAllocator = NodeAllocator;

auto RootAllocator = reinterpret_cast<Allocators::RootAllocatorType *>(
InternalAlloc(sizeof(Allocators::RootAllocatorType)));
auto RootAllocator = allocate<Allocators::RootAllocatorType>();
new (RootAllocator) Allocators::RootAllocatorType(Max);
A.RootAllocator = RootAllocator;

auto ShadowStackAllocator =
reinterpret_cast<Allocators::ShadowStackAllocatorType *>(
InternalAlloc(sizeof(Allocators::ShadowStackAllocatorType)));
allocate<Allocators::ShadowStackAllocatorType>();
new (ShadowStackAllocator) Allocators::ShadowStackAllocatorType(Max);
A.ShadowStackAllocator = ShadowStackAllocator;

auto NodeIdPairAllocator = reinterpret_cast<NodeIdPairAllocatorType *>(
InternalAlloc(sizeof(NodeIdPairAllocatorType)));
auto NodeIdPairAllocator = allocate<NodeIdPairAllocatorType>();
new (NodeIdPairAllocator) NodeIdPairAllocatorType(Max);
A.NodeIdPairAllocator = NodeIdPairAllocator;
return A;
Expand All @@ -256,12 +256,13 @@ class FunctionCallTrie {
NodeIdPairAllocatorType *NodeIdPairAllocator = nullptr;

public:
explicit FunctionCallTrie(const Allocators &A)
: Nodes(*A.NodeAllocator), Roots(*A.RootAllocator),
explicit FunctionCallTrie(const Allocators &A) XRAY_NEVER_INSTRUMENT
: Nodes(*A.NodeAllocator),
Roots(*A.RootAllocator),
ShadowStack(*A.ShadowStackAllocator),
NodeIdPairAllocator(A.NodeIdPairAllocator) {}

void enterFunction(const int32_t FId, uint64_t TSC) {
void enterFunction(const int32_t FId, uint64_t TSC) XRAY_NEVER_INSTRUMENT {
DCHECK_NE(FId, 0);
// This function primarily deals with ensuring that the ShadowStack is
// consistent and ready for when an exit event is encountered.
Expand Down Expand Up @@ -301,7 +302,7 @@ class FunctionCallTrie {
return;
}

void exitFunction(int32_t FId, uint64_t TSC) {
void exitFunction(int32_t FId, uint64_t TSC) XRAY_NEVER_INSTRUMENT {
// When we exit a function, we look up the ShadowStack to see whether we've
// entered this function before. We do as little processing here as we can,
// since most of the hard work would have already been done at function
Expand All @@ -323,7 +324,7 @@ class FunctionCallTrie {
}
}

const RootArray &getRoots() const { return Roots; }
const RootArray &getRoots() const XRAY_NEVER_INSTRUMENT { return Roots; }

// The deepCopyInto operation will update the provided FunctionCallTrie by
// re-creating the contents of this particular FunctionCallTrie in the other
Expand All @@ -338,7 +339,7 @@ class FunctionCallTrie {
// synchronisation of both "this" and |O|.
//
// This function must *not* be called with a non-empty FunctionCallTrie |O|.
void deepCopyInto(FunctionCallTrie &O) const {
void deepCopyInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
DCHECK(O.getRoots().empty());

// We then push the root into a stack, to use as the parent marker for new
Expand Down Expand Up @@ -394,7 +395,7 @@ class FunctionCallTrie {
//
// This function is *not* thread-safe, and may require external
// synchronisation of both "this" and |O|.
void mergeInto(FunctionCallTrie &O) const {
void mergeInto(FunctionCallTrie &O) const XRAY_NEVER_INSTRUMENT {
struct NodeAndTarget {
FunctionCallTrie::Node *OrigNode;
FunctionCallTrie::Node *TargetNode;
Expand Down
41 changes: 14 additions & 27 deletions compiler-rt/lib/xray/xray_profile_collector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "xray_profile_collector.h"
#include "sanitizer_common/sanitizer_common.h"
#include "xray_allocator.h"
#include "xray_defs.h"
#include "xray_profiling_flags.h"
#include "xray_segmented_array.h"
#include <memory>
Expand Down Expand Up @@ -81,26 +82,9 @@ static ProfileBufferArray *ProfileBuffers = nullptr;
static ProfileBufferArrayAllocator *ProfileBuffersAllocator = nullptr;
static FunctionCallTrie::Allocators *GlobalAllocators = nullptr;

static void *allocateBuffer(size_t S) {
auto B = reinterpret_cast<void *>(internal_mmap(
NULL, S, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
if (B == MAP_FAILED) {
if (Verbosity())
Report("XRay Profiling: Failed to allocate memory of size %d.\n", S);
return nullptr;
}
return B;
}

static void deallocateBuffer(void *B, size_t S) {
if (B == nullptr)
return;
internal_munmap(B, S);
}

} // namespace

void post(const FunctionCallTrie &T, tid_t TId) {
void post(const FunctionCallTrie &T, tid_t TId) XRAY_NEVER_INSTRUMENT {
static pthread_once_t Once = PTHREAD_ONCE_INIT;
pthread_once(&Once, +[] { reset(); });

Expand Down Expand Up @@ -134,8 +118,10 @@ struct ProfileRecord {
const FunctionCallTrie::Node *Node = nullptr;

// Constructor for in-place construction.
ProfileRecord(PathAllocator &A, const FunctionCallTrie::Node *N)
: Path(A), Node(N) {}
ProfileRecord(PathAllocator &A,
const FunctionCallTrie::Node *N) XRAY_NEVER_INSTRUMENT
: Path(A),
Node(N) {}
};

namespace {
Expand All @@ -144,9 +130,9 @@ using ProfileRecordArray = Array<ProfileRecord>;

// Walk a depth-first traversal of each root of the FunctionCallTrie to generate
// the path(s) and the data associated with the path.
static void populateRecords(ProfileRecordArray &PRs,
ProfileRecord::PathAllocator &PA,
const FunctionCallTrie &Trie) {
static void
populateRecords(ProfileRecordArray &PRs, ProfileRecord::PathAllocator &PA,
const FunctionCallTrie &Trie) XRAY_NEVER_INSTRUMENT {
using StackArray = Array<const FunctionCallTrie::Node *>;
using StackAllocator = typename StackArray::AllocatorType;
StackAllocator StackAlloc(profilingFlags()->stack_allocator_max);
Expand Down Expand Up @@ -174,7 +160,8 @@ static void populateRecords(ProfileRecordArray &PRs,
}

static void serializeRecords(ProfileBuffer *Buffer, const BlockHeader &Header,
const ProfileRecordArray &ProfileRecords) {
const ProfileRecordArray &ProfileRecords)
XRAY_NEVER_INSTRUMENT {
auto NextPtr = static_cast<char *>(
internal_memcpy(Buffer->Data, &Header, sizeof(Header))) +
sizeof(Header);
Expand Down Expand Up @@ -207,7 +194,7 @@ static void serializeRecords(ProfileBuffer *Buffer, const BlockHeader &Header,

} // namespace

void serialize() {
void serialize() XRAY_NEVER_INSTRUMENT {
SpinMutexLock Lock(&GlobalMutex);

if (GlobalAllocators == nullptr || ThreadTries == nullptr ||
Expand Down Expand Up @@ -266,7 +253,7 @@ void serialize() {
}
}

void reset() {
void reset() XRAY_NEVER_INSTRUMENT {
SpinMutexLock Lock(&GlobalMutex);

if (ProfileBuffers != nullptr) {
Expand Down Expand Up @@ -316,7 +303,7 @@ void reset() {
new (ProfileBuffers) ProfileBufferArray(*ProfileBuffersAllocator);
}

XRayBuffer nextBuffer(XRayBuffer B) {
XRayBuffer nextBuffer(XRayBuffer B) XRAY_NEVER_INSTRUMENT {
SpinMutexLock Lock(&GlobalMutex);

if (ProfileBuffers == nullptr || ProfileBuffers->size() == 0)
Expand Down
9 changes: 5 additions & 4 deletions compiler-rt/lib/xray/xray_profiling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include "sanitizer_common/sanitizer_flags.h"
#include "xray/xray_interface.h"
#include "xray/xray_log_interface.h"

#include "xray_flags.h"
#include "xray_profile_collector.h"
#include "xray_profiling_flags.h"
Expand Down Expand Up @@ -69,7 +68,7 @@ static ProfilingData &getThreadLocalData() XRAY_NEVER_INSTRUMENT {
}();
(void)ThreadOnce;

auto &TLD = *reinterpret_cast<ProfilingData*>(&ThreadStorage);
auto &TLD = *reinterpret_cast<ProfilingData *>(&ThreadStorage);

if (UNLIKELY(TLD.Allocators == nullptr || TLD.FCT == nullptr)) {
auto *Allocators =
Expand Down Expand Up @@ -167,11 +166,13 @@ namespace {

thread_local atomic_uint8_t ReentranceGuard{0};

static void postCurrentThreadFCT(ProfilingData &TLD) {
static void postCurrentThreadFCT(ProfilingData &TLD) XRAY_NEVER_INSTRUMENT {
if (TLD.Allocators == nullptr || TLD.FCT == nullptr)
return;

profileCollectorService::post(*TLD.FCT, GetTid());
if (!TLD.FCT->getRoots().empty())
profileCollectorService::post(*TLD.FCT, GetTid());

cleanupTLD();
}

Expand Down

0 comments on commit edf0f6a

Please sign in to comment.