Skip to content

Commit

Permalink
[XRay][profiler] Part 1: XRay Allocator and Array Implementations
Browse files Browse the repository at this point in the history
Summary:
This change is part of the larger XRay Profiling Mode effort.

Here we implement an arena allocator, for fixed sized buffers used in a
segmented array implementation. This change adds the segmented array
data structure, which relies on the allocator to provide and maintain
the storage for the segmented array.

Key features of the `Allocator` type:

*  It uses cache-aligned blocks, intended to host the actual data. These
   blocks are cache-line-size multiples of contiguous bytes.

*  The `Allocator` has a maximum memory budget, set at construction
   time. This allows us to cap the amount of data each specific
   `Allocator` instance is responsible for.

*  Upon destruction, the `Allocator` will clean up the storage it's
   used, handing it back to the internal allocator used in
   sanitizer_common.

Key features of the `Array` type:

*  Each segmented array is always backed by an `Allocator`, which is
   either user-provided or uses a global allocator.

*  When an `Array` grows, it grows by appending a segment that's
   fixed-sized. The size of each segment is computed by the number of
   elements of type `T` that can fit into cache line multiples.

*  An `Array` does not return memory to the `Allocator`, but it can keep
   track of the current number of "live" objects it stores.

*  When an `Array` is destroyed, it will not return memory to the
   `Allocator`. Users should clean up the `Allocator` independently of
   the `Array`.

*  The `Array` type keeps a freelist of the chunks it's used before, so
   that trimming and growing will re-use previously allocated chunks.

These basic data structures are used by the XRay Profiling Mode
implementation to implement efficient and cache-aware storage for data
that's typically read-and-write heavy for tracking latency information.
We're relying on the cache line characteristics of the architecture to
provide us good data isolation and cache friendliness, when we're
performing operations like searching for elements and/or updating data
hosted in these cache lines.

Reviewers: echristo, pelikan, kpw

Subscribers: mgorny, llvm-commits

Differential Revision: https://reviews.llvm.org/D45756

llvm-svn: 331141
  • Loading branch information
deanberris committed Apr 29, 2018
1 parent 2d56936 commit 26e8120
Show file tree
Hide file tree
Showing 6 changed files with 711 additions and 16 deletions.
56 changes: 40 additions & 16 deletions compiler-rt/lib/xray/CMakeLists.txt
Expand Up @@ -2,33 +2,57 @@

# XRay runtime library implementation files.
set(XRAY_SOURCES
xray_init.cc
xray_flags.cc
xray_interface.cc
xray_log_interface.cc
xray_utils.cc)
xray_init.cc
xray_flags.cc
xray_interface.cc
xray_log_interface.cc
xray_utils.cc)

# Implementation files for all XRay modes.
set(XRAY_FDR_MODE_SOURCES
xray_buffer_queue.cc
xray_fdr_logging.cc)
xray_buffer_queue.cc
xray_fdr_logging.cc)

set(XRAY_BASIC_MODE_SOURCES
xray_inmemory_log.cc)
xray_inmemory_log.cc)


# Implementation files for all XRay architectures.
set(aarch64_SOURCES xray_AArch64.cc xray_trampoline_AArch64.S)
set(arm_SOURCES xray_arm.cc xray_trampoline_arm.S)
set(armhf_SOURCES ${arm_SOURCES})
set(mips_SOURCES xray_mips.cc xray_trampoline_mips.S)
set(mipsel_SOURCES xray_mips.cc xray_trampoline_mips.S)
set(mips64_SOURCES xray_mips64.cc xray_trampoline_mips64.S)
set(mips64el_SOURCES xray_mips64.cc xray_trampoline_mips64.S)
set(x86_64_SOURCES
xray_x86_64.cc
xray_trampoline_x86_64.S)

set(arm_SOURCES
xray_arm.cc
xray_trampoline_arm.S)

set(armhf_SOURCES
${arm_SOURCES})

set(aarch64_SOURCES
xray_AArch64.cc
xray_trampoline_AArch64.S)

set(mips_SOURCES
xray_mips.cc
xray_trampoline_mips.S)

set(mipsel_SOURCES
xray_mips.cc
xray_trampoline_mips.S)

set(mips64_SOURCES
xray_mips64.cc
xray_trampoline_mips64.S)

set(mips64el_SOURCES
xray_mips64.cc
xray_trampoline_mips64.S)

set(powerpc64le_SOURCES
xray_powerpc64.cc
xray_trampoline_powerpc64.cc
xray_trampoline_powerpc64_asm.S)
set(x86_64_SOURCES xray_x86_64.cc xray_trampoline_x86_64.S)

# Now put it all together...
include_directories(..)
Expand Down
4 changes: 4 additions & 0 deletions compiler-rt/lib/xray/tests/unit/CMakeLists.txt
Expand Up @@ -2,3 +2,7 @@ add_xray_unittest(XRayBufferQueueTest SOURCES
buffer_queue_test.cc xray_unit_test_main.cc)
add_xray_unittest(XRayFDRLoggingTest SOURCES
fdr_logging_test.cc xray_unit_test_main.cc)
add_xray_unittest(XRayAllocatorTest SOURCES
allocator_test.cc xray_unit_test_main.cc)
add_xray_unittest(XRaySegmentedArrayTest SOURCES
segmented_array_test.cc xray_unit_test_main.cc)
42 changes: 42 additions & 0 deletions compiler-rt/lib/xray/tests/unit/allocator_test.cc
@@ -0,0 +1,42 @@
//===-- allocator_test.cc -------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of XRay, a function call tracing system.
//
//===----------------------------------------------------------------------===//

#include "xray_allocator.h"
#include "gtest/gtest.h"

namespace __xray {
namespace {

struct TestData {
s64 First;
s64 Second;
};

TEST(AllocatorTest, Construction) { Allocator<sizeof(TestData)> A(2 << 11, 0); }

TEST(AllocatorTest, Allocate) {
Allocator<sizeof(TestData)> A(2 << 11, 0);
auto B = A.Allocate();
ASSERT_NE(B.Data, nullptr);
}

TEST(AllocatorTest, OverAllocate) {
Allocator<sizeof(TestData)> A(sizeof(TestData), 0);
auto B1 = A.Allocate();
(void)B1;
auto B2 = A.Allocate();
ASSERT_EQ(B2.Data, nullptr);
}

} // namespace
} // namespace __xray
139 changes: 139 additions & 0 deletions compiler-rt/lib/xray/tests/unit/segmented_array_test.cc
@@ -0,0 +1,139 @@
#include "xray_segmented_array.h"
#include "gtest/gtest.h"

namespace __xray {
namespace {

struct TestData {
s64 First;
s64 Second;

// Need a constructor for emplace operations.
TestData(s64 F, s64 S) : First(F), Second(S) {}
};

TEST(SegmentedArrayTest, Construction) {
Array<TestData> Data;
(void)Data;
}

TEST(SegmentedArrayTest, ConstructWithAllocator) {
using AllocatorType = typename Array<TestData>::AllocatorType;
AllocatorType A(1 << 4, 0);
Array<TestData> Data(A);
(void)Data;
}

TEST(SegmentedArrayTest, ConstructAndPopulate) {
Array<TestData> data;
ASSERT_NE(data.Append(TestData{0, 0}), nullptr);
ASSERT_NE(data.Append(TestData{1, 1}), nullptr);
ASSERT_EQ(data.size(), 2u);
}

TEST(SegmentedArrayTest, ConstructPopulateAndLookup) {
Array<TestData> data;
ASSERT_NE(data.Append(TestData{0, 1}), nullptr);
ASSERT_EQ(data.size(), 1u);
ASSERT_EQ(data[0].First, 0);
ASSERT_EQ(data[0].Second, 1);
}

TEST(SegmentedArrayTest, PopulateWithMoreElements) {
Array<TestData> data;
static const auto kMaxElements = 100u;
for (auto I = 0u; I < kMaxElements; ++I) {
ASSERT_NE(data.Append(TestData{I, I + 1}), nullptr);
}
ASSERT_EQ(data.size(), kMaxElements);
for (auto I = 0u; I < kMaxElements; ++I) {
ASSERT_EQ(data[I].First, I);
ASSERT_EQ(data[I].Second, I + 1);
}
}

TEST(SegmentedArrayTest, AppendEmplace) {
Array<TestData> data;
ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
ASSERT_EQ(data[0].First, 1);
ASSERT_EQ(data[0].Second, 1);
}

TEST(SegmentedArrayTest, AppendAndTrim) {
Array<TestData> data;
ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
ASSERT_EQ(data.size(), 1u);
data.trim(1);
ASSERT_EQ(data.size(), 0u);
ASSERT_TRUE(data.empty());
}

TEST(SegmentedArrayTest, IteratorAdvance) {
Array<TestData> data;
ASSERT_TRUE(data.empty());
ASSERT_EQ(data.begin(), data.end());
auto I0 = data.begin();
ASSERT_EQ(I0++, data.begin());
ASSERT_NE(I0, data.begin());
for (const auto &D : data) {
(void)D;
FAIL();
}
ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
ASSERT_EQ(data.size(), 1u);
ASSERT_NE(data.begin(), data.end());
auto &D0 = *data.begin();
ASSERT_EQ(D0.First, 1);
ASSERT_EQ(D0.Second, 1);
}

TEST(SegmentedArrayTest, IteratorRetreat) {
Array<TestData> data;
ASSERT_TRUE(data.empty());
ASSERT_EQ(data.begin(), data.end());
ASSERT_NE(data.AppendEmplace(1, 1), nullptr);
ASSERT_EQ(data.size(), 1u);
ASSERT_NE(data.begin(), data.end());
auto &D0 = *data.begin();
ASSERT_EQ(D0.First, 1);
ASSERT_EQ(D0.Second, 1);

auto I0 = data.end();
ASSERT_EQ(I0--, data.end());
ASSERT_NE(I0, data.end());
ASSERT_EQ(I0, data.begin());
ASSERT_EQ(I0->First, 1);
ASSERT_EQ(I0->Second, 1);
}

TEST(SegmentedArrayTest, IteratorTrimBehaviour) {
Array<TestData> data;
ASSERT_TRUE(data.empty());
auto I0Begin = data.begin(), I0End = data.end();
// Add enough elements in data to have more than one chunk.
constexpr auto ChunkX2 = Array<TestData>::ChunkSize * 2;
for (auto i = ChunkX2; i > 0u; --i) {
data.Append({static_cast<s64>(i), static_cast<s64>(i)});
}
ASSERT_EQ(data.size(), ChunkX2);
// Trim one chunk's elements worth.
data.trim(ChunkX2 / 2);
ASSERT_EQ(data.size(), ChunkX2 / 2);
// Then trim until it's empty.
data.trim(ChunkX2 / 2);
ASSERT_TRUE(data.empty());

// Here our iterators should be the same.
auto I1Begin = data.begin(), I1End = data.end();
EXPECT_EQ(I0Begin, I1Begin);
EXPECT_EQ(I0End, I1End);

// Then we ensure that adding elements back works just fine.
for (auto i = ChunkX2; i > 0u; --i) {
data.Append({static_cast<s64>(i), static_cast<s64>(i)});
}
EXPECT_EQ(data.size(), ChunkX2);
}

} // namespace
} // namespace __xray

0 comments on commit 26e8120

Please sign in to comment.