Skip to content

Commit

Permalink
[XRay][profiler] Part 2: XRay Function Call Trie
Browse files Browse the repository at this point in the history
Summary:
This is part of the larger XRay Profiling Mode effort.

This patch implements a central data structure for capturing statistics
about XRay instrumented function call stacks. The `FunctionCallTrie`
type does the following things:

*  It keeps track of a shadow function call stack of XRay instrumented
   functions as they are entered (function enter event) and as they are
   exited (function exit event).

*  When a function is entered, the shadow stack contains information
   about the entry TSC, and updates the trie (or prefix tree)
   representing the current function call stack. If we haven't
   encountered this function call before, this creates a unique node for
   the function in this position on the stack. We update the list of
   callees of the parent function as well to reflect this newly found
   path.

*  When a function is exited, we compute statistics (TSC deltas,
   function call count frequency) for the associated function(s) up the
   stack as we unwind to find the matching entry event.

This builds upon the XRay `Allocator` and `Array` types in Part 1 of
this series of patches.

Depends on D45756.

Reviewers: echristo, pelikan, kpw

Reviewed By: kpw

Subscribers: llvm-commits, mgorny

Differential Revision: https://reviews.llvm.org/D45757

llvm-svn: 332313
  • Loading branch information
deanberris committed May 15, 2018
1 parent 595ba1d commit 980d93d
Show file tree
Hide file tree
Showing 8 changed files with 837 additions and 0 deletions.
29 changes: 29 additions & 0 deletions compiler-rt/lib/xray/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ set(XRAY_BASIC_MODE_SOURCES
xray_basic_flags.cc
xray_basic_logging.cc)

set(XRAY_PROFILER_MODE_SOURCES
xray_profiler_flags.cc)

# Implementation files for all XRay architectures.
set(x86_64_SOURCES
Expand Down Expand Up @@ -98,6 +100,12 @@ if (APPLE)
SOURCES ${XRAY_BASIC_MODE_SOURCES}
CFLAGS ${XRAY_CFLAGS}
DEFS ${XRAY_COMMON_DEFINITIONS})
add_compiler_rt_object_libraries(RTXrayPROFILER
OS ${XRAY_SUPPORTED_OS}
ARCHS ${XRAY_SUPPORTED_ARCH}
SOURCES ${XRAY_PROFILER_MODE_SOURCES}
CFLAGS ${XRAY_CFLAGS}
DEFS ${XRAY_COMMON_DEFINITIONS})

# We only support running on osx for now.
add_compiler_rt_runtime(clang_rt.xray
Expand Down Expand Up @@ -132,6 +140,16 @@ if (APPLE)
LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
LINK_LIBS ${XRAY_LINK_LIBS}
PARENT_TARGET xray)
add_compiler_rt_runtime(clang_rt.xray-profiler
STATIC
OS ${XRAY_SUPPORTED_OS}
ARCHS ${XRAY_SUPPORTED_ARCH}
OBJECT_LIBS RTXrayPROFILER
CFLAGS ${XRAY_CFLAGS}
DEFS ${XRAY_COMMON_DEFINITIONS}
LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
LINK_LIBS ${XRAY_LINK_LIBS}
PARENT_TARGET xray)
else() # not Apple
foreach(arch ${XRAY_SUPPORTED_ARCH})
if(NOT CAN_TARGET_${arch})
Expand All @@ -149,6 +167,10 @@ else() # not Apple
ARCHS ${arch}
SOURCES ${XRAY_BASIC_MODE_SOURCES} CFLAGS ${XRAY_CFLAGS}
DEFS ${XRAY_COMMON_DEFINITIONS})
add_compiler_rt_object_libraries(RTXrayPROFILER
ARCHS ${arch}
SOURCES ${XRAY_PROFILER_MODE_SOURCES} CFLAGS ${XRAY_CFLAGS}
DEFS ${XRAY_COMMON_DEFINITIONS})

# Common XRay archive for instrumented binaries.
add_compiler_rt_runtime(clang_rt.xray
Expand All @@ -174,6 +196,13 @@ else() # not Apple
DEFS ${XRAY_COMMON_DEFINITIONS}
OBJECT_LIBS RTXrayBASIC
PARENT_TARGET xray)
add_compiler_rt_runtime(clang_rt.xray-profiler
STATIC
ARCHS ${arch}
CFLAGS ${XRAY_CFLAGS}
DEFS ${XRAY_COMMON_DEFINITIONS}
OBJECT_LIBS RTXrayPROFILER
PARENT_TARGET xray)
endforeach()
endif() # not Apple

Expand Down
2 changes: 2 additions & 0 deletions compiler-rt/lib/xray/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,15 @@ if(COMPILER_RT_CAN_EXECUTE_TESTS)
add_xray_lib("RTXRay.test.osx"
$<TARGET_OBJECTS:RTXray.osx>
$<TARGET_OBJECTS:RTXrayFDR.osx>
$<TARGET_OBJECTS:RTXrayPROFILER.osx>
$<TARGET_OBJECTS:RTSanitizerCommon.osx>
$<TARGET_OBJECTS:RTSanitizerCommonLibc.osx>)
else()
foreach(arch ${XRAY_SUPPORTED_ARCH})
add_xray_lib("RTXRay.test.${arch}"
$<TARGET_OBJECTS:RTXray.${arch}>
$<TARGET_OBJECTS:RTXrayFDR.${arch}>
$<TARGET_OBJECTS:RTXrayPROFILER.${arch}>
$<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
$<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>)
endforeach()
Expand Down
2 changes: 2 additions & 0 deletions compiler-rt/lib/xray/tests/unit/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ add_xray_unittest(XRayAllocatorTest SOURCES
allocator_test.cc xray_unit_test_main.cc)
add_xray_unittest(XRaySegmentedArrayTest SOURCES
segmented_array_test.cc xray_unit_test_main.cc)
add_xray_unittest(XRayFunctionCallTrieTest SOURCES
function_call_trie_test.cc xray_unit_test_main.cc)
253 changes: 253 additions & 0 deletions compiler-rt/lib/xray/tests/unit/function_call_trie_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
//===-- function_call_trie_test.cc ----------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of XRay, a function call tracing system.
//
//===----------------------------------------------------------------------===//
#include "gtest/gtest.h"

#include "xray_function_call_trie.h"

namespace __xray {

namespace {

TEST(FunctionCallTrieTest, Construction) {
// We want to make sure that we can create one of these without the set of
// allocators we need. This will by default use the global allocators.
FunctionCallTrie Trie;
}

TEST(FunctionCallTrieTest, ConstructWithTLSAllocators) {
// FIXME: Support passing in configuration for allocators in the allocator
// constructors.
profilerFlags()->setDefaults();
FunctionCallTrie::Allocators Allocators = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(Allocators);
}

TEST(FunctionCallTrieTest, EnterAndExitFunction) {
profilerFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);

Trie.enterFunction(1, 1);
Trie.exitFunction(1, 2);

// We need a way to pull the data out. At this point, until we get a data
// collection service implemented, we're going to export the data as a list of
// roots, and manually walk through the structure ourselves.

const auto &R = Trie.getRoots();

ASSERT_EQ(R.size(), 1u);
ASSERT_EQ(R.front()->FId, 1);
ASSERT_EQ(R.front()->CallCount, 1);
ASSERT_EQ(R.front()->CumulativeLocalTime, 1u);
}

TEST(FunctionCallTrieTest, MissingFunctionEntry) {
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
Trie.exitFunction(1, 1);
const auto &R = Trie.getRoots();

ASSERT_TRUE(R.empty());
}

TEST(FunctionCallTrieTest, MissingFunctionExit) {
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);
Trie.enterFunction(1, 1);
const auto &R = Trie.getRoots();

ASSERT_TRUE(R.empty());
}

TEST(FunctionCallTrieTest, MultipleRoots) {
profilerFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);

// Enter and exit FId = 1.
Trie.enterFunction(1, 1);
Trie.exitFunction(1, 2);

// Enter and exit FId = 2.
Trie.enterFunction(2, 3);
Trie.exitFunction(2, 4);

const auto &R = Trie.getRoots();
ASSERT_FALSE(R.empty());
ASSERT_EQ(R.size(), 2u);

// Make sure the roots have different IDs.
const auto R0 = R[0];
const auto R1 = R[1];
ASSERT_NE(R0->FId, R1->FId);

// Inspect the roots that they have the right data.
ASSERT_NE(R0, nullptr);
EXPECT_EQ(R0->CallCount, 1u);
EXPECT_EQ(R0->CumulativeLocalTime, 1u);

ASSERT_NE(R1, nullptr);
EXPECT_EQ(R1->CallCount, 1u);
EXPECT_EQ(R1->CumulativeLocalTime, 1u);
}

// While missing an intermediary entry may be rare in practice, we still enforce
// that we can handle the case where we've missed the entry event somehow, in
// between call entry/exits. To illustrate, imagine the following shadow call
// stack:
//
// f0@t0 -> f1@t1 -> f2@t2
//
// If for whatever reason we see an exit for `f2` @ t3, followed by an exit for
// `f0` @ t4 (i.e. no `f1` exit in between) then we need to handle the case of
// accounting local time to `f2` from d = (t3 - t2), then local time to `f1`
// as d' = (t3 - t1) - d, and then local time to `f0` as d'' = (t3 - t0) - d'.
TEST(FunctionCallTrieTest, MissingIntermediaryExit) {
profilerFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);

Trie.enterFunction(1, 0);
Trie.enterFunction(2, 100);
Trie.enterFunction(3, 200);
Trie.exitFunction(3, 300);
Trie.exitFunction(1, 400);

// What we should see at this point is all the functions in the trie in a
// specific order (1 -> 2 -> 3) with the appropriate count(s) and local
// latencies.
const auto &R = Trie.getRoots();
ASSERT_FALSE(R.empty());
ASSERT_EQ(R.size(), 1u);

const auto &F1 = *R[0];
ASSERT_EQ(F1.FId, 1);
ASSERT_FALSE(F1.Callees.empty());

const auto &F2 = *F1.Callees[0].NodePtr;
ASSERT_EQ(F2.FId, 2);
ASSERT_FALSE(F2.Callees.empty());

const auto &F3 = *F2.Callees[0].NodePtr;
ASSERT_EQ(F3.FId, 3);
ASSERT_TRUE(F3.Callees.empty());

// Now that we've established the preconditions, we check for specific aspects
// of the nodes.
EXPECT_EQ(F3.CallCount, 1);
EXPECT_EQ(F2.CallCount, 1);
EXPECT_EQ(F1.CallCount, 1);
EXPECT_EQ(F3.CumulativeLocalTime, 100);
EXPECT_EQ(F2.CumulativeLocalTime, 300);
EXPECT_EQ(F1.CumulativeLocalTime, 100);
}

// TODO: Test that we can handle cross-CPU migrations, where TSCs are not
// guaranteed to be synchronised.
TEST(FunctionCallTrieTest, DeepCopy) {
profilerFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie Trie(A);

Trie.enterFunction(1, 0);
Trie.enterFunction(2, 1);
Trie.exitFunction(2, 2);
Trie.enterFunction(3, 3);
Trie.exitFunction(3, 4);
Trie.exitFunction(1, 5);

// We want to make a deep copy and compare notes.
auto B = FunctionCallTrie::InitAllocators();
FunctionCallTrie Copy(B);
Trie.deepCopyInto(Copy);

ASSERT_NE(Trie.getRoots().size(), 0u);
ASSERT_EQ(Trie.getRoots().size(), Copy.getRoots().size());
const auto &R0Orig = *Trie.getRoots()[0];
const auto &R0Copy = *Copy.getRoots()[0];
EXPECT_EQ(R0Orig.FId, 1);
EXPECT_EQ(R0Orig.FId, R0Copy.FId);

ASSERT_EQ(R0Orig.Callees.size(), 2u);
ASSERT_EQ(R0Copy.Callees.size(), 2u);

const auto &F1Orig =
*R0Orig.Callees
.find_element(
[](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; })
->NodePtr;
const auto &F1Copy =
*R0Copy.Callees
.find_element(
[](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; })
->NodePtr;
EXPECT_EQ(&R0Orig, F1Orig.Parent);
EXPECT_EQ(&R0Copy, F1Copy.Parent);
}

TEST(FunctionCallTrieTest, MergeInto) {
profilerFlags()->setDefaults();
auto A = FunctionCallTrie::InitAllocators();
FunctionCallTrie T0(A);
FunctionCallTrie T1(A);

// 1 -> 2 -> 3
T0.enterFunction(1, 0);
T0.enterFunction(2, 1);
T0.enterFunction(3, 2);
T0.exitFunction(3, 3);
T0.exitFunction(2, 4);
T0.exitFunction(1, 5);

// 1 -> 2 -> 3
T1.enterFunction(1, 0);
T1.enterFunction(2, 1);
T1.enterFunction(3, 2);
T1.exitFunction(3, 3);
T1.exitFunction(2, 4);
T1.exitFunction(1, 5);

// We use a different allocator here to make sure that we're able to transfer
// data into a FunctionCallTrie which uses a different allocator. This
// reflects the inteded usage scenario for when we're collecting profiles that
// aggregate across threads.
auto B = FunctionCallTrie::InitAllocators();
FunctionCallTrie Merged(B);

T0.mergeInto(Merged);
T1.mergeInto(Merged);

ASSERT_EQ(Merged.getRoots().size(), 1u);
const auto &R0 = *Merged.getRoots()[0];
EXPECT_EQ(R0.FId, 1);
EXPECT_EQ(R0.CallCount, 2);
EXPECT_EQ(R0.CumulativeLocalTime, 10);
EXPECT_EQ(R0.Callees.size(), 1u);

const auto &F1 = *R0.Callees[0].NodePtr;
EXPECT_EQ(F1.FId, 2);
EXPECT_EQ(F1.CallCount, 2);
EXPECT_EQ(F1.CumulativeLocalTime, 6);
EXPECT_EQ(F1.Callees.size(), 1u);

const auto &F2 = *F1.Callees[0].NodePtr;
EXPECT_EQ(F2.FId, 3);
EXPECT_EQ(F2.CallCount, 2);
EXPECT_EQ(F2.CumulativeLocalTime, 2);
EXPECT_EQ(F2.Callees.size(), 0u);
}

} // namespace

} // namespace __xray
Loading

0 comments on commit 980d93d

Please sign in to comment.