-
Notifications
You must be signed in to change notification settings - Fork 12.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[XRay][profiler] Part 2: XRay Function Call Trie
Summary: This is part of the larger XRay Profiling Mode effort. This patch implements a central data structure for capturing statistics about XRay instrumented function call stacks. The `FunctionCallTrie` type does the following things: * It keeps track of a shadow function call stack of XRay instrumented functions as they are entered (function enter event) and as they are exited (function exit event). * When a function is entered, the shadow stack contains information about the entry TSC, and updates the trie (or prefix tree) representing the current function call stack. If we haven't encountered this function call before, this creates a unique node for the function in this position on the stack. We update the list of callees of the parent function as well to reflect this newly found path. * When a function is exited, we compute statistics (TSC deltas, function call count frequency) for the associated function(s) up the stack as we unwind to find the matching entry event. This builds upon the XRay `Allocator` and `Array` types in Part 1 of this series of patches. Depends on D45756. Reviewers: echristo, pelikan, kpw Reviewed By: kpw Subscribers: llvm-commits, mgorny Differential Revision: https://reviews.llvm.org/D45757 llvm-svn: 332313
- Loading branch information
1 parent
595ba1d
commit 980d93d
Showing
8 changed files
with
837 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
253 changes: 253 additions & 0 deletions
253
compiler-rt/lib/xray/tests/unit/function_call_trie_test.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,253 @@ | ||
| //===-- function_call_trie_test.cc ----------------------------------------===// | ||
| // | ||
| // The LLVM Compiler Infrastructure | ||
| // | ||
| // This file is distributed under the University of Illinois Open Source | ||
| // License. See LICENSE.TXT for details. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // This file is a part of XRay, a function call tracing system. | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| #include "gtest/gtest.h" | ||
|
|
||
| #include "xray_function_call_trie.h" | ||
|
|
||
| namespace __xray { | ||
|
|
||
| namespace { | ||
|
|
||
| TEST(FunctionCallTrieTest, Construction) { | ||
| // We want to make sure that we can create one of these without the set of | ||
| // allocators we need. This will by default use the global allocators. | ||
| FunctionCallTrie Trie; | ||
| } | ||
|
|
||
| TEST(FunctionCallTrieTest, ConstructWithTLSAllocators) { | ||
| // FIXME: Support passing in configuration for allocators in the allocator | ||
| // constructors. | ||
| profilerFlags()->setDefaults(); | ||
| FunctionCallTrie::Allocators Allocators = FunctionCallTrie::InitAllocators(); | ||
| FunctionCallTrie Trie(Allocators); | ||
| } | ||
|
|
||
| TEST(FunctionCallTrieTest, EnterAndExitFunction) { | ||
| profilerFlags()->setDefaults(); | ||
| auto A = FunctionCallTrie::InitAllocators(); | ||
| FunctionCallTrie Trie(A); | ||
|
|
||
| Trie.enterFunction(1, 1); | ||
| Trie.exitFunction(1, 2); | ||
|
|
||
| // We need a way to pull the data out. At this point, until we get a data | ||
| // collection service implemented, we're going to export the data as a list of | ||
| // roots, and manually walk through the structure ourselves. | ||
|
|
||
| const auto &R = Trie.getRoots(); | ||
|
|
||
| ASSERT_EQ(R.size(), 1u); | ||
| ASSERT_EQ(R.front()->FId, 1); | ||
| ASSERT_EQ(R.front()->CallCount, 1); | ||
| ASSERT_EQ(R.front()->CumulativeLocalTime, 1u); | ||
| } | ||
|
|
||
| TEST(FunctionCallTrieTest, MissingFunctionEntry) { | ||
| auto A = FunctionCallTrie::InitAllocators(); | ||
| FunctionCallTrie Trie(A); | ||
| Trie.exitFunction(1, 1); | ||
| const auto &R = Trie.getRoots(); | ||
|
|
||
| ASSERT_TRUE(R.empty()); | ||
| } | ||
|
|
||
| TEST(FunctionCallTrieTest, MissingFunctionExit) { | ||
| auto A = FunctionCallTrie::InitAllocators(); | ||
| FunctionCallTrie Trie(A); | ||
| Trie.enterFunction(1, 1); | ||
| const auto &R = Trie.getRoots(); | ||
|
|
||
| ASSERT_TRUE(R.empty()); | ||
| } | ||
|
|
||
| TEST(FunctionCallTrieTest, MultipleRoots) { | ||
| profilerFlags()->setDefaults(); | ||
| auto A = FunctionCallTrie::InitAllocators(); | ||
| FunctionCallTrie Trie(A); | ||
|
|
||
| // Enter and exit FId = 1. | ||
| Trie.enterFunction(1, 1); | ||
| Trie.exitFunction(1, 2); | ||
|
|
||
| // Enter and exit FId = 2. | ||
| Trie.enterFunction(2, 3); | ||
| Trie.exitFunction(2, 4); | ||
|
|
||
| const auto &R = Trie.getRoots(); | ||
| ASSERT_FALSE(R.empty()); | ||
| ASSERT_EQ(R.size(), 2u); | ||
|
|
||
| // Make sure the roots have different IDs. | ||
| const auto R0 = R[0]; | ||
| const auto R1 = R[1]; | ||
| ASSERT_NE(R0->FId, R1->FId); | ||
|
|
||
| // Inspect the roots that they have the right data. | ||
| ASSERT_NE(R0, nullptr); | ||
| EXPECT_EQ(R0->CallCount, 1u); | ||
| EXPECT_EQ(R0->CumulativeLocalTime, 1u); | ||
|
|
||
| ASSERT_NE(R1, nullptr); | ||
| EXPECT_EQ(R1->CallCount, 1u); | ||
| EXPECT_EQ(R1->CumulativeLocalTime, 1u); | ||
| } | ||
|
|
||
| // While missing an intermediary entry may be rare in practice, we still enforce | ||
| // that we can handle the case where we've missed the entry event somehow, in | ||
| // between call entry/exits. To illustrate, imagine the following shadow call | ||
| // stack: | ||
| // | ||
| // f0@t0 -> f1@t1 -> f2@t2 | ||
| // | ||
| // If for whatever reason we see an exit for `f2` @ t3, followed by an exit for | ||
| // `f0` @ t4 (i.e. no `f1` exit in between) then we need to handle the case of | ||
| // accounting local time to `f2` from d = (t3 - t2), then local time to `f1` | ||
| // as d' = (t3 - t1) - d, and then local time to `f0` as d'' = (t3 - t0) - d'. | ||
| TEST(FunctionCallTrieTest, MissingIntermediaryExit) { | ||
| profilerFlags()->setDefaults(); | ||
| auto A = FunctionCallTrie::InitAllocators(); | ||
| FunctionCallTrie Trie(A); | ||
|
|
||
| Trie.enterFunction(1, 0); | ||
| Trie.enterFunction(2, 100); | ||
| Trie.enterFunction(3, 200); | ||
| Trie.exitFunction(3, 300); | ||
| Trie.exitFunction(1, 400); | ||
|
|
||
| // What we should see at this point is all the functions in the trie in a | ||
| // specific order (1 -> 2 -> 3) with the appropriate count(s) and local | ||
| // latencies. | ||
| const auto &R = Trie.getRoots(); | ||
| ASSERT_FALSE(R.empty()); | ||
| ASSERT_EQ(R.size(), 1u); | ||
|
|
||
| const auto &F1 = *R[0]; | ||
| ASSERT_EQ(F1.FId, 1); | ||
| ASSERT_FALSE(F1.Callees.empty()); | ||
|
|
||
| const auto &F2 = *F1.Callees[0].NodePtr; | ||
| ASSERT_EQ(F2.FId, 2); | ||
| ASSERT_FALSE(F2.Callees.empty()); | ||
|
|
||
| const auto &F3 = *F2.Callees[0].NodePtr; | ||
| ASSERT_EQ(F3.FId, 3); | ||
| ASSERT_TRUE(F3.Callees.empty()); | ||
|
|
||
| // Now that we've established the preconditions, we check for specific aspects | ||
| // of the nodes. | ||
| EXPECT_EQ(F3.CallCount, 1); | ||
| EXPECT_EQ(F2.CallCount, 1); | ||
| EXPECT_EQ(F1.CallCount, 1); | ||
| EXPECT_EQ(F3.CumulativeLocalTime, 100); | ||
| EXPECT_EQ(F2.CumulativeLocalTime, 300); | ||
| EXPECT_EQ(F1.CumulativeLocalTime, 100); | ||
| } | ||
|
|
||
| // TODO: Test that we can handle cross-CPU migrations, where TSCs are not | ||
| // guaranteed to be synchronised. | ||
| TEST(FunctionCallTrieTest, DeepCopy) { | ||
| profilerFlags()->setDefaults(); | ||
| auto A = FunctionCallTrie::InitAllocators(); | ||
| FunctionCallTrie Trie(A); | ||
|
|
||
| Trie.enterFunction(1, 0); | ||
| Trie.enterFunction(2, 1); | ||
| Trie.exitFunction(2, 2); | ||
| Trie.enterFunction(3, 3); | ||
| Trie.exitFunction(3, 4); | ||
| Trie.exitFunction(1, 5); | ||
|
|
||
| // We want to make a deep copy and compare notes. | ||
| auto B = FunctionCallTrie::InitAllocators(); | ||
| FunctionCallTrie Copy(B); | ||
| Trie.deepCopyInto(Copy); | ||
|
|
||
| ASSERT_NE(Trie.getRoots().size(), 0u); | ||
| ASSERT_EQ(Trie.getRoots().size(), Copy.getRoots().size()); | ||
| const auto &R0Orig = *Trie.getRoots()[0]; | ||
| const auto &R0Copy = *Copy.getRoots()[0]; | ||
| EXPECT_EQ(R0Orig.FId, 1); | ||
| EXPECT_EQ(R0Orig.FId, R0Copy.FId); | ||
|
|
||
| ASSERT_EQ(R0Orig.Callees.size(), 2u); | ||
| ASSERT_EQ(R0Copy.Callees.size(), 2u); | ||
|
|
||
| const auto &F1Orig = | ||
| *R0Orig.Callees | ||
| .find_element( | ||
| [](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; }) | ||
| ->NodePtr; | ||
| const auto &F1Copy = | ||
| *R0Copy.Callees | ||
| .find_element( | ||
| [](const FunctionCallTrie::NodeIdPair &R) { return R.FId == 2; }) | ||
| ->NodePtr; | ||
| EXPECT_EQ(&R0Orig, F1Orig.Parent); | ||
| EXPECT_EQ(&R0Copy, F1Copy.Parent); | ||
| } | ||
|
|
||
| TEST(FunctionCallTrieTest, MergeInto) { | ||
| profilerFlags()->setDefaults(); | ||
| auto A = FunctionCallTrie::InitAllocators(); | ||
| FunctionCallTrie T0(A); | ||
| FunctionCallTrie T1(A); | ||
|
|
||
| // 1 -> 2 -> 3 | ||
| T0.enterFunction(1, 0); | ||
| T0.enterFunction(2, 1); | ||
| T0.enterFunction(3, 2); | ||
| T0.exitFunction(3, 3); | ||
| T0.exitFunction(2, 4); | ||
| T0.exitFunction(1, 5); | ||
|
|
||
| // 1 -> 2 -> 3 | ||
| T1.enterFunction(1, 0); | ||
| T1.enterFunction(2, 1); | ||
| T1.enterFunction(3, 2); | ||
| T1.exitFunction(3, 3); | ||
| T1.exitFunction(2, 4); | ||
| T1.exitFunction(1, 5); | ||
|
|
||
| // We use a different allocator here to make sure that we're able to transfer | ||
| // data into a FunctionCallTrie which uses a different allocator. This | ||
| // reflects the inteded usage scenario for when we're collecting profiles that | ||
| // aggregate across threads. | ||
| auto B = FunctionCallTrie::InitAllocators(); | ||
| FunctionCallTrie Merged(B); | ||
|
|
||
| T0.mergeInto(Merged); | ||
| T1.mergeInto(Merged); | ||
|
|
||
| ASSERT_EQ(Merged.getRoots().size(), 1u); | ||
| const auto &R0 = *Merged.getRoots()[0]; | ||
| EXPECT_EQ(R0.FId, 1); | ||
| EXPECT_EQ(R0.CallCount, 2); | ||
| EXPECT_EQ(R0.CumulativeLocalTime, 10); | ||
| EXPECT_EQ(R0.Callees.size(), 1u); | ||
|
|
||
| const auto &F1 = *R0.Callees[0].NodePtr; | ||
| EXPECT_EQ(F1.FId, 2); | ||
| EXPECT_EQ(F1.CallCount, 2); | ||
| EXPECT_EQ(F1.CumulativeLocalTime, 6); | ||
| EXPECT_EQ(F1.Callees.size(), 1u); | ||
|
|
||
| const auto &F2 = *F1.Callees[0].NodePtr; | ||
| EXPECT_EQ(F2.FId, 3); | ||
| EXPECT_EQ(F2.CallCount, 2); | ||
| EXPECT_EQ(F2.CumulativeLocalTime, 2); | ||
| EXPECT_EQ(F2.Callees.size(), 0u); | ||
| } | ||
|
|
||
| } // namespace | ||
|
|
||
| } // namespace __xray |
Oops, something went wrong.