379 changes: 379 additions & 0 deletions llvm/lib/XRay/Profile.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,379 @@
//===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Defines the XRay Profile class representing the latency profile generated by
// XRay's profiling mode.
//
//===----------------------------------------------------------------------===//
#include "llvm/XRay/Profile.h"

#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/XRay/Trace.h"
#include <deque>
#include <memory>

namespace llvm {
namespace xray {

namespace {

struct BlockHeader {
uint32_t Size;
uint32_t Number;
uint64_t Thread;
};

static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
uint32_t &Offset) {
BlockHeader H;
uint32_t CurrentOffset = Offset;
H.Size = Extractor.getU32(&Offset);
if (Offset == CurrentOffset)
return make_error<StringError>(
Twine("Error parsing block header size at offset '") +
Twine(CurrentOffset) + "'",
std::make_error_code(std::errc::invalid_argument));
CurrentOffset = Offset;
H.Number = Extractor.getU32(&Offset);
if (Offset == CurrentOffset)
return make_error<StringError>(
Twine("Error parsing block header number at offset '") +
Twine(CurrentOffset) + "'",
std::make_error_code(std::errc::invalid_argument));
CurrentOffset = Offset;
H.Thread = Extractor.getU64(&Offset);
if (Offset == CurrentOffset)
return make_error<StringError>(
Twine("Error parsing block header thread id at offset '") +
Twine(CurrentOffset) + "'",
std::make_error_code(std::errc::invalid_argument));
return H;
}

static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
uint32_t &Offset) {
// We're reading a sequence of int32_t's until we find a 0.
std::vector<Profile::FuncID> Path;
auto CurrentOffset = Offset;
int32_t FuncId;
do {
FuncId = Extractor.getSigned(&Offset, 4);
if (CurrentOffset == Offset)
return make_error<StringError>(
Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'",
std::make_error_code(std::errc::invalid_argument));
CurrentOffset = Offset;
Path.push_back(FuncId);
} while (FuncId != 0);
return std::move(Path);
}

static Expected<Profile::Data> readData(DataExtractor &Extractor,
uint32_t &Offset) {
// We expect a certain number of elements for Data:
// - A 64-bit CallCount
// - A 64-bit CumulativeLocalTime counter
Profile::Data D;
auto CurrentOffset = Offset;
D.CallCount = Extractor.getU64(&Offset);
if (CurrentOffset == Offset)
return make_error<StringError>(
Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) +
"'",
std::make_error_code(std::errc::invalid_argument));
CurrentOffset = Offset;
D.CumulativeLocalTime = Extractor.getU64(&Offset);
if (CurrentOffset == Offset)
return make_error<StringError>(
Twine("Error parsing cumulative local time at offset '") +
Twine(CurrentOffset) + "'",
std::make_error_code(std::errc::invalid_argument));
return D;
}

} // namespace

Error Profile::addBlock(Block &&B) {
if (B.PathData.empty())
return make_error<StringError>(
"Block may not have empty path data.",
std::make_error_code(std::errc::invalid_argument));

Blocks.emplace_back(std::move(B));
return Error::success();
}

Expected<std::vector<Profile::FuncID>> Profile::expandPath(PathID P) const {
auto It = PathIDMap.find(P);
if (It == PathIDMap.end())
return make_error<StringError>(
Twine("PathID not found: ") + Twine(P),
std::make_error_code(std::errc::invalid_argument));
std::vector<Profile::FuncID> Path;
for (auto Node = It->second; Node; Node = Node->Caller)
Path.push_back(Node->Func);
return std::move(Path);
}

Profile::PathID Profile::internPath(ArrayRef<FuncID> P) {
if (P.empty())
return 0;

auto RootToLeafPath = reverse(P);

// Find the root.
auto It = RootToLeafPath.begin();
auto PathRoot = *It++;
auto RootIt =
find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; });

// If we've not seen this root before, remember it.
TrieNode *Node = nullptr;
if (RootIt == Roots.end()) {
NodeStorage.emplace_back();
Node = &NodeStorage.back();
Node->Func = PathRoot;
Roots.push_back(Node);
} else {
Node = *RootIt;
}

// Now traverse the path, re-creating if necessary.
while (It != RootToLeafPath.end()) {
auto NodeFuncID = *It++;
auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) {
return N->Func == NodeFuncID;
});
if (CalleeIt == Node->Callees.end()) {
NodeStorage.emplace_back();
auto NewNode = &NodeStorage.back();
NewNode->Func = NodeFuncID;
NewNode->Caller = Node;
Node->Callees.push_back(NewNode);
Node = NewNode;
} else {
Node = *CalleeIt;
}
}

// At this point, Node *must* be pointing at the leaf.
assert(Node->Func == P.front());
if (Node->ID == 0) {
Node->ID = NextID++;
PathIDMap.insert({Node->ID, Node});
}
return Node->ID;
}

Profile mergeProfilesByThread(const Profile &L, const Profile &R) {
Profile Merged;
using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
using PathDataMapPtr = std::unique_ptr<PathDataMap>;
using PathDataVector = decltype(Profile::Block::PathData);
using ThreadProfileIndexMap = DenseMap<Profile::ThreadID, PathDataMapPtr>;
ThreadProfileIndexMap ThreadProfileIndex;

for (const auto &P : {std::ref(L), std::ref(R)})
for (const auto &Block : P.get()) {
ThreadProfileIndexMap::iterator It;
std::tie(It, std::ignore) = ThreadProfileIndex.insert(
{Block.Thread, PathDataMapPtr{new PathDataMap()}});
for (const auto &PathAndData : Block.PathData) {
auto &PathID = PathAndData.first;
auto &Data = PathAndData.second;
auto NewPathID =
Merged.internPath(cantFail(P.get().expandPath(PathID)));
PathDataMap::iterator PathDataIt;
bool Inserted;
std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data});
if (!Inserted) {
auto &ExistingData = PathDataIt->second;
ExistingData.CallCount += Data.CallCount;
ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
}
}
}

for (const auto &IndexedThreadBlock : ThreadProfileIndex) {
PathDataVector PathAndData;
PathAndData.reserve(IndexedThreadBlock.second->size());
copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData));
cantFail(
Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)}));
}
return Merged;
}

Profile mergeProfilesByStack(const Profile &L, const Profile &R) {
Profile Merged;
using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
PathDataMap PathData;
using PathDataVector = decltype(Profile::Block::PathData);
for (const auto &P : {std::ref(L), std::ref(R)})
for (const auto &Block : P.get())
for (const auto &PathAndData : Block.PathData) {
auto &PathId = PathAndData.first;
auto &Data = PathAndData.second;
auto NewPathID =
Merged.internPath(cantFail(P.get().expandPath(PathId)));
PathDataMap::iterator PathDataIt;
bool Inserted;
std::tie(PathDataIt, Inserted) = PathData.insert({NewPathID, Data});
if (!Inserted) {
auto &ExistingData = PathDataIt->second;
ExistingData.CallCount += Data.CallCount;
ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
}
}

// In the end there's a single Block, for thread 0.
PathDataVector Block;
Block.reserve(PathData.size());
copy(PathData, std::back_inserter(Block));
cantFail(Merged.addBlock({0, std::move(Block)}));
return Merged;
}

Expected<Profile> loadProfile(StringRef Filename) {
int Fd;
if (auto EC = sys::fs::openFileForRead(Filename, Fd))
return make_error<StringError>(
Twine("Cannot read profile from '") + Filename + "'", EC);

uint64_t FileSize;
if (auto EC = sys::fs::file_size(Filename, FileSize))
return make_error<StringError>(
Twine("Cannot get filesize of '") + Filename + "'", EC);

std::error_code EC;
sys::fs::mapped_file_region MappedFile(
Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
if (EC)
return make_error<StringError>(
Twine("Cannot mmap profile '") + Filename + "'", EC);
StringRef Data(MappedFile.data(), MappedFile.size());

Profile P;
uint32_t Offset = 0;
DataExtractor Extractor(Data, true, 8);

// For each block we get from the file:
while (Offset != MappedFile.size()) {
auto HeaderOrError = readBlockHeader(Extractor, Offset);
if (!HeaderOrError)
return HeaderOrError.takeError();

// TODO: Maybe store this header information for each block, even just for
// debugging?
const auto &Header = HeaderOrError.get();

// Read in the path data.
auto PathOrError = readPath(Extractor, Offset);
if (!PathOrError)
return PathOrError.takeError();
const auto &Path = PathOrError.get();

// For each path we encounter, we should intern it to get a PathID.
auto DataOrError = readData(Extractor, Offset);
if (!DataOrError)
return DataOrError.takeError();
auto &Data = DataOrError.get();

if (auto E =
P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread},
{{P.internPath(Path), std::move(Data)}}}))
return std::move(E);
}

return P;
}

namespace {

struct StackEntry {
uint64_t Timestamp;
Profile::FuncID FuncId;
};

} // namespace

Expected<Profile> profileFromTrace(const Trace &T) {
Profile P;

// The implementation of the algorithm re-creates the execution of
// the functions based on the trace data. To do this, we set up a number of
// data structures to track the execution context of every thread in the
// Trace.
DenseMap<Profile::ThreadID, std::vector<StackEntry>> ThreadStacks;
DenseMap<Profile::ThreadID, DenseMap<Profile::PathID, Profile::Data>>
ThreadPathData;

// We then do a pass through the Trace to account data on a per-thread-basis.
for (const auto &E : T) {
auto &TSD = ThreadStacks[E.TId];
switch (E.Type) {
case RecordTypes::ENTER:
case RecordTypes::ENTER_ARG:

// Push entries into the function call stack.
TSD.push_back({E.TSC, E.FuncId});
break;

case RecordTypes::EXIT:
case RecordTypes::TAIL_EXIT:

// Exits cause some accounting to happen, based on the state of the stack.
// For each function we pop off the stack, we take note of the path and
// record the cumulative state for this path. As we're doing this, we
// intern the path into the Profile.
while (!TSD.empty()) {
auto Top = TSD.back();
auto FunctionLocalTime = AbsoluteDifference(Top.Timestamp, E.TSC);
SmallVector<Profile::FuncID, 16> Path;
transform(reverse(TSD), std::back_inserter(Path),
std::mem_fn(&StackEntry::FuncId));
auto InternedPath = P.internPath(Path);
auto &TPD = ThreadPathData[E.TId][InternedPath];
++TPD.CallCount;
TPD.CumulativeLocalTime += FunctionLocalTime;
TSD.pop_back();

// If we've matched the corresponding entry event for this function,
// then we exit the loop.
if (Top.FuncId == E.FuncId)
break;

// FIXME: Consider the intermediate times and the cumulative tree time
// as well.
}

break;
}
}

// Once we've gone through the Trace, we now create one Block per thread in
// the Profile.
for (const auto &ThreadPaths : ThreadPathData) {
const auto &TID = ThreadPaths.first;
const auto &PathsData = ThreadPaths.second;
if (auto E = P.addBlock({
TID,
std::vector<std::pair<Profile::PathID, Profile::Data>>(
PathsData.begin(), PathsData.end()),
}))
return std::move(E);
}

return P;
}

} // namespace xray
} // namespace llvm
2 changes: 2 additions & 0 deletions llvm/unittests/XRay/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
set(LLVM_LINK_COMPONENTS
Support
XRay
)

add_llvm_unittest(XRayTests
GraphTest.cpp
ProfileTest.cpp
)

add_dependencies(XRayTests intrinsics_gen)
218 changes: 218 additions & 0 deletions llvm/unittests/XRay/ProfileTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
//===- ProfileTest.cpp - XRay Profile unit tests ----------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/XRay/Profile.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"

#include <numeric>

namespace llvm {
namespace xray {
namespace {

using ::testing::AllOf;
using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::Field;
using ::testing::Not;
using ::testing::Pair;
using ::testing::UnorderedElementsAre;

TEST(ProfileTest, CreateProfile) { Profile P; }

TEST(ProfileTest, InternPath) {
Profile P;
auto Path0 = P.internPath({3, 2, 1});
auto Path1 = P.internPath({3, 2, 1});
auto Path2 = P.internPath({2, 1});
EXPECT_THAT(Path0, Eq(Path1));
EXPECT_THAT(Path0, Not(Eq(Path2)));
}

TEST(ProfileTest, ExpandPath) {
Profile P;
auto PathID = P.internPath({3, 2, 1});
auto PathOrError = P.expandPath(PathID);
if (!PathOrError)
FAIL() << "Error: " << PathOrError.takeError();
EXPECT_THAT(PathOrError.get(), ElementsAre(3, 2, 1));
}

TEST(ProfileTest, AddBlocks) {
Profile P;
// Expect an error on adding empty blocks.
EXPECT_TRUE(errorToBool(P.addBlock({})));

// Thread blocks may not be empty.
EXPECT_TRUE(errorToBool(P.addBlock({1, {}})));

// Thread blocks with data must succeed.
EXPECT_FALSE(errorToBool(P.addBlock(
Profile::Block{Profile::ThreadID{1},
{
{P.internPath({2, 1}), Profile::Data{1, 1000}},
{P.internPath({3, 2, 1}), Profile::Data{10, 100}},
}})));
}

TEST(ProfileTest, MergeProfilesByThread) {
Profile P0, P1;

// Set up the blocks for two different threads in P0.
EXPECT_FALSE(errorToBool(P0.addBlock(
Profile::Block{Profile::ThreadID{1},
{{P0.internPath({2, 1}), Profile::Data{1, 1000}},
{P0.internPath({4, 1}), Profile::Data{1, 1000}}}})));
EXPECT_FALSE(errorToBool(P0.addBlock(
Profile::Block{Profile::ThreadID{2},
{{P0.internPath({3, 1}), Profile::Data{1, 1000}}}})));

// Set up the blocks for two different threads in P1.
EXPECT_FALSE(errorToBool(P1.addBlock(
Profile::Block{Profile::ThreadID{1},
{{P1.internPath({2, 1}), Profile::Data{1, 1000}}}})));
EXPECT_FALSE(errorToBool(P1.addBlock(
Profile::Block{Profile::ThreadID{2},
{{P1.internPath({3, 1}), Profile::Data{1, 1000}},
{P1.internPath({4, 1}), Profile::Data{1, 1000}}}})));

Profile Merged = mergeProfilesByThread(P0, P1);
EXPECT_THAT(
Merged,
UnorderedElementsAre(
// We want to see two threads after the merge.
AllOf(Field(&Profile::Block::Thread, Eq(Profile::ThreadID{1})),
Field(&Profile::Block::PathData,
UnorderedElementsAre(
Pair(Merged.internPath({2, 1}),
AllOf(Field(&Profile::Data::CallCount, Eq(2u)),
Field(&Profile::Data::CumulativeLocalTime,
Eq(2000u)))),
Pair(Merged.internPath({4, 1}),
AllOf(Field(&Profile::Data::CallCount, Eq(1u)),
Field(&Profile::Data::CumulativeLocalTime,
Eq(1000u))))))),
AllOf(Field(&Profile::Block::Thread, Eq(Profile::ThreadID{2})),
Field(&Profile::Block::PathData,
UnorderedElementsAre(
Pair(Merged.internPath({3, 1}),
AllOf(Field(&Profile::Data::CallCount, Eq(2u)),
Field(&Profile::Data::CumulativeLocalTime,
Eq(2000u)))),
Pair(Merged.internPath({4, 1}),
AllOf(Field(&Profile::Data::CallCount, Eq(1u)),
Field(&Profile::Data::CumulativeLocalTime,
Eq(1000u)))))))));
}

TEST(ProfileTest, MergeProfilesByStack) {
Profile P0, P1;
EXPECT_FALSE(errorToBool(P0.addBlock(
Profile::Block{Profile::ThreadID{1},
{{P0.internPath({2, 1}), Profile::Data{1, 1000}}}})));
EXPECT_FALSE(errorToBool(P1.addBlock(
Profile::Block{Profile::ThreadID{2},
{{P1.internPath({2, 1}), Profile::Data{1, 1000}}}})));

Profile Merged = mergeProfilesByStack(P0, P1);
EXPECT_THAT(Merged,
ElementsAre(AllOf(
// We expect that we lose the ThreadID dimension in this
// algorithm.
Field(&Profile::Block::Thread, Eq(Profile::ThreadID{0})),
Field(&Profile::Block::PathData,
ElementsAre(Pair(
Merged.internPath({2, 1}),
AllOf(Field(&Profile::Data::CallCount, Eq(2u)),
Field(&Profile::Data::CumulativeLocalTime,
Eq(2000u)))))))));
}

TEST(ProfileTest, MergeProfilesByStackAccumulate) {
std::vector<Profile> Profiles(3);
EXPECT_FALSE(errorToBool(Profiles[0].addBlock(Profile::Block{
Profile::ThreadID{1},
{{Profiles[0].internPath({2, 1}), Profile::Data{1, 1000}}}})));
EXPECT_FALSE(errorToBool(Profiles[1].addBlock(Profile::Block{
Profile::ThreadID{2},
{{Profiles[1].internPath({2, 1}), Profile::Data{1, 1000}}}})));
EXPECT_FALSE(errorToBool(Profiles[2].addBlock(Profile::Block{
Profile::ThreadID{3},
{{Profiles[2].internPath({2, 1}), Profile::Data{1, 1000}}}})));
Profile Merged = std::accumulate(Profiles.begin(), Profiles.end(), Profile(),
mergeProfilesByStack);
EXPECT_THAT(Merged,
ElementsAre(AllOf(
// We expect that we lose the ThreadID dimension in this
// algorithm.
Field(&Profile::Block::Thread, Eq(Profile::ThreadID{0})),
Field(&Profile::Block::PathData,
ElementsAre(Pair(
Merged.internPath({2, 1}),
AllOf(Field(&Profile::Data::CallCount, Eq(3u)),
Field(&Profile::Data::CumulativeLocalTime,
Eq(3000u)))))))));
}

TEST(ProfileTest, MergeProfilesByThreadAccumulate) {
std::vector<Profile> Profiles(2);

// Set up the blocks for two different threads in Profiles[0].
EXPECT_FALSE(errorToBool(Profiles[0].addBlock(Profile::Block{
Profile::ThreadID{1},
{{Profiles[0].internPath({2, 1}), Profile::Data{1, 1000}},
{Profiles[0].internPath({4, 1}), Profile::Data{1, 1000}}}})));
EXPECT_FALSE(errorToBool(Profiles[0].addBlock(Profile::Block{
Profile::ThreadID{2},
{{Profiles[0].internPath({3, 1}), Profile::Data{1, 1000}}}})));

// Set up the blocks for two different threads in Profiles[1].
EXPECT_FALSE(errorToBool(Profiles[1].addBlock(Profile::Block{
Profile::ThreadID{1},
{{Profiles[1].internPath({2, 1}), Profile::Data{1, 1000}}}})));
EXPECT_FALSE(errorToBool(Profiles[1].addBlock(Profile::Block{
Profile::ThreadID{2},
{{Profiles[1].internPath({3, 1}), Profile::Data{1, 1000}},
{Profiles[1].internPath({4, 1}), Profile::Data{1, 1000}}}})));

Profile Merged = std::accumulate(Profiles.begin(), Profiles.end(), Profile(),
mergeProfilesByThread);
EXPECT_THAT(
Merged,
UnorderedElementsAre(
// We want to see two threads after the merge.
AllOf(Field(&Profile::Block::Thread, Eq(Profile::ThreadID{1})),
Field(&Profile::Block::PathData,
UnorderedElementsAre(
Pair(Merged.internPath({2, 1}),
AllOf(Field(&Profile::Data::CallCount, Eq(2u)),
Field(&Profile::Data::CumulativeLocalTime,
Eq(2000u)))),
Pair(Merged.internPath({4, 1}),
AllOf(Field(&Profile::Data::CallCount, Eq(1u)),
Field(&Profile::Data::CumulativeLocalTime,
Eq(1000u))))))),
AllOf(Field(&Profile::Block::Thread, Eq(Profile::ThreadID{2})),
Field(&Profile::Block::PathData,
UnorderedElementsAre(
Pair(Merged.internPath({3, 1}),
AllOf(Field(&Profile::Data::CallCount, Eq(2u)),
Field(&Profile::Data::CumulativeLocalTime,
Eq(2000u)))),
Pair(Merged.internPath({4, 1}),
AllOf(Field(&Profile::Data::CallCount, Eq(1u)),
Field(&Profile::Data::CumulativeLocalTime,
Eq(1000u)))))))));
}
// FIXME: Add a test creating a Trace and generating a Profile
// FIXME: Add tests for ranking/sorting profile blocks by dimension

} // namespace
} // namespace xray
} // namespace llvm