222 changes: 131 additions & 91 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,8 @@ Error DataAggregator::readProfile(BinaryContext &BC) {
// BAT YAML is handled by DataAggregator since normal YAML output requires
// CFG which is not available in BAT mode.
if (usesBAT()) {
// Postprocess split function profile for BAT
fixupBATProfile(BC);
if (opts::ProfileFormat == opts::ProfileFormatKind::PF_YAML)
if (std::error_code EC = writeBATYAML(BC, opts::OutputFilename))
report_error("cannot create output data file", EC);
Expand Down Expand Up @@ -662,19 +664,18 @@ DataAggregator::getBinaryFunctionContainingAddress(uint64_t Address) const {
/*UseMaxSize=*/true);
}

BinaryFunction *
DataAggregator::getParentFunction(const BinaryFunction &Func) const {
if (BAT)
if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress()))
return getBinaryFunctionContainingAddress(HotAddr);
return nullptr;
}

StringRef DataAggregator::getLocationName(const BinaryFunction &Func) const {
StringRef DataAggregator::getLocationName(BinaryFunction &Func,
uint64_t Count) {
if (!BAT)
return Func.getOneName();

const BinaryFunction *OrigFunc = &Func;
if (const uint64_t HotAddr = BAT->fetchParentAddress(Func.getAddress())) {
NumColdSamples += Count;
BinaryFunction *HotFunc = getBinaryFunctionContainingAddress(HotAddr);
if (HotFunc)
OrigFunc = HotFunc;
}
// If it is a local function, prefer the name containing the file name where
// the local function was declared
for (StringRef AlternativeName : OrigFunc->getNames()) {
Expand All @@ -689,17 +690,12 @@ StringRef DataAggregator::getLocationName(const BinaryFunction &Func) const {
return OrigFunc->getOneName();
}

bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
bool DataAggregator::doSample(BinaryFunction &Func, uint64_t Address,
uint64_t Count) {
BinaryFunction *ParentFunc = getParentFunction(OrigFunc);
BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
if (ParentFunc)
NumColdSamples += Count;

auto I = NamesToSamples.find(Func.getOneName());
if (I == NamesToSamples.end()) {
bool Success;
StringRef LocName = getLocationName(Func);
StringRef LocName = getLocationName(Func, Count);
std::tie(I, Success) = NamesToSamples.insert(
std::make_pair(Func.getOneName(),
FuncSampleData(LocName, FuncSampleData::ContainerTy())));
Expand All @@ -719,12 +715,22 @@ bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
FuncBranchData *AggrData = getBranchData(Func);
if (!AggrData) {
AggrData = &NamesToBranches[Func.getOneName()];
AggrData->Name = getLocationName(Func);
AggrData->Name = getLocationName(Func, Count);
setBranchData(Func, AggrData);
}

From -= Func.getAddress();
To -= Func.getAddress();
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: bumpBranchCount: "
<< formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
if (BAT) {
From = BAT->translate(Func.getAddress(), From, /*IsBranchSrc=*/true);
To = BAT->translate(Func.getAddress(), To, /*IsBranchSrc=*/false);
LLVM_DEBUG(
dbgs() << "BOLT-DEBUG: BAT translation on bumpBranchCount: "
<< formatv("{0} @ {1:x} -> {0} @ {2:x}\n", Func, From, To));
}

AggrData->bumpBranchCount(From, To, Count, Mispreds);
return true;
}
Expand All @@ -738,24 +744,30 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
StringRef SrcFunc;
StringRef DstFunc;
if (FromFunc) {
SrcFunc = getLocationName(*FromFunc);
SrcFunc = getLocationName(*FromFunc, Count);
FromAggrData = getBranchData(*FromFunc);
if (!FromAggrData) {
FromAggrData = &NamesToBranches[FromFunc->getOneName()];
FromAggrData->Name = SrcFunc;
setBranchData(*FromFunc, FromAggrData);
}
From -= FromFunc->getAddress();
if (BAT)
From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);

recordExit(*FromFunc, From, Mispreds, Count);
}
if (ToFunc) {
DstFunc = getLocationName(*ToFunc);
DstFunc = getLocationName(*ToFunc, 0);
ToAggrData = getBranchData(*ToFunc);
if (!ToAggrData) {
ToAggrData = &NamesToBranches[ToFunc->getOneName()];
ToAggrData->Name = DstFunc;
setBranchData(*ToFunc, ToAggrData);
}
To -= ToFunc->getAddress();
if (BAT)
To = BAT->translate(ToFunc->getAddress(), To, /*IsBranchSrc=*/false);

recordEntry(*ToFunc, To, Mispreds, Count);
}
Expand All @@ -771,32 +783,15 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,

bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
uint64_t Mispreds) {
auto handleAddress = [&](uint64_t &Addr, bool IsFrom) -> BinaryFunction * {
if (BinaryFunction *Func = getBinaryFunctionContainingAddress(Addr)) {
Addr -= Func->getAddress();

if (BAT)
Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);

if (BinaryFunction *ParentFunc = getParentFunction(*Func)) {
Func = ParentFunc;
if (IsFrom)
NumColdSamples += Count;
}

return Func;
}
return nullptr;
};

BinaryFunction *FromFunc = handleAddress(From, /*IsFrom=*/true);
BinaryFunction *ToFunc = handleAddress(To, /*IsFrom=*/false);
BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From);
BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To);
if (!FromFunc && !ToFunc)
return false;

// Treat recursive control transfers as inter-branches.
if (FromFunc == ToFunc && To != 0) {
recordBranch(*FromFunc, From, To, Count, Mispreds);
if (FromFunc == ToFunc && (To != ToFunc->getAddress())) {
recordBranch(*FromFunc, From - FromFunc->getAddress(),
To - FromFunc->getAddress(), Count, Mispreds);
return doIntraBranch(*FromFunc, From, To, Count, Mispreds);
}

Expand Down Expand Up @@ -847,14 +842,9 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
<< FromFunc->getPrintName() << ":"
<< Twine::utohexstr(First.To) << " to "
<< Twine::utohexstr(Second.From) << ".\n");
BinaryFunction *ParentFunc = getParentFunction(*FromFunc);
for (auto [From, To] : *FTs) {
if (BAT) {
From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false);
}
doIntraBranch(ParentFunc ? *ParentFunc : *FromFunc, From, To, Count, false);
}
for (const std::pair<uint64_t, uint64_t> &Pair : *FTs)
doIntraBranch(*FromFunc, Pair.first + FromFunc->getAddress(),
Pair.second + FromFunc->getAddress(), Count, false);

return true;
}
Expand Down Expand Up @@ -2283,6 +2273,29 @@ DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
return std::error_code();
}

void DataAggregator::fixupBATProfile(BinaryContext &BC) {
for (auto &[FuncName, Branches] : NamesToBranches) {
BinaryData *BD = BC.getBinaryDataByName(FuncName);
assert(BD);
uint64_t FuncAddress = BD->getAddress();
if (!BAT->isBATFunction(FuncAddress))
continue;
// Filter out cold fragments
if (!BD->getSectionName().equals(BC.getMainCodeSectionName()))
continue;
// Convert inter-branches between hot and cold fragments into
// intra-branches.
for (auto &[OffsetFrom, CallToMap] : Branches.InterIndex) {
for (auto &[CallToLoc, CallToIdx] : CallToMap) {
if (CallToLoc.Name != FuncName)
continue;
Branches.IntraIndex[OffsetFrom][CallToLoc.Offset] = CallToIdx;
Branches.InterIndex[OffsetFrom].erase(CallToLoc);
}
}
}
}

std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
StringRef OutputFilename) const {
std::error_code EC;
Expand Down Expand Up @@ -2332,6 +2345,9 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
uint64_t FuncAddress = BD->getAddress();
if (!BAT->isBATFunction(FuncAddress))
continue;
// Filter out cold fragments
if (!BD->getSectionName().equals(BC.getMainCodeSectionName()))
continue;
BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncAddress);
assert(BF);
YamlBF.Name = FuncName.str();
Expand All @@ -2341,62 +2357,86 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
YamlBF.NumBasicBlocks = BAT->getNumBasicBlocks(FuncAddress);
const BoltAddressTranslation::BBHashMapTy &BlockMap =
BAT->getBBHashMap(FuncAddress);
YamlBF.Blocks.resize(YamlBF.NumBasicBlocks);

for (auto &&[Idx, YamlBB] : llvm::enumerate(YamlBF.Blocks))
YamlBB.Index = Idx;

for (auto BI = BlockMap.begin(), BE = BlockMap.end(); BI != BE; ++BI)
YamlBF.Blocks[BI->second.getBBIndex()].Hash = BI->second.getBBHash();

auto getSuccessorInfo = [&](uint32_t SuccOffset, unsigned SuccDataIdx) {
auto addSuccProfile = [&](yaml::bolt::BinaryBasicBlockProfile &YamlBB,
uint64_t SuccOffset, unsigned SuccDataIdx) {
const llvm::bolt::BranchInfo &BI = Branches.Data.at(SuccDataIdx);
yaml::bolt::SuccessorInfo SI;
SI.Index = BlockMap.getBBIndex(SuccOffset);
SI.Count = BI.Branches;
SI.Mispreds = BI.Mispreds;
return SI;
YamlBB.Successors.emplace_back(SI);
};

auto getCallSiteInfo = [&](Location CallToLoc, unsigned CallToIdx,
uint32_t Offset) {
const llvm::bolt::BranchInfo &BI = Branches.Data.at(CallToIdx);
yaml::bolt::CallSiteInfo CSI;
CSI.DestId = 0; // designated for unknown functions
CSI.EntryDiscriminator = 0;
CSI.Count = BI.Branches;
CSI.Mispreds = BI.Mispreds;
CSI.Offset = Offset;
if (BinaryData *BD = BC.getBinaryDataByName(CallToLoc.Name))
YAMLProfileWriter::setCSIDestination(BC, CSI, BD->getSymbol(), BAT,
CallToLoc.Offset);
return CSI;
std::unordered_map<uint32_t, std::vector<uint32_t>> BFBranches =
BAT->getBFBranches(FuncAddress);

auto addCallsProfile = [&](yaml::bolt::BinaryBasicBlockProfile &YamlBB,
uint64_t Offset) {
// Iterate over BRANCHENTRY records in the current block
for (uint32_t BranchOffset : BFBranches[Offset]) {
if (!Branches.InterIndex.contains(BranchOffset))
continue;
for (const auto &[CallToLoc, CallToIdx] :
Branches.InterIndex.at(BranchOffset)) {
const llvm::bolt::BranchInfo &BI = Branches.Data.at(CallToIdx);
yaml::bolt::CallSiteInfo YamlCSI;
YamlCSI.DestId = 0; // designated for unknown functions
YamlCSI.EntryDiscriminator = 0;
YamlCSI.Count = BI.Branches;
YamlCSI.Mispreds = BI.Mispreds;
YamlCSI.Offset = BranchOffset - Offset;
BinaryData *CallTargetBD = BC.getBinaryDataByName(CallToLoc.Name);
if (!CallTargetBD) {
YamlBB.CallSites.emplace_back(YamlCSI);
continue;
}
uint64_t CallTargetAddress = CallTargetBD->getAddress();
BinaryFunction *CallTargetBF =
BC.getBinaryFunctionAtAddress(CallTargetAddress);
if (!CallTargetBF) {
YamlBB.CallSites.emplace_back(YamlCSI);
continue;
}
// Calls between hot and cold fragments must be handled in
// fixupBATProfile.
assert(CallTargetBF != BF && "invalid CallTargetBF");
YamlCSI.DestId = CallTargetBF->getFunctionNumber();
if (CallToLoc.Offset) {
if (BAT->isBATFunction(CallTargetAddress)) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Unsupported secondary "
"entry point in BAT function "
<< CallToLoc.Name << '\n');
} else if (const BinaryBasicBlock *CallTargetBB =
CallTargetBF->getBasicBlockAtOffset(
CallToLoc.Offset)) {
// Only record true call information, ignoring returns (normally
// won't have a target basic block) and jumps to the landing
// pads (not an entry point).
if (CallTargetBB->isEntryPoint()) {
YamlCSI.EntryDiscriminator =
CallTargetBF->getEntryIDForSymbol(
CallTargetBB->getLabel());
}
}
}
YamlBB.CallSites.emplace_back(YamlCSI);
}
}
};

for (const auto &[FromOffset, SuccKV] : Branches.IntraIndex) {
yaml::bolt::BinaryBasicBlockProfile YamlBB;
if (!BlockMap.isInputBlock(FromOffset))
continue;
unsigned Index = BlockMap.getBBIndex(FromOffset);
yaml::bolt::BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[Index];
YamlBB.Index = BlockMap.getBBIndex(FromOffset);
YamlBB.Hash = BlockMap.getBBHash(FromOffset);
for (const auto &[SuccOffset, SuccDataIdx] : SuccKV)
if (BlockMap.isInputBlock(SuccOffset))
YamlBB.Successors.emplace_back(
getSuccessorInfo(SuccOffset, SuccDataIdx));
}
for (const auto &[FromOffset, CallTo] : Branches.InterIndex) {
auto BlockIt = BlockMap.upper_bound(FromOffset);
--BlockIt;
unsigned BlockOffset = BlockIt->first;
unsigned BlockIndex = BlockIt->second.getBBIndex();
yaml::bolt::BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[BlockIndex];
uint32_t Offset = FromOffset - BlockOffset;
for (const auto &[CallToLoc, CallToIdx] : CallTo)
YamlBB.CallSites.emplace_back(
getCallSiteInfo(CallToLoc, CallToIdx, Offset));
llvm::sort(YamlBB.CallSites, [](yaml::bolt::CallSiteInfo &A,
yaml::bolt::CallSiteInfo &B) {
return A.Offset < B.Offset;
});
addSuccProfile(YamlBB, SuccOffset, SuccDataIdx);
addCallsProfile(YamlBB, FromOffset);
if (YamlBB.ExecCount || !YamlBB.Successors.empty() ||
!YamlBB.CallSites.empty())
YamlBF.Blocks.emplace_back(YamlBB);
}
BP.Functions.emplace_back(YamlBF);
}
Expand Down
59 changes: 0 additions & 59 deletions bolt/test/X86/yaml-secondary-entry-discriminator.s
Original file line number Diff line number Diff line change
Expand Up @@ -38,63 +38,10 @@
# RUN: llvm-bolt %t.exe -o %t.bat --data %t.fdata --funcs=func \
# RUN: --split-functions --split-strategy=all --split-all-cold --enable-bat

# Prepare pre-aggregated profile using %t.bat
# RUN: link_fdata %s %t.bat %t.preagg PREAGG
# Strip labels used for pre-aggregated profile
# RUN: llvm-strip -NLcall -NLindcall %t.bat

# Convert pre-aggregated profile using BAT
# RUN: perf2bolt %t.bat -p %t.preagg --pa -o %t.bat.fdata -w %t.bat.yaml

# Convert BAT fdata into YAML
# RUN: llvm-bolt %t.exe -data %t.bat.fdata -w %t.bat.fdata-yaml -o /dev/null

# Check fdata YAML - make sure that a direct call has discriminator field
# RUN: FileCheck %s --input-file %t.bat.fdata-yaml -check-prefix CHECK-BAT-YAML

# Check BAT YAML - make sure that a direct call has discriminator field
# RUN: FileCheck %s --input-file %t.bat.yaml --check-prefix CHECK-BAT-YAML

# YAML BAT test of calling BAT secondary entry from BAT function
# RUN: llvm-bolt %t.exe -o %t.bat2 --data %t.fdata --funcs=main,func \
# RUN: --split-functions --split-strategy=all --split-all-cold --enable-bat

# Prepare pre-aggregated profile using %t.bat
# RUN: link_fdata %s %t.bat2 %t.preagg2 PREAGG2

# Strip labels used for pre-aggregated profile
# RUN: llvm-strip -NLcall -NLindcall %t.bat2

# Convert pre-aggregated profile using BAT
# RUN: perf2bolt %t.bat2 -p %t.preagg2 --pa -o %t.bat2.fdata -w %t.bat2.yaml

# Convert BAT fdata into YAML
# RUN: llvm-bolt %t.exe -data %t.bat2.fdata -w %t.bat2.fdata-yaml -o /dev/null

# Check fdata YAML - make sure that a direct call has discriminator field
# RUN: FileCheck %s --input-file %t.bat2.fdata-yaml -check-prefix CHECK-BAT-YAML

# Check BAT YAML - make sure that a direct call has discriminator field
# RUN: FileCheck %s --input-file %t.bat2.yaml --check-prefix CHECK-BAT-YAML


# CHECK-BAT-YAML: - name: main
# CHECK-BAT-YAML-NEXT: fid: [[#]]
# CHECK-BAT-YAML-NEXT: hash: 0xADF270D550151185
# CHECK-BAT-YAML-NEXT: exec: 0
# CHECK-BAT-YAML-NEXT: nblocks: 4
# CHECK-BAT-YAML-NEXT: blocks:
# CHECK-BAT-YAML: - bid: 1
# CHECK-BAT-YAML-NEXT: insns: [[#]]
# CHECK-BAT-YAML-NEXT: hash: 0x36A303CBA4360018
# CHECK-BAT-YAML-NEXT: calls: [ { off: 0x0, fid: [[#]], disc: 1, cnt: 1

.globl func
.type func, @function
func:
# FDATA: 0 [unknown] 0 1 func 0 1 0
# PREAGG: B X:0 #func# 1 1
# PREAGG2: B X:0 #func# 1 1
.cfi_startproc
pushq %rbp
movq %rsp, %rbp
Expand Down Expand Up @@ -124,18 +71,12 @@ main:
movl $0, -4(%rbp)
testq %rax, %rax
jne Lindcall
.globl Lcall
Lcall:
call secondary_entry
# FDATA: 1 main #Lcall# 1 secondary_entry 0 1 1
# PREAGG: B #Lcall# #secondary_entry# 1 1
# PREAGG2: B #main.cold.0# #func.cold.0# 1 1
.globl Lindcall
Lindcall:
callq *%rax
# FDATA: 1 main #Lindcall# 1 secondary_entry 0 1 1
# PREAGG: B #Lindcall# #secondary_entry# 1 1
# PREAGG2: B #main.cold.1# #func.cold.0# 1 1
xorl %eax, %eax
addq $16, %rsp
popq %rbp
Expand Down