Skip to content

Commit

Permalink
Fix profiling for functions with multiple entry points
Browse files Browse the repository at this point in the history
Summary:
Fix issue in memcpy where one of its entry points was getting
no profiling data and was wrongly considered cold, being put in the cold
region.

(cherry picked from FBD5569156)
  • Loading branch information
rafaelauler authored and maksfb committed Aug 3, 2017
1 parent b81ff8a commit 21c48f7
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 39 deletions.
4 changes: 2 additions & 2 deletions bolt/BinaryContext.h
Expand Up @@ -141,7 +141,7 @@ class BinaryContext {

std::function<void(std::error_code)> ErrorCheck;

const DataReader &DR;
DataReader &DR;

/// Sum of execution count of all functions
uint64_t SumExecutionCount{0};
Expand All @@ -163,7 +163,7 @@ class BinaryContext {
std::unique_ptr<const MCInstrAnalysis> MIA,
std::unique_ptr<const MCRegisterInfo> MRI,
std::unique_ptr<MCDisassembler> DisAsm,
const DataReader &DR) :
DataReader &DR) :
Ctx(std::move(Ctx)),
DwCtx(std::move(DwCtx)),
TheTriple(std::move(TheTriple)),
Expand Down
72 changes: 60 additions & 12 deletions bolt/BinaryFunction.cpp
Expand Up @@ -1949,11 +1949,49 @@ void BinaryFunction::addEntryPoint(uint64_t Address) {
}
}

bool BinaryFunction::fetchProfileForOtherEntryPoints() {
if (!BranchData)
return false;

// Check if we are missing profiling data for secondary entry points
bool First{true};
bool Updated{false};
for (auto BB : BasicBlocks) {
if (First) {
First = false;
continue;
}
if (BB->isEntryPoint()) {
uint64_t EntryAddress = BB->getOffset() + getAddress();
// Look for branch data associated with this entry point
std::vector<std::string> Names;
std::multimap<uint64_t, std::string>::iterator I, E;
for (std::tie(I, E) = BC.GlobalAddresses.equal_range(EntryAddress);
I != E; ++I) {
Names.push_back(I->second);
}
if (!Names.empty()) {
if (FuncBranchData *Data = BC.DR.getFuncBranchData(Names)) {
BranchData->appendFrom(*Data, BB->getOffset());
Data->Used = true;
Updated = true;
}
}
}
}
return Updated;
}

void BinaryFunction::matchProfileData() {
if (BranchData) {
ProfileMatchRatio = evaluateProfileData(*BranchData);
if (ProfileMatchRatio == 1.0f)
if (ProfileMatchRatio == 1.0f) {
if (fetchProfileForOtherEntryPoints()) {
ProfileMatchRatio = evaluateProfileData(*BranchData);
ExecutionCount = BranchData->ExecutionCount;
}
return;
}
}

// Check if the function name can fluctuate between several compilations
Expand All @@ -1971,7 +2009,7 @@ void BinaryFunction::matchProfileData() {

// Check for a profile that matches with 100% confidence.
const auto AllBranchData = BC.DR.getFuncBranchDataRegex(getNames());
for (const auto *NewBranchData : AllBranchData) {
for (auto *NewBranchData : AllBranchData) {
// Prevent functions from sharing the same profile.
if (NewBranchData->Used)
continue;
Expand Down Expand Up @@ -2097,8 +2135,12 @@ float BinaryFunction::evaluateProfileData(const FuncBranchData &BranchData) {
if (II == Instructions.end())
return true;
const auto &Instr = II->second;
// Check for calls, tail calls, rets and indirect branches.
// When matching profiling info, we did not reach the stage
// when we identify tail calls, so they are still represented
// by regular branch instructions and we need isBranch() here.
if (BC.MIA->isCall(Instr) ||
BC.MIA->isIndirectBranch(Instr) ||
BC.MIA->isBranch(Instr) ||
BC.MIA->isReturn(Instr))
return false;
// Check for "rep ret"
Expand Down Expand Up @@ -2153,27 +2195,33 @@ void BinaryFunction::inferFallThroughCounts() {
for (auto CurBB : BasicBlocks) {
CurBB->ExecutionCount = 0;
}
BasicBlocks.front()->setExecutionCount(ExecutionCount);

for (auto CurBB : BasicBlocks) {
auto SuccCount = CurBB->branch_info_begin();
for (auto Succ : CurBB->successors()) {
// Do not update execution count of the entry block (when we have tail
// calls). We already accounted for those when computing the func count.
if (Succ == BasicBlocks.front()) {
++SuccCount;
continue;
}
if (SuccCount->Count != BinaryBasicBlock::COUNT_NO_PROFILE)
Succ->setExecutionCount(Succ->getExecutionCount() + SuccCount->Count);
++SuccCount;
}
}

// Update execution counts of landing pad blocks.
// Set entry BBs to zero, we'll update their execution count next with entry
// data (we maintain a separate data structure for branches to function entry
// points)
for (auto BB : BasicBlocks) {
if (BB->isEntryPoint())
BB->ExecutionCount = 0;
}

// Update execution counts of landing pad blocks and entry BBs
// There is a slight skew introduced here as branches originated from RETs
// may be accounted for in the execution count of an entry block if the last
// instruction in a predecessor fall-through block is a call. This situation
// should rarely happen because there are few multiple-entry functions.
for (const auto &I : BranchData->EntryData) {
BinaryBasicBlock *BB = getBasicBlockAtOffset(I.To.Offset);
if (BB && LandingPads.find(BB->getLabel()) != LandingPads.end()) {
if (BB && (BB->isEntryPoint() ||
LandingPads.find(BB->getLabel()) != LandingPads.end())) {
BB->setExecutionCount(BB->getExecutionCount() + I.Branches);
}
}
Expand Down
7 changes: 6 additions & 1 deletion bolt/BinaryFunction.h
Expand Up @@ -313,7 +313,7 @@ class BinaryFunction {
uint64_t ExecutionCount{COUNT_NO_PROFILE};

/// Profile data for branches.
const FuncBranchData *BranchData{nullptr};
FuncBranchData *BranchData{nullptr};

/// Profile match ratio for BranchData.
float ProfileMatchRatio{0.0f};
Expand Down Expand Up @@ -1713,6 +1713,11 @@ class BinaryFunction {
/// cannot be statically evaluated for any given indirect branch.
bool postProcessIndirectBranches();

/// In functions with multiple entry points, the profile collection records
/// data for other entry points in a different function entry. This function
/// attempts to fetch extra profile data for each secondary entry point.
bool fetchProfileForOtherEntryPoints();

/// Find the best matching profile for a function after the creation of basic
/// blocks.
void matchProfileData();
Expand Down
52 changes: 38 additions & 14 deletions bolt/DataReader.cpp
Expand Up @@ -56,6 +56,28 @@ FuncBranchData::getBranchRange(uint64_t From) const {
return iterator_range<ContainerTy::const_iterator>(Range.first, Range.second);
}

void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) {
Data.insert(Data.end(), FBD.Data.begin(), FBD.Data.end());
for (auto I = Data.begin(), E = Data.end(); I != E; ++I) {
if (I->From.Name == FBD.Name) {
I->From.Name = this->Name;
I->From.Offset += Offset;
}
if (I->To.Name == FBD.Name) {
I->To.Name = this->Name;
I->To.Offset += Offset;
}
}
std::stable_sort(Data.begin(), Data.end());
ExecutionCount += FBD.ExecutionCount;
for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) {
assert(I->To.Name == FBD.Name);
auto NewElmt = EntryData.insert(EntryData.end(), *I);
NewElmt->To.Name = this->Name;
NewElmt->To.Offset += Offset;
}
}

void BranchInfo::mergeWith(const BranchInfo &BI) {

// Merge branch and misprediction counts.
Expand Down Expand Up @@ -160,7 +182,8 @@ void BranchInfo::print(raw_ostream &OS) const {
ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From,
uint64_t To) const {
for (const auto &I : Data) {
if (I.From.Offset == From && I.To.Offset == To)
if (I.From.Offset == From && I.To.Offset == To &&
I.From.Name == I.To.Name)
return I;
}
return make_error_code(llvm::errc::invalid_argument);
Expand Down Expand Up @@ -422,8 +445,10 @@ std::error_code DataReader::parse() {
auto I = GetOrCreateFuncEntry(BI.From.Name);
I->getValue().Data.emplace_back(std::move(BI));

// Add entry data for branches from another function.
if (BI.To.IsSymbol && !BI.From.Name.equals(BI.To.Name)) {
// Add entry data for branches to another function or branches
// to entry points (including recursive calls)
if (BI.To.IsSymbol &&
(!BI.From.Name.equals(BI.To.Name) || BI.To.Offset == 0)) {
I = GetOrCreateFuncEntry(BI.To.Name);
I->getValue().EntryData.emplace_back(std::move(BI));
}
Expand All @@ -446,44 +471,43 @@ std::error_code DataReader::parse() {
}

void DataReader::buildLTONameMap() {
for (const auto &FuncData : FuncsMap) {
for (auto &FuncData : FuncsMap) {
const auto FuncName = FuncData.getKey();
const auto CommonName = getLTOCommonName(FuncName);
if (CommonName)
LTOCommonNameMap[*CommonName].push_back(&FuncData.getValue());
}
}

const FuncBranchData *
DataReader::getFuncBranchData(const std::vector<std::string> &FuncNames) const {
FuncBranchData *
DataReader::getFuncBranchData(const std::vector<std::string> &FuncNames) {
// Do a reverse order iteration since the name in profile has a higher chance
// of matching a name at the end of the list.
for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
const auto I = FuncsMap.find(normalizeName(*FI));
auto I = FuncsMap.find(normalizeName(*FI));
if (I != FuncsMap.end())
return &I->getValue();
}
return nullptr;
}

std::vector<const FuncBranchData *>
DataReader::getFuncBranchDataRegex(const std::vector<std::string> &FuncNames)
const {
std::vector<const FuncBranchData *> AllData;
std::vector<FuncBranchData *>
DataReader::getFuncBranchDataRegex(const std::vector<std::string> &FuncNames) {
std::vector<FuncBranchData *> AllData;
// Do a reverse order iteration since the name in profile has a higher chance
// of matching a name at the end of the list.
for (auto FI = FuncNames.rbegin(), FE = FuncNames.rend(); FI != FE; ++FI) {
StringRef Name = *FI;
Name = normalizeName(Name);
const auto LTOCommonName = getLTOCommonName(Name);
if (LTOCommonName) {
const auto I = LTOCommonNameMap.find(*LTOCommonName);
auto I = LTOCommonNameMap.find(*LTOCommonName);
if (I != LTOCommonNameMap.end()) {
const auto &CommonData = I->getValue();
auto &CommonData = I->getValue();
AllData.insert(AllData.end(), CommonData.begin(), CommonData.end());
}
} else {
const auto I = FuncsMap.find(Name);
auto I = FuncsMap.find(Name);
if (I != FuncsMap.end()) {
return {&I->getValue()};
}
Expand Down
16 changes: 10 additions & 6 deletions bolt/DataReader.h
Expand Up @@ -138,7 +138,7 @@ struct FuncBranchData {
int64_t ExecutionCount{0};

/// Indicate if the data was used.
mutable bool Used{false};
bool Used{false};

FuncBranchData(StringRef Name, ContainerTy Data)
: Name(Name), Data(std::move(Data)) {}
Expand All @@ -157,6 +157,10 @@ struct FuncBranchData {
/// Find all the branches originating at From.
iterator_range<ContainerTy::const_iterator> getBranchRange(
uint64_t From) const;

/// Append the branch data of another function located \p Offset bytes away
/// from the entry of this function.
void appendFrom(const FuncBranchData &FBD, uint64_t Offset);
};

//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -219,14 +223,14 @@ class DataReader {
std::error_code parse();

/// Return branch data matching one of the names in \p FuncNames.
const FuncBranchData *
getFuncBranchData(const std::vector<std::string> &FuncNames) const;
FuncBranchData *
getFuncBranchData(const std::vector<std::string> &FuncNames);

/// Return a vector of all FuncBranchData matching the list of names.
/// Internally use fuzzy matching to match special names like LTO-generated
/// function names.
std::vector<const FuncBranchData *>
getFuncBranchDataRegex(const std::vector<std::string> &FuncNames) const;
std::vector<FuncBranchData *>
getFuncBranchDataRegex(const std::vector<std::string> &FuncNames);

using FuncsMapType = StringMap<FuncBranchData>;

Expand Down Expand Up @@ -266,7 +270,7 @@ class DataReader {
static const char FieldSeparator = ' ';

/// Map of common LTO names to possible matching profiles.
StringMap<std::vector<const FuncBranchData *>> LTOCommonNameMap;
StringMap<std::vector<FuncBranchData *>> LTOCommonNameMap;
};

}
Expand Down
6 changes: 3 additions & 3 deletions bolt/RewriteInstance.cpp
Expand Up @@ -512,7 +512,7 @@ namespace {
/// Create BinaryContext for a given architecture \p ArchName and
/// triple \p TripleName.
std::unique_ptr<BinaryContext>
createBinaryContext(ELFObjectFileBase *File, const DataReader &DR,
createBinaryContext(ELFObjectFileBase *File, DataReader &DR,
std::unique_ptr<DWARFContext> DwCtx) {
std::string ArchName;
std::string TripleName;
Expand Down Expand Up @@ -630,7 +630,7 @@ createBinaryContext(ELFObjectFileBase *File, const DataReader &DR,

} // namespace

RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const DataReader &DR,
RewriteInstance::RewriteInstance(ELFObjectFileBase *File, DataReader &DR,
const int Argc, const char *const *Argv)
: InputFile(File), Argc(Argc), Argv(Argv),
BC(createBinaryContext(
Expand Down Expand Up @@ -1677,7 +1677,7 @@ void RewriteInstance::readProfileData() {

for (auto &BFI : BinaryFunctions) {
auto &Function = BFI.second;
const auto *FuncData = BC->DR.getFuncBranchData(Function.getNames());
auto *FuncData = BC->DR.getFuncBranchData(Function.getNames());
if (!FuncData)
continue;
Function.BranchData = FuncData;
Expand Down
2 changes: 1 addition & 1 deletion bolt/RewriteInstance.h
Expand Up @@ -146,7 +146,7 @@ class ExecutableFileMemoryManager : public SectionMemoryManager {
/// events.
class RewriteInstance {
public:
RewriteInstance(llvm::object::ELFObjectFileBase *File, const DataReader &DR,
RewriteInstance(llvm::object::ELFObjectFileBase *File, DataReader &DR,
const int Argc, const char *const *Argv);
~RewriteInstance();

Expand Down

0 comments on commit 21c48f7

Please sign in to comment.