4 changes: 3 additions & 1 deletion .github/workflows/release-binaries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:

- name: Install Dependencies
run: |
pip install -r ./llvm/utils/git/requirements.txt
pip install --require-hashes -r ./llvm/utils/git/requirements.txt
- name: Check Permissions
env:
Expand Down Expand Up @@ -156,6 +156,8 @@ jobs:
rm build.tar.zst
- name: Build Stage 2
# Re-enable once PGO builds are supported.
if: false
run: |
ninja -C /mnt/build stage2-instrumented
Expand Down
134 changes: 134 additions & 0 deletions .github/workflows/restart-preempted-libcxx-jobs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
name: Restart Preempted Libc++ Workflow

# The libc++ builders run on preemptable VMs, which can be shutdown at any time.
# This workflow identifies when a workflow run was canceled due to the VM being preempted,
# and restarts the workflow run.

# We identify a canceled workflow run by checking the annotations of the check runs in the check suite,
# which should contain the message "The runner has received a shutdown signal."

# Note: If a job is both preempted and also contains a non-preemption failure, we do not restart the workflow.

on:
workflow_run:
workflows: [Build and Test libc\+\+]
types:
- completed

permissions:
contents: read

jobs:
restart:
if: github.repository_owner == 'llvm' && (github.event.workflow_run.conclusion == 'failure' || github.event.workflow_run.conclusion == 'cancelled')
name: "Restart Job"
permissions:
statuses: read
checks: write
actions: write
runs-on: ubuntu-latest
steps:
- name: "Restart Job"
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea #v7.0.1
with:
script: |
const failure_regex = /Process completed with exit code 1./
const preemption_regex = /The runner has received a shutdown signal/
const wf_run = context.payload.workflow_run
core.notice(`Running on "${wf_run.display_title}" by @${wf_run.actor.login} (event: ${wf_run.event})\nWorkflow run URL: ${wf_run.html_url}`)
async function create_check_run(conclusion, message) {
// Create a check run on the given workflow run to indicate if
// we are restarting the workflow or not.
if (conclusion != 'success' && conclusion != 'skipped' && conclusion != 'neutral') {
core.setFailed('Invalid conclusion: ' + conclusion)
}
await github.rest.checks.create({
owner: context.repo.owner,
repo: context.repo.repo,
name: 'Restart Preempted Job',
head_sha: wf_run.head_sha,
status: 'completed',
conclusion: conclusion,
output: {
title: 'Restarted Preempted Job',
summary: message
}
})
}
console.log('Listing check runs for suite')
const check_suites = await github.rest.checks.listForSuite({
owner: context.repo.owner,
repo: context.repo.repo,
check_suite_id: context.payload.workflow_run.check_suite_id,
per_page: 100 // FIXME: We don't have 100 check runs yet, but we should handle this better.
})
check_run_ids = [];
for (check_run of check_suites.data.check_runs) {
console.log('Checking check run: ' + check_run.id);
if (check_run.status != 'completed') {
console.log('Check run was not completed. Skipping.');
continue;
}
if (check_run.conclusion != 'failure' && check_run.conclusion != 'cancelled') {
console.log('Check run had conclusion: ' + check_run.conclusion + '. Skipping.');
continue;
}
check_run_ids.push(check_run.id);
}
has_preempted_job = false;
for (check_run_id of check_run_ids) {
console.log('Listing annotations for check run: ' + check_run_id);
annotations = await github.rest.checks.listAnnotations({
owner: context.repo.owner,
repo: context.repo.repo,
check_run_id: check_run_id
})
for (annotation of annotations.data) {
if (annotation.annotation_level != 'failure') {
continue;
}
const preemption_match = annotation.message.match(preemption_regex);
if (preemption_match != null) {
console.log('Found preemption message: ' + annotation.message);
has_preempted_job = true;
}
const failure_match = annotation.message.match(failure_regex);
if (failure_match != null) {
// We only want to restart the workflow if all of the failures were due to preemption.
// We don't want to restart the workflow if there were other failures.
core.notice('Choosing not to rerun workflow because we found a non-preemption failure' +
'Failure message: "' + annotation.message + '"');
await create_check_run('skipped', 'Choosing not to rerun workflow because we found a non-preemption failure\n'
+ 'Failure message: ' + annotation.message)
return;
}
}
}
if (!has_preempted_job) {
core.notice('No preempted jobs found. Not restarting workflow.');
await create_check_run('neutral', 'No preempted jobs found. Not restarting workflow.')
return;
}
core.notice("Restarted workflow: " + context.payload.workflow_run.id);
await github.rest.actions.reRunWorkflowFailedJobs({
owner: context.repo.owner,
repo: context.repo.repo,
run_id: context.payload.workflow_run.id
})
await create_check_run('success', 'Restarted workflow run due to preempted job')
2 changes: 1 addition & 1 deletion .github/workflows/version-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:

- name: Install dependencies
run: |
pip install -r ./llvm/utils/git/requirements.txt
pip install --require-hashes -r ./llvm/utils/git/requirements.txt
- name: Version Check
run: |
Expand Down
4 changes: 3 additions & 1 deletion bolt/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
set(LLVM_SUBPROJECT_TITLE "BOLT")

include(ExternalProject)

set(BOLT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
Expand Down Expand Up @@ -121,7 +123,7 @@ option(BOLT_BUILD_TOOLS
"Build the BOLT tools. If OFF, just generate build targets." ON)

add_custom_target(bolt)
set_target_properties(bolt PROPERTIES FOLDER "BOLT")
set_target_properties(bolt PROPERTIES FOLDER "BOLT/Metatargets")
add_llvm_install_targets(install-bolt DEPENDS bolt COMPONENT bolt)

include_directories(
Expand Down
1 change: 0 additions & 1 deletion bolt/cmake/modules/AddBOLT.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ include(LLVMDistributionSupport)

macro(add_bolt_executable name)
add_llvm_executable(${name} ${ARGN})
set_target_properties(${name} PROPERTIES FOLDER "BOLT")
endmacro()

macro(add_bolt_tool name)
Expand Down
5 changes: 5 additions & 0 deletions bolt/docs/BAT.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,14 @@ equals output offset.
`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
(branch or call instruction). If not set, it signifies a control flow target
(basic block offset).

`InputAddr` is omitted for equal offsets in input and output function. In this
case, `BRANCHENTRY` bits are encoded separately in a `BranchEntries` bitvector.

Deleted basic blocks are emitted as having `OutputOffset` equal to the size of
the function. They don't affect address translation and only participate in
input basic block mapping.

### Secondary Entry Points table
The table is emitted for hot fragments only. It contains `NumSecEntryPoints`
offsets denoting secondary entry points, delta encoded, implicitly starting at zero.
Expand Down
1 change: 1 addition & 0 deletions bolt/docs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ if (LLVM_ENABLE_DOXYGEN)
COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/doxygen.cfg
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Generating bolt doxygen documentation." VERBATIM)
set_target_properties(doxygen-bolt PROPERTIES FOLDER "BOLT/Docs")

if (LLVM_BUILD_DOCS)
add_dependencies(doxygen doxygen-bolt)
Expand Down
227 changes: 89 additions & 138 deletions bolt/docs/CommandLineArgumentReference.md

Large diffs are not rendered by default.

149 changes: 149 additions & 0 deletions bolt/docs/generate_doc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
#!/usr/bin/env python3
# A tool to parse the output of `llvm-bolt --help-hidden` and update the
# documentation in CommandLineArgumentReference.md automatically.
# Run from the directory in which this file is located to update the docs.

import subprocess
from textwrap import wrap

LINE_LIMIT = 80


def wrap_text(text, indent, limit=LINE_LIMIT):
wrapped_lines = wrap(text, width=limit - len(indent))
wrapped_text = ("\n" + indent).join(wrapped_lines)
return wrapped_text


def add_info(sections, section, option, description):
indent = " "
wrapped_description = "\n".join(
[
wrap_text(line, indent) if len(line) > LINE_LIMIT else line
for line in description
]
)
sections[section].append((option, indent + wrapped_description))


def parse_bolt_options(output):
section_headers = [
"Generic options:",
"Output options:",
"BOLT generic options:",
"BOLT optimization options:",
"BOLT options in relocation mode:",
"BOLT instrumentation options:",
"BOLT printing options:",
]

sections = {key: [] for key in section_headers}
current_section, prev_section = None, None
option, description = None, []

for line in output.split("\n"):
cleaned_line = line.strip()

if cleaned_line.casefold() in map(str.casefold, section_headers):
if prev_section != None: # Save last option from prev section
add_info(sections, current_section, option, description)
option, description = None, []

cleaned_line = cleaned_line.split()
# Apply lowercase to all words except the first one
cleaned_line = [cleaned_line[0]] + [
word.lower() for word in cleaned_line[1:]
]
# Join the words back together into a string
cleaned_line = " ".join(cleaned_line)

current_section = cleaned_line
prev_section = current_section
continue

if cleaned_line.startswith("-"):
if option and description:
# Join description lines, adding an extra newline for
# sub-options that start with '='
add_info(sections, current_section, option, description)
option, description = None, []

parts = cleaned_line.split(" ", 1)
if len(parts) > 1:
option = parts[0].strip()
descr = parts[1].strip()
descr = descr[2].upper() + descr[3:]
description = [descr]
if option.startswith("--print") or option.startswith("--time"):
current_section = "BOLT printing options:"
elif prev_section != None:
current_section = prev_section
continue

if cleaned_line.startswith("="):
parts = cleaned_line.split(maxsplit=1)
# Split into two parts: sub-option and description
if len(parts) == 2:
# Rejoin with a single space
cleaned_line = parts[0] + " " + parts[1].rstrip()
description.append(cleaned_line)
elif cleaned_line: # Multiline description continuation
description.append(cleaned_line)

add_info(sections, current_section, option, description)
return sections


def generate_markdown(sections):
markdown_lines = [
"# BOLT - a post-link optimizer developed to speed up large applications\n",
"## SYNOPSIS\n",
"`llvm-bolt <executable> [-o outputfile] <executable>.bolt "
"[-data=perf.fdata] [options]`\n",
"## OPTIONS",
]

for section, options in sections.items():
markdown_lines.append(f"\n### {section}")
if section == "BOLT instrumentation options:":
markdown_lines.append(
f"\n`llvm-bolt <executable> -instrument"
" [-o outputfile] <instrumented-executable>`"
)
for option, desc in options:
markdown_lines.append(f"\n- `{option}`\n")
# Split description into lines to handle sub-options
desc_lines = desc.split("\n")
for line in desc_lines:
if line.startswith("="):
# Sub-option: correct formatting with bullet
sub_option, sub_desc = line[1:].split(" ", 1)
markdown_lines.append(f" - `{sub_option}`: {sub_desc[4:]}")
else:
# Regular line of description
if line[2:].startswith("<"):
line = line.replace("<", "").replace(">", "")
markdown_lines.append(f"{line}")

return "\n".join(markdown_lines)


def main():
try:
help_output = subprocess.run(
["llvm-bolt", "--help-hidden"], capture_output=True, text=True, check=True
).stdout
except subprocess.CalledProcessError as e:
print("Failed to execute llvm-bolt --help:")
print(e)
return

sections = parse_bolt_options(help_output)
markdown = generate_markdown(sections)

with open("CommandLineArgumentReference.md", "w") as md_file:
md_file.write(markdown)


if __name__ == "__main__":
main()
4 changes: 2 additions & 2 deletions bolt/include/bolt/Core/BinaryBasicBlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ class BinaryBasicBlock {
unsigned Index{InvalidIndex};

/// Index in the current layout.
mutable unsigned LayoutIndex{InvalidIndex};
unsigned LayoutIndex{InvalidIndex};

/// Number of pseudo instructions in this block.
uint32_t NumPseudos{0};
Expand Down Expand Up @@ -891,7 +891,7 @@ class BinaryBasicBlock {
}

/// Set layout index. To be used by BinaryFunction.
void setLayoutIndex(unsigned Index) const { LayoutIndex = Index; }
void setLayoutIndex(unsigned Index) { LayoutIndex = Index; }

/// Needed by graph traits.
BinaryFunction *getParent() const { return getFunction(); }
Expand Down
12 changes: 9 additions & 3 deletions bolt/include/bolt/Core/BinaryContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "bolt/Core/BinaryData.h"
#include "bolt/Core/BinarySection.h"
#include "bolt/Core/DebugData.h"
#include "bolt/Core/DynoStats.h"
#include "bolt/Core/JumpTable.h"
#include "bolt/Core/MCPlusBuilder.h"
#include "bolt/RuntimeLibs/RuntimeLibrary.h"
Expand Down Expand Up @@ -359,7 +360,7 @@ class BinaryContext {
void setFileBuildID(StringRef ID) { FileBuildID = std::string(ID); }

bool hasSymbolsWithFileName() const { return HasSymbolsWithFileName; }
void setHasSymbolsWithFileName(bool Value) { HasSymbolsWithFileName = true; }
void setHasSymbolsWithFileName(bool Value) { HasSymbolsWithFileName = Value; }

/// Return true if relocations against symbol with a given name
/// must be created.
Expand Down Expand Up @@ -677,6 +678,9 @@ class BinaryContext {
/// have an origin file name available.
bool HasSymbolsWithFileName{false};

/// Does the binary have BAT section.
bool HasBATSection{false};

/// Sum of execution count of all functions
uint64_t SumExecutionCount{0};

Expand Down Expand Up @@ -714,6 +718,9 @@ class BinaryContext {
uint64_t NumStaleBlocksWithEqualIcount{0};
} Stats;

// Original binary execution count stats.
DynoStats InitialDynoStats;

// Address of the first allocated segment.
uint64_t FirstAllocAddress{std::numeric_limits<uint64_t>::max()};

Expand Down Expand Up @@ -1217,8 +1224,7 @@ class BinaryContext {

/// Return a signed value of \p Size stored at \p Address. The address has
/// to be a valid statically allocated address for the binary.
ErrorOr<uint64_t> getSignedValueAtAddress(uint64_t Address,
size_t Size) const;
ErrorOr<int64_t> getSignedValueAtAddress(uint64_t Address, size_t Size) const;

/// Special case of getUnsignedValueAtAddress() that uses a pointer size.
ErrorOr<uint64_t> getPointerAtAddress(uint64_t Address) const {
Expand Down
1 change: 1 addition & 0 deletions bolt/include/bolt/Core/BinarySection.h
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ class BinarySection {
return true;
}
}
bool isNote() const { return isELF() && ELFType == ELF::SHT_NOTE; }
bool isReordered() const { return IsReordered; }
bool isAnonymous() const { return IsAnonymous; }
bool isRelro() const { return IsRelro; }
Expand Down
7 changes: 3 additions & 4 deletions bolt/include/bolt/Core/DIEBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,9 @@ class DIEBuilder {
/// Along with current CU, and DIE being processed and the new DIE offset to
/// be updated, it takes in Parents vector that can be empty if this DIE has
/// no parents.
uint32_t
finalizeDIEs(DWARFUnit &CU, DIE &Die,
std::vector<std::optional<BOLTDWARF5AccelTableData *>> &Parents,
uint32_t &CurOffset);
uint32_t finalizeDIEs(DWARFUnit &CU, DIE &Die,
std::optional<BOLTDWARF5AccelTableData *> Parent,
uint32_t NumberParentsInChain, uint32_t &CurOffset);

void registerUnit(DWARFUnit &DU, bool NeedSort);

Expand Down
7 changes: 5 additions & 2 deletions bolt/include/bolt/Core/DebugNames.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,25 @@ class BOLTDWARF5AccelTableData : public DWARF5AccelTableData {
BOLTDWARF5AccelTableData(const uint64_t DieOffset,
const std::optional<uint64_t> DefiningParentOffset,
const unsigned DieTag, const unsigned UnitID,
const bool IsTU,
const bool IsParentRoot, const bool IsTU,
const std::optional<unsigned> SecondUnitID)
: DWARF5AccelTableData(DieOffset, DefiningParentOffset, DieTag, UnitID,
IsTU),
SecondUnitID(SecondUnitID) {}
SecondUnitID(SecondUnitID), IsParentRoot(IsParentRoot) {}

uint64_t getDieOffset() const { return DWARF5AccelTableData::getDieOffset(); }
unsigned getDieTag() const { return DWARF5AccelTableData::getDieTag(); }
unsigned getUnitID() const { return DWARF5AccelTableData::getUnitID(); }
bool isTU() const { return DWARF5AccelTableData::isTU(); }
bool isParentRoot() const { return IsParentRoot; }
std::optional<unsigned> getSecondUnitID() const { return SecondUnitID; }

void setPatchOffset(uint64_t PatchOffset) { OffsetVal = PatchOffset; }
uint64_t getPatchOffset() const { return std::get<uint64_t>(OffsetVal); }

private:
std::optional<unsigned> SecondUnitID;
bool IsParentRoot;
};

class DWARF5AcceleratorTable {
Expand All @@ -57,6 +59,7 @@ class DWARF5AcceleratorTable {
std::optional<BOLTDWARF5AccelTableData *>
addAccelTableEntry(DWARFUnit &Unit, const DIE &Die,
const std::optional<uint64_t> &DWOID,
const uint32_t NumberParentsInChain,
std::optional<BOLTDWARF5AccelTableData *> &Parent);
/// Set current unit being processed.
void setCurrentUnit(DWARFUnit &Unit, const uint64_t UnitStartOffset);
Expand Down
3 changes: 2 additions & 1 deletion bolt/include/bolt/Core/FunctionLayout.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,8 @@ class FunctionLayout {
void eraseBasicBlocks(const DenseSet<const BinaryBasicBlock *> ToErase);

/// Make sure fragments' and basic blocks' indices match the current layout.
void updateLayoutIndices();
void updateLayoutIndices() const;
void updateLayoutIndices(ArrayRef<BinaryBasicBlock *> Order) const;

/// Replace the current layout with NewLayout. Uses the block's
/// self-identifying fragment number to assign blocks to infer function
Expand Down
4 changes: 2 additions & 2 deletions bolt/include/bolt/Core/MCPlusBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -438,8 +438,8 @@ class MCPlusBuilder {
return false;
}

/// Check whether we support inverting this branch
virtual bool isUnsupportedBranch(const MCInst &Inst) const { return false; }
/// Check whether this conditional branch can be reversed
virtual bool isReversibleBranch(const MCInst &Inst) const { return true; }

/// Return true of the instruction is of pseudo kind.
virtual bool isPseudo(const MCInst &Inst) const {
Expand Down
29 changes: 25 additions & 4 deletions bolt/include/bolt/Passes/BinaryPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/DynoStats.h"
#include "bolt/Profile/BoltAddressTranslation.h"
#include "llvm/Support/CommandLine.h"
#include <atomic>
#include <set>
Expand Down Expand Up @@ -52,15 +53,31 @@ class BinaryFunctionPass {
virtual Error runOnFunctions(BinaryContext &BC) = 0;
};

/// A pass to set initial program-wide dynostats.
class DynoStatsSetPass : public BinaryFunctionPass {
public:
DynoStatsSetPass() : BinaryFunctionPass(false) {}

const char *getName() const override {
return "set dyno-stats before optimizations";
}

bool shouldPrint(const BinaryFunction &BF) const override { return false; }

Error runOnFunctions(BinaryContext &BC) override {
BC.InitialDynoStats = getDynoStats(BC.getBinaryFunctions(), BC.isAArch64());
return Error::success();
}
};

/// A pass to print program-wide dynostats.
class DynoStatsPrintPass : public BinaryFunctionPass {
protected:
DynoStats PrevDynoStats;
std::string Title;

public:
DynoStatsPrintPass(const DynoStats &PrevDynoStats, const char *Title)
: BinaryFunctionPass(false), PrevDynoStats(PrevDynoStats), Title(Title) {}
DynoStatsPrintPass(const char *Title)
: BinaryFunctionPass(false), Title(Title) {}

const char *getName() const override {
return "print dyno-stats after optimizations";
Expand All @@ -69,6 +86,7 @@ class DynoStatsPrintPass : public BinaryFunctionPass {
bool shouldPrint(const BinaryFunction &BF) const override { return false; }

Error runOnFunctions(BinaryContext &BC) override {
const DynoStats PrevDynoStats = BC.InitialDynoStats;
const DynoStats NewDynoStats =
getDynoStats(BC.getBinaryFunctions(), BC.isAArch64());
const bool Changed = (NewDynoStats != PrevDynoStats);
Expand Down Expand Up @@ -399,8 +417,11 @@ class PrintProfileStats : public BinaryFunctionPass {
/// Prints a list of the top 100 functions sorted by a set of
/// dyno stats categories.
class PrintProgramStats : public BinaryFunctionPass {
BoltAddressTranslation *BAT = nullptr;

public:
explicit PrintProgramStats() : BinaryFunctionPass(false) {}
explicit PrintProgramStats(BoltAddressTranslation *BAT = nullptr)
: BinaryFunctionPass(false), BAT(BAT) {}

const char *getName() const override { return "print-stats"; }
bool shouldPrint(const BinaryFunction &) const override { return false; }
Expand Down
41 changes: 15 additions & 26 deletions bolt/include/bolt/Passes/MCF.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,14 @@
#ifndef BOLT_PASSES_MCF_H
#define BOLT_PASSES_MCF_H

#include "bolt/Passes/BinaryPasses.h"
#include "llvm/Support/CommandLine.h"

namespace llvm {
namespace bolt {

class BinaryFunction;
class DataflowInfoManager;

enum MCFCostFunction : char {
MCF_DISABLE = 0,
MCF_LINEAR,
MCF_QUADRATIC,
MCF_LOG,
MCF_BLAMEFTS
};

/// Implement the idea in "SamplePGO - The Power of Profile Guided Optimizations
/// without the Usability Burden" by Diego Novillo to make basic block counts
/// equal if we show that A dominates B, B post-dominates A and they are in the
Expand All @@ -31,23 +25,18 @@ void equalizeBBCounts(DataflowInfoManager &Info, BinaryFunction &BF);

/// Fill edge counts based on the basic block count. Used in nonLBR mode when
/// we only have bb count.
void estimateEdgeCounts(BinaryFunction &BF);

/// Entry point for computing a min-cost flow for the CFG with the goal
/// of fixing the flow of the CFG edges, that is, making sure it obeys the
/// flow-conservation equation SumInEdges = SumOutEdges.
///
/// To do this, we create an instance of the min-cost flow problem in a
/// similar way as the one discussed in the work of Roy Levin "Completing
/// Incomplete Edge Profile by Applying Minimum Cost Circulation Algorithms".
/// We do a few things differently, though. We don't populate edge counts using
/// weights coming from a static branch prediction technique and we don't
/// use the same cost function.
///
/// If cost function BlameFTs is used, assign all remaining flow to
/// fall-throughs. This is used when the sampling is based on taken branches
/// that do not account for them.
void solveMCF(BinaryFunction &BF, MCFCostFunction CostFunction);
class EstimateEdgeCounts : public BinaryFunctionPass {
void runOnFunction(BinaryFunction &BF);

public:
explicit EstimateEdgeCounts(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) {}

const char *getName() const override { return "estimate-edge-counts"; }

/// Pass entry point
Error runOnFunctions(BinaryContext &BC) override;
};

} // end namespace bolt
} // end namespace llvm
Expand Down
4 changes: 2 additions & 2 deletions bolt/include/bolt/Passes/StokeInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,10 @@ struct StokeFuncInfo {
<< "," << NumBlocks << "," << IsLoopFree << "," << NumLoops << ","
<< MaxLoopDepth << "," << HotSize << "," << TotalSize << ","
<< Score << "," << HasCall << ",\"{ ";
for (std::string S : DefIn)
for (const std::string &S : DefIn)
Outfile << "%" << S << " ";
Outfile << "}\",\"{ ";
for (std::string S : LiveOut)
for (const std::string &S : LiveOut)
Outfile << "%" << S << " ";
Outfile << "}\"," << HeapOut << "," << StackOut << "," << HasRipAddr
<< "," << Omitted << "\n";
Expand Down
49 changes: 24 additions & 25 deletions bolt/include/bolt/Profile/BoltAddressTranslation.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ class BinaryFunction;
class BoltAddressTranslation {
public:
// In-memory representation of the address translation table
using MapTy = std::map<uint32_t, uint32_t>;
using MapTy = std::multimap<uint32_t, uint32_t>;

// List of taken fall-throughs
using FallthroughListTy = SmallVector<std::pair<uint64_t, uint64_t>, 16>;
Expand All @@ -90,7 +90,7 @@ class BoltAddressTranslation {
std::error_code parse(raw_ostream &OS, StringRef Buf);

/// Dump the parsed address translation tables
void dump(raw_ostream &OS);
void dump(raw_ostream &OS) const;

/// If the maps are loaded in memory, perform the lookup to translate LBR
/// addresses in function located at \p FuncAddress.
Expand All @@ -107,7 +107,12 @@ class BoltAddressTranslation {

/// If available, fetch the address of the hot part linked to the cold part
/// at \p Address. Return 0 otherwise.
uint64_t fetchParentAddress(uint64_t Address) const;
uint64_t fetchParentAddress(uint64_t Address) const {
auto Iter = ColdPartSource.find(Address);
if (Iter == ColdPartSource.end())
return 0;
return Iter->second;
}

/// True if the input binary has a translation table we can use to convert
/// addresses when aggregating profile
Expand All @@ -132,7 +137,8 @@ class BoltAddressTranslation {
/// emitted for the start of the BB. More entries may be emitted to cover
/// the location of calls or any instruction that may change control flow.
void writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB,
uint64_t FuncInputAddress, uint64_t FuncOutputAddress);
uint64_t FuncInputAddress,
uint64_t FuncOutputAddress) const;

/// Write the serialized address translation table for a function.
template <bool Cold>
Expand All @@ -147,7 +153,7 @@ class BoltAddressTranslation {

/// Returns the bitmask with set bits corresponding to indices of BRANCHENTRY
/// entries in function address translation map.
APInt calculateBranchEntriesBitMask(MapTy &Map, size_t EqualElems);
APInt calculateBranchEntriesBitMask(MapTy &Map, size_t EqualElems) const;

/// Calculate the number of equal offsets (output = input - skew) in the
/// beginning of the function.
Expand Down Expand Up @@ -178,14 +184,9 @@ class BoltAddressTranslation {
public:
/// Map basic block input offset to a basic block index and hash pair.
class BBHashMapTy {
class EntryTy {
struct EntryTy {
unsigned Index;
size_t Hash;

public:
unsigned getBBIndex() const { return Index; }
size_t getBBHash() const { return Hash; }
EntryTy(unsigned Index, size_t Hash) : Index(Index), Hash(Hash) {}
};

std::map<uint32_t, EntryTy> Map;
Expand All @@ -201,34 +202,30 @@ class BoltAddressTranslation {
}

unsigned getBBIndex(uint32_t BBInputOffset) const {
return getEntry(BBInputOffset).getBBIndex();
return getEntry(BBInputOffset).Index;
}

size_t getBBHash(uint32_t BBInputOffset) const {
return getEntry(BBInputOffset).getBBHash();
return getEntry(BBInputOffset).Hash;
}

void addEntry(uint32_t BBInputOffset, unsigned BBIndex, size_t BBHash) {
Map.emplace(BBInputOffset, EntryTy(BBIndex, BBHash));
Map.emplace(BBInputOffset, EntryTy{BBIndex, BBHash});
}

size_t getNumBasicBlocks() const { return Map.size(); }

auto begin() const { return Map.begin(); }
auto end() const { return Map.end(); }
auto upper_bound(uint32_t Offset) const { return Map.upper_bound(Offset); }
auto size() const { return Map.size(); }
};

/// Map function output address to its hash and basic blocks hash map.
class FuncHashesTy {
class EntryTy {
struct EntryTy {
size_t Hash;
BBHashMapTy BBHashMap;

public:
size_t getBFHash() const { return Hash; }
const BBHashMapTy &getBBHashMap() const { return BBHashMap; }
EntryTy(size_t Hash) : Hash(Hash) {}
};

std::unordered_map<uint64_t, EntryTy> Map;
Expand All @@ -240,23 +237,23 @@ class BoltAddressTranslation {

public:
size_t getBFHash(uint64_t FuncOutputAddress) const {
return getEntry(FuncOutputAddress).getBFHash();
return getEntry(FuncOutputAddress).Hash;
}

const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const {
return getEntry(FuncOutputAddress).getBBHashMap();
return getEntry(FuncOutputAddress).BBHashMap;
}

void addEntry(uint64_t FuncOutputAddress, size_t BFHash) {
Map.emplace(FuncOutputAddress, EntryTy(BFHash));
Map.emplace(FuncOutputAddress, EntryTy{BFHash, BBHashMapTy()});
}

size_t getNumFunctions() const { return Map.size(); };

size_t getNumBasicBlocks() const {
size_t NumBasicBlocks{0};
for (auto &I : Map)
NumBasicBlocks += I.second.getBBHashMap().getNumBasicBlocks();
NumBasicBlocks += I.second.BBHashMap.getNumBasicBlocks();
return NumBasicBlocks;
}
};
Expand All @@ -278,7 +275,9 @@ class BoltAddressTranslation {

/// Returns the number of basic blocks in a function.
size_t getNumBasicBlocks(uint64_t OutputAddress) const {
return NumBasicBlocksMap.at(OutputAddress);
auto It = NumBasicBlocksMap.find(OutputAddress);
assert(It != NumBasicBlocksMap.end());
return It->second;
}

private:
Expand Down
9 changes: 6 additions & 3 deletions bolt/include/bolt/Profile/DataAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#define BOLT_PROFILE_DATA_AGGREGATOR_H

#include "bolt/Profile/DataReader.h"
#include "bolt/Profile/YAMLProfileWriter.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Program.h"
Expand Down Expand Up @@ -122,14 +123,14 @@ class DataAggregator : public DataReader {
uint64_t ExternCount{0};
};

struct BranchInfo {
struct TakenBranchInfo {
uint64_t TakenCount{0};
uint64_t MispredCount{0};
};

/// Intermediate storage for profile data. We save the results of parsing
/// and use them later for processing and assigning profile.
std::unordered_map<Trace, BranchInfo, TraceHash> BranchLBRs;
std::unordered_map<Trace, TakenBranchInfo, TraceHash> BranchLBRs;
std::unordered_map<Trace, FTInfo, TraceHash> FallthroughLBRs;
std::vector<AggregatedLBREntry> AggregatedLBRs;
std::unordered_map<uint64_t, uint64_t> BasicSamples;
Expand Down Expand Up @@ -248,7 +249,7 @@ class DataAggregator : public DataReader {
BinaryFunction *getBATParentFunction(const BinaryFunction &Func) const;

/// Retrieve the location name to be used for samples recorded in \p Func.
StringRef getLocationName(const BinaryFunction &Func) const;
static StringRef getLocationName(const BinaryFunction &Func, bool BAT);

/// Semantic actions - parser hooks to interpret parsed perf samples
/// Register a sample (non-LBR mode), i.e. a new hit at \p Address
Expand Down Expand Up @@ -490,6 +491,8 @@ class DataAggregator : public DataReader {
/// Parse the output generated by "perf buildid-list" to extract build-ids
/// and return a file name matching a given \p FileBuildID.
std::optional<StringRef> getFileNameForBuildID(StringRef FileBuildID);

friend class YAMLProfileWriter;
};
} // namespace bolt
} // namespace llvm
Expand Down
3 changes: 3 additions & 0 deletions bolt/include/bolt/Rewrite/MetadataManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ class MetadataManager {
/// Register a new \p Rewriter.
void registerRewriter(std::unique_ptr<MetadataRewriter> Rewriter);

/// Run initializers after sections are discovered.
void runSectionInitializers();

/// Execute initialization of rewriters while functions are disassembled, but
/// CFG is not yet built.
void runInitializersPreCFG();
Expand Down
4 changes: 4 additions & 0 deletions bolt/include/bolt/Rewrite/MetadataRewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ class MetadataRewriter {
/// Return name for the rewriter.
StringRef getName() const { return Name; }

/// Run initialization after the binary is read and sections are identified,
/// but before functions are discovered.
virtual Error sectionInitializer() { return Error::success(); }

/// Interface for modifying/annotating functions in the binary based on the
/// contents of the section. Functions are in pre-cfg state.
virtual Error preCFGInitializer() { return Error::success(); }
Expand Down
2 changes: 2 additions & 0 deletions bolt/include/bolt/Rewrite/MetadataRewriters.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ class BinaryContext;

std::unique_ptr<MetadataRewriter> createLinuxKernelRewriter(BinaryContext &);

std::unique_ptr<MetadataRewriter> createBuildIDRewriter(BinaryContext &);

std::unique_ptr<MetadataRewriter> createPseudoProbeRewriter(BinaryContext &);

std::unique_ptr<MetadataRewriter> createSDTRewriter(BinaryContext &);
Expand Down
27 changes: 7 additions & 20 deletions bolt/include/bolt/Rewrite/RewriteInstance.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Regex.h"
#include <map>
#include <set>
#include <unordered_map>
Expand Down Expand Up @@ -78,15 +79,6 @@ class RewriteInstance {
return InputFile->getFileName();
}

/// Set the build-id string if we did not fail to parse the contents of the
/// ELF note section containing build-id information.
void parseBuildID();

/// The build-id is typically a stream of 20 bytes. Return these bytes in
/// printable hexadecimal form if they are available, or std::nullopt
/// otherwise.
std::optional<std::string> getPrintableBuildID() const;

/// If this instance uses a profile, return appropriate profile reader.
const ProfileReaderBase *getProfileReader() const {
return ProfileReader.get();
Expand Down Expand Up @@ -183,6 +175,9 @@ class RewriteInstance {
/// Link additional runtime code to support instrumentation.
void linkRuntime();

/// Process metadata in sections before functions are discovered.
void processSectionMetadata();

/// Process metadata in special sections before CFG is built for functions.
void processMetadataPreCFG();

Expand Down Expand Up @@ -367,11 +362,6 @@ class RewriteInstance {
/// Loop over now emitted functions to write translation maps
void encodeBATSection();

/// Update the ELF note section containing the binary build-id to reflect
/// a new build-id, so tools can differentiate between the old and the
/// rewritten binary.
void patchBuildID();

/// Return file offset corresponding to a virtual \p Address.
/// Return 0 if the address has no mapping in the file, including being
/// part of .bss section.
Expand Down Expand Up @@ -561,18 +551,12 @@ class RewriteInstance {
/// Exception handling and stack unwinding information in this binary.
ErrorOr<BinarySection &> EHFrameSection{std::errc::bad_address};

/// .note.gnu.build-id section.
ErrorOr<BinarySection &> BuildIDSection{std::errc::bad_address};

/// Helper for accessing sections by name.
BinarySection *getSection(const Twine &Name) {
ErrorOr<BinarySection &> ErrOrSection = BC->getUniqueSectionByName(Name);
return ErrOrSection ? &ErrOrSection.get() : nullptr;
}

/// A reference to the build-id bytes in the original binary
StringRef BuildID;

/// Keep track of functions we fail to write in the binary. We need to avoid
/// rewriting CFI info for these functions.
std::vector<uint64_t> FailedAddresses;
Expand All @@ -596,6 +580,9 @@ class RewriteInstance {

NameResolver NR;

// Regex object matching split function names.
const Regex FunctionFragmentTemplate{"(.*)\\.(cold|warm)(\\.[0-9]+)?"};

friend class RewriteInstanceDiff;
};

Expand Down
21 changes: 13 additions & 8 deletions bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ BinaryContext::BinaryContext(std::unique_ptr<MCContext> Ctx,
AsmInfo(std::move(AsmInfo)), MII(std::move(MII)), STI(std::move(STI)),
InstPrinter(std::move(InstPrinter)), MIA(std::move(MIA)),
MIB(std::move(MIB)), MRI(std::move(MRI)), DisAsm(std::move(DisAsm)),
Logger(Logger) {
Logger(Logger), InitialDynoStats(isAArch64()) {
Relocation::Arch = this->TheTriple->getArch();
RegularPageSize = isAArch64() ? RegularPageSizeAArch64 : RegularPageSizeX86;
PageAlign = opts::NoHugePages ? RegularPageSize : HugePageSize;
Expand Down Expand Up @@ -934,10 +934,13 @@ std::string BinaryContext::generateJumpTableName(const BinaryFunction &BF,
uint64_t Offset = 0;
if (const JumpTable *JT = BF.getJumpTableContainingAddress(Address)) {
Offset = Address - JT->getAddress();
auto Itr = JT->Labels.find(Offset);
if (Itr != JT->Labels.end())
return std::string(Itr->second->getName());
Id = JumpTableIds.at(JT->getAddress());
auto JTLabelsIt = JT->Labels.find(Offset);
if (JTLabelsIt != JT->Labels.end())
return std::string(JTLabelsIt->second->getName());

auto JTIdsIt = JumpTableIds.find(JT->getAddress());
assert(JTIdsIt != JumpTableIds.end());
Id = JTIdsIt->second;
} else {
Id = JumpTableIds[Address] = BF.JumpTables.size();
}
Expand Down Expand Up @@ -1322,7 +1325,9 @@ void BinaryContext::processInterproceduralReferences() {
InterproceduralReferences) {
BinaryFunction &Function = *It.first;
uint64_t Address = It.second;
if (!Address || Function.isIgnored())
// Process interprocedural references from ignored functions in BAT mode
// (non-simple in non-relocation mode) to properly register entry points
if (!Address || (Function.isIgnored() && !HasBATSection))
continue;

BinaryFunction *TargetFunction =
Expand Down Expand Up @@ -2212,8 +2217,8 @@ ErrorOr<uint64_t> BinaryContext::getUnsignedValueAtAddress(uint64_t Address,
return DE.getUnsigned(&ValueOffset, Size);
}

ErrorOr<uint64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
size_t Size) const {
ErrorOr<int64_t> BinaryContext::getSignedValueAtAddress(uint64_t Address,
size_t Size) const {
const ErrorOr<const BinarySection &> Section = getSectionForAddress(Address);
if (!Section)
return std::make_error_code(std::errc::bad_address);
Expand Down
4 changes: 3 additions & 1 deletion bolt/lib/Core/BinaryEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -813,7 +813,9 @@ void BinaryEmitter::emitJumpTable(const JumpTable &JT, MCSection *HotSection,
// determining its destination.
std::map<MCSymbol *, uint64_t> LabelCounts;
if (opts::JumpTables > JTS_SPLIT && !JT.Counts.empty()) {
MCSymbol *CurrentLabel = JT.Labels.at(0);
auto It = JT.Labels.find(0);
assert(It != JT.Labels.end());
MCSymbol *CurrentLabel = It->second;
uint64_t CurrentLabelCount = 0;
for (unsigned Index = 0; Index < JT.Entries.size(); ++Index) {
auto LI = JT.Labels.find(Index * JT.EntrySize);
Expand Down
40 changes: 24 additions & 16 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -851,15 +851,19 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction, unsigned Size,
return IndirectBranchType::UNKNOWN;
}

// RIP-relative addressing should be converted to symbol form by now
// in processed instructions (but not in jump).
if (DispExpr) {
auto getExprValue = [&](const MCExpr *Expr) {
const MCSymbol *TargetSym;
uint64_t TargetOffset;
std::tie(TargetSym, TargetOffset) = BC.MIB->getTargetSymbolInfo(DispExpr);
std::tie(TargetSym, TargetOffset) = BC.MIB->getTargetSymbolInfo(Expr);
ErrorOr<uint64_t> SymValueOrError = BC.getSymbolValue(*TargetSym);
assert(SymValueOrError && "global symbol needs a value");
ArrayStart = *SymValueOrError + TargetOffset;
assert(SymValueOrError && "Global symbol needs a value");
return *SymValueOrError + TargetOffset;
};

// RIP-relative addressing should be converted to symbol form by now
// in processed instructions (but not in jump).
if (DispExpr) {
ArrayStart = getExprValue(DispExpr);
BaseRegNum = BC.MIB->getNoRegister();
if (BC.isAArch64()) {
ArrayStart &= ~0xFFFULL;
Expand Down Expand Up @@ -1284,7 +1288,7 @@ Error BinaryFunction::disassemble() {
const bool IsCondBranch = MIB->isConditionalBranch(Instruction);
MCSymbol *TargetSymbol = nullptr;

if (BC.MIB->isUnsupportedBranch(Instruction)) {
if (!BC.MIB->isReversibleBranch(Instruction)) {
setIgnored();
if (BinaryFunction *TargetFunc =
BC.getBinaryFunctionContainingAddress(TargetAddress))
Expand Down Expand Up @@ -1666,7 +1670,8 @@ void BinaryFunction::postProcessEntryPoints() {
// In non-relocation mode there's potentially an external undetectable
// reference to the entry point and hence we cannot move this entry
// point. Optimizing without moving could be difficult.
if (!BC.HasRelocations)
// In BAT mode, register any known entry points for CFG construction.
if (!BC.HasRelocations && !BC.HasBATSection)
setSimple(false);

const uint32_t Offset = KV.first;
Expand Down Expand Up @@ -3381,7 +3386,7 @@ void BinaryFunction::fixBranches() {

// Reverse branch condition and swap successors.
auto swapSuccessors = [&]() {
if (MIB->isUnsupportedBranch(*CondBranch)) {
if (!MIB->isReversibleBranch(*CondBranch)) {
if (opts::Verbosity) {
BC.outs() << "BOLT-INFO: unable to swap successors in " << *this
<< '\n';
Expand Down Expand Up @@ -3636,8 +3641,8 @@ bool BinaryFunction::forEachEntryPoint(EntryPointCallbackTy Callback) const {

BinaryFunction::BasicBlockListType BinaryFunction::dfs() const {
BasicBlockListType DFS;
unsigned Index = 0;
std::stack<BinaryBasicBlock *> Stack;
std::set<BinaryBasicBlock *> Visited;

// Push entry points to the stack in reverse order.
//
Expand All @@ -3654,17 +3659,13 @@ BinaryFunction::BasicBlockListType BinaryFunction::dfs() const {
for (BinaryBasicBlock *const BB : reverse(EntryPoints))
Stack.push(BB);

for (BinaryBasicBlock &BB : blocks())
BB.setLayoutIndex(BinaryBasicBlock::InvalidIndex);

while (!Stack.empty()) {
BinaryBasicBlock *BB = Stack.top();
Stack.pop();

if (BB->getLayoutIndex() != BinaryBasicBlock::InvalidIndex)
if (Visited.find(BB) != Visited.end())
continue;

BB->setLayoutIndex(Index++);
Visited.insert(BB);
DFS.push_back(BB);

for (BinaryBasicBlock *SuccBB : BB->landing_pads()) {
Expand Down Expand Up @@ -3697,6 +3698,13 @@ BinaryFunction::BasicBlockListType BinaryFunction::dfs() const {

size_t BinaryFunction::computeHash(bool UseDFS, HashFunction HashFunction,
OperandHashFuncTy OperandHashFunc) const {
LLVM_DEBUG({
dbgs() << "BOLT-DEBUG: computeHash " << getPrintName() << ' '
<< (UseDFS ? "dfs" : "bin") << " order "
<< (HashFunction == HashFunction::StdHash ? "std::hash" : "xxh3")
<< '\n';
});

if (size() == 0)
return 0;

Expand Down
29 changes: 18 additions & 11 deletions bolt/lib/Core/DIEBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,32 +461,42 @@ getUnitForOffset(DIEBuilder &Builder, DWARFContext &DWCtx,
return nullptr;
}

uint32_t DIEBuilder::finalizeDIEs(
DWARFUnit &CU, DIE &Die,
std::vector<std::optional<BOLTDWARF5AccelTableData *>> &Parents,
uint32_t &CurOffset) {
uint32_t
DIEBuilder::finalizeDIEs(DWARFUnit &CU, DIE &Die,
std::optional<BOLTDWARF5AccelTableData *> Parent,
uint32_t NumberParentsInChain, uint32_t &CurOffset) {
getState().DWARFDieAddressesParsed.erase(Die.getOffset());
uint32_t CurSize = 0;
Die.setOffset(CurOffset);
std::optional<BOLTDWARF5AccelTableData *> NameEntry =
DebugNamesTable.addAccelTableEntry(
CU, Die, SkeletonCU ? SkeletonCU->getDWOId() : std::nullopt,
Parents.back());
NumberParentsInChain, Parent);
// It is possible that an indexed debugging information entry has a parent
// that is not indexed (for example, if its parent does not have a name
// attribute). In such a case, a parent attribute may point to a nameless
// index entry (that is, one that cannot be reached from any entry in the name
// table), or it may point to the nearest ancestor that does have an index
// entry.
// Skipping entry is not very useful for LLDB. This follows clang where
// children of forward declaration won't have DW_IDX_parent.
// https://github.com/llvm/llvm-project/pull/91808

// If Parent is nullopt and NumberParentsInChain is not zero, then forward
// declaration was encountered in this DF traversal. Propagating nullopt for
// Parent to children.
if (!Parent && NumberParentsInChain)
NameEntry = std::nullopt;
if (NameEntry)
Parents.push_back(std::move(NameEntry));
++NumberParentsInChain;
for (DIEValue &Val : Die.values())
CurSize += Val.sizeOf(CU.getFormParams());
CurSize += getULEB128Size(Die.getAbbrevNumber());
CurOffset += CurSize;

for (DIE &Child : Die.children()) {
uint32_t ChildSize = finalizeDIEs(CU, Child, Parents, CurOffset);
uint32_t ChildSize =
finalizeDIEs(CU, Child, NameEntry, NumberParentsInChain, CurOffset);
CurSize += ChildSize;
}
// for children end mark.
Expand All @@ -496,9 +506,6 @@ uint32_t DIEBuilder::finalizeDIEs(
}

Die.setSize(CurSize);
if (NameEntry)
Parents.pop_back();

return CurSize;
}

Expand All @@ -510,7 +517,7 @@ void DIEBuilder::finish() {
DebugNamesTable.setCurrentUnit(CU, UnitStartOffset);
std::vector<std::optional<BOLTDWARF5AccelTableData *>> Parents;
Parents.push_back(std::nullopt);
finalizeDIEs(CU, *UnitDIE, Parents, CurOffset);
finalizeDIEs(CU, *UnitDIE, std::nullopt, 0, CurOffset);

DWARFUnitInfo &CurUnitInfo = getUnitInfoByDwarfUnit(CU);
CurUnitInfo.UnitOffset = UnitStartOffset;
Expand Down
15 changes: 11 additions & 4 deletions bolt/lib/Core/DebugNames.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,6 @@ void DWARF5AcceleratorTable::addUnit(DWARFUnit &Unit,
// Returns true if DW_TAG_variable should be included in .debug-names based on
// section 6.1.1.1 for DWARF5 spec.
static bool shouldIncludeVariable(const DWARFUnit &Unit, const DIE &Die) {
if (Die.findAttribute(dwarf::Attribute::DW_AT_declaration))
return false;
const DIEValue LocAttrInfo =
Die.findAttribute(dwarf::Attribute::DW_AT_location);
if (!LocAttrInfo)
Expand Down Expand Up @@ -148,6 +146,8 @@ static bool shouldIncludeVariable(const DWARFUnit &Unit, const DIE &Die) {

bool static canProcess(const DWARFUnit &Unit, const DIE &Die,
std::string &NameToUse, const bool TagsOnly) {
if (Die.findAttribute(dwarf::Attribute::DW_AT_declaration))
return false;
switch (Die.getTag()) {
case dwarf::DW_TAG_base_type:
case dwarf::DW_TAG_class_type:
Expand Down Expand Up @@ -220,6 +220,7 @@ static uint64_t getEntryID(const BOLTDWARF5AccelTableData &Entry) {
std::optional<BOLTDWARF5AccelTableData *>
DWARF5AcceleratorTable::addAccelTableEntry(
DWARFUnit &Unit, const DIE &Die, const std::optional<uint64_t> &DWOID,
const uint32_t NumberParentsInChain,
std::optional<BOLTDWARF5AccelTableData *> &Parent) {
if (Unit.getVersion() < 5 || !NeedToCreate)
return std::nullopt;
Expand Down Expand Up @@ -312,8 +313,14 @@ DWARF5AcceleratorTable::addAccelTableEntry(
// Keeping memory footprint down.
if (ParentOffset)
EntryRelativeOffsets.insert({*ParentOffset, 0});
bool IsParentRoot = false;
// If there is no parent and no valid Entries in parent chain this is a root
// to be marked with a flag.
if (!Parent && !NumberParentsInChain)
IsParentRoot = true;
It.Values.push_back(new (Allocator) BOLTDWARF5AccelTableData(
Die.getOffset(), ParentOffset, DieTag, UnitID, IsTU, SecondIndex));
Die.getOffset(), ParentOffset, DieTag, UnitID, IsParentRoot, IsTU,
SecondIndex));
return It.Values.back();
};

Expand Down Expand Up @@ -462,7 +469,7 @@ void DWARF5AcceleratorTable::populateAbbrevsMap() {
Abbrev.addAttribute({dwarf::DW_IDX_die_offset, dwarf::DW_FORM_ref4});
if (std::optional<uint64_t> Offset = Value->getParentDieOffset())
Abbrev.addAttribute({dwarf::DW_IDX_parent, dwarf::DW_FORM_ref4});
else
else if (Value->isParentRoot())
Abbrev.addAttribute(
{dwarf::DW_IDX_parent, dwarf::DW_FORM_flag_present});
FoldingSetNodeID ID;
Expand Down
5 changes: 3 additions & 2 deletions bolt/lib/Core/DynoStats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,9 @@ void DynoStats::print(raw_ostream &OS, const DynoStats *Other,
for (auto &Stat : llvm::reverse(SortedHistogram)) {
OS << format("%20s,%'18lld", Printer->getOpcodeName(Stat.second).data(),
Stat.first * opts::DynoStatsScale);

MaxOpcodeHistogramTy MaxMultiMap = OpcodeHistogram.at(Stat.second).second;
auto It = OpcodeHistogram.find(Stat.second);
assert(It != OpcodeHistogram.end());
MaxOpcodeHistogramTy MaxMultiMap = It->second.second;
// Start with function name:BB offset with highest execution count.
for (auto &Max : llvm::reverse(MaxMultiMap)) {
OS << format(", %'18lld, ", Max.first * opts::DynoStatsScale)
Expand Down
9 changes: 7 additions & 2 deletions bolt/lib/Core/FunctionLayout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,15 +164,20 @@ void FunctionLayout::eraseBasicBlocks(
updateLayoutIndices();
}

void FunctionLayout::updateLayoutIndices() {
void FunctionLayout::updateLayoutIndices() const {
unsigned BlockIndex = 0;
for (FunctionFragment &FF : fragments()) {
for (const FunctionFragment &FF : fragments()) {
for (BinaryBasicBlock *const BB : FF) {
BB->setLayoutIndex(BlockIndex++);
BB->setFragmentNum(FF.getFragmentNum());
}
}
}
void FunctionLayout::updateLayoutIndices(
ArrayRef<BinaryBasicBlock *> Order) const {
for (auto [Index, BB] : llvm::enumerate(Order))
BB->setLayoutIndex(Index);
}

bool FunctionLayout::update(const ArrayRef<BinaryBasicBlock *> NewLayout) {
const bool EqualBlockOrder = llvm::equal(Blocks, NewLayout);
Expand Down
4 changes: 3 additions & 1 deletion bolt/lib/Passes/BinaryFunctionCallGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() {
std::stack<NodeId> Worklist;

for (BinaryFunction *Func : Funcs) {
const NodeId Id = FuncToNodeId.at(Func);
auto It = FuncToNodeId.find(Func);
assert(It != FuncToNodeId.end());
const NodeId Id = It->second;
Worklist.push(Id);
NodeStatus[Id] = NEW;
}
Expand Down
65 changes: 42 additions & 23 deletions bolt/lib/Passes/BinaryPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,8 @@ static uint64_t fixDoubleJumps(BinaryFunction &Function, bool MarkInvalid) {
MCPlusBuilder *MIB = Function.getBinaryContext().MIB.get();
for (BinaryBasicBlock &BB : Function) {
auto checkAndPatch = [&](BinaryBasicBlock *Pred, BinaryBasicBlock *Succ,
const MCSymbol *SuccSym) {
const MCSymbol *SuccSym,
std::optional<uint32_t> Offset) {
// Ignore infinite loop jumps or fallthrough tail jumps.
if (Pred == Succ || Succ == &BB)
return false;
Expand Down Expand Up @@ -715,9 +716,11 @@ static uint64_t fixDoubleJumps(BinaryFunction &Function, bool MarkInvalid) {
Pred->removeSuccessor(&BB);
Pred->eraseInstruction(Pred->findInstruction(Branch));
Pred->addTailCallInstruction(SuccSym);
MCInst *TailCall = Pred->getLastNonPseudoInstr();
assert(TailCall);
MIB->setOffset(*TailCall, BB.getOffset());
if (Offset) {
MCInst *TailCall = Pred->getLastNonPseudoInstr();
assert(TailCall);
MIB->setOffset(*TailCall, *Offset);
}
} else {
return false;
}
Expand Down Expand Up @@ -760,7 +763,8 @@ static uint64_t fixDoubleJumps(BinaryFunction &Function, bool MarkInvalid) {
if (Pred->getSuccessor() == &BB ||
(Pred->getConditionalSuccessor(true) == &BB && !IsTailCall) ||
Pred->getConditionalSuccessor(false) == &BB)
if (checkAndPatch(Pred, Succ, SuccSym) && MarkInvalid)
if (checkAndPatch(Pred, Succ, SuccSym, MIB->getOffset(*Inst)) &&
MarkInvalid)
BB.markValid(BB.pred_size() != 0 || BB.isLandingPad() ||
BB.isEntryPoint());
}
Expand Down Expand Up @@ -1386,9 +1390,19 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
if (Function.isPLTFunction())
continue;

// Adjustment for BAT mode: the profile for BOLT split fragments is combined
// so only count the hot fragment.
const uint64_t Address = Function.getAddress();
bool IsHotParentOfBOLTSplitFunction = !Function.getFragments().empty() &&
BAT && BAT->isBATFunction(Address) &&
!BAT->fetchParentAddress(Address);

++NumRegularFunctions;

if (!Function.isSimple()) {
// In BOLTed binaries split functions are non-simple (due to non-relocation
// mode), but the original function is known to be simple and we have a
// valid profile for it.
if (!Function.isSimple() && !IsHotParentOfBOLTSplitFunction) {
if (Function.hasProfile())
++NumNonSimpleProfiledFunctions;
continue;
Expand Down Expand Up @@ -1549,23 +1563,28 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
const bool Ascending =
opts::DynoStatsSortOrderOpt == opts::DynoStatsSortOrder::Ascending;

if (SortAll) {
llvm::stable_sort(Functions,
[Ascending, &Stats](const BinaryFunction *A,
const BinaryFunction *B) {
return Ascending ? Stats.at(A) < Stats.at(B)
: Stats.at(B) < Stats.at(A);
});
} else {
llvm::stable_sort(
Functions, [Ascending, &Stats](const BinaryFunction *A,
const BinaryFunction *B) {
const DynoStats &StatsA = Stats.at(A);
const DynoStats &StatsB = Stats.at(B);
return Ascending ? StatsA.lessThan(StatsB, opts::PrintSortedBy)
: StatsB.lessThan(StatsA, opts::PrintSortedBy);
});
}
std::function<bool(const DynoStats &, const DynoStats &)>
DynoStatsComparator =
SortAll ? [](const DynoStats &StatsA,
const DynoStats &StatsB) { return StatsA < StatsB; }
: [](const DynoStats &StatsA, const DynoStats &StatsB) {
return StatsA.lessThan(StatsB, opts::PrintSortedBy);
};

llvm::stable_sort(Functions,
[Ascending, &Stats, DynoStatsComparator](
const BinaryFunction *A, const BinaryFunction *B) {
auto StatsItr = Stats.find(A);
assert(StatsItr != Stats.end());
const DynoStats &StatsA = StatsItr->second;

StatsItr = Stats.find(B);
assert(StatsItr != Stats.end());
const DynoStats &StatsB = StatsItr->second;

return Ascending ? DynoStatsComparator(StatsA, StatsB)
: DynoStatsComparator(StatsB, StatsA);
});

BC.outs() << "BOLT-INFO: top functions sorted by ";
if (SortAll) {
Expand Down
43 changes: 33 additions & 10 deletions bolt/lib/Passes/CacheMetrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,20 @@ calcTSPScore(const std::vector<BinaryFunction *> &BinaryFunctions,
for (BinaryBasicBlock *DstBB : SrcBB->successors()) {
if (SrcBB != DstBB && BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE) {
JumpCount += BI->Count;
if (BBAddr.at(SrcBB) + BBSize.at(SrcBB) == BBAddr.at(DstBB))

auto BBAddrIt = BBAddr.find(SrcBB);
assert(BBAddrIt != BBAddr.end());
uint64_t SrcBBAddr = BBAddrIt->second;

auto BBSizeIt = BBSize.find(SrcBB);
assert(BBSizeIt != BBSize.end());
uint64_t SrcBBSize = BBSizeIt->second;

BBAddrIt = BBAddr.find(DstBB);
assert(BBAddrIt != BBAddr.end());
uint64_t DstBBAddr = BBAddrIt->second;

if (SrcBBAddr + SrcBBSize == DstBBAddr)
Score += BI->Count;
}
++BI;
Expand Down Expand Up @@ -149,29 +162,39 @@ double expectedCacheHitRatio(
for (BinaryFunction *BF : BinaryFunctions) {
if (BF->getLayout().block_empty())
continue;
const uint64_t Page =
BBAddr.at(BF->getLayout().block_front()) / ITLBPageSize;
PageSamples[Page] += FunctionSamples.at(BF);
auto BBAddrIt = BBAddr.find(BF->getLayout().block_front());
assert(BBAddrIt != BBAddr.end());
const uint64_t Page = BBAddrIt->second / ITLBPageSize;

auto FunctionSamplesIt = FunctionSamples.find(BF);
assert(FunctionSamplesIt != FunctionSamples.end());
PageSamples[Page] += FunctionSamplesIt->second;
}

// Computing the expected number of misses for every function
double Misses = 0;
for (BinaryFunction *BF : BinaryFunctions) {
// Skip the function if it has no samples
if (BF->getLayout().block_empty() || FunctionSamples.at(BF) == 0.0)
auto FunctionSamplesIt = FunctionSamples.find(BF);
assert(FunctionSamplesIt != FunctionSamples.end());
double Samples = FunctionSamplesIt->second;
if (BF->getLayout().block_empty() || Samples == 0.0)
continue;
double Samples = FunctionSamples.at(BF);
const uint64_t Page =
BBAddr.at(BF->getLayout().block_front()) / ITLBPageSize;

auto BBAddrIt = BBAddr.find(BF->getLayout().block_front());
assert(BBAddrIt != BBAddr.end());
const uint64_t Page = BBAddrIt->second / ITLBPageSize;
// The probability that the page is not present in the cache
const double MissProb =
pow(1.0 - PageSamples[Page] / TotalSamples, ITLBEntries);

// Processing all callers of the function
for (std::pair<BinaryFunction *, uint64_t> Pair : Calls[BF]) {
BinaryFunction *SrcFunction = Pair.first;
const uint64_t SrcPage =
BBAddr.at(SrcFunction->getLayout().block_front()) / ITLBPageSize;

BBAddrIt = BBAddr.find(SrcFunction->getLayout().block_front());
assert(BBAddrIt != BBAddr.end());
const uint64_t SrcPage = BBAddrIt->second / ITLBPageSize;
// Is this a 'long' or a 'short' call?
if (Page != SrcPage) {
// This is a miss
Expand Down
5 changes: 4 additions & 1 deletion bolt/lib/Passes/IdenticalCodeFolding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,10 @@ Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
"ICF breakdown", opts::TimeICF);
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
// Make sure indices are in-order.
BF.getLayout().updateLayoutIndices();
if (opts::ICFUseDFS)
BF.getLayout().updateLayoutIndices(BF.dfs());
else
BF.getLayout().updateLayoutIndices();

// Pre-compute hash before pushing into hashtable.
// Hash instruction operands to minimize hash collisions.
Expand Down
4 changes: 3 additions & 1 deletion bolt/lib/Passes/Inliner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,9 @@ Inliner::inlineCall(BinaryBasicBlock &CallerBB,
std::vector<BinaryBasicBlock *> Successors(BB.succ_size());
llvm::transform(BB.successors(), Successors.begin(),
[&InlinedBBMap](const BinaryBasicBlock *BB) {
return InlinedBBMap.at(BB);
auto It = InlinedBBMap.find(BB);
assert(It != InlinedBBMap.end());
return It->second;
});

if (CallerFunction.hasValidProfile() && Callee.hasValidProfile())
Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Passes/Instrumentation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ void Instrumentation::instrumentFunction(BinaryFunction &Function,
else if (BC.MIB->isUnconditionalBranch(Inst))
HasUnconditionalBranch = true;
else if ((!BC.MIB->isCall(Inst) && !BC.MIB->isConditionalBranch(Inst)) ||
BC.MIB->isUnsupportedBranch(Inst))
!BC.MIB->isReversibleBranch(Inst))
continue;

const uint32_t FromOffset = *BC.MIB->getOffset(Inst);
Expand Down
33 changes: 21 additions & 12 deletions bolt/lib/Passes/MCF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@

#include "bolt/Passes/MCF.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/ParallelUtilities.h"
#include "bolt/Passes/DataflowInfoManager.h"
#include "bolt/Utils/CommandLineOpts.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/CommandLine.h"
#include <algorithm>
#include <vector>
Expand All @@ -29,19 +31,10 @@ namespace opts {

extern cl::OptionCategory BoltOptCategory;

extern cl::opt<bool> TimeOpts;

static cl::opt<bool> IterativeGuess(
"iterative-guess",
cl::desc("in non-LBR mode, guess edge counts using iterative technique"),
cl::Hidden, cl::cat(BoltOptCategory));

static cl::opt<bool> UseRArcs(
"mcf-use-rarcs",
cl::desc("in MCF, consider the possibility of cancelling flow to balance "
"edges"),
cl::Hidden, cl::cat(BoltOptCategory));

} // namespace opts

namespace llvm {
Expand Down Expand Up @@ -441,7 +434,7 @@ void equalizeBBCounts(DataflowInfoManager &Info, BinaryFunction &BF) {
}
}

void estimateEdgeCounts(BinaryFunction &BF) {
void EstimateEdgeCounts::runOnFunction(BinaryFunction &BF) {
EdgeWeightMap PredEdgeWeights;
EdgeWeightMap SuccEdgeWeights;
if (!opts::IterativeGuess) {
Expand All @@ -462,8 +455,24 @@ void estimateEdgeCounts(BinaryFunction &BF) {
recalculateBBCounts(BF, /*AllEdges=*/false);
}

void solveMCF(BinaryFunction &BF, MCFCostFunction CostFunction) {
llvm_unreachable("not implemented");
Error EstimateEdgeCounts::runOnFunctions(BinaryContext &BC) {
if (llvm::none_of(llvm::make_second_range(BC.getBinaryFunctions()),
[](const BinaryFunction &BF) {
return BF.getProfileFlags() == BinaryFunction::PF_SAMPLE;
}))
return Error::success();

ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
runOnFunction(BF);
};
ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
return BF.getProfileFlags() != BinaryFunction::PF_SAMPLE;
};

ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_BB_QUADRATIC, WorkFun,
SkipFunc, "EstimateEdgeCounts");
return Error::success();
}

} // namespace bolt
Expand Down
8 changes: 4 additions & 4 deletions bolt/lib/Passes/ValidateMemRefs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ bool ValidateMemRefs::checkAndFixJTReference(BinaryFunction &BF, MCInst &Inst,
if (!BD)
return false;

const uint64_t TargetAddress = BD->getAddress() + Offset;
JumpTable *JT = BC.getJumpTableContainingAddress(TargetAddress);
JumpTable *JT = BC.getJumpTableContainingAddress(BD->getAddress());
if (!JT)
return false;

Expand All @@ -43,8 +42,9 @@ bool ValidateMemRefs::checkAndFixJTReference(BinaryFunction &BF, MCInst &Inst,
// the jump table label with a regular rodata reference. Get a
// non-JT reference by fetching the symbol 1 byte before the JT
// label.
MCSymbol *NewSym = BC.getOrCreateGlobalSymbol(TargetAddress - 1, "DATAat");
BC.MIB->setOperandToSymbolRef(Inst, OperandNum, NewSym, 1, &*BC.Ctx, 0);
MCSymbol *NewSym = BC.getOrCreateGlobalSymbol(BD->getAddress() - 1, "DATAat");
BC.MIB->setOperandToSymbolRef(Inst, OperandNum, NewSym, Offset + 1, &*BC.Ctx,
0);
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: replaced reference @" << BF.getPrintName()
<< " from " << BD->getName() << " to " << NewSym->getName()
<< " + 1\n");
Expand Down
50 changes: 29 additions & 21 deletions bolt/lib/Profile/BoltAddressTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,9 @@ namespace bolt {

const char *BoltAddressTranslation::SECTION_NAME = ".note.bolt_bat";

void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
const BinaryBasicBlock &BB,
uint64_t FuncInputAddress,
uint64_t FuncOutputAddress) {
void BoltAddressTranslation::writeEntriesForBB(
MapTy &Map, const BinaryBasicBlock &BB, uint64_t FuncInputAddress,
uint64_t FuncOutputAddress) const {
const uint64_t BBOutputOffset =
BB.getOutputAddressRange().first - FuncOutputAddress;
const uint32_t BBInputOffset = BB.getInputOffset();
Expand Down Expand Up @@ -55,7 +54,7 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
// and this deleted block will both share the same output address (the same
// key), and we need to map back. We choose here to privilege the successor by
// allowing it to overwrite the previously inserted key in the map.
Map[BBOutputOffset] = BBInputOffset << 1;
Map.emplace(BBOutputOffset, BBInputOffset << 1);

const auto &IOAddressMap =
BB.getFunction()->getBinaryContext().getIOAddressMap();
Expand All @@ -72,8 +71,7 @@ void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,

LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(OutputOffset) << " Val: "
<< Twine::utohexstr(InputOffset) << " (branch)\n");
Map.insert(std::pair<uint32_t, uint32_t>(OutputOffset,
(InputOffset << 1) | BRANCHENTRY));
Map.emplace(OutputOffset, (InputOffset << 1) | BRANCHENTRY);
}
}

Expand Down Expand Up @@ -108,6 +106,19 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
for (const BinaryBasicBlock *const BB :
Function.getLayout().getMainFragment())
writeEntriesForBB(Map, *BB, InputAddress, OutputAddress);
// Add entries for deleted blocks. They are still required for correct BB
// mapping of branches modified by SCTC. By convention, they would have the
// end of the function as output address.
const BBHashMapTy &BBHashMap = getBBHashMap(InputAddress);
if (BBHashMap.size() != Function.size()) {
const uint64_t EndOffset = Function.getOutputSize();
std::unordered_set<uint32_t> MappedInputOffsets;
for (const BinaryBasicBlock &BB : Function)
MappedInputOffsets.emplace(BB.getInputOffset());
for (const auto &[InputOffset, _] : BBHashMap)
if (!llvm::is_contained(MappedInputOffsets, InputOffset))
Map.emplace(EndOffset, InputOffset << 1);
}
Maps.emplace(Function.getOutputAddress(), std::move(Map));
ReverseMap.emplace(OutputAddress, InputAddress);

Expand Down Expand Up @@ -138,8 +149,8 @@ void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
<< " basic block hashes\n";
}

APInt BoltAddressTranslation::calculateBranchEntriesBitMask(MapTy &Map,
size_t EqualElems) {
APInt BoltAddressTranslation::calculateBranchEntriesBitMask(
MapTy &Map, size_t EqualElems) const {
APInt BitMask(alignTo(EqualElems, 8), 0);
size_t Index = 0;
for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
Expand Down Expand Up @@ -422,7 +433,7 @@ void BoltAddressTranslation::parseMaps(std::vector<uint64_t> &HotFuncs,
}
}

void BoltAddressTranslation::dump(raw_ostream &OS) {
void BoltAddressTranslation::dump(raw_ostream &OS) const {
const size_t NumTables = Maps.size();
OS << "BAT tables for " << NumTables << " functions:\n";
for (const auto &MapEntry : Maps) {
Expand All @@ -447,11 +458,15 @@ void BoltAddressTranslation::dump(raw_ostream &OS) {
OS << formatv(" hash: {0:x}", BBHashMap.getBBHash(Val));
OS << "\n";
}
if (IsHotFunction)
OS << "NumBlocks: " << NumBasicBlocksMap[Address] << '\n';
if (SecondaryEntryPointsMap.count(Address)) {
if (IsHotFunction) {
auto NumBasicBlocksIt = NumBasicBlocksMap.find(Address);
assert(NumBasicBlocksIt != NumBasicBlocksMap.end());
OS << "NumBlocks: " << NumBasicBlocksIt->second << '\n';
}
auto SecondaryEntryPointsIt = SecondaryEntryPointsMap.find(Address);
if (SecondaryEntryPointsIt != SecondaryEntryPointsMap.end()) {
const std::vector<uint32_t> &SecondaryEntryPoints =
SecondaryEntryPointsMap[Address];
SecondaryEntryPointsIt->second;
OS << SecondaryEntryPoints.size() << " secondary entry points:\n";
for (uint32_t EntryPointOffset : SecondaryEntryPoints)
OS << formatv("{0:x}\n", EntryPointOffset);
Expand Down Expand Up @@ -547,13 +562,6 @@ BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress,
return Res;
}

uint64_t BoltAddressTranslation::fetchParentAddress(uint64_t Address) const {
auto Iter = ColdPartSource.find(Address);
if (Iter == ColdPartSource.end())
return 0;
return Iter->second;
}

bool BoltAddressTranslation::enabledFor(
llvm::object::ELFObjectFileBase *InputFile) const {
for (const SectionRef &Section : InputFile->sections()) {
Expand Down
1 change: 0 additions & 1 deletion bolt/lib/Profile/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,5 @@ add_llvm_library(LLVMBOLTProfile
target_link_libraries(LLVMBOLTProfile
PRIVATE
LLVMBOLTCore
LLVMBOLTPasses
LLVMBOLTUtils
)
40 changes: 20 additions & 20 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,6 @@ Error DataAggregator::readProfile(BinaryContext &BC) {
if (std::error_code EC = writeBATYAML(BC, opts::SaveProfile))
report_error("cannot create output data file", EC);
}
BC.logBOLTErrorsAndQuitOnFatal(PrintProgramStats().runOnFunctions(BC));
}

return Error::success();
Expand Down Expand Up @@ -673,7 +672,8 @@ DataAggregator::getBATParentFunction(const BinaryFunction &Func) const {
return nullptr;
}

StringRef DataAggregator::getLocationName(const BinaryFunction &Func) const {
StringRef DataAggregator::getLocationName(const BinaryFunction &Func,
bool BAT) {
if (!BAT)
return Func.getOneName();

Expand Down Expand Up @@ -702,7 +702,7 @@ bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
auto I = NamesToSamples.find(Func.getOneName());
if (I == NamesToSamples.end()) {
bool Success;
StringRef LocName = getLocationName(Func);
StringRef LocName = getLocationName(Func, BAT);
std::tie(I, Success) = NamesToSamples.insert(
std::make_pair(Func.getOneName(),
FuncSampleData(LocName, FuncSampleData::ContainerTy())));
Expand All @@ -722,7 +722,7 @@ bool DataAggregator::doIntraBranch(BinaryFunction &Func, uint64_t From,
FuncBranchData *AggrData = getBranchData(Func);
if (!AggrData) {
AggrData = &NamesToBranches[Func.getOneName()];
AggrData->Name = getLocationName(Func);
AggrData->Name = getLocationName(Func, BAT);
setBranchData(Func, AggrData);
}

Expand All @@ -741,7 +741,7 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
StringRef SrcFunc;
StringRef DstFunc;
if (FromFunc) {
SrcFunc = getLocationName(*FromFunc);
SrcFunc = getLocationName(*FromFunc, BAT);
FromAggrData = getBranchData(*FromFunc);
if (!FromAggrData) {
FromAggrData = &NamesToBranches[FromFunc->getOneName()];
Expand All @@ -752,7 +752,7 @@ bool DataAggregator::doInterBranch(BinaryFunction *FromFunc,
recordExit(*FromFunc, From, Mispreds, Count);
}
if (ToFunc) {
DstFunc = getLocationName(*ToFunc);
DstFunc = getLocationName(*ToFunc, BAT);
ToAggrData = getBranchData(*ToFunc);
if (!ToAggrData) {
ToAggrData = &NamesToBranches[ToFunc->getOneName()];
Expand Down Expand Up @@ -1227,7 +1227,7 @@ ErrorOr<Location> DataAggregator::parseLocationOrOffset() {
if (Sep == StringRef::npos)
return parseOffset();
StringRef LookAhead = ParsingBuf.substr(0, Sep);
if (LookAhead.find_first_of(":") == StringRef::npos)
if (!LookAhead.contains(':'))
return parseOffset();

ErrorOr<StringRef> BuildID = parseString(':');
Expand Down Expand Up @@ -1464,7 +1464,7 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
uint64_t To = getBinaryFunctionContainingAddress(LBR.To) ? LBR.To : 0;
if (!From && !To)
continue;
BranchInfo &Info = BranchLBRs[Trace(From, To)];
TakenBranchInfo &Info = BranchLBRs[Trace(From, To)];
++Info.TakenCount;
Info.MispredCount += LBR.Mispred;
}
Expand Down Expand Up @@ -1609,7 +1609,7 @@ void DataAggregator::processBranchEvents() {

for (const auto &AggrLBR : BranchLBRs) {
const Trace &Loc = AggrLBR.first;
const BranchInfo &Info = AggrLBR.second;
const TakenBranchInfo &Info = AggrLBR.second;
doBranch(Loc.From, Loc.To, Info.TakenCount, Info.MispredCount);
}
}
Expand Down Expand Up @@ -2253,13 +2253,13 @@ DataAggregator::writeAggregatedFile(StringRef OutputFilename) const {
} else {
for (const auto &KV : NamesToBranches) {
const FuncBranchData &FBD = KV.second;
for (const llvm::bolt::BranchInfo &BI : FBD.Data) {
for (const BranchInfo &BI : FBD.Data) {
writeLocation(BI.From);
writeLocation(BI.To);
OutFile << BI.Mispreds << " " << BI.Branches << "\n";
++BranchValues;
}
for (const llvm::bolt::BranchInfo &BI : FBD.EntryData) {
for (const BranchInfo &BI : FBD.EntryData) {
// Do not output if source is a known symbol, since this was already
// accounted for in the source function
if (BI.From.IsSymbol)
Expand Down Expand Up @@ -2340,7 +2340,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
continue;
BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncAddress);
assert(BF);
YamlBF.Name = getLocationName(*BF);
YamlBF.Name = getLocationName(*BF, BAT);
YamlBF.Id = BF->getFunctionNumber();
YamlBF.Hash = BAT->getBFHash(FuncAddress);
YamlBF.ExecCount = BF->getKnownExecutionCount();
Expand All @@ -2349,11 +2349,11 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
BAT->getBBHashMap(FuncAddress);
YamlBF.Blocks.resize(YamlBF.NumBasicBlocks);

for (auto &&[Idx, YamlBB] : llvm::enumerate(YamlBF.Blocks))
YamlBB.Index = Idx;

for (auto BI = BlockMap.begin(), BE = BlockMap.end(); BI != BE; ++BI)
YamlBF.Blocks[BI->second.getBBIndex()].Hash = BI->second.getBBHash();
for (auto &&[Entry, YamlBB] : llvm::zip(BlockMap, YamlBF.Blocks)) {
const auto &Block = Entry.second;
YamlBB.Hash = Block.Hash;
YamlBB.Index = Block.Index;
}

// Lookup containing basic block offset and index
auto getBlock = [&BlockMap](uint32_t Offset) {
Expand All @@ -2363,10 +2363,10 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
exit(1);
}
--BlockIt;
return std::pair(BlockIt->first, BlockIt->second.getBBIndex());
return std::pair(BlockIt->first, BlockIt->second.Index);
};

for (const llvm::bolt::BranchInfo &BI : Branches.Data) {
for (const BranchInfo &BI : Branches.Data) {
using namespace yaml::bolt;
const auto &[BlockOffset, BlockIndex] = getBlock(BI.From.Offset);
BinaryBasicBlockProfile &YamlBB = YamlBF.Blocks[BlockIndex];
Expand All @@ -2388,7 +2388,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
}
}
// Set entry counts, similar to DataReader::readProfile.
for (const llvm::bolt::BranchInfo &BI : Branches.EntryData) {
for (const BranchInfo &BI : Branches.EntryData) {
if (!BlockMap.isInputBlock(BI.To.Offset)) {
if (opts::Verbosity >= 1)
errs() << "BOLT-WARNING: Unexpected EntryData in " << FuncName
Expand Down
2 changes: 0 additions & 2 deletions bolt/lib/Profile/DataReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -598,8 +598,6 @@ void DataReader::readSampleData(BinaryFunction &BF) {
}

BF.ExecutionCount = TotalEntryCount;

estimateEdgeCounts(BF);
}

void DataReader::convertBranchData(BinaryFunction &BF) const {
Expand Down
12 changes: 10 additions & 2 deletions bolt/lib/Profile/StaleProfileMatching.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "llvm/ADT/Bitfields.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/xxhash.h"
#include "llvm/Transforms/Utils/SampleProfileInference.h"

Expand All @@ -42,6 +43,7 @@ using namespace llvm;

namespace opts {

extern cl::opt<bool> TimeRewrite;
extern cl::OptionCategory BoltOptCategory;

cl::opt<bool>
Expand Down Expand Up @@ -372,8 +374,10 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {

// Create necessary metadata for the flow function
for (FlowJump &Jump : Func.Jumps) {
Func.Blocks.at(Jump.Source).SuccJumps.push_back(&Jump);
Func.Blocks.at(Jump.Target).PredJumps.push_back(&Jump);
assert(Jump.Source < Func.Blocks.size());
Func.Blocks[Jump.Source].SuccJumps.push_back(&Jump);
assert(Jump.Target < Func.Blocks.size());
Func.Blocks[Jump.Target].PredJumps.push_back(&Jump);
}
return Func;
}
Expand Down Expand Up @@ -705,6 +709,10 @@ void assignProfile(BinaryFunction &BF,

bool YAMLProfileReader::inferStaleProfile(
BinaryFunction &BF, const yaml::bolt::BinaryFunctionProfile &YamlBF) {

NamedRegionTimer T("inferStaleProfile", "stale profile inference", "rewrite",
"Rewrite passes", opts::TimeRewrite);

if (!BF.hasCFG())
return false;

Expand Down
20 changes: 12 additions & 8 deletions bolt/lib/Profile/YAMLProfileReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,17 @@ bool YAMLProfileReader::parseFunctionProfile(
FuncRawBranchCount += YamlSI.Count;
BF.setRawBranchCount(FuncRawBranchCount);

if (!opts::IgnoreHash &&
YamlBF.Hash != BF.computeHash(IsDFSOrder, HashFunction)) {
if (opts::Verbosity >= 1)
errs() << "BOLT-WARNING: function hash mismatch\n";
ProfileMatched = false;
if (BF.empty())
return true;

if (!opts::IgnoreHash) {
if (!BF.getHash())
BF.computeHash(IsDFSOrder, HashFunction);
if (YamlBF.Hash != BF.getHash()) {
if (opts::Verbosity >= 1)
errs() << "BOLT-WARNING: function hash mismatch\n";
ProfileMatched = false;
}
}

if (YamlBF.NumBasicBlocks != BF.size()) {
Expand Down Expand Up @@ -250,10 +256,8 @@ bool YAMLProfileReader::parseFunctionProfile(
if (BB.getExecutionCount() == BinaryBasicBlock::COUNT_NO_PROFILE)
BB.setExecutionCount(0);

if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE) {
if (YamlBP.Header.Flags & BinaryFunction::PF_SAMPLE)
BF.setExecutionCount(FunctionExecutionCount);
estimateEdgeCounts(BF);
}

ProfileMatched &= !MismatchedBlocks && !MismatchedCalls && !MismatchedEdges;

Expand Down
10 changes: 9 additions & 1 deletion bolt/lib/Profile/YAMLProfileWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Profile/BoltAddressTranslation.h"
#include "bolt/Profile/DataAggregator.h"
#include "bolt/Profile/ProfileReaderBase.h"
#include "bolt/Rewrite/RewriteInstance.h"
#include "llvm/Support/CommandLine.h"
Expand Down Expand Up @@ -39,6 +40,10 @@ const BinaryFunction *YAMLProfileWriter::setCSIDestination(
BC.getFunctionForSymbol(Symbol, &EntryID)) {
if (BAT && BAT->isBATFunction(Callee->getAddress()))
std::tie(Callee, EntryID) = BAT->translateSymbol(BC, *Symbol, Offset);
else if (const BinaryBasicBlock *BB =
Callee->getBasicBlockContainingOffset(Offset))
BC.getFunctionForSymbol(Callee->getSecondaryEntryPointSymbol(*BB),
&EntryID);
CSI.DestId = Callee->getFunctionNumber();
CSI.EntryDiscriminator = EntryID;
return Callee;
Expand All @@ -59,7 +64,7 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
BF.computeHash(UseDFS);
BF.computeBlockHashes();

YamlBF.Name = BF.getPrintName();
YamlBF.Name = DataAggregator::getLocationName(BF, BAT);
YamlBF.Id = BF.getFunctionNumber();
YamlBF.Hash = BF.getHash();
YamlBF.NumBasicBlocks = BF.size();
Expand All @@ -69,6 +74,9 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
llvm::copy(UseDFS ? BF.dfs() : BF.getLayout().blocks(),
std::back_inserter(Order));

const FunctionLayout Layout = BF.getLayout();
Layout.updateLayoutIndices(Order);

for (const BinaryBasicBlock *BB : Order) {
yaml::bolt::BinaryBasicBlockProfile YamlBB;
YamlBB.Index = BB->getLayoutIndex();
Expand Down
19 changes: 13 additions & 6 deletions bolt/lib/Rewrite/BinaryPassManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "bolt/Passes/JTFootprintReduction.h"
#include "bolt/Passes/LongJmp.h"
#include "bolt/Passes/LoopInversionPass.h"
#include "bolt/Passes/MCF.h"
#include "bolt/Passes/PLTCall.h"
#include "bolt/Passes/PatchEntries.h"
#include "bolt/Passes/RegReAssign.h"
Expand Down Expand Up @@ -90,6 +91,11 @@ PrintAfterLowering("print-after-lowering",
cl::desc("print function after instruction lowering"),
cl::Hidden, cl::cat(BoltOptCategory));

static cl::opt<bool> PrintEstimateEdgeCounts(
"print-estimate-edge-counts",
cl::desc("print function after edge counts are set for no-LBR profile"),
cl::Hidden, cl::cat(BoltOptCategory));

cl::opt<bool>
PrintFinalized("print-finalized",
cl::desc("print function after CFG is finalized"),
Expand Down Expand Up @@ -334,8 +340,10 @@ Error BinaryFunctionPassManager::runPasses() {
Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
BinaryFunctionPassManager Manager(BC);

const DynoStats InitialDynoStats =
getDynoStats(BC.getBinaryFunctions(), BC.isAArch64());
Manager.registerPass(
std::make_unique<EstimateEdgeCounts>(PrintEstimateEdgeCounts));

Manager.registerPass(std::make_unique<DynoStatsSetPass>());

Manager.registerPass(std::make_unique<AsmDumpPass>(),
opts::AsmDump.getNumOccurrences());
Expand Down Expand Up @@ -447,10 +455,9 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));

// Print final dyno stats right while CFG and instruction analysis are intact.
Manager.registerPass(
std::make_unique<DynoStatsPrintPass>(
InitialDynoStats, "after all optimizations before SCTC and FOP"),
opts::PrintDynoStats || opts::DynoStatsAll);
Manager.registerPass(std::make_unique<DynoStatsPrintPass>(
"after all optimizations before SCTC and FOP"),
opts::PrintDynoStats || opts::DynoStatsAll);

// Add the StokeInfo pass, which extract functions for stoke optimization and
// get the liveness information for them
Expand Down
113 changes: 113 additions & 0 deletions bolt/lib/Rewrite/BuildIDRewriter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
//===- bolt/Rewrite/BuildIDRewriter.cpp -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Read and update build ID stored in ELF note section.
//
//===----------------------------------------------------------------------===//

#include "bolt/Rewrite/MetadataRewriter.h"
#include "bolt/Rewrite/MetadataRewriters.h"
#include "llvm/Support/Errc.h"

using namespace llvm;
using namespace bolt;

namespace {

/// The build-id is typically a stream of 20 bytes. Return these bytes in
/// printable hexadecimal form.
std::string getPrintableBuildID(StringRef BuildID) {
std::string Str;
raw_string_ostream OS(Str);
for (const char &Char : BuildID)
OS << format("%.2x", static_cast<unsigned char>(Char));

return OS.str();
}

class BuildIDRewriter final : public MetadataRewriter {

/// Information about binary build ID.
ErrorOr<BinarySection &> BuildIDSection{std::errc::bad_address};
StringRef BuildID;
std::optional<uint64_t> BuildIDOffset;
std::optional<uint64_t> BuildIDSize;

public:
BuildIDRewriter(StringRef Name, BinaryContext &BC)
: MetadataRewriter(Name, BC) {}

Error sectionInitializer() override;

Error postEmitFinalizer() override;
};

Error BuildIDRewriter::sectionInitializer() {
// Typically, build ID will reside in .note.gnu.build-id section. Howerver,
// a linker script can change the section name and such is the case with
// the Linux kernel. Hence, we iterate over all note sections.
for (BinarySection &NoteSection : BC.sections()) {
if (!NoteSection.isNote())
continue;

StringRef Buf = NoteSection.getContents();
DataExtractor DE = DataExtractor(Buf, BC.AsmInfo->isLittleEndian(),
BC.AsmInfo->getCodePointerSize());
DataExtractor::Cursor Cursor(0);
while (Cursor && !DE.eof(Cursor)) {
const uint32_t NameSz = DE.getU32(Cursor);
const uint32_t DescSz = DE.getU32(Cursor);
const uint32_t Type = DE.getU32(Cursor);

StringRef Name =
NameSz ? Buf.slice(Cursor.tell(), Cursor.tell() + NameSz) : "<empty>";
Cursor.seek(alignTo(Cursor.tell() + NameSz, 4));

const uint64_t DescOffset = Cursor.tell();
StringRef Desc =
DescSz ? Buf.slice(DescOffset, DescOffset + DescSz) : "<empty>";
Cursor.seek(alignTo(DescOffset + DescSz, 4));

if (!Cursor)
return createStringError(errc::executable_format_error,
"out of bounds while reading note section: %s",
toString(Cursor.takeError()).c_str());

if (Type == ELF::NT_GNU_BUILD_ID && Name.substr(0, 3) == "GNU" &&
DescSz) {
BuildIDSection = NoteSection;
BuildID = Desc;
BC.setFileBuildID(getPrintableBuildID(Desc));
BuildIDOffset = DescOffset;
BuildIDSize = DescSz;

return Error::success();
}
}
}

return Error::success();
}

Error BuildIDRewriter::postEmitFinalizer() {
if (!BuildIDSection || !BuildIDOffset)
return Error::success();

const uint8_t LastByte = BuildID[BuildID.size() - 1];
SmallVector<char, 1> Patch = {static_cast<char>(LastByte ^ 1)};
BuildIDSection->addPatch(*BuildIDOffset + BuildID.size() - 1, Patch);
BC.outs() << "BOLT-INFO: patched build-id (flipped last bit)\n";

return Error::success();
}
} // namespace

std::unique_ptr<MetadataRewriter>
llvm::bolt::createBuildIDRewriter(BinaryContext &BC) {
return std::make_unique<BuildIDRewriter>("build-id-rewriter", BC);
}
1 change: 1 addition & 0 deletions bolt/lib/Rewrite/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ add_llvm_library(LLVMBOLTRewrite
LinuxKernelRewriter.cpp
MachORewriteInstance.cpp
MetadataManager.cpp
BuildIDRewriter.cpp
PseudoProbeRewriter.cpp
RewriteInstance.cpp
SDTRewriter.cpp
Expand Down
5 changes: 2 additions & 3 deletions bolt/lib/Rewrite/DWARFRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@ static void printDie(DWARFUnit &DU, uint64_t DIEOffset) {
DWARFDataExtractor DebugInfoData = DU.getDebugInfoExtractor();
DWARFDebugInfoEntry DIEEntry;
if (DIEEntry.extractFast(DU, &DIEOffset, DebugInfoData, NextCUOffset, 0)) {
if (const DWARFAbbreviationDeclaration *AbbrDecl =
DIEEntry.getAbbreviationDeclarationPtr()) {
if (DIEEntry.getAbbreviationDeclarationPtr()) {
DWARFDie DDie(&DU, &DIEEntry);
printDie(DDie);
} else {
Expand Down Expand Up @@ -353,7 +352,7 @@ static cl::opt<bool> CreateDebugNames(

static cl::opt<bool>
DebugSkeletonCu("debug-skeleton-cu",
cl::desc("prints out offsetrs for abbrev and debu_info of "
cl::desc("prints out offsets for abbrev and debug_info of "
"Skeleton CUs that get patched."),
cl::ZeroOrMore, cl::Hidden, cl::init(false),
cl::cat(BoltCategory));
Expand Down
2 changes: 1 addition & 1 deletion bolt/lib/Rewrite/LinuxKernelRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ void LinuxKernelRewriter::processLKKSymtab(bool IsGPL) {

for (uint64_t I = 0; I < SectionSize; I += 4) {
const uint64_t EntryAddress = SectionAddress + I;
ErrorOr<uint64_t> Offset = BC.getSignedValueAtAddress(EntryAddress, 4);
ErrorOr<int64_t> Offset = BC.getSignedValueAtAddress(EntryAddress, 4);
assert(Offset && "Reading valid PC-relative offset for a ksymtab entry");
const int32_t SignedOffset = *Offset;
const uint64_t RefAddress = EntryAddress + SignedOffset;
Expand Down
12 changes: 12 additions & 0 deletions bolt/lib/Rewrite/MetadataManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,18 @@ void MetadataManager::registerRewriter(
Rewriters.emplace_back(std::move(Rewriter));
}

void MetadataManager::runSectionInitializers() {
for (auto &Rewriter : Rewriters) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invoking " << Rewriter->getName()
<< " after reading sections\n");
if (Error E = Rewriter->sectionInitializer()) {
errs() << "BOLT-ERROR: while running " << Rewriter->getName()
<< " after reading sections: " << toString(std::move(E)) << '\n';
exit(1);
}
}
}

void MetadataManager::runInitializersPreCFG() {
for (auto &Rewriter : Rewriters) {
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: invoking " << Rewriter->getName()
Expand Down
187 changes: 59 additions & 128 deletions bolt/lib/Rewrite/RewriteInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "bolt/Core/MCPlusBuilder.h"
#include "bolt/Core/ParallelUtilities.h"
#include "bolt/Core/Relocation.h"
#include "bolt/Passes/BinaryPasses.h"
#include "bolt/Passes/CacheMetrics.h"
#include "bolt/Passes/ReorderFunctions.h"
#include "bolt/Profile/BoltAddressTranslation.h"
Expand Down Expand Up @@ -54,7 +55,6 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Regex.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
Expand Down Expand Up @@ -86,6 +86,7 @@ extern cl::list<std::string> ReorderData;
extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
extern cl::opt<bool> TerminalTrap;
extern cl::opt<bool> TimeBuild;
extern cl::opt<bool> TimeRewrite;

cl::opt<bool> AllowStripped("allow-stripped",
cl::desc("allow processing of stripped binaries"),
Expand Down Expand Up @@ -235,11 +236,6 @@ UseGnuStack("use-gnu-stack",
cl::ZeroOrMore,
cl::cat(BoltCategory));

static cl::opt<bool>
TimeRewrite("time-rewrite",
cl::desc("print time spent in rewriting passes"), cl::Hidden,
cl::cat(BoltCategory));

static cl::opt<bool>
SequentialDisassembly("sequential-disassembly",
cl::desc("performs disassembly sequentially"),
Expand Down Expand Up @@ -647,82 +643,6 @@ Error RewriteInstance::discoverStorage() {
return Error::success();
}

void RewriteInstance::parseBuildID() {
if (!BuildIDSection)
return;

StringRef Buf = BuildIDSection->getContents();

// Reading notes section (see Portable Formats Specification, Version 1.1,
// pg 2-5, section "Note Section").
DataExtractor DE =
DataExtractor(Buf,
/*IsLittleEndian=*/true, InputFile->getBytesInAddress());
uint64_t Offset = 0;
if (!DE.isValidOffset(Offset))
return;
uint32_t NameSz = DE.getU32(&Offset);
if (!DE.isValidOffset(Offset))
return;
uint32_t DescSz = DE.getU32(&Offset);
if (!DE.isValidOffset(Offset))
return;
uint32_t Type = DE.getU32(&Offset);

LLVM_DEBUG(dbgs() << "NameSz = " << NameSz << "; DescSz = " << DescSz
<< "; Type = " << Type << "\n");

// Type 3 is a GNU build-id note section
if (Type != 3)
return;

StringRef Name = Buf.slice(Offset, Offset + NameSz);
Offset = alignTo(Offset + NameSz, 4);
if (Name.substr(0, 3) != "GNU")
return;

BuildID = Buf.slice(Offset, Offset + DescSz);
}

std::optional<std::string> RewriteInstance::getPrintableBuildID() const {
if (BuildID.empty())
return std::nullopt;

std::string Str;
raw_string_ostream OS(Str);
const unsigned char *CharIter = BuildID.bytes_begin();
while (CharIter != BuildID.bytes_end()) {
if (*CharIter < 0x10)
OS << "0";
OS << Twine::utohexstr(*CharIter);
++CharIter;
}
return OS.str();
}

void RewriteInstance::patchBuildID() {
raw_fd_ostream &OS = Out->os();

if (BuildID.empty())
return;

size_t IDOffset = BuildIDSection->getContents().rfind(BuildID);
assert(IDOffset != StringRef::npos && "failed to patch build-id");

uint64_t FileOffset = getFileOffsetForAddress(BuildIDSection->getAddress());
if (!FileOffset) {
BC->errs()
<< "BOLT-WARNING: Non-allocatable build-id will not be updated.\n";
return;
}

char LastIDByte = BuildID[BuildID.size() - 1];
LastIDByte ^= 1;
OS.pwrite(&LastIDByte, 1, FileOffset + IDOffset + BuildID.size() - 1);

BC->outs() << "BOLT-INFO: patched build-id (flipped last bit)\n";
}

Error RewriteInstance::run() {
assert(BC && "failed to create a binary context");

Expand Down Expand Up @@ -948,9 +868,6 @@ void RewriteInstance::discoverFileObjects() {
BinaryFunction *PreviousFunction = nullptr;
unsigned AnonymousId = 0;

// Regex object for matching cold fragments.
const Regex ColdFragment(".*\\.cold(\\.[0-9]+)?");

const auto SortedSymbolsEnd =
LastSymbol == SortedSymbols.end() ? LastSymbol : std::next(LastSymbol);
for (auto Iter = SortedSymbols.begin(); Iter != SortedSymbolsEnd; ++Iter) {
Expand Down Expand Up @@ -1232,7 +1149,7 @@ void RewriteInstance::discoverFileObjects() {
}

// Check if it's a cold function fragment.
if (ColdFragment.match(SymName)) {
if (FunctionFragmentTemplate.match(SymName)) {
static bool PrintedWarning = false;
if (!PrintedWarning) {
PrintedWarning = true;
Expand Down Expand Up @@ -1463,10 +1380,10 @@ void RewriteInstance::registerFragments() {
for (StringRef Name : Function.getNames()) {
StringRef BaseName = NR.restore(Name);
const bool IsGlobal = BaseName == Name;
const size_t ColdSuffixPos = BaseName.find(".cold");
if (ColdSuffixPos == StringRef::npos)
SmallVector<StringRef> Matches;
if (!FunctionFragmentTemplate.match(BaseName, &Matches))
continue;
StringRef ParentName = BaseName.substr(0, ColdSuffixPos);
StringRef ParentName = Matches[1];
const BinaryData *BD = BC->getBinaryDataByName(ParentName);
const uint64_t NumPossibleLocalParents =
NR.getUniquifiedNameCount(ParentName);
Expand Down Expand Up @@ -1500,7 +1417,7 @@ void RewriteInstance::registerFragments() {
if (!BC->hasSymbolsWithFileName()) {
BC->errs() << "BOLT-ERROR: input file has split functions but does not "
"have FILE symbols. If the binary was stripped, preserve "
"FILE symbols with --keep-file-symbols strip option";
"FILE symbols with --keep-file-symbols strip option\n";
exit(1);
}

Expand Down Expand Up @@ -1984,10 +1901,10 @@ Error RewriteInstance::readSpecialSections() {
".rela" + std::string(BC->getMainCodeSectionName()));
HasSymbolTable = (bool)BC->getUniqueSectionByName(".symtab");
EHFrameSection = BC->getUniqueSectionByName(".eh_frame");
BuildIDSection = BC->getUniqueSectionByName(".note.gnu.build-id");

if (ErrorOr<BinarySection &> BATSec =
BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) {
BC->HasBATSection = true;
// Do not read BAT when plotting a heatmap
if (!opts::HeatmapMode) {
if (std::error_code EC = BAT->parse(BC->outs(), BATSec->getContents())) {
Expand Down Expand Up @@ -2041,10 +1958,7 @@ Error RewriteInstance::readSpecialSections() {
report_error("expected valid eh_frame section", EHFrameOrError.takeError());
CFIRdWrt.reset(new CFIReaderWriter(*BC, *EHFrameOrError.get()));

// Parse build-id
parseBuildID();
if (std::optional<std::string> FileBuildID = getPrintableBuildID())
BC->setFileBuildID(*FileBuildID);
processSectionMetadata();

// Read .dynamic/PT_DYNAMIC.
return readELFDynamic();
Expand Down Expand Up @@ -3208,12 +3122,14 @@ void RewriteInstance::preprocessProfileData() {
if (Error E = ProfileReader->preprocessProfile(*BC.get()))
report_error("cannot pre-process profile", std::move(E));

if (!BC->hasSymbolsWithFileName() && ProfileReader->hasLocalsWithFileName()) {
if (!BC->hasSymbolsWithFileName() && ProfileReader->hasLocalsWithFileName() &&
!opts::AllowStripped) {
BC->errs()
<< "BOLT-ERROR: input binary does not have local file symbols "
"but profile data includes function names with embedded file "
"names. It appears that the input binary was stripped while a "
"profiled binary was not\n";
"profiled binary was not. If you know what you are doing and "
"wish to proceed, use -allow-stripped option.\n";
exit(1);
}
}
Expand All @@ -3222,14 +3138,20 @@ void RewriteInstance::initializeMetadataManager() {
if (BC->IsLinuxKernel)
MetadataManager.registerRewriter(createLinuxKernelRewriter(*BC));

MetadataManager.registerRewriter(createBuildIDRewriter(*BC));

MetadataManager.registerRewriter(createPseudoProbeRewriter(*BC));

MetadataManager.registerRewriter(createSDTRewriter(*BC));
}

void RewriteInstance::processMetadataPreCFG() {
void RewriteInstance::processSectionMetadata() {
initializeMetadataManager();

MetadataManager.runSectionInitializers();
}

void RewriteInstance::processMetadataPreCFG() {
MetadataManager.runInitializersPreCFG();

processProfileDataPreCFG();
Expand Down Expand Up @@ -3284,8 +3206,11 @@ void RewriteInstance::processProfileData() {
// Release memory used by profile reader.
ProfileReader.reset();

if (opts::AggregateOnly)
if (opts::AggregateOnly) {
PrintProgramStats PPS(&*BAT);
BC->logBOLTErrorsAndQuitOnFatal(PPS.runOnFunctions(*BC));
exit(0);
}
}

void RewriteInstance::disassembleFunctions() {
Expand Down Expand Up @@ -4808,6 +4733,40 @@ void RewriteInstance::updateELFSymbolTable(
// Create a new symbol based on the existing symbol.
ELFSymTy NewSymbol = Symbol;

// Handle special symbols based on their name.
Expected<StringRef> SymbolName = Symbol.getName(StringSection);
assert(SymbolName && "cannot get symbol name");

auto updateSymbolValue = [&](const StringRef Name,
std::optional<uint64_t> Value = std::nullopt) {
NewSymbol.st_value = Value ? *Value : getNewValueForSymbol(Name);
NewSymbol.st_shndx = ELF::SHN_ABS;
BC->outs() << "BOLT-INFO: setting " << Name << " to 0x"
<< Twine::utohexstr(NewSymbol.st_value) << '\n';
};

if (*SymbolName == "__hot_start" || *SymbolName == "__hot_end") {
if (opts::HotText) {
updateSymbolValue(*SymbolName);
++NumHotTextSymsUpdated;
}
goto registerSymbol;
}

if (*SymbolName == "__hot_data_start" || *SymbolName == "__hot_data_end") {
if (opts::HotData) {
updateSymbolValue(*SymbolName);
++NumHotDataSymsUpdated;
}
goto registerSymbol;
}

if (*SymbolName == "_end") {
if (NextAvailableAddress > Symbol.st_value)
updateSymbolValue(*SymbolName, NextAvailableAddress);
goto registerSymbol;
}

if (Function) {
// If the symbol matched a function that was not emitted, update the
// corresponding section index but otherwise leave it unchanged.
Expand Down Expand Up @@ -4904,33 +4863,7 @@ void RewriteInstance::updateELFSymbolTable(
}
}

// Handle special symbols based on their name.
Expected<StringRef> SymbolName = Symbol.getName(StringSection);
assert(SymbolName && "cannot get symbol name");

auto updateSymbolValue = [&](const StringRef Name,
std::optional<uint64_t> Value = std::nullopt) {
NewSymbol.st_value = Value ? *Value : getNewValueForSymbol(Name);
NewSymbol.st_shndx = ELF::SHN_ABS;
BC->outs() << "BOLT-INFO: setting " << Name << " to 0x"
<< Twine::utohexstr(NewSymbol.st_value) << '\n';
};

if (opts::HotText &&
(*SymbolName == "__hot_start" || *SymbolName == "__hot_end")) {
updateSymbolValue(*SymbolName);
++NumHotTextSymsUpdated;
}

if (opts::HotData && (*SymbolName == "__hot_data_start" ||
*SymbolName == "__hot_data_end")) {
updateSymbolValue(*SymbolName);
++NumHotDataSymsUpdated;
}

if (*SymbolName == "_end" && NextAvailableAddress > Symbol.st_value)
updateSymbolValue(*SymbolName, NextAvailableAddress);

registerSymbol:
if (IsDynSym)
Write((&Symbol - cantFail(Obj.symbols(&SymTabSection)).begin()) *
sizeof(ELFSymTy),
Expand Down Expand Up @@ -5765,8 +5698,6 @@ void RewriteInstance::rewriteFile() {
// Update symbol tables.
patchELFSymTabs();

patchBuildID();

if (opts::EnableBAT)
encodeBATSection();

Expand Down
Loading