26 changes: 19 additions & 7 deletions .github/workflows/pr-code-format.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
name: "Check code formatting"

permissions:
contents: read

on:
pull_request_target:
pull_request:
branches:
- main

permissions:
pull-requests: write

jobs:
code_formatter:
runs-on: ubuntu-latest
Expand All @@ -31,12 +32,13 @@ jobs:
separator: ","
skip_initial_fetch: true

# We need to make sure that we aren't executing/using any code from the
# PR for security reasons as we're using pull_request_target. Checkout
# the target branch with the necessary files.
# We need to pull the script from the main branch, so that we ensure
# we get the latest version of this script.
- name: Fetch code formatting utils
uses: actions/checkout@v4
with:
repository: ${{ github.repository }}
ref: ${{ github.base_ref }}
sparse-checkout: |
llvm/utils/git/requirements_formatting.txt
llvm/utils/git/code-format-helper.py
Expand Down Expand Up @@ -75,10 +77,20 @@ jobs:
# to take advantage of the new --diff_from_common_commit option
# explicitly in code-format-helper.py and not have to diff starting at
# the merge base.
# Create an empty comments file so the pr-write job doesn't fail.
run: |
echo "[]" > comments &&
python ./code-format-tools/llvm/utils/git/code-format-helper.py \
--write-comment-to-file \
--token ${{ secrets.GITHUB_TOKEN }} \
--issue-number $GITHUB_PR_NUMBER \
--start-rev $(git merge-base $START_REV $END_REV) \
--end-rev $END_REV \
--changed-files "$CHANGED_FILES"
- uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 #v4.3.0
if: always()
with:
name: workflow-args
path: |
comments
2 changes: 1 addition & 1 deletion .github/workflows/release-lit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ jobs:
cd llvm/utils/lit
# Remove 'dev' suffix from lit version.
sed -i 's/ + "dev"//g' lit/__init__.py
python3 setup.py sdist
python3 setup.py sdist bdist_wheel
- name: Upload lit to test.pypi.org
uses: pypa/gh-action-pypi-publish@release/v1
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/scorecard.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
persist-credentials: false

- name: "Run analysis"
uses: ossf/scorecard-action@e38b1902ae4f44df626f11ba0734b14fb91f8f86 # v2.1.2
uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1
with:
results_file: results.sarif
results_format: sarif
Expand Down
71 changes: 42 additions & 29 deletions bolt/docs/BAT.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,21 @@ and [BoltAddressTranslation.cpp](/bolt/lib/Profile/BoltAddressTranslation.cpp).
### Layout
The general layout is as follows:
```
Hot functions table header
|------------------|
| Function entry |
| |--------------| |
| | OutOff InOff | |
| |--------------| |
~~~~~~~~~~~~~~~~~~~~
Hot functions table
Cold functions table
Cold functions table header
Functions table:
|------------------|
| Function entry |
| |--------------| |
| | OutOff InOff | |
| |--------------| |
~~~~~~~~~~~~~~~~~~~~
| |
| Address |
| translation |
| table |
| |
| Secondary entry |
| points |
|------------------|
```

### Functions table
Expand All @@ -74,30 +74,43 @@ internal offsets, and between hot and cold fragments, to better spread deltas
and save space.

Hot indices are delta encoded, implicitly starting at zero.
| Entry | Encoding | Description |
| ------ | ------| ----------- |
| `Address` | Continuous, Delta, ULEB128 | Function address in the output binary |
| `HotIndex` | Delta, ULEB128 | Cold functions only: index of corresponding hot function in hot functions table |
| `FuncHash` | 8b | Hot functions only: function hash for input function |
| `NumEntries` | ULEB128 | Number of address translation entries for a function |
| `EqualElems` | ULEB128 | Hot functions only: number of equal offsets in the beginning of a function |
| `BranchEntries` | Bitmask, `alignTo(EqualElems, 8)` bits | Hot functions only: if `EqualElems` is non-zero, bitmask denoting entries with `BRANCHENTRY` bit |

Function header is followed by `EqualElems` offsets (hot functions only) and
`NumEntries-EqualElems` (`NumEntries` for cold functions) pairs of offsets for
current function.
| Entry | Encoding | Description | Hot/Cold |
| ------ | ------| ----------- | ------ |
| `Address` | Continuous, Delta, ULEB128 | Function address in the output binary | Both |
| `HotIndex` | Delta, ULEB128 | Index of corresponding hot function in hot functions table | Cold |
| `FuncHash` | 8b | Function hash for input function | Hot |
| `NumBlocks` | ULEB128 | Number of basic blocks in the original function | Hot |
| `NumSecEntryPoints` | ULEB128 | Number of secondary entry points in the original function | Hot |
| `NumEntries` | ULEB128 | Number of address translation entries for a function | Both |
| `EqualElems` | ULEB128 | Number of equal offsets in the beginning of a function | Hot |
| `BranchEntries` | Bitmask, `alignTo(EqualElems, 8)` bits | If `EqualElems` is non-zero, bitmask denoting entries with `BRANCHENTRY` bit | Hot |

Function header is followed by *Address Translation Table* with `NumEntries`
total entries, and *Secondary Entry Points* table with `NumSecEntryPoints`
entries (hot functions only).

### Address translation table
Delta encoding means that only the difference with the previous corresponding
entry is encoded. Input offsets implicitly start at zero.
| Entry | Encoding | Description |
| ------ | ------| ----------- |
| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary |
| `InputOffset` | Optional, Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit |
| `BBHash` | Optional, 8b | Basic block entries only: basic block hash in input binary |
| Entry | Encoding | Description | Branch/BB |
| ------ | ------| ----------- | ------ |
| `OutputOffset` | Continuous, Delta, ULEB128 | Function offset in output binary | Both |
| `InputOffset` | Optional, Delta, SLEB128 | Function offset in input binary with `BRANCHENTRY` LSB bit | Both |
| `BBHash` | Optional, 8b | Basic block hash in input binary | BB |
| `BBIdx` | Optional, Delta, ULEB128 | Basic block index in input binary | BB |

For hot fragments, the table omits the first `EqualElems` input offsets
where the input offset equals output offset.

`BRANCHENTRY` bit denotes whether a given offset pair is a control flow source
(branch or call instruction). If not set, it signifies a control flow target
(basic block offset).
`InputAddr` is omitted for equal offsets in input and output function. In this
case, `BRANCHENTRY` bits are encoded separately in a `BranchEntries` bitvector.

### Secondary Entry Points table
The table is emitted for hot fragments only. It contains `NumSecEntryPoints`
offsets denoting secondary entry points, delta encoded, implicitly starting at zero.
| Entry | Encoding | Description |
| ----- | -------- | ----------- |
| `SecEntryPoint` | Delta, ULEB128 | Secondary entry point offset |
1 change: 0 additions & 1 deletion bolt/include/bolt/Core/AddressMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#ifndef BOLT_CORE_ADDRESS_MAP_H
#define BOLT_CORE_ADDRESS_MAP_H

#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCSymbol.h"

#include <optional>
Expand Down
3 changes: 2 additions & 1 deletion bolt/include/bolt/Core/BinaryContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,8 @@ class BinaryContext {

public:
static Expected<std::unique_ptr<BinaryContext>>
createBinaryContext(const ObjectFile *File, bool IsPIC,
createBinaryContext(Triple TheTriple, StringRef InputFileName,
SubtargetFeatures *Features, bool IsPIC,
std::unique_ptr<DWARFContext> DwCtx,
JournalingStreams Logger);

Expand Down
1 change: 0 additions & 1 deletion bolt/include/bolt/Core/BinaryData.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <string>
#include <vector>

Expand Down
1 change: 0 additions & 1 deletion bolt/include/bolt/Core/BinaryDomTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

#include "bolt/Core/BinaryBasicBlock.h"
#include "llvm/IR/Dominators.h"
#include "llvm/Support/GenericDomTreeConstruction.h"

namespace llvm {
namespace bolt {
Expand Down
16 changes: 13 additions & 3 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryDomTree.h"
#include "bolt/Core/BinaryLoop.h"
#include "bolt/Core/BinarySection.h"
#include "bolt/Core/DebugData.h"
Expand All @@ -51,7 +52,6 @@
#include <iterator>
#include <limits>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>

Expand Down Expand Up @@ -266,6 +266,7 @@ class BinaryFunction {
BinaryContext &BC;

std::unique_ptr<BinaryLoopInfo> BLI;
std::unique_ptr<BinaryDominatorTree> BDT;

/// All labels in the function that are referenced via relocations from
/// data objects. Typically these are jump table destinations and computed
Expand Down Expand Up @@ -838,6 +839,14 @@ class BinaryFunction {
/// stats.
void calculateMacroOpFusionStats();

/// Returns if BinaryDominatorTree has been constructed for this function.
bool hasDomTree() const { return BDT != nullptr; }

BinaryDominatorTree &getDomTree() { return *BDT.get(); }

/// Constructs DomTree for this function.
void constructDomTree();

/// Returns if loop detection has been run for this function.
bool hasLoopInfo() const { return BLI != nullptr; }

Expand Down Expand Up @@ -1159,7 +1168,7 @@ class BinaryFunction {
/// Pass an offset of the entry point in the input binary and a corresponding
/// global symbol to the callback function.
///
/// Return true of all callbacks returned true, false otherwise.
/// Return true if all callbacks returned true, false otherwise.
bool forEachEntryPoint(EntryPointCallbackTy Callback) const;

/// Return MC symbol associated with the end of the function.
Expand Down Expand Up @@ -1393,7 +1402,8 @@ class BinaryFunction {

/// Return true if the function has CFI instructions
bool hasCFI() const {
return !FrameInstructions.empty() || !CIEFrameInstructions.empty();
return !FrameInstructions.empty() || !CIEFrameInstructions.empty() ||
IsInjected;
}

/// Return unique number associated with the function.
Expand Down
2 changes: 1 addition & 1 deletion bolt/include/bolt/Core/BinaryLoop.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#ifndef BOLT_CORE_BINARY_LOOP_H
#define BOLT_CORE_BINARY_LOOP_H

#include "llvm/Support/GenericLoopInfoImpl.h"
#include "llvm/Support/GenericLoopInfo.h"

namespace llvm {
namespace bolt {
Expand Down
1 change: 0 additions & 1 deletion bolt/include/bolt/Core/BinarySection.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include "bolt/Core/DebugData.h"
#include "bolt/Core/Relocation.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/MachO.h"
Expand Down
1 change: 0 additions & 1 deletion bolt/include/bolt/Core/DebugData.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
#include <mutex>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>

Expand Down
13 changes: 12 additions & 1 deletion bolt/include/bolt/Core/DebugNames.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#ifndef BOLT_CORE_DEBUG_NAMES_H
#define BOLT_CORE_DEBUG_NAMES_H

#include "DebugData.h"
#include "bolt/Core/DebugData.h"
#include "llvm/CodeGen/AccelTable.h"

namespace llvm {
Expand Down Expand Up @@ -68,6 +68,16 @@ class DWARF5AcceleratorTable {
std::unique_ptr<DebugBufferVector> releaseBuffer() {
return std::move(FullTableBuffer);
}
/// Adds a DIE that is referenced across CUs.
void addCrossCUDie(const DIE *Die) {
CrossCUDies.insert({Die->getOffset(), Die});
}
/// Returns true if the DIE can generate an entry for a cross cu reference.
/// This only checks TAGs of a DIE because when this is invoked DIE might not
/// be fully constructed.
bool canGenerateEntryWithCrossCUReference(
const DWARFUnit &Unit, const DIE &Die,
const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec);

private:
BinaryContext &BC;
Expand Down Expand Up @@ -128,6 +138,7 @@ class DWARF5AcceleratorTable {
llvm::DenseMap<uint64_t, uint32_t> CUOffsetsToPatch;
// Contains a map of Entry ID to Entry relative offset.
llvm::DenseMap<uint64_t, uint32_t> EntryRelativeOffsets;
llvm::DenseMap<uint64_t, const DIE *> CrossCUDies;
/// Adds Unit to either CUList, LocalTUList or ForeignTUList.
/// Input Unit being processed, and DWO ID if Unit is being processed comes
/// from a DWO section.
Expand Down
1 change: 0 additions & 1 deletion bolt/include/bolt/Core/FunctionLayout.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include <iterator>
#include <utility>

namespace llvm {
namespace bolt {
Expand Down
2 changes: 0 additions & 2 deletions bolt/include/bolt/Core/MCPlus.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,8 @@
#ifndef BOLT_CORE_MCPLUS_H
#define BOLT_CORE_MCPLUS_H

#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Support/Casting.h"
#include <vector>

namespace llvm {
Expand Down
6 changes: 3 additions & 3 deletions bolt/include/bolt/Core/MCPlusBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
#include "llvm/MC/MCExpr.h"
Expand All @@ -27,6 +28,7 @@
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/RWMutex.h"
Expand Down Expand Up @@ -533,9 +535,7 @@ class MCPlusBuilder {
return Analysis->isReturn(Inst);
}

virtual bool isTerminator(const MCInst &Inst) const {
return Analysis->isTerminator(Inst);
}
virtual bool isTerminator(const MCInst &Inst) const;

virtual bool isNoop(const MCInst &Inst) const {
llvm_unreachable("not implemented");
Expand Down
1 change: 0 additions & 1 deletion bolt/include/bolt/Passes/BinaryPasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include "bolt/Core/DynoStats.h"
#include "llvm/Support/CommandLine.h"
#include <atomic>
#include <map>
#include <set>
#include <string>
#include <unordered_set>
Expand Down
1 change: 0 additions & 1 deletion bolt/include/bolt/Passes/CacheMetrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
#ifndef BOLT_PASSES_CACHEMETRICS_H
#define BOLT_PASSES_CACHEMETRICS_H

#include <cstdint>
#include <vector>

namespace llvm {
Expand Down
1 change: 0 additions & 1 deletion bolt/include/bolt/Passes/DominatorAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

#include "bolt/Passes/DataflowAnalysis.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Timer.h"

namespace opts {
extern llvm::cl::opt<bool> TimeOpts;
Expand Down
2 changes: 0 additions & 2 deletions bolt/include/bolt/Passes/ReachingDefOrUse.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@

#include "bolt/Passes/DataflowAnalysis.h"
#include "bolt/Passes/RegAnalysis.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Timer.h"
#include <optional>

namespace opts {
Expand Down
1 change: 0 additions & 1 deletion bolt/include/bolt/Passes/ReachingInsns.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

#include "bolt/Passes/DataflowAnalysis.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Timer.h"

namespace opts {
extern llvm::cl::opt<bool> TimeOpts;
Expand Down
1 change: 0 additions & 1 deletion bolt/include/bolt/Passes/ReorderUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#ifndef BOLT_PASSES_REORDER_UTILS_H
#define BOLT_PASSES_REORDER_UTILS_H

#include <memory>
#include <vector>

#include "llvm/ADT/BitVector.h"
Expand Down
137 changes: 127 additions & 10 deletions bolt/include/bolt/Profile/BoltAddressTranslation.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <unordered_map>

namespace llvm {
class MCSymbol;
class raw_ostream;

namespace object {
Expand Down Expand Up @@ -115,23 +116,23 @@ class BoltAddressTranslation {
/// Save function and basic block hashes used for metadata dump.
void saveMetadata(BinaryContext &BC);

/// Returns BB hash by function output address (after BOLT) and basic block
/// input offset.
size_t getBBHash(uint64_t FuncOutputAddress, uint32_t BBInputOffset) const;

/// Returns BF hash by function output address (after BOLT).
size_t getBFHash(uint64_t OutputAddress) const;

/// True if a given \p Address is a function with translation table entry.
bool isBATFunction(uint64_t Address) const { return Maps.count(Address); }

/// For a given \p Symbol in the output binary and known \p InputOffset
/// return a corresponding pair of parent BinaryFunction and secondary entry
/// point in it.
std::pair<const BinaryFunction *, unsigned>
translateSymbol(const BinaryContext &BC, const MCSymbol &Symbol,
uint32_t InputOffset) const;

private:
/// Helper to update \p Map by inserting one or more BAT entries reflecting
/// \p BB for function located at \p FuncAddress. At least one entry will be
/// emitted for the start of the BB. More entries may be emitted to cover
/// the location of calls or any instruction that may change control flow.
void writeEntriesForBB(MapTy &Map, const BinaryBasicBlock &BB,
uint64_t FuncAddress);
uint64_t FuncInputAddress, uint64_t FuncOutputAddress);

/// Write the serialized address translation table for a function.
template <bool Cold>
Expand All @@ -154,8 +155,15 @@ class BoltAddressTranslation {

std::map<uint64_t, MapTy> Maps;

using BBHashMap = std::unordered_map<uint32_t, size_t>;
std::unordered_map<uint64_t, std::pair<size_t, BBHashMap>> FuncHashes;
/// Map a function to its basic blocks count
std::unordered_map<uint64_t, size_t> NumBasicBlocksMap;

/// Map a function to its secondary entry points vector
std::unordered_map<uint64_t, std::vector<uint32_t>> SecondaryEntryPointsMap;

/// Return a secondary entry point ID for a function located at \p Address and
/// \p Offset within that function.
unsigned getSecondaryEntryPointId(uint64_t Address, uint32_t Offset) const;

/// Links outlined cold bocks to their original function
std::map<uint64_t, uint64_t> ColdPartSource;
Expand All @@ -166,6 +174,115 @@ class BoltAddressTranslation {
/// Identifies the address of a control-flow changing instructions in a
/// translation map entry
const static uint32_t BRANCHENTRY = 0x1;

public:
/// Map basic block input offset to a basic block index and hash pair.
class BBHashMapTy {
class EntryTy {
unsigned Index;
size_t Hash;

public:
unsigned getBBIndex() const { return Index; }
size_t getBBHash() const { return Hash; }
EntryTy(unsigned Index, size_t Hash) : Index(Index), Hash(Hash) {}
};

std::map<uint32_t, EntryTy> Map;
const EntryTy &getEntry(uint32_t BBInputOffset) const {
auto It = Map.find(BBInputOffset);
assert(It != Map.end());
return It->second;
}

public:
bool isInputBlock(uint32_t InputOffset) const {
return Map.count(InputOffset);
}

unsigned getBBIndex(uint32_t BBInputOffset) const {
return getEntry(BBInputOffset).getBBIndex();
}

size_t getBBHash(uint32_t BBInputOffset) const {
return getEntry(BBInputOffset).getBBHash();
}

void addEntry(uint32_t BBInputOffset, unsigned BBIndex, size_t BBHash) {
Map.emplace(BBInputOffset, EntryTy(BBIndex, BBHash));
}

size_t getNumBasicBlocks() const { return Map.size(); }

auto begin() const { return Map.begin(); }
auto end() const { return Map.end(); }
auto upper_bound(uint32_t Offset) const { return Map.upper_bound(Offset); }
};

/// Map function output address to its hash and basic blocks hash map.
class FuncHashesTy {
class EntryTy {
size_t Hash;
BBHashMapTy BBHashMap;

public:
size_t getBFHash() const { return Hash; }
const BBHashMapTy &getBBHashMap() const { return BBHashMap; }
EntryTy(size_t Hash) : Hash(Hash) {}
};

std::unordered_map<uint64_t, EntryTy> Map;
const EntryTy &getEntry(uint64_t FuncOutputAddress) const {
auto It = Map.find(FuncOutputAddress);
assert(It != Map.end());
return It->second;
}

public:
size_t getBFHash(uint64_t FuncOutputAddress) const {
return getEntry(FuncOutputAddress).getBFHash();
}

const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const {
return getEntry(FuncOutputAddress).getBBHashMap();
}

void addEntry(uint64_t FuncOutputAddress, size_t BFHash) {
Map.emplace(FuncOutputAddress, EntryTy(BFHash));
}

size_t getNumFunctions() const { return Map.size(); };

size_t getNumBasicBlocks() const {
size_t NumBasicBlocks{0};
for (auto &I : Map)
NumBasicBlocks += I.second.getBBHashMap().getNumBasicBlocks();
return NumBasicBlocks;
}
};

/// Returns BF hash by function output address (after BOLT).
size_t getBFHash(uint64_t FuncOutputAddress) const {
return FuncHashes.getBFHash(FuncOutputAddress);
}

/// Returns BBHashMap by function output address (after BOLT).
const BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) const {
return FuncHashes.getBBHashMap(FuncOutputAddress);
}

BBHashMapTy &getBBHashMap(uint64_t FuncOutputAddress) {
return const_cast<BBHashMapTy &>(
std::as_const(*this).getBBHashMap(FuncOutputAddress));
}

/// Returns the number of basic blocks in a function.
size_t getNumBasicBlocks(uint64_t OutputAddress) const {
return NumBasicBlocksMap.at(OutputAddress);
}

private:
FuncHashesTy FuncHashes;
};
} // namespace bolt

Expand Down
16 changes: 9 additions & 7 deletions bolt/include/bolt/Profile/DataAggregator.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,10 @@ class DataAggregator : public DataReader {
/// Aggregation statistics
uint64_t NumInvalidTraces{0};
uint64_t NumLongRangeTraces{0};
/// Specifies how many samples were recorded in cold areas if we are dealing
/// with profiling data collected in a bolted binary. For LBRs, incremented
/// for the source of the branch to avoid counting cold activity twice (one
/// for source and another for destination).
uint64_t NumColdSamples{0};

/// Looks into system PATH for Linux Perf and set up the aggregator to use it
Expand All @@ -245,14 +249,12 @@ class DataAggregator : public DataReader {
/// disassembled BinaryFunctions
BinaryFunction *getBinaryFunctionContainingAddress(uint64_t Address) const;

/// Perform BAT translation for a given \p Func and return the parent
/// BinaryFunction or nullptr.
BinaryFunction *getBATParentFunction(const BinaryFunction &Func) const;

/// Retrieve the location name to be used for samples recorded in \p Func.
/// If doing BAT translation, link cold parts to the hot part names (used by
/// the original binary). \p Count specifies how many samples were recorded
/// at that location, so we can tally total activity in cold areas if we are
/// dealing with profiling data collected in a bolted binary. For LBRs,
/// \p Count should only be used for the source of the branch to avoid
/// counting cold activity twice (one for source and another for destination).
StringRef getLocationName(BinaryFunction &Func, uint64_t Count);
StringRef getLocationName(const BinaryFunction &Func) const;

/// Semantic actions - parser hooks to interpret parsed perf samples
/// Register a sample (non-LBR mode), i.e. a new hit at \p Address
Expand Down
2 changes: 1 addition & 1 deletion bolt/include/bolt/Profile/ProfileReaderBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class ProfileReaderBase {
/// Return true if the function \p BF may have a profile available.
/// The result is based on the name(s) of the function alone and the profile
/// match is not guaranteed.
virtual bool mayHaveProfileData(const BinaryFunction &BF);
virtual bool mayHaveProfileData(const BinaryFunction &BF) { return true; }

/// Return true if the profile contains an entry for a local object
/// that has an associated file name.
Expand Down
1 change: 0 additions & 1 deletion bolt/include/bolt/Profile/ProfileYAMLMapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#define BOLT_PROFILE_PROFILEYAMLMAPPING_H

#include "bolt/Core/BinaryFunction.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/YAMLTraits.h"
#include <vector>

Expand Down
13 changes: 11 additions & 2 deletions bolt/include/bolt/Profile/YAMLProfileWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

namespace llvm {
namespace bolt {
class BoltAddressTranslation;
class RewriteInstance;

class YAMLProfileWriter {
Expand All @@ -31,8 +32,16 @@ class YAMLProfileWriter {
/// Save execution profile for that instance.
std::error_code writeProfile(const RewriteInstance &RI);

static yaml::bolt::BinaryFunctionProfile convert(const BinaryFunction &BF,
bool UseDFS);
static yaml::bolt::BinaryFunctionProfile
convert(const BinaryFunction &BF, bool UseDFS,
const BoltAddressTranslation *BAT = nullptr);

/// Set CallSiteInfo destination fields from \p Symbol and return a target
/// BinaryFunction for that symbol.
static const BinaryFunction *
setCSIDestination(const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI,
const MCSymbol *Symbol, const BoltAddressTranslation *BAT,
uint32_t Offset = 0);
};

} // namespace bolt
Expand Down
2 changes: 0 additions & 2 deletions bolt/include/bolt/Rewrite/DWARFRewriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@
#include <memory>
#include <mutex>
#include <optional>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <vector>

namespace llvm {
Expand Down
1 change: 0 additions & 1 deletion bolt/include/bolt/Rewrite/MetadataManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

#include "bolt/Rewrite/MetadataRewriter.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Error.h"

namespace llvm {
namespace bolt {
Expand Down
1 change: 0 additions & 1 deletion bolt/include/bolt/Rewrite/RewriteInstance.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#include "bolt/Core/Linker.h"
#include "bolt/Rewrite/MetadataManager.h"
#include "bolt/Utils/NameResolver.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ObjectFile.h"
Expand Down
1 change: 0 additions & 1 deletion bolt/include/bolt/RuntimeLibs/RuntimeLibrary.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

#include "bolt/Core/Linker.h"
#include "llvm/ADT/StringRef.h"
#include <functional>
#include <vector>

namespace llvm {
Expand Down
1 change: 0 additions & 1 deletion bolt/include/bolt/Utils/NameShortener.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#define BOLT_UTILS_NAME_SHORTENER_H

#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"

namespace llvm {
namespace bolt {
Expand Down
41 changes: 20 additions & 21 deletions bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#include "bolt/Core/BinaryEmitter.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Utils/CommandLineOpts.h"
#include "bolt/Utils/NameResolver.h"
#include "bolt/Utils/Utils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Twine.h"
Expand All @@ -39,7 +38,6 @@
#include <algorithm>
#include <functional>
#include <iterator>
#include <numeric>
#include <unordered_set>

using namespace llvm;
Expand Down Expand Up @@ -162,28 +160,30 @@ BinaryContext::~BinaryContext() {

/// Create BinaryContext for a given architecture \p ArchName and
/// triple \p TripleName.
Expected<std::unique_ptr<BinaryContext>>
BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
std::unique_ptr<DWARFContext> DwCtx,
JournalingStreams Logger) {
Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
Triple TheTriple, StringRef InputFileName, SubtargetFeatures *Features,
bool IsPIC, std::unique_ptr<DWARFContext> DwCtx, JournalingStreams Logger) {
StringRef ArchName = "";
std::string FeaturesStr = "";
switch (File->getArch()) {
switch (TheTriple.getArch()) {
case llvm::Triple::x86_64:
if (Features)
return createFatalBOLTError(
"x86_64 target does not use SubtargetFeatures");
ArchName = "x86-64";
FeaturesStr = "+nopl";
break;
case llvm::Triple::aarch64:
if (Features)
return createFatalBOLTError(
"AArch64 target does not use SubtargetFeatures");
ArchName = "aarch64";
FeaturesStr = "+all";
break;
case llvm::Triple::riscv64: {
ArchName = "riscv64";
Expected<SubtargetFeatures> Features = File->getFeatures();

if (auto E = Features.takeError())
return std::move(E);

if (!Features)
return createFatalBOLTError("RISCV target needs SubtargetFeatures");
// We rely on relaxation for some transformations (e.g., promoting all calls
// to PseudoCALL and then making JITLink relax them). Since the relax
// feature is not stored in the object file, we manually enable it.
Expand All @@ -196,12 +196,11 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
"BOLT-ERROR: Unrecognized machine in ELF file");
}

auto TheTriple = std::make_unique<Triple>(File->makeTriple());
const std::string TripleName = TheTriple->str();
const std::string TripleName = TheTriple.str();

std::string Error;
const Target *TheTarget =
TargetRegistry::lookupTarget(std::string(ArchName), *TheTriple, Error);
TargetRegistry::lookupTarget(std::string(ArchName), TheTriple, Error);
if (!TheTarget)
return createStringError(make_error_code(std::errc::not_supported),
Twine("BOLT-ERROR: ", Error));
Expand Down Expand Up @@ -240,13 +239,13 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
Twine("BOLT-ERROR: no instruction info for target ", TripleName));

std::unique_ptr<MCContext> Ctx(
new MCContext(*TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
new MCContext(TheTriple, AsmInfo.get(), MRI.get(), STI.get()));
std::unique_ptr<MCObjectFileInfo> MOFI(
TheTarget->createMCObjectFileInfo(*Ctx, IsPIC));
Ctx->setObjectFileInfo(MOFI.get());
// We do not support X86 Large code model. Change this in the future.
bool Large = false;
if (TheTriple->getArch() == llvm::Triple::aarch64)
if (TheTriple.getArch() == llvm::Triple::aarch64)
Large = true;
unsigned LSDAEncoding =
Large ? dwarf::DW_EH_PE_absptr : dwarf::DW_EH_PE_udata4;
Expand All @@ -273,7 +272,7 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,

int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
std::unique_ptr<MCInstPrinter> InstructionPrinter(
TheTarget->createMCInstPrinter(*TheTriple, AsmPrinterVariant, *AsmInfo,
TheTarget->createMCInstPrinter(TheTriple, AsmPrinterVariant, *AsmInfo,
*MII, *MRI));
if (!InstructionPrinter)
return createStringError(
Expand All @@ -285,8 +284,8 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
TheTarget->createMCCodeEmitter(*MII, *Ctx));

auto BC = std::make_unique<BinaryContext>(
std::move(Ctx), std::move(DwCtx), std::move(TheTriple), TheTarget,
std::string(TripleName), std::move(MCE), std::move(MOFI),
std::move(Ctx), std::move(DwCtx), std::make_unique<Triple>(TheTriple),
TheTarget, std::string(TripleName), std::move(MCE), std::move(MOFI),
std::move(AsmInfo), std::move(MII), std::move(STI),
std::move(InstructionPrinter), std::move(MIA), nullptr, std::move(MRI),
std::move(DisAsm), Logger);
Expand All @@ -296,7 +295,7 @@ BinaryContext::createBinaryContext(const ObjectFile *File, bool IsPIC,
BC->MAB = std::unique_ptr<MCAsmBackend>(
BC->TheTarget->createMCAsmBackend(*BC->STI, *BC->MRI, MCTargetOptions()));

BC->setFilename(File->getFileName());
BC->setFilename(InputFileName);

BC->HasFixedLoadAddress = !IsPIC;

Expand Down
20 changes: 13 additions & 7 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@

#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryDomTree.h"
#include "bolt/Core/DynoStats.h"
#include "bolt/Core/HashUtilities.h"
#include "bolt/Core/MCPlusBuilder.h"
Expand All @@ -35,6 +34,8 @@
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GenericDomTreeConstruction.h"
#include "llvm/Support/GenericLoopInfoImpl.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/Regex.h"
Expand Down Expand Up @@ -3547,7 +3548,7 @@ MCSymbol *BinaryFunction::getSymbolForEntryID(uint64_t EntryID) {
if (!isMultiEntry())
return nullptr;

uint64_t NumEntries = 0;
uint64_t NumEntries = 1;
if (hasCFG()) {
for (BinaryBasicBlock *BB : BasicBlocks) {
MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
Expand Down Expand Up @@ -3580,7 +3581,7 @@ uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
return 0;

// Check all secondary entries available as either basic blocks or lables.
uint64_t NumEntries = 0;
uint64_t NumEntries = 1;
for (const BinaryBasicBlock *BB : BasicBlocks) {
MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB);
if (!EntrySymbol)
Expand All @@ -3589,7 +3590,7 @@ uint64_t BinaryFunction::getEntryIDForSymbol(const MCSymbol *Symbol) const {
return NumEntries;
++NumEntries;
}
NumEntries = 0;
NumEntries = 1;
for (const std::pair<const uint32_t, MCSymbol *> &KV : Labels) {
MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(KV.second);
if (!EntrySymbol)
Expand Down Expand Up @@ -4076,12 +4077,17 @@ BinaryFunction::~BinaryFunction() {
delete BB;
}

void BinaryFunction::constructDomTree() {
BDT.reset(new BinaryDominatorTree);
BDT->recalculate(*this);
}

void BinaryFunction::calculateLoopInfo() {
if (!hasDomTree())
constructDomTree();
// Discover loops.
BinaryDominatorTree DomTree;
DomTree.recalculate(*this);
BLI.reset(new BinaryLoopInfo());
BLI->analyze(DomTree);
BLI->analyze(getDomTree());

// Traverse discovered loops and add depth and profile information.
std::stack<BinaryLoop *> St;
Expand Down
7 changes: 5 additions & 2 deletions bolt/lib/Core/DIEBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/LEB128.h"

#include <algorithm>
Expand Down Expand Up @@ -545,6 +544,10 @@ void DIEBuilder::cloneDieReferenceAttribute(
NewRefDie = DieInfo.Die;

if (AttrSpec.Form == dwarf::DW_FORM_ref_addr) {
// Adding referenced DIE to DebugNames to be used when entries are created
// that contain cross cu references.
if (DebugNamesTable.canGenerateEntryWithCrossCUReference(U, Die, AttrSpec))
DebugNamesTable.addCrossCUDie(DieInfo.Die);
// no matter forward reference or backward reference, we are supposed
// to calculate them in `finish` due to the possible modification of
// the DIE.
Expand All @@ -554,7 +557,7 @@ void DIEBuilder::cloneDieReferenceAttribute(
std::make_pair(CurDieInfo, AddrReferenceInfo(&DieInfo, AttrSpec)));

Die.addValue(getState().DIEAlloc, AttrSpec.Attr, dwarf::DW_FORM_ref_addr,
DIEInteger(0xDEADBEEF));
DIEInteger(DieInfo.Die->getOffset()));
return;
}

Expand Down
3 changes: 0 additions & 3 deletions bolt/lib/Core/DebugData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
#include "bolt/Core/DebugData.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/DIEBuilder.h"
#include "bolt/Rewrite/RewriteInstance.h"
#include "bolt/Utils/Utils.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/DIE.h"
Expand All @@ -23,7 +22,6 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/LEB128.h"
Expand All @@ -32,7 +30,6 @@
#include <cassert>
#include <cstdint>
#include <functional>
#include <limits>
#include <memory>
#include <unordered_map>
#include <vector>
Expand Down
Loading