Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
181 changes: 181 additions & 0 deletions bolt/include/bolt/Core/MCInstUtils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
//===- bolt/Core/MCInstUtils.h ----------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef BOLT_CORE_MCINSTUTILS_H
#define BOLT_CORE_MCINSTUTILS_H

#include "bolt/Core/BinaryBasicBlock.h"
#include <map>
#include <variant>

namespace llvm {
class MCCodeEmitter;
}

namespace llvm {
namespace bolt {

class BinaryFunction;

/// MCInstReference represents a reference to a constant MCInst as stored either
/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
/// (after a CFG is created).
///
/// The reference may be invalidated when the function containing the referenced
/// instruction is modified.
class MCInstReference {
public:
using nocfg_const_iterator = std::map<uint32_t, MCInst>::const_iterator;

/// Constructs an empty reference.
MCInstReference() : Reference(RefInBB(nullptr, /*Index=*/0)) {}

/// Constructs a reference to the instruction inside the basic block.
MCInstReference(const BinaryBasicBlock &BB, const MCInst &Inst)
: Reference(RefInBB(&BB, getInstIndexInBB(BB, Inst))) {}
/// Constructs a reference to the instruction inside the basic block.
MCInstReference(const BinaryBasicBlock &BB, unsigned Index)
: Reference(RefInBB(&BB, Index)) {}

/// Constructs a reference to the instruction inside the function without
/// CFG information.
MCInstReference(const BinaryFunction &BF, nocfg_const_iterator It)
: Reference(RefInBF(&BF, It)) {}

/// Locates an instruction inside a function and returns a reference.
static MCInstReference get(const MCInst &Inst, const BinaryFunction &BF);

bool operator==(const MCInstReference &Other) const {
return Reference == Other.Reference;
}

const MCInst &getMCInst() const {
assert(!empty() && "Empty reference");
if (auto *Ref = tryGetRefInBB()) {
[[maybe_unused]] unsigned NumInstructions = Ref->BB->size();
assert(Ref->Index < NumInstructions && "Invalid reference");
return Ref->BB->getInstructionAtIndex(Ref->Index);
}
return getRefInBF().It->second;
}

operator const MCInst &() const { return getMCInst(); }

bool empty() const {
if (auto *Ref = tryGetRefInBB())
return Ref->BB == nullptr;
return getRefInBF().BF == nullptr;
}

bool hasCFG() const { return !empty() && tryGetRefInBB() != nullptr; }

const BinaryFunction *getFunction() const {
assert(!empty() && "Empty reference");
if (auto *Ref = tryGetRefInBB())
return Ref->BB->getFunction();
return getRefInBF().BF;
}

const BinaryBasicBlock *getBasicBlock() const {
assert(!empty() && "Empty reference");
if (auto *Ref = tryGetRefInBB())
return Ref->BB;
return nullptr;
}

/// Computes the original address of the instruction (or offset from base
/// for PIC), assuming the containing function was not modified.
///
/// This function is intended for the use cases like debug printing, as it
/// is only as precise as BinaryContext::computeCodeSize() is and requires
/// iterating over the prefix of the basic block (when CFG is available).
///
/// MCCodeEmitter is not thread safe and the default instance from
/// BinaryContext is used by default, thus pass an instance explicitly if
/// this function may be called from multithreaded code.
uint64_t computeAddress(const MCCodeEmitter *Emitter = nullptr) const;

raw_ostream &print(raw_ostream &OS) const;

private:
static unsigned getInstIndexInBB(const BinaryBasicBlock &BB,
const MCInst &Inst) {
// Usage of pointer arithmetic assumes the instructions are stored in a
// vector, see BasicBlockStorageIsVector in MCInstUtils.cpp.
const MCInst *FirstInstInBB = &*BB.begin();
return &Inst - FirstInstInBB;
}

// Two cases are possible:
// * functions with CFG reconstructed - a function stores a collection of
// basic blocks, each basic block stores a contiguous vector of MCInst
// * functions without CFG - there are no basic blocks created,
// the instructions are directly stored in std::map in BinaryFunction
//
// In both cases, the direct parent of MCInst is stored together with an
// index or iterator pointing to the instruction.

// Helper struct: CFG is available, the direct parent is a basic block.
struct RefInBB {
RefInBB(const BinaryBasicBlock *BB, unsigned Index)
: BB(BB), Index(Index) {}
RefInBB(const RefInBB &Other) = default;
RefInBB &operator=(const RefInBB &Other) = default;

const BinaryBasicBlock *BB;
unsigned Index;

bool operator==(const RefInBB &Other) const {
return BB == Other.BB && Index == Other.Index;
}
};

// Helper struct: CFG is *not* available, the direct parent is a function,
// iterator's type is std::map<uint32_t, MCInst>::iterator (the mapped value
// is an instruction's offset).
struct RefInBF {
RefInBF(const BinaryFunction *BF, nocfg_const_iterator It)
: BF(BF), It(It) {}
RefInBF(const RefInBF &Other) = default;
RefInBF &operator=(const RefInBF &Other) = default;

const BinaryFunction *BF;
nocfg_const_iterator It;

bool operator==(const RefInBF &Other) const {
return BF == Other.BF && It->first == Other.It->first;
}
};

std::variant<RefInBB, RefInBF> Reference;

// Utility methods to be used like this:
//
// if (auto *Ref = tryGetRefInBB())
// return Ref->doSomething(...);
// return getRefInBF().doSomethingElse(...);
const RefInBB *tryGetRefInBB() const {
assert(std::get_if<RefInBB>(&Reference) ||
std::get_if<RefInBF>(&Reference));
return std::get_if<RefInBB>(&Reference);
}
const RefInBF &getRefInBF() const {
assert(std::get_if<RefInBF>(&Reference));
return *std::get_if<RefInBF>(&Reference);
}
};

static inline raw_ostream &operator<<(raw_ostream &OS,
const MCInstReference &Ref) {
return Ref.print(OS);
}

} // namespace bolt
} // namespace llvm

#endif
176 changes: 1 addition & 175 deletions bolt/include/bolt/Passes/PAuthGadgetScanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,187 +11,13 @@

#include "bolt/Core/BinaryContext.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/MCInstUtils.h"
#include "bolt/Passes/BinaryPasses.h"
#include "llvm/Support/raw_ostream.h"
#include <memory>

namespace llvm {
namespace bolt {

/// @brief MCInstReference represents a reference to an MCInst as stored either
/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock
/// (after a CFG is created). It aims to store the necessary information to be
/// able to find the specific MCInst in either the BinaryFunction or
/// BinaryBasicBlock data structures later, so that e.g. the InputAddress of
/// the corresponding instruction can be computed.

struct MCInstInBBReference {
BinaryBasicBlock *BB;
int64_t BBIndex;
MCInstInBBReference(BinaryBasicBlock *BB, int64_t BBIndex)
: BB(BB), BBIndex(BBIndex) {}
MCInstInBBReference() : BB(nullptr), BBIndex(0) {}
static MCInstInBBReference get(const MCInst *Inst, BinaryFunction &BF) {
for (BinaryBasicBlock &BB : BF)
for (size_t I = 0; I < BB.size(); ++I)
if (Inst == &BB.getInstructionAtIndex(I))
return MCInstInBBReference(&BB, I);
return {};
}
bool operator==(const MCInstInBBReference &RHS) const {
return BB == RHS.BB && BBIndex == RHS.BBIndex;
}
bool operator<(const MCInstInBBReference &RHS) const {
return std::tie(BB, BBIndex) < std::tie(RHS.BB, RHS.BBIndex);
}
operator MCInst &() const {
assert(BB != nullptr);
return BB->getInstructionAtIndex(BBIndex);
}
uint64_t getAddress() const {
// 4 bytes per instruction on AArch64.
// FIXME: the assumption of 4 byte per instruction needs to be fixed before
// this method gets used on any non-AArch64 binaries (but should be fine for
// pac-ret analysis, as that is an AArch64-specific feature).
return BB->getFunction()->getAddress() + BB->getOffset() + BBIndex * 4;
}
};

raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &);

struct MCInstInBFReference {
BinaryFunction *BF;
uint64_t Offset;
MCInstInBFReference(BinaryFunction *BF, uint64_t Offset)
: BF(BF), Offset(Offset) {}

static MCInstInBFReference get(const MCInst *Inst, BinaryFunction &BF) {
for (auto &I : BF.instrs())
if (Inst == &I.second)
return MCInstInBFReference(&BF, I.first);
return {};
}

MCInstInBFReference() : BF(nullptr), Offset(0) {}
bool operator==(const MCInstInBFReference &RHS) const {
return BF == RHS.BF && Offset == RHS.Offset;
}
bool operator<(const MCInstInBFReference &RHS) const {
return std::tie(BF, Offset) < std::tie(RHS.BF, RHS.Offset);
}
operator MCInst &() const {
assert(BF != nullptr);
return *BF->getInstructionAtOffset(Offset);
}

uint64_t getOffset() const { return Offset; }

uint64_t getAddress() const { return BF->getAddress() + getOffset(); }
};

raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &);

struct MCInstReference {
enum Kind { FunctionParent, BasicBlockParent };
Kind ParentKind;
union U {
MCInstInBBReference BBRef;
MCInstInBFReference BFRef;
U(MCInstInBBReference BBRef) : BBRef(BBRef) {}
U(MCInstInBFReference BFRef) : BFRef(BFRef) {}
} U;
MCInstReference(MCInstInBBReference BBRef)
: ParentKind(BasicBlockParent), U(BBRef) {}
MCInstReference(MCInstInBFReference BFRef)
: ParentKind(FunctionParent), U(BFRef) {}
MCInstReference(BinaryBasicBlock *BB, int64_t BBIndex)
: MCInstReference(MCInstInBBReference(BB, BBIndex)) {}
MCInstReference(BinaryFunction *BF, uint32_t Offset)
: MCInstReference(MCInstInBFReference(BF, Offset)) {}

static MCInstReference get(const MCInst *Inst, BinaryFunction &BF) {
if (BF.hasCFG())
return MCInstInBBReference::get(Inst, BF);
return MCInstInBFReference::get(Inst, BF);
}

bool operator<(const MCInstReference &RHS) const {
if (ParentKind != RHS.ParentKind)
return ParentKind < RHS.ParentKind;
switch (ParentKind) {
case BasicBlockParent:
return U.BBRef < RHS.U.BBRef;
case FunctionParent:
return U.BFRef < RHS.U.BFRef;
}
llvm_unreachable("");
}

bool operator==(const MCInstReference &RHS) const {
if (ParentKind != RHS.ParentKind)
return false;
switch (ParentKind) {
case BasicBlockParent:
return U.BBRef == RHS.U.BBRef;
case FunctionParent:
return U.BFRef == RHS.U.BFRef;
}
llvm_unreachable("");
}

operator MCInst &() const {
switch (ParentKind) {
case BasicBlockParent:
return U.BBRef;
case FunctionParent:
return U.BFRef;
}
llvm_unreachable("");
}

operator bool() const {
switch (ParentKind) {
case BasicBlockParent:
return U.BBRef.BB != nullptr;
case FunctionParent:
return U.BFRef.BF != nullptr;
}
llvm_unreachable("");
}

uint64_t getAddress() const {
switch (ParentKind) {
case BasicBlockParent:
return U.BBRef.getAddress();
case FunctionParent:
return U.BFRef.getAddress();
}
llvm_unreachable("");
}

BinaryFunction *getFunction() const {
switch (ParentKind) {
case FunctionParent:
return U.BFRef.BF;
case BasicBlockParent:
return U.BBRef.BB->getFunction();
}
llvm_unreachable("");
}

BinaryBasicBlock *getBasicBlock() const {
switch (ParentKind) {
case FunctionParent:
return nullptr;
case BasicBlockParent:
return U.BBRef.BB;
}
llvm_unreachable("");
}
};

raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &);

namespace PAuthGadgetScanner {

// The report classes are designed to be used in an immutable manner.
Expand Down
1 change: 1 addition & 0 deletions bolt/lib/Core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ add_llvm_library(LLVMBOLTCore
GDBIndex.cpp
HashUtilities.cpp
JumpTable.cpp
MCInstUtils.cpp
MCPlusBuilder.cpp
ParallelUtilities.cpp
Relocation.cpp
Expand Down
Loading
Loading