diff --git a/bolt/include/bolt/Core/MCInstUtils.h b/bolt/include/bolt/Core/MCInstUtils.h deleted file mode 100644 index 2e93dfaf4c275..0000000000000 --- a/bolt/include/bolt/Core/MCInstUtils.h +++ /dev/null @@ -1,155 +0,0 @@ -//===- bolt/Core/MCInstUtils.h ----------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef BOLT_CORE_MCINSTUTILS_H -#define BOLT_CORE_MCINSTUTILS_H - -#include "bolt/Core/BinaryBasicBlock.h" -#include -#include - -namespace llvm { -namespace bolt { - -class BinaryFunction; - -/// MCInstReference represents a reference to a constant MCInst as stored either -/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock -/// (after a CFG is created). -class MCInstReference { - using nocfg_const_iterator = std::map::const_iterator; - - // Two cases are possible: - // * functions with CFG reconstructed - a function stores a collection of - // basic blocks, each basic block stores a contiguous vector of MCInst - // * functions without CFG - there are no basic blocks created, - // the instructions are directly stored in std::map in BinaryFunction - // - // In both cases, the direct parent of MCInst is stored together with an - // iterator pointing to the instruction. - - // Helper struct: CFG is available, the direct parent is a basic block, - // iterator's type is `MCInst *`. - struct RefInBB { - RefInBB(const BinaryBasicBlock *BB, const MCInst *Inst) - : BB(BB), It(Inst) {} - RefInBB(const RefInBB &Other) = default; - RefInBB &operator=(const RefInBB &Other) = default; - - const BinaryBasicBlock *BB; - BinaryBasicBlock::const_iterator It; - - bool operator==(const RefInBB &Other) const { - return BB == Other.BB && It == Other.It; - } - }; - - // Helper struct: CFG is *not* available, the direct parent is a function, - // iterator's type is std::map::iterator (the mapped value - // is an instruction's offset). - struct RefInBF { - RefInBF(const BinaryFunction *BF, nocfg_const_iterator It) - : BF(BF), It(It) {} - RefInBF(const RefInBF &Other) = default; - RefInBF &operator=(const RefInBF &Other) = default; - - const BinaryFunction *BF; - nocfg_const_iterator It; - - bool operator==(const RefInBF &Other) const { - return BF == Other.BF && It->first == Other.It->first; - } - }; - - std::variant Reference; - - // Utility methods to be used like this: - // - // if (auto *Ref = tryGetRefInBB()) - // return Ref->doSomething(...); - // return getRefInBF().doSomethingElse(...); - const RefInBB *tryGetRefInBB() const { - assert(std::get_if(&Reference) || - std::get_if(&Reference)); - return std::get_if(&Reference); - } - const RefInBF &getRefInBF() const { - assert(std::get_if(&Reference)); - return *std::get_if(&Reference); - } - -public: - /// Constructs an empty reference. - MCInstReference() : Reference(RefInBB(nullptr, nullptr)) {} - /// Constructs a reference to the instruction inside the basic block. - MCInstReference(const BinaryBasicBlock *BB, const MCInst *Inst) - : Reference(RefInBB(BB, Inst)) { - assert(BB && Inst && "Neither BB nor Inst should be nullptr"); - } - /// Constructs a reference to the instruction inside the basic block. - MCInstReference(const BinaryBasicBlock *BB, unsigned Index) - : Reference(RefInBB(BB, &BB->getInstructionAtIndex(Index))) { - assert(BB && "Basic block should not be nullptr"); - } - /// Constructs a reference to the instruction inside the function without - /// CFG information. - MCInstReference(const BinaryFunction *BF, nocfg_const_iterator It) - : Reference(RefInBF(BF, It)) { - assert(BF && "Function should not be nullptr"); - } - - /// Locates an instruction inside a function and returns a reference. - static MCInstReference get(const MCInst *Inst, const BinaryFunction &BF); - - bool operator==(const MCInstReference &Other) const { - return Reference == Other.Reference; - } - - const MCInst &getMCInst() const { - assert(!empty() && "Empty reference"); - if (auto *Ref = tryGetRefInBB()) - return *Ref->It; - return getRefInBF().It->second; - } - - operator const MCInst &() const { return getMCInst(); } - - bool empty() const { - if (auto *Ref = tryGetRefInBB()) - return Ref->BB == nullptr; - return getRefInBF().BF == nullptr; - } - - bool hasCFG() const { return !empty() && tryGetRefInBB() != nullptr; } - - const BinaryFunction *getFunction() const { - assert(!empty() && "Empty reference"); - if (auto *Ref = tryGetRefInBB()) - return Ref->BB->getFunction(); - return getRefInBF().BF; - } - - const BinaryBasicBlock *getBasicBlock() const { - assert(!empty() && "Empty reference"); - if (auto *Ref = tryGetRefInBB()) - return Ref->BB; - return nullptr; - } - - raw_ostream &print(raw_ostream &OS) const; -}; - -static inline raw_ostream &operator<<(raw_ostream &OS, - const MCInstReference &Ref) { - return Ref.print(OS); -} - -} // namespace bolt -} // namespace llvm - -#endif diff --git a/bolt/include/bolt/Passes/PAuthGadgetScanner.h b/bolt/include/bolt/Passes/PAuthGadgetScanner.h index cb865a725d72a..721fd664a3253 100644 --- a/bolt/include/bolt/Passes/PAuthGadgetScanner.h +++ b/bolt/include/bolt/Passes/PAuthGadgetScanner.h @@ -11,13 +11,187 @@ #include "bolt/Core/BinaryContext.h" #include "bolt/Core/BinaryFunction.h" -#include "bolt/Core/MCInstUtils.h" #include "bolt/Passes/BinaryPasses.h" #include "llvm/Support/raw_ostream.h" #include namespace llvm { namespace bolt { + +/// @brief MCInstReference represents a reference to an MCInst as stored either +/// in a BinaryFunction (i.e. before a CFG is created), or in a BinaryBasicBlock +/// (after a CFG is created). It aims to store the necessary information to be +/// able to find the specific MCInst in either the BinaryFunction or +/// BinaryBasicBlock data structures later, so that e.g. the InputAddress of +/// the corresponding instruction can be computed. + +struct MCInstInBBReference { + BinaryBasicBlock *BB; + int64_t BBIndex; + MCInstInBBReference(BinaryBasicBlock *BB, int64_t BBIndex) + : BB(BB), BBIndex(BBIndex) {} + MCInstInBBReference() : BB(nullptr), BBIndex(0) {} + static MCInstInBBReference get(const MCInst *Inst, BinaryFunction &BF) { + for (BinaryBasicBlock &BB : BF) + for (size_t I = 0; I < BB.size(); ++I) + if (Inst == &BB.getInstructionAtIndex(I)) + return MCInstInBBReference(&BB, I); + return {}; + } + bool operator==(const MCInstInBBReference &RHS) const { + return BB == RHS.BB && BBIndex == RHS.BBIndex; + } + bool operator<(const MCInstInBBReference &RHS) const { + return std::tie(BB, BBIndex) < std::tie(RHS.BB, RHS.BBIndex); + } + operator MCInst &() const { + assert(BB != nullptr); + return BB->getInstructionAtIndex(BBIndex); + } + uint64_t getAddress() const { + // 4 bytes per instruction on AArch64. + // FIXME: the assumption of 4 byte per instruction needs to be fixed before + // this method gets used on any non-AArch64 binaries (but should be fine for + // pac-ret analysis, as that is an AArch64-specific feature). + return BB->getFunction()->getAddress() + BB->getOffset() + BBIndex * 4; + } +}; + +raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &); + +struct MCInstInBFReference { + BinaryFunction *BF; + uint64_t Offset; + MCInstInBFReference(BinaryFunction *BF, uint64_t Offset) + : BF(BF), Offset(Offset) {} + + static MCInstInBFReference get(const MCInst *Inst, BinaryFunction &BF) { + for (auto &I : BF.instrs()) + if (Inst == &I.second) + return MCInstInBFReference(&BF, I.first); + return {}; + } + + MCInstInBFReference() : BF(nullptr), Offset(0) {} + bool operator==(const MCInstInBFReference &RHS) const { + return BF == RHS.BF && Offset == RHS.Offset; + } + bool operator<(const MCInstInBFReference &RHS) const { + return std::tie(BF, Offset) < std::tie(RHS.BF, RHS.Offset); + } + operator MCInst &() const { + assert(BF != nullptr); + return *BF->getInstructionAtOffset(Offset); + } + + uint64_t getOffset() const { return Offset; } + + uint64_t getAddress() const { return BF->getAddress() + getOffset(); } +}; + +raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &); + +struct MCInstReference { + enum Kind { FunctionParent, BasicBlockParent }; + Kind ParentKind; + union U { + MCInstInBBReference BBRef; + MCInstInBFReference BFRef; + U(MCInstInBBReference BBRef) : BBRef(BBRef) {} + U(MCInstInBFReference BFRef) : BFRef(BFRef) {} + } U; + MCInstReference(MCInstInBBReference BBRef) + : ParentKind(BasicBlockParent), U(BBRef) {} + MCInstReference(MCInstInBFReference BFRef) + : ParentKind(FunctionParent), U(BFRef) {} + MCInstReference(BinaryBasicBlock *BB, int64_t BBIndex) + : MCInstReference(MCInstInBBReference(BB, BBIndex)) {} + MCInstReference(BinaryFunction *BF, uint32_t Offset) + : MCInstReference(MCInstInBFReference(BF, Offset)) {} + + static MCInstReference get(const MCInst *Inst, BinaryFunction &BF) { + if (BF.hasCFG()) + return MCInstInBBReference::get(Inst, BF); + return MCInstInBFReference::get(Inst, BF); + } + + bool operator<(const MCInstReference &RHS) const { + if (ParentKind != RHS.ParentKind) + return ParentKind < RHS.ParentKind; + switch (ParentKind) { + case BasicBlockParent: + return U.BBRef < RHS.U.BBRef; + case FunctionParent: + return U.BFRef < RHS.U.BFRef; + } + llvm_unreachable(""); + } + + bool operator==(const MCInstReference &RHS) const { + if (ParentKind != RHS.ParentKind) + return false; + switch (ParentKind) { + case BasicBlockParent: + return U.BBRef == RHS.U.BBRef; + case FunctionParent: + return U.BFRef == RHS.U.BFRef; + } + llvm_unreachable(""); + } + + operator MCInst &() const { + switch (ParentKind) { + case BasicBlockParent: + return U.BBRef; + case FunctionParent: + return U.BFRef; + } + llvm_unreachable(""); + } + + operator bool() const { + switch (ParentKind) { + case BasicBlockParent: + return U.BBRef.BB != nullptr; + case FunctionParent: + return U.BFRef.BF != nullptr; + } + llvm_unreachable(""); + } + + uint64_t getAddress() const { + switch (ParentKind) { + case BasicBlockParent: + return U.BBRef.getAddress(); + case FunctionParent: + return U.BFRef.getAddress(); + } + llvm_unreachable(""); + } + + BinaryFunction *getFunction() const { + switch (ParentKind) { + case FunctionParent: + return U.BFRef.BF; + case BasicBlockParent: + return U.BBRef.BB->getFunction(); + } + llvm_unreachable(""); + } + + BinaryBasicBlock *getBasicBlock() const { + switch (ParentKind) { + case FunctionParent: + return nullptr; + case BasicBlockParent: + return U.BBRef.BB; + } + llvm_unreachable(""); + } +}; + +raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &); + namespace PAuthGadgetScanner { // The report classes are designed to be used in an immutable manner. diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt index 58cfcab370f16..fc72dc023c590 100644 --- a/bolt/lib/Core/CMakeLists.txt +++ b/bolt/lib/Core/CMakeLists.txt @@ -32,7 +32,6 @@ add_llvm_library(LLVMBOLTCore GDBIndex.cpp HashUtilities.cpp JumpTable.cpp - MCInstUtils.cpp MCPlusBuilder.cpp ParallelUtilities.cpp Relocation.cpp diff --git a/bolt/lib/Core/MCInstUtils.cpp b/bolt/lib/Core/MCInstUtils.cpp deleted file mode 100644 index 3cdb9673d4dc0..0000000000000 --- a/bolt/lib/Core/MCInstUtils.cpp +++ /dev/null @@ -1,56 +0,0 @@ -//===- bolt/Passes/MCInstUtils.cpp ----------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "bolt/Core/MCInstUtils.h" -#include "bolt/Core/BinaryBasicBlock.h" -#include "bolt/Core/BinaryFunction.h" - -#include - -using namespace llvm; -using namespace llvm::bolt; - -MCInstReference MCInstReference::get(const MCInst *Inst, - const BinaryFunction &BF) { - if (BF.hasCFG()) { - for (BinaryBasicBlock &BB : BF) - for (MCInst &MI : BB) - if (&MI == Inst) - return MCInstReference(&BB, Inst); - return {}; - } - - for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I) { - if (&I->second == Inst) - return MCInstReference(&BF, I); - } - return {}; -} - -raw_ostream &MCInstReference::print(raw_ostream &OS) const { - if (const RefInBB *Ref = tryGetRefInBB()) { - OS << "MCInstBBRef<"; - if (Ref->BB == nullptr) { - OS << "BB:(null)"; - } else { - unsigned IndexInBB = std::distance(Ref->BB->begin(), Ref->It); - OS << "BB:" << Ref->BB->getName() << ":" << IndexInBB; - } - OS << ">"; - return OS; - } - - const RefInBF &Ref = getRefInBF(); - OS << "MCInstBFRef<"; - if (Ref.BF == nullptr) - OS << "BF:(null)"; - else - OS << "BF:" << Ref.BF->getPrintName() << ":" << Ref.It->first; - OS << ">"; - return OS; -} diff --git a/bolt/lib/Passes/PAuthGadgetScanner.cpp b/bolt/lib/Passes/PAuthGadgetScanner.cpp index 5d884e2d37354..65c84ebc8c4f4 100644 --- a/bolt/lib/Passes/PAuthGadgetScanner.cpp +++ b/bolt/lib/Passes/PAuthGadgetScanner.cpp @@ -24,6 +24,39 @@ namespace llvm { namespace bolt { + +raw_ostream &operator<<(raw_ostream &OS, const MCInstInBBReference &Ref) { + OS << "MCInstBBRef<"; + if (Ref.BB == nullptr) + OS << "BB:(null)"; + else + OS << "BB:" << Ref.BB->getName() << ":" << Ref.BBIndex; + OS << ">"; + return OS; +} + +raw_ostream &operator<<(raw_ostream &OS, const MCInstInBFReference &Ref) { + OS << "MCInstBFRef<"; + if (Ref.BF == nullptr) + OS << "BF:(null)"; + else + OS << "BF:" << Ref.BF->getPrintName() << ":" << Ref.getOffset(); + OS << ">"; + return OS; +} + +raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &Ref) { + switch (Ref.ParentKind) { + case MCInstReference::BasicBlockParent: + OS << Ref.U.BBRef; + return OS; + case MCInstReference::FunctionParent: + OS << Ref.U.BFRef; + return OS; + } + llvm_unreachable(""); +} + namespace PAuthGadgetScanner { [[maybe_unused]] static void traceInst(const BinaryContext &BC, StringRef Label, @@ -58,10 +91,10 @@ template static void iterateOverInstrs(BinaryFunction &BF, T Fn) { if (BF.hasCFG()) { for (BinaryBasicBlock &BB : BF) for (int64_t I = 0, E = BB.size(); I < E; ++I) - Fn(MCInstReference(&BB, I)); + Fn(MCInstInBBReference(&BB, I)); } else { - for (auto I = BF.instrs().begin(), E = BF.instrs().end(); I != E; ++I) - Fn(MCInstReference(&BF, I)); + for (auto I : BF.instrs()) + Fn(MCInstInBFReference(&BF, I.first)); } } @@ -533,7 +566,7 @@ class SrcSafetyAnalysis { std::vector Result; for (const MCInst *Inst : lastWritingInsts(S, ClobberedReg)) { MCInstReference Ref = MCInstReference::get(Inst, BF); - assert(!Ref.empty() && "Expected Inst to be found"); + assert(Ref && "Expected Inst to be found"); Result.push_back(Ref); } return Result; @@ -1105,7 +1138,7 @@ class DstSafetyAnalysis { std::vector Result; for (const MCInst *Inst : firstLeakingInsts(S, LeakedReg)) { MCInstReference Ref = MCInstReference::get(Inst, BF); - assert(!Ref.empty() && "Expected Inst to be found"); + assert(Ref && "Expected Inst to be found"); Result.push_back(Ref); } return Result; @@ -1312,7 +1345,8 @@ static bool shouldAnalyzeTailCallInst(const BinaryContext &BC, // (such as isBranch at the time of writing this comment), some don't (such // as isCall). For that reason, call MCInstrDesc's methods explicitly when // it is important. - const MCInstrDesc &Desc = BC.MII->get(Inst.getMCInst().getOpcode()); + const MCInstrDesc &Desc = + BC.MII->get(static_cast(Inst).getOpcode()); // Tail call should be a branch (but not necessarily an indirect one). if (!Desc.isBranch()) return false; @@ -1507,7 +1541,7 @@ void FunctionAnalysisContext::findUnsafeUses( // This is printed as "[message] in function [name], basic block ..., // at address ..." when the issue is reported to the user. Reports.push_back(make_generic_report( - MCInstReference(&BB, FirstInst), + MCInstReference::get(FirstInst, BF), "Warning: possibly imprecise CFG, the analysis quality may be " "degraded in this function. According to BOLT, unreachable code is " "found" /* in function [name]... */)); @@ -1671,49 +1705,31 @@ void Analysis::runOnFunction(BinaryFunction &BF, } } -// Compute the instruction address for printing (may be slow). -static uint64_t getAddress(const MCInstReference &Inst) { - const BinaryFunction *BF = Inst.getFunction(); - - if (Inst.hasCFG()) { - const BinaryBasicBlock *BB = Inst.getBasicBlock(); - - auto It = static_cast(&Inst.getMCInst()); - unsigned IndexInBB = std::distance(BB->begin(), It); - - // FIXME: this assumes all instructions are 4 bytes in size. This is true - // for AArch64, but it might be good to extract this function so it can be - // used elsewhere and for other targets too. - return BF->getAddress() + BB->getOffset() + IndexInBB * 4; - } - - for (auto I = BF->instrs().begin(), E = BF->instrs().end(); I != E; ++I) { - if (&I->second == &Inst.getMCInst()) - return BF->getAddress() + I->first; - } - llvm_unreachable("Instruction not found in function"); -} - static void printBB(const BinaryContext &BC, const BinaryBasicBlock *BB, size_t StartIndex = 0, size_t EndIndex = -1) { if (EndIndex == (size_t)-1) EndIndex = BB->size() - 1; const BinaryFunction *BF = BB->getFunction(); for (unsigned I = StartIndex; I <= EndIndex; ++I) { - MCInstReference Inst(BB, I); + // FIXME: this assumes all instructions are 4 bytes in size. This is true + // for AArch64, but it might be good to extract this function so it can be + // used elsewhere and for other targets too. + uint64_t Address = BB->getOffset() + BF->getAddress() + 4 * I; + const MCInst &Inst = BB->getInstructionAtIndex(I); if (BC.MIB->isCFI(Inst)) continue; - BC.printInstruction(outs(), Inst, getAddress(Inst), BF); + BC.printInstruction(outs(), Inst, Address, BF); } } static void reportFoundGadgetInSingleBBSingleRelatedInst( raw_ostream &OS, const BinaryContext &BC, const MCInstReference RelatedInst, const MCInstReference Location) { - const BinaryBasicBlock *BB = Location.getBasicBlock(); - assert(RelatedInst.hasCFG()); - assert(Location.hasCFG()); - if (BB == RelatedInst.getBasicBlock()) { + BinaryBasicBlock *BB = Location.getBasicBlock(); + assert(RelatedInst.ParentKind == MCInstReference::BasicBlockParent); + assert(Location.ParentKind == MCInstReference::BasicBlockParent); + MCInstInBBReference RelatedInstBB = RelatedInst.U.BBRef; + if (BB == RelatedInstBB.BB) { OS << " This happens in the following basic block:\n"; printBB(BC, BB); } @@ -1721,16 +1737,16 @@ static void reportFoundGadgetInSingleBBSingleRelatedInst( void Diagnostic::printBasicInfo(raw_ostream &OS, const BinaryContext &BC, StringRef IssueKind) const { - const BinaryBasicBlock *BB = Location.getBasicBlock(); - const BinaryFunction *BF = Location.getFunction(); + BinaryFunction *BF = Location.getFunction(); + BinaryBasicBlock *BB = Location.getBasicBlock(); OS << "\nGS-PAUTH: " << IssueKind; OS << " in function " << BF->getPrintName(); if (BB) OS << ", basic block " << BB->getName(); - OS << ", at address " << llvm::format("%x", getAddress(Location)) << "\n"; + OS << ", at address " << llvm::format("%x", Location.getAddress()) << "\n"; OS << " The instruction is "; - BC.printInstruction(OS, Location, getAddress(Location), BF); + BC.printInstruction(OS, Location, Location.getAddress(), BF); } void GadgetDiagnostic::generateReport(raw_ostream &OS, @@ -1746,19 +1762,19 @@ static void printRelatedInstrs(raw_ostream &OS, const MCInstReference Location, // Sort by address to ensure output is deterministic. SmallVector RI(RelatedInstrs); llvm::sort(RI, [](const MCInstReference &A, const MCInstReference &B) { - return getAddress(A) < getAddress(B); + return A.getAddress() < B.getAddress(); }); for (unsigned I = 0; I < RI.size(); ++I) { MCInstReference InstRef = RI[I]; OS << " " << (I + 1) << ". "; - BC.printInstruction(OS, InstRef, getAddress(InstRef), &BF); + BC.printInstruction(OS, InstRef, InstRef.getAddress(), &BF); }; if (RelatedInstrs.size() == 1) { const MCInstReference RelatedInst = RelatedInstrs[0]; // Printing the details for the MCInstReference::FunctionParent case // is not implemented not to overcomplicate the code, as most functions // are expected to have CFG information. - if (RelatedInst.hasCFG()) + if (RelatedInst.ParentKind == MCInstReference::BasicBlockParent) reportFoundGadgetInSingleBBSingleRelatedInst(OS, BC, RelatedInst, Location); }