From 860fc0cdc8aff6a7e3a5a1ad9c9d39f6fe227581 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Tue, 19 Nov 2024 09:43:25 +0100 Subject: [PATCH 01/31] [BOLT] Recognize paciasp and autiasp instructions --- bolt/include/bolt/Core/MCPlusBuilder.h | 7 +++++++ bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 90129d475d870..4bb127c7f14f7 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -834,6 +834,13 @@ class MCPlusBuilder { llvm_unreachable("not implemented"); return false; } + virtual bool isPAuth(MCInst &Inst) const { + llvm_unreachable("not implemented"); + } + + virtual bool isPSign(MCInst &Inst) const { + llvm_unreachable("not implemented"); + } virtual bool isCleanRegXOR(const MCInst &Inst) const { llvm_unreachable("not implemented"); diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index f972646aa12ea..8aa6b7457f222 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -893,6 +893,12 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { } return false; } + bool isPAuth(MCInst &Inst) const override { + return Inst.getOpcode() == AArch64::AUTIASP; + } + bool isPSign(MCInst &Inst) const override { + return Inst.getOpcode() == AArch64::PACIASP; + } bool isRegToRegMove(const MCInst &Inst, MCPhysReg &From, MCPhysReg &To) const override { From 8fe324499d1ab29d7bc38f0a5adda8644c6042f2 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Mon, 16 Dec 2024 10:45:41 +0100 Subject: [PATCH 02/31] [BOLT] Support for OpNegateRAState - save OpNegateRAState, RememberState and RestoreState locations when parsing input - determine the RA states from these before other optimizations in MarkRAStates Pass - after optimizations, we can insert OpNegateRAState at state boundaries and other needed locations (e.g. split functions) in InsertNegateRAStatePass --- bolt/include/bolt/Core/BinaryFunction.h | 45 ++++++ bolt/include/bolt/Core/MCPlus.h | 9 +- bolt/include/bolt/Core/MCPlusBuilder.h | 59 ++++++++ .../bolt/Passes/InsertNegateRAStatePass.h | 44 ++++++ bolt/include/bolt/Passes/MarkRAStates.h | 33 ++++ bolt/lib/Core/BinaryBasicBlock.cpp | 6 +- bolt/lib/Core/BinaryFunction.cpp | 1 + bolt/lib/Core/Exceptions.cpp | 24 ++- bolt/lib/Core/MCPlusBuilder.cpp | 68 +++++++++ bolt/lib/Passes/CMakeLists.txt | 2 + bolt/lib/Passes/InsertNegateRAStatePass.cpp | 142 ++++++++++++++++++ bolt/lib/Passes/MarkRAStates.cpp | 133 ++++++++++++++++ bolt/lib/Rewrite/BinaryPassManager.cpp | 7 + 13 files changed, 567 insertions(+), 6 deletions(-) create mode 100644 bolt/include/bolt/Passes/InsertNegateRAStatePass.h create mode 100644 bolt/include/bolt/Passes/MarkRAStates.h create mode 100644 bolt/lib/Passes/InsertNegateRAStatePass.cpp create mode 100644 bolt/lib/Passes/MarkRAStates.cpp diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 51b139a15e1a0..4534bbcb1b640 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -1643,6 +1643,51 @@ class BinaryFunction { void setHasInferredProfile(bool Inferred) { HasInferredProfile = Inferred; } + /// Find corrected offset the same way addCFIInstruction does it to skip NOPs. + std::optional getCorrectedCFIOffset(uint64_t Offset) { + assert(!Instructions.empty()); + auto I = Instructions.lower_bound(Offset); + if (Offset == getSize()) { + assert(I == Instructions.end() && "unexpected iterator value"); + // Sometimes compiler issues restore_state after all instructions + // in the function (even after nop). + --I; + Offset = I->first; + } + assert(I->first == Offset && "CFI pointing to unknown instruction"); + if (I == Instructions.begin()) + return {}; + + --I; + while (I != Instructions.begin() && BC.MIB->isNoop(I->second)) { + Offset = I->first; + --I; + } + return Offset; + } + + void setInstModifiesRAState(uint8_t CFIOpcode, uint64_t Offset) { + std::optional CorrectedOffset = getCorrectedCFIOffset(Offset); + if (CorrectedOffset) { + auto I = Instructions.lower_bound(*CorrectedOffset); + I--; + + switch (CFIOpcode) { + case dwarf::DW_CFA_AARCH64_negate_ra_state: + BC.MIB->setNegateRAState(I->second); + break; + case dwarf::DW_CFA_remember_state: + BC.MIB->setRememberState(I->second); + break; + case dwarf::DW_CFA_restore_state: + BC.MIB->setRestoreState(I->second); + break; + default: + assert(0 && "CFI Opcode not covered by function"); + } + } + } + void addCFIInstruction(uint64_t Offset, MCCFIInstruction &&Inst) { assert(!Instructions.empty()); diff --git a/bolt/include/bolt/Core/MCPlus.h b/bolt/include/bolt/Core/MCPlus.h index 601d709712864..a95bba36c5a6e 100644 --- a/bolt/include/bolt/Core/MCPlus.h +++ b/bolt/include/bolt/Core/MCPlus.h @@ -72,7 +72,14 @@ class MCAnnotation { kLabel, /// MCSymbol pointing to this instruction. kSize, /// Size of the instruction. kDynamicBranch, /// Jit instruction patched at runtime. - kGeneric /// First generic annotation. + kSigning, /// Inst is a signing instruction (paciasp, etc.). + kSigned, /// Inst is in a range where RA is signed. + kAuthenticating, /// Authenticating inst (e.g. autiasp). + kUnsigned, /// Inst is in a range where RA is unsigned. + kRememberState, /// Inst has rememberState CFI. + kRestoreState, /// Inst has restoreState CFI. + kNegateState, /// Inst has OpNegateRAState CFI. + kGeneric, /// First generic annotation. }; virtual void print(raw_ostream &OS) const = 0; diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 4bb127c7f14f7..ce659f39a6ab3 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -69,6 +69,20 @@ class MCPlusBuilder { public: using AllocatorIdTy = uint16_t; + std::optional getAnnotationAtOpIndex(const MCInst &Inst, + unsigned OpIndex) const { + std::optional FirstAnnotationOp = getFirstAnnotationOpIndex(Inst); + if (!FirstAnnotationOp) + return std::nullopt; + + if (*FirstAnnotationOp > OpIndex || Inst.getNumOperands() < OpIndex) + return std::nullopt; + + auto Op = Inst.begin() + OpIndex; + const int64_t ImmValue = Op->getImm(); + return extractAnnotationIndex(ImmValue); + } + private: /// A struct that represents a single annotation allocator struct AnnotationAllocator { @@ -1315,6 +1329,51 @@ class MCPlusBuilder { /// Return true if the instruction is a tail call. bool isTailCall(const MCInst &Inst) const; + /// Stores NegateRAState annotation on \p Inst. + void setNegateRAState(MCInst &Inst) const; + + /// Return true if \p Inst has NegateRAState annotation. + bool hasNegateRAState(const MCInst &Inst) const; + + /// Sets RememberState annotation on \p Inst. + void setRememberState(MCInst &Inst) const; + + /// Return true if \p Inst has RememberState annotation. + bool hasRememberState(const MCInst &Inst) const; + + /// Stores RestoreState annotation on \p Inst. + void setRestoreState(MCInst &Inst) const; + + /// Return true if \p Inst has RestoreState annotation. + bool hasRestoreState(const MCInst &Inst) const; + + /// Stores RA Signed annotation on \p Inst. + void setRASigned(MCInst &Inst) const; + + /// Return true if \p Inst has Signed RA annotation. + bool isRASigned(const MCInst &Inst) const; + + /// Stores RA Signing annotation on \p Inst. + void setRASigning(MCInst &Inst) const; + + /// Return true if \p Inst has Signing RA annotation. + bool isRASigning(const MCInst &Inst) const; + + /// Stores Authenticating annotation on \p Inst. + void setAuthenticating(MCInst &Inst) const; + + /// Return true if \p Inst has Authenticating annotation. + bool isAuthenticating(const MCInst &Inst) const; + + /// Stores RA Unsigned annotation on \p Inst. + void setRAUnsigned(MCInst &Inst) const; + + /// Return true if \p Inst has Unsigned RA annotation. + bool isRAUnsigned(const MCInst &Inst) const; + + /// Return true if \p Inst doesn't have any annotation related to RA state. + bool isRAStateUnknown(const MCInst &Inst) const; + /// Return true if the instruction is a call with an exception handling info. virtual bool isInvoke(const MCInst &Inst) const { return isCall(Inst) && getEHInfo(Inst); diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h new file mode 100644 index 0000000000000..e62006baa2eff --- /dev/null +++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h @@ -0,0 +1,44 @@ +//===- bolt/Passes/InsertNegateRAStatePass.cpp ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the InsertNegateRAStatePass class. +// +//===----------------------------------------------------------------------===// +#ifndef BOLT_PASSES_INSERT_NEGATE_RA_STATE_PASS +#define BOLT_PASSES_INSERT_NEGATE_RA_STATE_PASS + +#include "bolt/Passes/BinaryPasses.h" +#include + +namespace llvm { +namespace bolt { + +class InsertNegateRAState : public BinaryFunctionPass { +public: + explicit InsertNegateRAState() : BinaryFunctionPass(false) {} + + const char *getName() const override { return "insert-negate-ra-state-pass"; } + + /// Pass entry point + Error runOnFunctions(BinaryContext &BC) override; + void runOnFunction(BinaryFunction &BF); + +private: + /// Loops over all instructions and adds OpNegateRAState CFI + /// after any pointer signing or authenticating instructions. + /// Returns true, if any OpNegateRAState CFIs were added. + bool addNegateRAStateAfterPacOrAuth(BinaryFunction &BF); + /// Because states are tracked as MCAnnotations on individual instructions, + /// newly inserted instructions do not have a state associated with them. + /// New states are "inherited" from the last known state. + void fixUnknownStates(BinaryFunction &BF); +}; + +} // namespace bolt +} // namespace llvm +#endif diff --git a/bolt/include/bolt/Passes/MarkRAStates.h b/bolt/include/bolt/Passes/MarkRAStates.h new file mode 100644 index 0000000000000..e7a49f813b6a7 --- /dev/null +++ b/bolt/include/bolt/Passes/MarkRAStates.h @@ -0,0 +1,33 @@ +//===- bolt/Passes/MarkRAStates.cpp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the MarkRAStates class. +// +//===----------------------------------------------------------------------===// +#ifndef BOLT_PASSES_MARK_RA_STATES +#define BOLT_PASSES_MARK_RA_STATES + +#include "bolt/Passes/BinaryPasses.h" + +namespace llvm { +namespace bolt { + +class MarkRAStates : public BinaryFunctionPass { +public: + explicit MarkRAStates() : BinaryFunctionPass(false) {} + + const char *getName() const override { return "mark-ra-states"; } + + /// Pass entry point + Error runOnFunctions(BinaryContext &BC) override; + void runOnFunction(BinaryFunction &BF); +}; + +} // namespace bolt +} // namespace llvm +#endif diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp index eeab1ed4d7cff..d680850bf2ea9 100644 --- a/bolt/lib/Core/BinaryBasicBlock.cpp +++ b/bolt/lib/Core/BinaryBasicBlock.cpp @@ -210,7 +210,11 @@ int32_t BinaryBasicBlock::getCFIStateAtInstr(const MCInst *Instr) const { InstrSeen = (&Inst == Instr); continue; } - if (Function->getBinaryContext().MIB->isCFI(Inst)) { + // Ignoring OpNegateRAState CFIs here, as they dont have a "State" + // number associated with them. + if (Function->getBinaryContext().MIB->isCFI(Inst) && + (Function->getCFIFor(Inst)->getOperation() != + MCCFIInstruction::OpNegateRAState)) { LastCFI = &Inst; break; } diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 7102054a366fa..d622e26959b64 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -2809,6 +2809,7 @@ struct CFISnapshot { void advanceTo(int32_t State) { for (int32_t I = CurState, E = State; I != E; ++I) { const MCCFIInstruction &Instr = FDE[I]; + assert(Instr.getOperation() != MCCFIInstruction::OpNegateRAState); if (Instr.getOperation() != MCCFIInstruction::OpRestoreState) { update(Instr, I); continue; diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp index 874419f592cc9..2480cd5bf38ca 100644 --- a/bolt/lib/Core/Exceptions.cpp +++ b/bolt/lib/Core/Exceptions.cpp @@ -568,10 +568,21 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { case DW_CFA_remember_state: Function.addCFIInstruction( Offset, MCCFIInstruction::createRememberState(nullptr)); + + if (Function.getBinaryContext().isAArch64()) + // Support for pointer authentication: + // We need to annotate instructions that modify the RA State, to work + // out the state of each instruction in MarkRAStates Pass. + Function.setInstModifiesRAState(DW_CFA_remember_state, Offset); break; case DW_CFA_restore_state: Function.addCFIInstruction(Offset, MCCFIInstruction::createRestoreState(nullptr)); + if (Function.getBinaryContext().isAArch64()) + // Support for pointer authentication: + // We need to annotate instructions that modify the RA State, to work + // out the state of each instruction in MarkRAStates Pass. + Function.setInstModifiesRAState(DW_CFA_restore_state, Offset); break; case DW_CFA_def_cfa: Function.addCFIInstruction( @@ -629,11 +640,16 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { BC.errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n"; return false; case DW_CFA_GNU_window_save: - // DW_CFA_GNU_window_save and DW_CFA_GNU_NegateRAState just use the same - // id but mean different things. The latter is used in AArch64. + // DW_CFA_GNU_window_save and DW_CFA_AARCH64_negate_ra_state just use the + // same id but mean different things. The latter is used in AArch64. if (Function.getBinaryContext().isAArch64()) { - Function.addCFIInstruction( - Offset, MCCFIInstruction::createNegateRAState(nullptr)); + // The location OpNegateRAState CFIs are needed + // depends on the order of BasicBlocks, which changes during + // optimizations. Instead of adding OpNegateRAState CFIs, an annotation + // is added to the instruction, to mark that the instruction modifies + // the RA State. The actual state for instructions are worked out in + // MarkRAStates based on these annotations. + Function.setInstModifiesRAState(DW_CFA_AARCH64_negate_ra_state, Offset); break; } if (opts::Verbosity >= 1) diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp index 52475227eb32f..4a7b1d73479c1 100644 --- a/bolt/lib/Core/MCPlusBuilder.cpp +++ b/bolt/lib/Core/MCPlusBuilder.cpp @@ -159,6 +159,74 @@ bool MCPlusBuilder::isTailCall(const MCInst &Inst) const { return false; } +void MCPlusBuilder::setNegateRAState(MCInst &Inst) const { + assert(!hasAnnotation(Inst, MCAnnotation::kNegateState)); + setAnnotationOpValue(Inst, MCAnnotation::kNegateState, true); +} + +bool MCPlusBuilder::hasNegateRAState(const MCInst &Inst) const { + return hasAnnotation(Inst, MCAnnotation::kNegateState); +} + +void MCPlusBuilder::setRememberState(MCInst &Inst) const { + assert(!hasAnnotation(Inst, MCAnnotation::kRememberState)); + setAnnotationOpValue(Inst, MCAnnotation::kRememberState, true); +} + +bool MCPlusBuilder::hasRememberState(const MCInst &Inst) const { + return hasAnnotation(Inst, MCAnnotation::kRememberState); +} + +void MCPlusBuilder::setRestoreState(MCInst &Inst) const { + assert(!hasAnnotation(Inst, MCAnnotation::kRestoreState)); + setAnnotationOpValue(Inst, MCAnnotation::kRestoreState, true); +} + +bool MCPlusBuilder::hasRestoreState(const MCInst &Inst) const { + return hasAnnotation(Inst, MCAnnotation::kRestoreState); +} + +void MCPlusBuilder::setRASigned(MCInst &Inst) const { + assert(!hasAnnotation(Inst, MCAnnotation::kSigned)); + setAnnotationOpValue(Inst, MCAnnotation::kSigned, true); +} + +bool MCPlusBuilder::isRASigned(const MCInst &Inst) const { + return hasAnnotation(Inst, MCAnnotation::kSigned); +} + +void MCPlusBuilder::setRASigning(MCInst &Inst) const { + assert(!hasAnnotation(Inst, MCAnnotation::kSigning)); + setAnnotationOpValue(Inst, MCAnnotation::kSigning, true); +} + +bool MCPlusBuilder::isRASigning(const MCInst &Inst) const { + return hasAnnotation(Inst, MCAnnotation::kSigning); +} + +void MCPlusBuilder::setAuthenticating(MCInst &Inst) const { + assert(!hasAnnotation(Inst, MCAnnotation::kAuthenticating)); + setAnnotationOpValue(Inst, MCAnnotation::kAuthenticating, true); +} + +bool MCPlusBuilder::isAuthenticating(const MCInst &Inst) const { + return hasAnnotation(Inst, MCAnnotation::kAuthenticating); +} + +void MCPlusBuilder::setRAUnsigned(MCInst &Inst) const { + assert(!hasAnnotation(Inst, MCAnnotation::kUnsigned)); + setAnnotationOpValue(Inst, MCAnnotation::kUnsigned, true); +} + +bool MCPlusBuilder::isRAUnsigned(const MCInst &Inst) const { + return hasAnnotation(Inst, MCAnnotation::kUnsigned); +} + +bool MCPlusBuilder::isRAStateUnknown(const MCInst &Inst) const { + return !(isRAUnsigned(Inst) || isRASigned(Inst) || isRASigning(Inst) || + isAuthenticating(Inst)); +} + std::optional MCPlusBuilder::getEHInfo(const MCInst &Inst) const { if (!isCall(Inst)) return std::nullopt; diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt index 77d2bb9c2bcb5..d7519518f186f 100644 --- a/bolt/lib/Passes/CMakeLists.txt +++ b/bolt/lib/Passes/CMakeLists.txt @@ -17,12 +17,14 @@ add_llvm_library(LLVMBOLTPasses IdenticalCodeFolding.cpp IndirectCallPromotion.cpp Inliner.cpp + InsertNegateRAStatePass.cpp Instrumentation.cpp JTFootprintReduction.cpp LongJmp.cpp LoopInversionPass.cpp LivenessAnalysis.cpp MCF.cpp + MarkRAStates.cpp PatchEntries.cpp PAuthGadgetScanner.cpp PettisAndHansen.cpp diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp new file mode 100644 index 0000000000000..098ebf8f953b4 --- /dev/null +++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp @@ -0,0 +1,142 @@ +//===- bolt/Passes/InsertNegateRAStatePass.cpp ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the InsertNegateRAStatePass class. It inserts +// OpNegateRAState CFIs to places where the state of two consecutive +// instructions are different. +// +//===----------------------------------------------------------------------===// +#include "bolt/Passes/InsertNegateRAStatePass.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Core/ParallelUtilities.h" +#include "bolt/Utils/CommandLineOpts.h" +#include +#include +#include + +using namespace llvm; + +namespace llvm { +namespace bolt { + +void InsertNegateRAState::runOnFunction(BinaryFunction &BF) { + BinaryContext &BC = BF.getBinaryContext(); + + if (BF.getState() == BinaryFunction::State::Empty) + return; + + if (BF.getState() != BinaryFunction::State::CFG && + BF.getState() != BinaryFunction::State::CFG_Finalized) { + BC.outs() << "BOLT-INFO: No CFG for " << BF.getPrintName() + << " in InsertNegateRAStatePass\n"; + return; + } + + // If none is inserted, the function doesn't need more work. + if (!addNegateRAStateAfterPacOrAuth(BF)) + return; + + fixUnknownStates(BF); + + bool FirstIter = true; + MCInst PrevInst; + BinaryBasicBlock *PrevBB = nullptr; + auto *Begin = BF.getLayout().block_begin(); + auto *End = BF.getLayout().block_end(); + for (auto *BB = Begin; BB != End; BB++) { + + // Support for function splitting: + // if two consecutive BBs are going to end up in different functions, + // we have to negate the RA State, so the new function starts with a Signed + // state. + if (PrevBB != nullptr && + PrevBB->getFragmentNum() != (*BB)->getFragmentNum() && + BC.MIB->isRASigned(*((*BB)->begin()))) { + BF.addCFIInstruction(*BB, (*BB)->begin(), + MCCFIInstruction::createNegateRAState(nullptr)); + } + + for (auto It = (*BB)->begin(); It != (*BB)->end(); ++It) { + + MCInst &Inst = *It; + if (BC.MIB->isCFI(Inst)) + continue; + + if (!FirstIter) { + if ((BC.MIB->isRASigned(PrevInst) && BC.MIB->isRAUnsigned(Inst)) || + (BC.MIB->isRAUnsigned(PrevInst) && BC.MIB->isRASigned(Inst))) { + + It = BF.addCFIInstruction( + *BB, It, MCCFIInstruction::createNegateRAState(nullptr)); + } + + } else { + FirstIter = false; + } + PrevInst = *It; + } + PrevBB = *BB; + } +} + +bool InsertNegateRAState::addNegateRAStateAfterPacOrAuth(BinaryFunction &BF) { + BinaryContext &BC = BF.getBinaryContext(); + bool FoundAny = false; + for (BinaryBasicBlock &BB : BF) { + for (auto Iter = BB.begin(); Iter != BB.end(); ++Iter) { + MCInst &Inst = *Iter; + if (BC.MIB->isPSign(Inst) || BC.MIB->isPAuth(Inst)) { + Iter = BF.addCFIInstruction( + &BB, Iter + 1, MCCFIInstruction::createNegateRAState(nullptr)); + FoundAny = true; + } + } + } + return FoundAny; +} + +void InsertNegateRAState::fixUnknownStates(BinaryFunction &BF) { + BinaryContext &BC = BF.getBinaryContext(); + bool FirstIter = true; + MCInst PrevInst; + for (BinaryBasicBlock &BB : BF) { + for (auto It = BB.begin(); It != BB.end(); ++It) { + + MCInst &Inst = *It; + if (BC.MIB->isCFI(Inst)) + continue; + + if (!FirstIter && BC.MIB->isRAStateUnknown(Inst)) { + if (BC.MIB->isRASigned(PrevInst) || BC.MIB->isRASigning(PrevInst)) { + BC.MIB->setRASigned(Inst); + } else if (BC.MIB->isRAUnsigned(PrevInst) || + BC.MIB->isAuthenticating(PrevInst)) { + BC.MIB->setRAUnsigned(Inst); + } + } else { + FirstIter = false; + } + PrevInst = Inst; + } + } +} + +Error InsertNegateRAState::runOnFunctions(BinaryContext &BC) { + ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { + runOnFunction(BF); + }; + + ParallelUtilities::runOnEachFunction( + BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, nullptr, + "InsertNegateRAStatePass"); + + return Error::success(); +} + +} // end namespace bolt +} // end namespace llvm diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp new file mode 100644 index 0000000000000..adccf2090c36f --- /dev/null +++ b/bolt/lib/Passes/MarkRAStates.cpp @@ -0,0 +1,133 @@ +//===- bolt/Passes/MarkRAStates.cpp ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the MarkRAStates class. +// Three CFIs have an influence on the RA State of an instruction: +// - NegateRAState flips the RA State, +// - RememberState pushes the RA State to a stack, +// - RestoreState pops the RA State from the stack. +// These are saved as MCAnnotations on instructions they refer to at CFI +// reading (in CFIReaderWriter::fillCFIInfoFor). In this pass, we can work out +// the RA State of each instruction, and save it as new MCAnnotations. The new +// annotations are Signing, Signed, Authenticating and Unsigned. After +// optimizations, .cfi_negate_ra_state CFIs are added to the places where the +// state changes in InsertNegateRAStatePass. +// +//===----------------------------------------------------------------------===// +#include "bolt/Passes/MarkRAStates.h" +#include "bolt/Core/BinaryFunction.h" +#include "bolt/Core/ParallelUtilities.h" +#include "bolt/Utils/CommandLineOpts.h" +#include +#include +#include + +#include +#include +#include + +using namespace llvm; + +namespace llvm { +namespace bolt { + +void MarkRAStates::runOnFunction(BinaryFunction &BF) { + + if (BF.isIgnored()) + return; + + BinaryContext &BC = BF.getBinaryContext(); + + for (BinaryBasicBlock &BB : BF) { + for (auto It = BB.begin(); It != BB.end(); ++It) { + MCInst &Inst = *It; + if ((BC.MIB->isPSign(Inst) || BC.MIB->isPAuth(Inst)) && + !BC.MIB->hasNegateRAState(Inst)) { + // no .cfi_negate_ra_state attached to signing or authenticating instr + // means, that this is a function with handwritten assembly, which might + // not respect Clang's conventions (e.g. tailcalls are always + // authenticated, so functions always start with unsigned RAState when + // working with compiler-generated code) + BF.setIgnored(); + BC.outs() << "BOLT-INFO: inconsistent RAStates in function " + << BF.getPrintName() << "\n"; + return; + } + } + } + + bool RAState = false; + std::stack RAStateStack; + + for (BinaryBasicBlock &BB : BF) { + for (auto It = BB.begin(); It != BB.end(); ++It) { + + MCInst &Inst = *It; + if (BC.MIB->isCFI(Inst)) + continue; + + if (BC.MIB->isPSign(Inst)) { + if (RAState) { + // RA signing instructions should only follow unsigned RA state. + BC.outs() << "BOLT-INFO: inconsistent RAStates in function " + << BF.getPrintName() << "\n"; + BF.setIgnored(); + return; + } + BC.MIB->setRASigning(Inst); + } else if (BC.MIB->isPAuth(Inst)) { + if (!RAState) { + // RA authenticating instructions should only follow signed RA state. + BC.outs() << "BOLT-INFO: inconsistent RAStates in function " + << BF.getPrintName() << "\n"; + BF.setIgnored(); + return; + } + BC.MIB->setAuthenticating(Inst); + } else if (RAState) { + BC.MIB->setRASigned(Inst); + } else { + BC.MIB->setRAUnsigned(Inst); + } + + // Updating RAState. All updates are valid from the next instruction. + // Because the same instruction can have remember and restore, the order + // here is relevant. This is the reason to loop over Annotations instead + // of just checking each in a predefined order. + for (unsigned int Idx = 0; Idx < Inst.getNumOperands(); Idx++) { + std::optional Annotation = + BC.MIB->getAnnotationAtOpIndex(Inst, Idx); + if (!Annotation) + continue; + if (Annotation == MCPlus::MCAnnotation::kNegateState) + RAState = !RAState; + else if (Annotation == MCPlus::MCAnnotation::kRememberState) + RAStateStack.push(RAState); + else if (Annotation == MCPlus::MCAnnotation::kRestoreState) { + RAState = RAStateStack.top(); + RAStateStack.pop(); + } + } + } + } +} + +Error MarkRAStates::runOnFunctions(BinaryContext &BC) { + ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { + runOnFunction(BF); + }; + + ParallelUtilities::runOnEachFunction( + BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, nullptr, + "MarkRAStates"); + + return Error::success(); +} + +} // end namespace bolt +} // end namespace llvm diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index d9b7a2bd9a14c..0a45183a51966 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -19,11 +19,13 @@ #include "bolt/Passes/IdenticalCodeFolding.h" #include "bolt/Passes/IndirectCallPromotion.h" #include "bolt/Passes/Inliner.h" +#include "bolt/Passes/InsertNegateRAStatePass.h" #include "bolt/Passes/Instrumentation.h" #include "bolt/Passes/JTFootprintReduction.h" #include "bolt/Passes/LongJmp.h" #include "bolt/Passes/LoopInversionPass.h" #include "bolt/Passes/MCF.h" +#include "bolt/Passes/MarkRAStates.h" #include "bolt/Passes/PLTCall.h" #include "bolt/Passes/PatchEntries.h" #include "bolt/Passes/ProfileQualityStats.h" @@ -353,6 +355,9 @@ Error BinaryFunctionPassManager::runPasses() { Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { BinaryFunctionPassManager Manager(BC); + if (BC.isAArch64()) + Manager.registerPass(std::make_unique()); + Manager.registerPass( std::make_unique(PrintEstimateEdgeCounts)); @@ -512,6 +517,8 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) { // targets. No extra instructions after this pass, otherwise we may have // relocations out of range and crash during linking. Manager.registerPass(std::make_unique(PrintLongJmp)); + + Manager.registerPass(std::make_unique()); } // This pass should always run last.* From e81445cf4786bf7773c72ff0be2713eb9c5a6cfd Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Mon, 7 Apr 2025 18:19:20 +0200 Subject: [PATCH 03/31] [BOLT][AArch64] Fix which PSign and PAuth variants are used (#120064) - only the ones operating on LR should be marked with .cfi_cfi_negate_ra_state - added support for fused PtrAuth and Ret instructions, e.g. RETAA. --- bolt/include/bolt/Core/MCPlusBuilder.h | 22 ++++++++++----- .../bolt/Passes/InsertNegateRAStatePass.h | 4 ++- bolt/lib/Passes/InsertNegateRAStatePass.cpp | 13 ++++++--- bolt/lib/Passes/MarkRAStates.cpp | 7 +++-- .../Target/AArch64/AArch64MCPlusBuilder.cpp | 28 +++++++++++++++---- 5 files changed, 53 insertions(+), 21 deletions(-) diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index ce659f39a6ab3..288d8b75afa1c 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -611,6 +611,21 @@ class MCPlusBuilder { return std::nullopt; } + virtual bool isPSignOnLR(const MCInst &Inst) const { + llvm_unreachable("not implemented"); + return false; + } + + virtual bool isPAuthOnLR(const MCInst &Inst) const { + llvm_unreachable("not implemented"); + return false; + } + + virtual bool isPAuthAndRet(const MCInst &Inst) const { + llvm_unreachable("not implemented"); + return false; + } + /// Returns the register used as a return address. Returns std::nullopt if /// not applicable, such as reading the return address from a system register /// or from the stack. @@ -848,13 +863,6 @@ class MCPlusBuilder { llvm_unreachable("not implemented"); return false; } - virtual bool isPAuth(MCInst &Inst) const { - llvm_unreachable("not implemented"); - } - - virtual bool isPSign(MCInst &Inst) const { - llvm_unreachable("not implemented"); - } virtual bool isCleanRegXOR(const MCInst &Inst) const { llvm_unreachable("not implemented"); diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h index e62006baa2eff..ce73b5a152d12 100644 --- a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h +++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h @@ -30,7 +30,9 @@ class InsertNegateRAState : public BinaryFunctionPass { private: /// Loops over all instructions and adds OpNegateRAState CFI - /// after any pointer signing or authenticating instructions. + /// after any pointer signing or authenticating instructions, + /// which operate on the LR, except fused ptrauth + ret instructions + /// (such as RETAA). /// Returns true, if any OpNegateRAState CFIs were added. bool addNegateRAStateAfterPacOrAuth(BinaryFunction &BF); /// Because states are tracked as MCAnnotations on individual instructions, diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp index 098ebf8f953b4..05d4d58c7e4cf 100644 --- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp +++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp @@ -46,14 +46,16 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) { bool FirstIter = true; MCInst PrevInst; BinaryBasicBlock *PrevBB = nullptr; + // We need to iterate on BBs in the Layout order + // not in the order they are stored in the BF class. auto *Begin = BF.getLayout().block_begin(); auto *End = BF.getLayout().block_end(); for (auto *BB = Begin; BB != End; BB++) { // Support for function splitting: - // if two consecutive BBs are going to end up in different functions, - // we have to negate the RA State, so the new function starts with a Signed - // state. + // if two consecutive BBs with Signed state are going to end up in different + // functions, we have to add a OpNegateRAState to the beginning of the newly + // split function, so it starts with a Signed state. if (PrevBB != nullptr && PrevBB->getFragmentNum() != (*BB)->getFragmentNum() && BC.MIB->isRASigned(*((*BB)->begin()))) { @@ -68,6 +70,8 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) { continue; if (!FirstIter) { + // Consecutive instructions with different RAState means we need to add + // a OpNegateRAState. if ((BC.MIB->isRASigned(PrevInst) && BC.MIB->isRAUnsigned(Inst)) || (BC.MIB->isRAUnsigned(PrevInst) && BC.MIB->isRASigned(Inst))) { @@ -90,7 +94,8 @@ bool InsertNegateRAState::addNegateRAStateAfterPacOrAuth(BinaryFunction &BF) { for (BinaryBasicBlock &BB : BF) { for (auto Iter = BB.begin(); Iter != BB.end(); ++Iter) { MCInst &Inst = *Iter; - if (BC.MIB->isPSign(Inst) || BC.MIB->isPAuth(Inst)) { + if (BC.MIB->isPSignOnLR(Inst) || + (BC.MIB->isPAuthOnLR(Inst) && !BC.MIB->isPAuthAndRet(Inst))) { Iter = BF.addCFIInstruction( &BB, Iter + 1, MCCFIInstruction::createNegateRAState(nullptr)); FoundAny = true; diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp index adccf2090c36f..8121fffb93c9f 100644 --- a/bolt/lib/Passes/MarkRAStates.cpp +++ b/bolt/lib/Passes/MarkRAStates.cpp @@ -46,7 +46,8 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) { for (BinaryBasicBlock &BB : BF) { for (auto It = BB.begin(); It != BB.end(); ++It) { MCInst &Inst = *It; - if ((BC.MIB->isPSign(Inst) || BC.MIB->isPAuth(Inst)) && + if ((BC.MIB->isPSignOnLR(Inst) || + (BC.MIB->isPAuthOnLR(Inst) && !BC.MIB->isPAuthAndRet(Inst))) && !BC.MIB->hasNegateRAState(Inst)) { // no .cfi_negate_ra_state attached to signing or authenticating instr // means, that this is a function with handwritten assembly, which might @@ -71,7 +72,7 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) { if (BC.MIB->isCFI(Inst)) continue; - if (BC.MIB->isPSign(Inst)) { + if (BC.MIB->isPSignOnLR(Inst)) { if (RAState) { // RA signing instructions should only follow unsigned RA state. BC.outs() << "BOLT-INFO: inconsistent RAStates in function " @@ -80,7 +81,7 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) { return; } BC.MIB->setRASigning(Inst); - } else if (BC.MIB->isPAuth(Inst)) { + } else if (BC.MIB->isPAuthOnLR(Inst)) { if (!RAState) { // RA authenticating instructions should only follow signed RA state. BC.outs() << "BOLT-INFO: inconsistent RAStates in function " diff --git a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp index 8aa6b7457f222..d902ba57e7e01 100644 --- a/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp +++ b/bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp @@ -231,6 +231,28 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { } } + bool isPSignOnLR(const MCInst &Inst) const override { + std::optional SignReg = getSignedReg(Inst); + return SignReg && *SignReg == AArch64::LR; + } + + bool isPAuthOnLR(const MCInst &Inst) const override { + // LDR(A|B) should not be covered. + bool IsChecked; + std::optional AuthReg = + getWrittenAuthenticatedReg(Inst, IsChecked); + return !IsChecked && AuthReg && *AuthReg == AArch64::LR; + } + + bool isPAuthAndRet(const MCInst &Inst) const override { + return Inst.getOpcode() == AArch64::RETAA || + Inst.getOpcode() == AArch64::RETAB || + Inst.getOpcode() == AArch64::RETAASPPCi || + Inst.getOpcode() == AArch64::RETABSPPCi || + Inst.getOpcode() == AArch64::RETAASPPCr || + Inst.getOpcode() == AArch64::RETABSPPCr; + } + std::optional getSignedReg(const MCInst &Inst) const override { switch (Inst.getOpcode()) { case AArch64::PACIA: @@ -893,12 +915,6 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { } return false; } - bool isPAuth(MCInst &Inst) const override { - return Inst.getOpcode() == AArch64::AUTIASP; - } - bool isPSign(MCInst &Inst) const override { - return Inst.getOpcode() == AArch64::PACIASP; - } bool isRegToRegMove(const MCInst &Inst, MCPhysReg &From, MCPhysReg &To) const override { From de4b9899f0f271f5f038ead0560277d7cffffad1 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Fri, 4 Apr 2025 11:23:22 +0200 Subject: [PATCH 04/31] [BOLT] Add unit tests for negate_ra_state cfi handling - also add match_dwarf.py, a tool used by the unit tests. --- bolt/lib/Core/BinaryFunction.cpp | 24 +-- bolt/test/AArch64/negate-ra-state-incorrect.s | 44 ++++++ bolt/test/AArch64/negate-ra-state.s | 42 ++++++ bolt/test/lit.cfg.py | 7 + bolt/test/match_dwarf.py | 137 ++++++++++++++++++ 5 files changed, 233 insertions(+), 21 deletions(-) create mode 100644 bolt/test/AArch64/negate-ra-state-incorrect.s create mode 100644 bolt/test/AArch64/negate-ra-state.s create mode 100755 bolt/test/match_dwarf.py diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index d622e26959b64..914d54a9dffc5 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -2787,14 +2787,8 @@ struct CFISnapshot { case MCCFIInstruction::OpLLVMDefAspaceCfa: case MCCFIInstruction::OpLabel: case MCCFIInstruction::OpValOffset: - llvm_unreachable("unsupported CFI opcode"); - break; case MCCFIInstruction::OpNegateRAState: - if (!(opts::BinaryAnalysisMode || opts::HeatmapMode)) { - llvm_unreachable("BOLT-ERROR: binaries using pac-ret hardening (e.g. " - "as produced by '-mbranch-protection=pac-ret') are " - "currently not supported by BOLT."); - } + llvm_unreachable("unsupported CFI opcode"); break; case MCCFIInstruction::OpRememberState: case MCCFIInstruction::OpRestoreState: @@ -2934,15 +2928,9 @@ struct CFISnapshotDiff : public CFISnapshot { case MCCFIInstruction::OpLLVMDefAspaceCfa: case MCCFIInstruction::OpLabel: case MCCFIInstruction::OpValOffset: + case MCCFIInstruction::OpNegateRAState: llvm_unreachable("unsupported CFI opcode"); return false; - case MCCFIInstruction::OpNegateRAState: - if (!(opts::BinaryAnalysisMode || opts::HeatmapMode)) { - llvm_unreachable("BOLT-ERROR: binaries using pac-ret hardening (e.g. " - "as produced by '-mbranch-protection=pac-ret') are " - "currently not supported by BOLT."); - } - break; case MCCFIInstruction::OpRememberState: case MCCFIInstruction::OpRestoreState: case MCCFIInstruction::OpGnuArgsSize: @@ -3091,14 +3079,8 @@ BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState, case MCCFIInstruction::OpLLVMDefAspaceCfa: case MCCFIInstruction::OpLabel: case MCCFIInstruction::OpValOffset: - llvm_unreachable("unsupported CFI opcode"); - break; case MCCFIInstruction::OpNegateRAState: - if (!(opts::BinaryAnalysisMode || opts::HeatmapMode)) { - llvm_unreachable("BOLT-ERROR: binaries using pac-ret hardening (e.g. " - "as produced by '-mbranch-protection=pac-ret') are " - "currently not supported by BOLT."); - } + llvm_unreachable("unsupported CFI opcode"); break; case MCCFIInstruction::OpGnuArgsSize: // do not affect CFI state diff --git a/bolt/test/AArch64/negate-ra-state-incorrect.s b/bolt/test/AArch64/negate-ra-state-incorrect.s new file mode 100644 index 0000000000000..c6b8b36939f4d --- /dev/null +++ b/bolt/test/AArch64/negate-ra-state-incorrect.s @@ -0,0 +1,44 @@ +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.exe.bolt | FileCheck %s + +# check that the output is listing foo as incorrect +# CHECK: BOLT-INFO: inconsistent RAStates in function foo + +# check that foo got Ignored, so it's not in the new .text section +# RUN: llvm-objdump %t.exe.bolt -d -j .text > %t.exe.dump +# RUN: not grep ":" %t.exe.dump + + +# How is this test incorrect? +# There is an extra .cfi_negate_ra_state in foo. +# Because of this, we will get to the autiasp (hint #29) +# in a (seemingly) unsigned state. That is incorrect. + .text + .globl foo + .p2align 2 + .type foo,@function +foo: + .cfi_startproc + hint #25 + .cfi_negate_ra_state + sub sp, sp, #16 + stp x29, x30, [sp, #16] // 16-byte Folded Spill + .cfi_def_cfa_offset 16 + str w0, [sp, #12] + ldr w8, [sp, #12] + .cfi_negate_ra_state + add w0, w8, #1 + ldp x29, x30, [sp, #16] // 16-byte Folded Reload + add sp, sp, #16 + hint #29 + .cfi_negate_ra_state + ret +.Lfunc_end1: + .size foo, .Lfunc_end1-foo + .cfi_endproc + + .global _start + .type _start, %function +_start: + b foo diff --git a/bolt/test/AArch64/negate-ra-state.s b/bolt/test/AArch64/negate-ra-state.s new file mode 100644 index 0000000000000..11c511a254c71 --- /dev/null +++ b/bolt/test/AArch64/negate-ra-state.s @@ -0,0 +1,42 @@ +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q + +# RUN: llvm-objdump %t.exe -d > %t.exe.dump +# RUN: llvm-objdump --dwarf=frames %t.exe > %t.exe.dump-dwarf +# RUN: match-dwarf %t.exe.dump %t.exe.dump-dwarf foo > %t.match-dwarf.txt + +# RUN: llvm-bolt %t.exe -o %t.exe.bolt + +# RUN: llvm-objdump %t.exe.bolt -d > %t.exe.bolt.dump +# RUN: llvm-objdump --dwarf=frames %t.exe.bolt > %t.exe.bolt.dump-dwarf +# RUN: match-dwarf %t.exe.bolt.dump %t.exe.bolt.dump-dwarf foo > %t.bolt.match-dwarf.txt + +# RUN: diff %t.match-dwarf.txt %t.bolt.match-dwarf.txt + + .text + .globl foo + .p2align 2 + .type foo,@function +foo: + .cfi_startproc + hint #25 + .cfi_negate_ra_state + sub sp, sp, #16 + stp x29, x30, [sp, #16] // 16-byte Folded Spill + .cfi_def_cfa_offset 16 + str w0, [sp, #12] + ldr w8, [sp, #12] + add w0, w8, #1 + ldp x29, x30, [sp, #16] // 16-byte Folded Reload + add sp, sp, #16 + hint #29 + .cfi_negate_ra_state + ret +.Lfunc_end1: + .size foo, .Lfunc_end1-foo + .cfi_endproc + + .global _start + .type _start, %function +_start: + b foo diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py index 3299051db4983..c88c055f3f6e4 100644 --- a/bolt/test/lit.cfg.py +++ b/bolt/test/lit.cfg.py @@ -100,6 +100,7 @@ config.substitutions.append(("%cxxflags", "")) link_fdata_cmd = os.path.join(config.test_source_root, "link_fdata.py") +match_dwarf_cmd = os.path.join(config.test_source_root, "match_dwarf.py") tool_dirs = [config.llvm_tools_dir, config.test_source_root] @@ -143,6 +144,12 @@ ToolSubst("llvm-readobj", unresolved="fatal"), ToolSubst("llvm-dwp", unresolved="fatal"), ToolSubst("split-file", unresolved="fatal"), + ToolSubst( + "match-dwarf", + command=sys.executable, + unresolved="fatal", + extra_args=[match_dwarf_cmd], + ), ] llvm_config.add_tool_substitutions(tools, tool_dirs) diff --git a/bolt/test/match_dwarf.py b/bolt/test/match_dwarf.py new file mode 100755 index 0000000000000..3d3ab22042d5c --- /dev/null +++ b/bolt/test/match_dwarf.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 + +# This tool helps matching dwarf dumps +# (= the output from running llvm-objdump --dwarf=frames), +# by address to function names (which are parsed from a normal objdump). +# The script is used for checking if .cfi_negate_ra_state CFIs +# are generated by BOLT the same way they are generated by LLVM. +# The script is called twice in unittests: once with the objdumps of +# the BOLT input binary, and once with the output binary from BOLT. +# We output the offsets of .cfi_negate_ra_state instructions from the +# function's start address to see that BOLT can generate them to the same +# locations. +# Because we check the location, this is only useful for testing without +# optimization flags, so `llvm-bolt input.exe -o output.exe` + + +import argparse +import subprocess +import sys +import re + + +class NameDwarfPair(object): + def __init__(self, name, body): + self.name = name + self.body = body + self.finalized = False + + def append(self, body_line): + # only store elements into the body until the first whitespace line is encountered. + if body_line.isspace(): + self.finalized = True + if not self.finalized: + self.body += body_line + + def print(self): + print(self.name) + print(self.body) + + def parse_negate_offsets(self): + """ + Create a list of locations/offsets of the negate_ra_state CFIs in the + dwarf entry. To find offsets for each, we match the DW_CFA_advance_loc + entries, and sum up their values. + """ + negate_offsets = [] + loc = 0 + # TODO: make sure this is not printed in hex + re_advloc = r"DW_CFA_advance_loc: (\d+)" + + for line in self.body.splitlines(): + # if line matches advance_loc int + match = re.search(re_advloc, line) + if match: + loc += int(match.group(1)) + if "DW_CFA_AARCH64_negate_ra_state" in line: + negate_offsets.append(loc) + + self.negate_offsets = negate_offsets + + def __eq__(self, other): + return self.name == other.name and self.negate_offsets == other.negate_offsets + + +def extract_function_addresses(objdump): + """ + Parse and return address-to-name dictionary from objdump file. + Function names in the objdump look like this: + 000123abc : + We create a dict from the addr (000123abc), to the name (foo). + """ + addr_name_dict = dict() + re_function = re.compile(r"^([0-9a-fA-F]+)\s<(.*)>:$") + with open(objdump, "r") as f: + for line in f.readlines(): + match = re_function.match(line) + if not match: + continue + m_addr = match.groups()[0] + m_name = match.groups()[1] + addr_name_dict[int(m_addr, 16)] = m_name + + return addr_name_dict + + +def match_dwarf_to_name(dwarfdump, addr_name_dict): + """ + Parse dwarf dump, and match names to blocks using the dict from the objdump. + Return a list of NameDwarfPairs. + The matched lines look like this: + 000123 000456 000789 FDE cie=000000 pc=0123abc...0456def + We do not have the function name for this, only the PC range it applies to. + We match the pc=0123abc (the start address), and find the matching name from + the addr_name_dict. + The resultint NameDwarfPair will hold the lines this header applied to, and + instead of the header with the addresses, it will just have the function name. + """ + re_address_line = re.compile(r".*pc=([0-9a-fA-F]+)\.\.\.([0-9a-fA-F]+)") + with open(dwarfdump, "r") as dw: + functions = [] + for line in dw.readlines(): + match = re_address_line.match(line) + if not match: + if len(functions) > 0: + functions[-1].append(line) + continue + pc_start_address = match.groups()[0] + name = addr_name_dict.get(int(pc_start_address, 16)) + functions.append(NameDwarfPair(name, "")) + + return functions + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("objdump", help="Objdump file") + parser.add_argument( + "dwarfdump", help="dwarf dump file created with 'llvm-objdump --dwarf=frames'" + ) + parser.add_argument("function", help="Function to search CFIs in.") + + args = parser.parse_args() + + addr_name_dict = extract_function_addresses(args.objdump) + functions = match_dwarf_to_name(args.dwarfdump, addr_name_dict) + + for f in functions: + if f.name == args.function: + f.parse_negate_offsets() + print(f.negate_offsets) + break + else: + print(f"{args.function} not found") + exit(-1) + + +main() From 1ddea7b0a07e691cc44bca7172bb018c549960bf Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Fri, 23 May 2025 14:11:47 +0200 Subject: [PATCH 05/31] [BOLT] Basic exception unwinding test --- bolt/test/runtime/AArch64/negate-ra-state.cpp | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 bolt/test/runtime/AArch64/negate-ra-state.cpp diff --git a/bolt/test/runtime/AArch64/negate-ra-state.cpp b/bolt/test/runtime/AArch64/negate-ra-state.cpp new file mode 100644 index 0000000000000..60b0b08950b58 --- /dev/null +++ b/bolt/test/runtime/AArch64/negate-ra-state.cpp @@ -0,0 +1,26 @@ +// REQUIRES: system-linux,bolt-runtime + +// RUN: %clangxx --target=aarch64-unknown-linux-gnu \ +// RUN: -mbranch-protection=pac-ret -Wl,-q %s -o %t.exe +// RUN: llvm-bolt %t.exe -o %t.bolt.exe +// RUN: %t.bolt.exe | FileCheck %s + +// CHECK: Exception caught: Exception from bar(). + +#include +#include + +void bar() { throw std::runtime_error("Exception from bar()."); } + +void foo() { + try { + bar(); + } catch (const std::exception &e) { + printf("Exception caught: %s\n", e.what()); + } +} + +int main() { + foo(); + return 0; +} From 0cec69a061b620507553a0babd57572ee81d3012 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Tue, 29 Jul 2025 14:28:34 +0000 Subject: [PATCH 06/31] [BOLT] Add OpNegateRAState to printCFI --- bolt/lib/Core/BinaryContext.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 98440cde7cebd..ee13773a49369 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1905,6 +1905,9 @@ void BinaryContext::printCFI(raw_ostream &OS, const MCCFIInstruction &Inst) { case MCCFIInstruction::OpGnuArgsSize: OS << "OpGnuArgsSize"; break; + case MCCFIInstruction::OpNegateRAState: + OS << "OpNegateRAState"; + break; default: OS << "Op#" << Operation; break; From 9e19f0a93e7b0b8763b06df4a4d66abaebc693d7 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Tue, 29 Jul 2025 14:28:57 +0000 Subject: [PATCH 07/31] [BOLT] Improve function splitting at OpNegateRAState handling - Previous version used the Layout API which should not be used. - This version iterates on Fragments to have the same effect. - NegateRAState is added to the first *non-empty* BB of the cold fragment. --- bolt/lib/Passes/InsertNegateRAStatePass.cpp | 44 +++++++++++---------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp index 05d4d58c7e4cf..0eb0e3aef00d4 100644 --- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp +++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp @@ -43,27 +43,32 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) { fixUnknownStates(BF); - bool FirstIter = true; - MCInst PrevInst; - BinaryBasicBlock *PrevBB = nullptr; - // We need to iterate on BBs in the Layout order - // not in the order they are stored in the BF class. - auto *Begin = BF.getLayout().block_begin(); - auto *End = BF.getLayout().block_end(); - for (auto *BB = Begin; BB != End; BB++) { - - // Support for function splitting: - // if two consecutive BBs with Signed state are going to end up in different - // functions, we have to add a OpNegateRAState to the beginning of the newly - // split function, so it starts with a Signed state. - if (PrevBB != nullptr && - PrevBB->getFragmentNum() != (*BB)->getFragmentNum() && - BC.MIB->isRASigned(*((*BB)->begin()))) { - BF.addCFIInstruction(*BB, (*BB)->begin(), + // Support for function splitting: + // if two consecutive BBs with Signed state are going to end up in different + // functions (so are held by different FunctionFragments), we have to add a + // OpNegateRAState to the beginning of the newly split function, so it starts + // with a Signed state. + for (FunctionFragment &FF : BF.getLayout().fragments()) { + // Find the first BB in the FF which has Instructions. + // BOLT can generate empty BBs at function splitting which are only used as + // target labels. We should add the negate-ra-state CFI to the first + // non-empty BB. + auto FirstNonEmpty = + std::find_if(FF.begin(), FF.end(), [](BinaryBasicBlock *BB) { + // getFirstNonPseudo returns BB.end() if it does not find any + // Instructions. + return BB->getFirstNonPseudo() != BB->end(); + }); + if (BC.MIB->isRASigned(*((*FirstNonEmpty)->begin()))) { + BF.addCFIInstruction(*FirstNonEmpty, (*FirstNonEmpty)->begin(), MCCFIInstruction::createNegateRAState(nullptr)); } + } - for (auto It = (*BB)->begin(); It != (*BB)->end(); ++It) { + bool FirstIter = true; + MCInst PrevInst; + for (BinaryBasicBlock &BB : BF) { + for (auto It = BB.begin(); It != BB.end(); ++It) { MCInst &Inst = *It; if (BC.MIB->isCFI(Inst)) @@ -76,7 +81,7 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) { (BC.MIB->isRAUnsigned(PrevInst) && BC.MIB->isRASigned(Inst))) { It = BF.addCFIInstruction( - *BB, It, MCCFIInstruction::createNegateRAState(nullptr)); + &BB, It, MCCFIInstruction::createNegateRAState(nullptr)); } } else { @@ -84,7 +89,6 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) { } PrevInst = *It; } - PrevBB = *BB; } } From 8182c9b645c5c970b98993abab12afb7279993e9 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Thu, 31 Jul 2025 08:42:06 +0000 Subject: [PATCH 08/31] [BOLT] Bugfix: CFIs can be placed before the first Instruction - This caused a crash when trying to Annotate RAState-changing CFIs (RememberState, RestoreState, NegateRAState). - The fix introduces an InitialRAState for each BinaryFunction. - If we have a NegateRAState before the first Instr, we set that to True. - In MarkRAStates, we push the InitialRAState to the RAStateStack: as we may have omitted the RememberState at the function start, its RestoreState pair would try to pop an empty stack otherwise. --- bolt/include/bolt/Core/BinaryFunction.h | 5 +++++ bolt/lib/Core/Exceptions.cpp | 21 ++++++++++++++++----- bolt/lib/Passes/MarkRAStates.cpp | 3 ++- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 4534bbcb1b640..c3b22d821c39c 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -148,6 +148,9 @@ class BinaryFunction { PF_MEMEVENT = 4, /// Profile has mem events. }; + void setInitialRAState(bool State) { InitialRAState = State; } + bool getInitialRAState() { return InitialRAState; } + /// Struct for tracking exception handling ranges. struct CallSite { const MCSymbol *Start; @@ -221,6 +224,8 @@ class BinaryFunction { /// Current state of the function. State CurrentState{State::Empty}; + bool InitialRAState{false}; + /// A list of symbols associated with the function entry point. /// /// Multiple symbols would typically result from identical code-folding diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp index 2480cd5bf38ca..60d74c0f06e30 100644 --- a/bolt/lib/Core/Exceptions.cpp +++ b/bolt/lib/Core/Exceptions.cpp @@ -569,20 +569,24 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { Function.addCFIInstruction( Offset, MCCFIInstruction::createRememberState(nullptr)); - if (Function.getBinaryContext().isAArch64()) + if (Function.getBinaryContext().isAArch64()) { // Support for pointer authentication: // We need to annotate instructions that modify the RA State, to work // out the state of each instruction in MarkRAStates Pass. - Function.setInstModifiesRAState(DW_CFA_remember_state, Offset); + if (Offset != 0) + Function.setInstModifiesRAState(DW_CFA_remember_state, Offset); + } break; case DW_CFA_restore_state: Function.addCFIInstruction(Offset, MCCFIInstruction::createRestoreState(nullptr)); - if (Function.getBinaryContext().isAArch64()) + if (Function.getBinaryContext().isAArch64()) { // Support for pointer authentication: // We need to annotate instructions that modify the RA State, to work // out the state of each instruction in MarkRAStates Pass. - Function.setInstModifiesRAState(DW_CFA_restore_state, Offset); + if (Offset != 0) + Function.setInstModifiesRAState(DW_CFA_restore_state, Offset); + } break; case DW_CFA_def_cfa: Function.addCFIInstruction( @@ -649,7 +653,14 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { // is added to the instruction, to mark that the instruction modifies // the RA State. The actual state for instructions are worked out in // MarkRAStates based on these annotations. - Function.setInstModifiesRAState(DW_CFA_AARCH64_negate_ra_state, Offset); + if (Offset != 0) + Function.setInstModifiesRAState(DW_CFA_AARCH64_negate_ra_state, + Offset); + else + // We cannot Annotate an instruction at Offset == 0. + // Instead, we save the initial (Signed) state, and push it to + // MarkRAStates' RAStateStack. + Function.setInitialRAState(true); break; } if (opts::Verbosity >= 1) diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp index 8121fffb93c9f..d7db5532002bf 100644 --- a/bolt/lib/Passes/MarkRAStates.cpp +++ b/bolt/lib/Passes/MarkRAStates.cpp @@ -62,8 +62,9 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) { } } - bool RAState = false; + bool RAState = BF.getInitialRAState(); std::stack RAStateStack; + RAStateStack.push(RAState); for (BinaryBasicBlock &BB : BF) { for (auto It = BB.begin(); It != BB.end(); ++It) { From 0bfae2c8e9e3d4384f1fd9440a1f75ea466a3ce1 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Fri, 1 Aug 2025 08:21:17 +0000 Subject: [PATCH 09/31] [BOLT] Add function-splitting test --- .../runtime/AArch64/pacret-function-split.cpp | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 bolt/test/runtime/AArch64/pacret-function-split.cpp diff --git a/bolt/test/runtime/AArch64/pacret-function-split.cpp b/bolt/test/runtime/AArch64/pacret-function-split.cpp new file mode 100644 index 0000000000000..208fc5c115571 --- /dev/null +++ b/bolt/test/runtime/AArch64/pacret-function-split.cpp @@ -0,0 +1,42 @@ +/* This test check that the negate-ra-state CFIs are properly emitted in case of + function splitting. The test checks two things: + - we split at the correct location: to test the feature, + we need to split *before* the bl __cxa_throw@PLT call is made, + so the unwinder has to unwind from the split (cold) part. + + - the BOLTed binary runs, and returns the string from foo. + +# REQUIRES: system-linux,bolt-runtime + +# FDATA: 1 main #split# 1 _Z3foov 0 0 1 + +# RUN: %clangxx --target=aarch64-unknown-linux-gnu \ +# RUN: -mbranch-protection=pac-ret %s -o %t.exe -Wl,-q +# RUN: link_fdata %s %t.exe %t.fdata +# RUN: llvm-bolt %t.exe -o %t.bolt --split-functions --split-eh \ +# RUN: --split-strategy=profile2 --split-all-cold --print-split \ +# RUN: --print-only=_Z3foov --data=%t.fdata 2>&1 | FileCheck \ +# RUN: --check-prefix=BOLT-CHECK %s +# RUN: %t.bolt | FileCheck %s --check-prefix=RUN-CHECK + +# BOLT-CHECK-NOT: bl __cxa_throw@PLT +# BOLT-CHECK: ------- HOT-COLD SPLIT POINT ------- +# BOLT-CHECK: bl __cxa_throw@PLT + +# RUN-CHECK: Exception caught: Exception from foo(). +*/ + +#include +#include + +void foo() { throw std::runtime_error("Exception from foo()."); } + +int main() { + try { + __asm__ __volatile__("split:"); + foo(); + } catch (const std::exception &e) { + printf("Exception caught: %s\n", e.what()); + } + return 0; +} From 2a5fd8042d488039d111ecd27c1cb1caba904414 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Mon, 4 Aug 2025 13:48:49 +0000 Subject: [PATCH 10/31] [BOLT] Improve warnings in MarkRAStates - Previously, the same warning was printed in 3 different cases. Changed warnings to be more specific. --- bolt/lib/Passes/MarkRAStates.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp index d7db5532002bf..1b56cd83a2314 100644 --- a/bolt/lib/Passes/MarkRAStates.cpp +++ b/bolt/lib/Passes/MarkRAStates.cpp @@ -57,6 +57,8 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) { BF.setIgnored(); BC.outs() << "BOLT-INFO: inconsistent RAStates in function " << BF.getPrintName() << "\n"; + BC.outs() + << "BOLT-INFO: ptr sign/auth inst without .cfi_negate_ra_state\n"; return; } } @@ -78,6 +80,8 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) { // RA signing instructions should only follow unsigned RA state. BC.outs() << "BOLT-INFO: inconsistent RAStates in function " << BF.getPrintName() << "\n"; + BC.outs() << "BOLT-INFO: ptr signing inst encountered in Signed RA " + "state.\n"; BF.setIgnored(); return; } @@ -87,6 +91,8 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) { // RA authenticating instructions should only follow signed RA state. BC.outs() << "BOLT-INFO: inconsistent RAStates in function " << BF.getPrintName() << "\n"; + BC.outs() << "BOLT-INFO: ptr authenticating inst encountered in " + "Unsigned RA state.\n"; BF.setIgnored(); return; } From 0c287613b1949cdbae4792750af5e588a9de9725 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Tue, 5 Aug 2025 13:33:44 +0000 Subject: [PATCH 11/31] [BOLT] Improve negate-ra-state-incorrect test - check all 3 possibilities that MarkRAStates warns about, - simplify the asm functions in the test, - use FileCheck instead of grep on the disassembly. --- bolt/test/AArch64/negate-ra-state-incorrect.s | 79 ++++++++++++++----- 1 file changed, 59 insertions(+), 20 deletions(-) diff --git a/bolt/test/AArch64/negate-ra-state-incorrect.s b/bolt/test/AArch64/negate-ra-state-incorrect.s index c6b8b36939f4d..ad922d83c6d61 100644 --- a/bolt/test/AArch64/negate-ra-state-incorrect.s +++ b/bolt/test/AArch64/negate-ra-state-incorrect.s @@ -1,19 +1,31 @@ +# This test checks that MarkRAStates pass ignores functions with +# malformed .cfi_negate_ra_state sequences in the input binary. + +# The cases checked are: +# - extra .cfi_negate_ra_state in Signed state: checked in foo, +# - extra .cfi_negate_ra_state in Unsigned state: checked in bar, +# - missing .cfi_negate_ra_state from PSign or PAuth instructions: checked in baz. + # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -# RUN: llvm-bolt %t.exe -o %t.exe.bolt | FileCheck %s +# RUN: llvm-bolt %t.exe -o %t.exe.bolt --no-threads | FileCheck %s --check-prefix=CHECK-BOLT + +# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function foo +# CHECK-BOLT-NEXT: BOLT-INFO: ptr authenticating inst encountered in Unsigned RA state. -# check that the output is listing foo as incorrect -# CHECK: BOLT-INFO: inconsistent RAStates in function foo +# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function bar +# CHECK-BOLT-NEXT: BOLT-INFO: ptr signing inst encountered in Signed RA state -# check that foo got Ignored, so it's not in the new .text section -# RUN: llvm-objdump %t.exe.bolt -d -j .text > %t.exe.dump -# RUN: not grep ":" %t.exe.dump +# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function baz +# CHECK-BOLT-NEXT: BOLT-INFO: ptr sign/auth inst without .cfi_negate_ra_state + +# Check that the incorrect functions got ignored, so they are not in the new .text section +# RUN: llvm-objdump %t.exe.bolt -d -j .text | FileCheck %s --check-prefix=CHECK-OBJDUMP +# CHECK-OBJDUMP-NOT: : +# CHECK-OBJDUMP-NOT: : +# CHECK-OBJDUMP-NOT: : -# How is this test incorrect? -# There is an extra .cfi_negate_ra_state in foo. -# Because of this, we will get to the autiasp (hint #29) -# in a (seemingly) unsigned state. That is incorrect. .text .globl foo .p2align 2 @@ -22,23 +34,50 @@ foo: .cfi_startproc hint #25 .cfi_negate_ra_state - sub sp, sp, #16 - stp x29, x30, [sp, #16] // 16-byte Folded Spill - .cfi_def_cfa_offset 16 - str w0, [sp, #12] - ldr w8, [sp, #12] + mov x1, #0 + .cfi_negate_ra_state // Incorrect CFI in signed state + hint #29 + .cfi_negate_ra_state + ret + .cfi_endproc + .size foo, .-foo + + .text + .globl bar + .p2align 2 + .type bar,@function +bar: + .cfi_startproc + mov x1, #0 + .cfi_negate_ra_state // Incorrect CFI in unsigned state + hint #25 .cfi_negate_ra_state - add w0, w8, #1 - ldp x29, x30, [sp, #16] // 16-byte Folded Reload - add sp, sp, #16 + mov x1, #0 hint #29 .cfi_negate_ra_state ret -.Lfunc_end1: - .size foo, .Lfunc_end1-foo .cfi_endproc + .size bar, .-bar + + .text + .globl baz + .p2align 2 + .type baz,@function +baz: + .cfi_startproc + mov x1, #0 + hint #25 + .cfi_negate_ra_state + mov x1, #0 + hint #29 + // Missing .cfi_negate_ra_state + ret + .cfi_endproc + .size baz, .-baz .global _start .type _start, %function _start: b foo + b bar + b baz From 054f614a0be622bcade6468a6ff51f97f0545fe6 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Wed, 6 Aug 2025 13:57:29 +0000 Subject: [PATCH 12/31] [BOLT] Do run negate-ra-state rewriting passes on all functions - if a BinaryFunction did not contain any .cfi_negate_ra_state, we can skip MarkRAStates and InsertNegateRAStatePass. This can happen with handwritten assembly, or if the binary was compiled in a way that drops DWARF tables. --- bolt/include/bolt/Core/BinaryFunction.h | 6 ++++++ bolt/lib/Core/Exceptions.cpp | 1 + bolt/lib/Passes/InsertNegateRAStatePass.cpp | 8 +++++++- bolt/lib/Passes/MarkRAStates.cpp | 8 +++++++- 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index c3b22d821c39c..3db39e47f9e94 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -148,6 +148,8 @@ class BinaryFunction { PF_MEMEVENT = 4, /// Profile has mem events. }; + void setContainedNegateRAState() { HadNegateRAState = true; } + bool containedNegateRAState() { return HadNegateRAState; } void setInitialRAState(bool State) { InitialRAState = State; } bool getInitialRAState() { return InitialRAState; } @@ -224,6 +226,10 @@ class BinaryFunction { /// Current state of the function. State CurrentState{State::Empty}; + /// Indicates if the Function contained .cfi-negate-ra-state. These are not + /// read from the binary. This boolean is used when deciding to run the + /// .cfi-negate-ra-state rewriting passes on a function or not. + bool HadNegateRAState{false}; bool InitialRAState{false}; /// A list of symbols associated with the function entry point. diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp index 60d74c0f06e30..7a2fba140b48d 100644 --- a/bolt/lib/Core/Exceptions.cpp +++ b/bolt/lib/Core/Exceptions.cpp @@ -647,6 +647,7 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { // DW_CFA_GNU_window_save and DW_CFA_AARCH64_negate_ra_state just use the // same id but mean different things. The latter is used in AArch64. if (Function.getBinaryContext().isAArch64()) { + Function.setContainedNegateRAState(); // The location OpNegateRAState CFIs are needed // depends on the order of BasicBlocks, which changes during // optimizations. Instead of adding OpNegateRAState CFIs, an annotation diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp index 0eb0e3aef00d4..80857204ff53c 100644 --- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp +++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp @@ -137,7 +137,13 @@ void InsertNegateRAState::fixUnknownStates(BinaryFunction &BF) { Error InsertNegateRAState::runOnFunctions(BinaryContext &BC) { ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { - runOnFunction(BF); + if (BF.containedNegateRAState()) { + // We can skip functions which did not include negate-ra-state CFIs. This + // includes code using pac-ret hardening as well, if the binary is + // compiled with `-fno-exceptions -fno-unwind-tables + // -fno-asynchronous-unwind-tables` + runOnFunction(BF); + } }; ParallelUtilities::runOnEachFunction( diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp index 1b56cd83a2314..9e2ab049a67f5 100644 --- a/bolt/lib/Passes/MarkRAStates.cpp +++ b/bolt/lib/Passes/MarkRAStates.cpp @@ -127,7 +127,13 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) { Error MarkRAStates::runOnFunctions(BinaryContext &BC) { ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { - runOnFunction(BF); + if (BF.containedNegateRAState()) { + // We can skip functions which did not include negate-ra-state CFIs. This + // includes code using pac-ret hardening as well, if the binary is + // compiled with `-fno-exceptions -fno-unwind-tables + // -fno-asynchronous-unwind-tables` + runOnFunction(BF); + } }; ParallelUtilities::runOnEachFunction( From 855c74d038419d18e082c279e275039bf5b13d65 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Wed, 6 Aug 2025 13:58:37 +0000 Subject: [PATCH 13/31] [BOLT] Introduce --disallow-pacret flag --- bolt/include/bolt/Utils/CommandLineOpts.h | 1 + bolt/lib/Rewrite/BinaryPassManager.cpp | 9 +++++++ bolt/lib/Rewrite/RewriteInstance.cpp | 11 +++++++++ bolt/test/AArch64/negate-ra-state-disallow.s | 25 ++++++++++++++++++++ 4 files changed, 46 insertions(+) create mode 100644 bolt/test/AArch64/negate-ra-state-disallow.s diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index 859d6f3bf6774..fb83321ac529a 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -78,6 +78,7 @@ extern llvm::cl::opt OutputFilename; extern llvm::cl::opt PerfData; extern llvm::cl::opt PrintCacheMetrics; extern llvm::cl::opt PrintSections; +extern llvm::cl::opt DisallowPacret; // The format to use with -o in aggregation mode (perf2bolt) enum ProfileFormatKind { PF_Fdata, PF_YAML }; diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 0a45183a51966..bb8697cb4f5ec 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -278,6 +278,15 @@ static cl::opt ShortenInstructions("shorten-instructions", cl::desc("shorten instructions"), cl::init(true), cl::cat(BoltOptCategory)); + +// This flag is used to "gate" the negate-ra-state CFI handling. +// Sometimes, binaries use pac-ret but not contain negate-ra-state CFIs. That +// should cause no issues for BOLT. +cl::opt DisallowPacret( + "disallow-pacret", + cl::desc("Disable processing binaries containing negate-ra-state DWARF " + "CFIs (e.g. binaries using pac-ret hardening)"), + cl::cat(BoltOptCategory)); } // namespace opts namespace llvm { diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index a6e4dbc9c192f..91b7feb59cc14 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -3520,6 +3520,17 @@ void RewriteInstance::disassembleFunctions() { } } + // Check if fillCFIInfoFor removed any OpNegateRAState CFIs from the + // function. + if (Function.containedNegateRAState()) { + if (opts::DisallowPacret) { + BC->errs() << "BOLT-ERROR: --disallow-pacret flag was used, but " + << Function.getPrintName() + << " contains .cfi-negate-ra-state.\n"; + exit(1); + } + } + // Parse LSDA. if (Function.getLSDAAddress() != 0 && !BC->getFragmentsToSkip().count(&Function)) { diff --git a/bolt/test/AArch64/negate-ra-state-disallow.s b/bolt/test/AArch64/negate-ra-state-disallow.s new file mode 100644 index 0000000000000..97488be6eb940 --- /dev/null +++ b/bolt/test/AArch64/negate-ra-state-disallow.s @@ -0,0 +1,25 @@ +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q +# RUN: not llvm-bolt %t.exe -o %t.exe.bolt --disallow-pacret 2>&1 | FileCheck %s + +# CHECK: BOLT-ERROR: --disallow-pacret flag was used, but foo contains .cfi-negate-ra-state. + + .text + .globl foo + .p2align 2 + .type foo,@function +foo: + .cfi_startproc + hint #25 + .cfi_negate_ra_state + mov x1, #0 + hint #29 + .cfi_negate_ra_state + ret + .cfi_endproc + .size foo, .-foo + + .global _start + .type _start, %function +_start: + b foo From 4a976a4743593e6895c93a72ee3efd88d276e7ae Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Fri, 8 Aug 2025 12:32:56 +0000 Subject: [PATCH 14/31] [BOLT][NFC] Review nits - remove unused imports - fix clangd warnings - rename functions - improve comments - reformat lib/Core/Exceptions.cpp --- bolt/include/bolt/Core/MCPlusBuilder.h | 2 +- bolt/include/bolt/Passes/InsertNegateRAStatePass.h | 12 +++++++----- bolt/lib/Core/Exceptions.cpp | 12 ++++++------ bolt/lib/Passes/InsertNegateRAStatePass.cpp | 14 ++++++-------- bolt/lib/Passes/MarkRAStates.cpp | 13 +++---------- 5 files changed, 23 insertions(+), 30 deletions(-) diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 288d8b75afa1c..56eae11389285 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -78,7 +78,7 @@ class MCPlusBuilder { if (*FirstAnnotationOp > OpIndex || Inst.getNumOperands() < OpIndex) return std::nullopt; - auto Op = Inst.begin() + OpIndex; + const auto *Op = Inst.begin() + OpIndex; const int64_t ImmValue = Op->getImm(); return extractAnnotationIndex(ImmValue); } diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h index ce73b5a152d12..2394356401564 100644 --- a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h +++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h @@ -13,7 +13,6 @@ #define BOLT_PASSES_INSERT_NEGATE_RA_STATE_PASS #include "bolt/Passes/BinaryPasses.h" -#include namespace llvm { namespace bolt { @@ -31,14 +30,17 @@ class InsertNegateRAState : public BinaryFunctionPass { private: /// Loops over all instructions and adds OpNegateRAState CFI /// after any pointer signing or authenticating instructions, - /// which operate on the LR, except fused ptrauth + ret instructions - /// (such as RETAA). + /// which operate on the LR, except fused pauth + ret instructions + /// (such as RETAA). Normal pauth and psign instructions are "special cases", + /// meaning they always need an OpNegateRAState CFI after them. + /// Fused pauth + ret instructions are not, they work as any other + /// instruction. /// Returns true, if any OpNegateRAState CFIs were added. - bool addNegateRAStateAfterPacOrAuth(BinaryFunction &BF); + bool addNegateRAStateAfterPSignOrPAuth(BinaryFunction &BF); /// Because states are tracked as MCAnnotations on individual instructions, /// newly inserted instructions do not have a state associated with them. /// New states are "inherited" from the last known state. - void fixUnknownStates(BinaryFunction &BF); + void inferUnknownStates(BinaryFunction &BF); }; } // namespace bolt diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp index 7a2fba140b48d..27656c7b3cadf 100644 --- a/bolt/lib/Core/Exceptions.cpp +++ b/bolt/lib/Core/Exceptions.cpp @@ -648,12 +648,12 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { // same id but mean different things. The latter is used in AArch64. if (Function.getBinaryContext().isAArch64()) { Function.setContainedNegateRAState(); - // The location OpNegateRAState CFIs are needed - // depends on the order of BasicBlocks, which changes during - // optimizations. Instead of adding OpNegateRAState CFIs, an annotation - // is added to the instruction, to mark that the instruction modifies - // the RA State. The actual state for instructions are worked out in - // MarkRAStates based on these annotations. + // The location OpNegateRAState CFIs are needed depends on the order of + // BasicBlocks, which changes during optimizations. Instead of adding + // OpNegateRAState CFIs, an annotation is added to the instruction, to + // mark that the instruction modifies the RA State. The actual state for + // instructions are worked out in MarkRAStates based on these + // annotations. if (Offset != 0) Function.setInstModifiesRAState(DW_CFA_AARCH64_negate_ra_state, Offset); diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp index 80857204ff53c..8393f2b05a16f 100644 --- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp +++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp @@ -14,10 +14,7 @@ #include "bolt/Passes/InsertNegateRAStatePass.h" #include "bolt/Core/BinaryFunction.h" #include "bolt/Core/ParallelUtilities.h" -#include "bolt/Utils/CommandLineOpts.h" #include -#include -#include using namespace llvm; @@ -38,10 +35,10 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) { } // If none is inserted, the function doesn't need more work. - if (!addNegateRAStateAfterPacOrAuth(BF)) + if (!addNegateRAStateAfterPSignOrPAuth(BF)) return; - fixUnknownStates(BF); + inferUnknownStates(BF); // Support for function splitting: // if two consecutive BBs with Signed state are going to end up in different @@ -53,7 +50,7 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) { // BOLT can generate empty BBs at function splitting which are only used as // target labels. We should add the negate-ra-state CFI to the first // non-empty BB. - auto FirstNonEmpty = + auto *FirstNonEmpty = std::find_if(FF.begin(), FF.end(), [](BinaryBasicBlock *BB) { // getFirstNonPseudo returns BB.end() if it does not find any // Instructions. @@ -92,7 +89,8 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) { } } -bool InsertNegateRAState::addNegateRAStateAfterPacOrAuth(BinaryFunction &BF) { +bool InsertNegateRAState::addNegateRAStateAfterPSignOrPAuth( + BinaryFunction &BF) { BinaryContext &BC = BF.getBinaryContext(); bool FoundAny = false; for (BinaryBasicBlock &BB : BF) { @@ -109,7 +107,7 @@ bool InsertNegateRAState::addNegateRAStateAfterPacOrAuth(BinaryFunction &BF) { return FoundAny; } -void InsertNegateRAState::fixUnknownStates(BinaryFunction &BF) { +void InsertNegateRAState::inferUnknownStates(BinaryFunction &BF) { BinaryContext &BC = BF.getBinaryContext(); bool FirstIter = true; MCInst PrevInst; diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp index 9e2ab049a67f5..aacb14fb7dcc9 100644 --- a/bolt/lib/Passes/MarkRAStates.cpp +++ b/bolt/lib/Passes/MarkRAStates.cpp @@ -22,12 +22,7 @@ #include "bolt/Passes/MarkRAStates.h" #include "bolt/Core/BinaryFunction.h" #include "bolt/Core/ParallelUtilities.h" -#include "bolt/Utils/CommandLineOpts.h" #include -#include -#include - -#include #include #include @@ -49,11 +44,9 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) { if ((BC.MIB->isPSignOnLR(Inst) || (BC.MIB->isPAuthOnLR(Inst) && !BC.MIB->isPAuthAndRet(Inst))) && !BC.MIB->hasNegateRAState(Inst)) { - // no .cfi_negate_ra_state attached to signing or authenticating instr - // means, that this is a function with handwritten assembly, which might - // not respect Clang's conventions (e.g. tailcalls are always - // authenticated, so functions always start with unsigned RAState when - // working with compiler-generated code) + // Not all functions have .cfi_negate_ra_state in them. But if one does, + // we expect psign/pauth instructions to have the hasNegateRAState + // annotation. BF.setIgnored(); BC.outs() << "BOLT-INFO: inconsistent RAStates in function " << BF.getPrintName() << "\n"; From 7df2811ad987e4901f949c297161aa80ba29b104 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Fri, 8 Aug 2025 12:39:02 +0000 Subject: [PATCH 15/31] [BOLT] Refactor and improve InsertNegateRAStatePass - separate function splitting code into its own function - change the iteration looking for state-switches between consecutive instructions to only look for changes *inside* FunctionFragments, and skip borders between them. --- .../bolt/Passes/InsertNegateRAStatePass.h | 7 ++ bolt/lib/Passes/InsertNegateRAStatePass.cpp | 91 ++++++++++--------- 2 files changed, 53 insertions(+), 45 deletions(-) diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h index 2394356401564..0b1654c1380b9 100644 --- a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h +++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h @@ -41,6 +41,13 @@ class InsertNegateRAState : public BinaryFunctionPass { /// newly inserted instructions do not have a state associated with them. /// New states are "inherited" from the last known state. void inferUnknownStates(BinaryFunction &BF); + + /// Support for function splitting: + /// if two consecutive BBs with Signed state are going to end up in different + /// functions (so are held by different FunctionFragments), we have to add a + /// OpNegateRAState to the beginning of the newly split function, so it starts + /// with a Signed state. + void coverFunctionFragmentStart(BinaryFunction &BF, FunctionFragment &FF); }; } // namespace bolt diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp index 8393f2b05a16f..8ee2d28d0e379 100644 --- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp +++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp @@ -34,57 +34,35 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) { return; } - // If none is inserted, the function doesn't need more work. - if (!addNegateRAStateAfterPSignOrPAuth(BF)) - return; + // Attach .cfi_negate_ra_state to the "trivial" cases first. + addNegateRAStateAfterPSignOrPAuth(BF); inferUnknownStates(BF); - // Support for function splitting: - // if two consecutive BBs with Signed state are going to end up in different - // functions (so are held by different FunctionFragments), we have to add a - // OpNegateRAState to the beginning of the newly split function, so it starts - // with a Signed state. for (FunctionFragment &FF : BF.getLayout().fragments()) { - // Find the first BB in the FF which has Instructions. - // BOLT can generate empty BBs at function splitting which are only used as - // target labels. We should add the negate-ra-state CFI to the first - // non-empty BB. - auto *FirstNonEmpty = - std::find_if(FF.begin(), FF.end(), [](BinaryBasicBlock *BB) { - // getFirstNonPseudo returns BB.end() if it does not find any - // Instructions. - return BB->getFirstNonPseudo() != BB->end(); - }); - if (BC.MIB->isRASigned(*((*FirstNonEmpty)->begin()))) { - BF.addCFIInstruction(*FirstNonEmpty, (*FirstNonEmpty)->begin(), - MCCFIInstruction::createNegateRAState(nullptr)); - } - } - - bool FirstIter = true; - MCInst PrevInst; - for (BinaryBasicBlock &BB : BF) { - for (auto It = BB.begin(); It != BB.end(); ++It) { - - MCInst &Inst = *It; - if (BC.MIB->isCFI(Inst)) - continue; - - if (!FirstIter) { - // Consecutive instructions with different RAState means we need to add - // a OpNegateRAState. - if ((BC.MIB->isRASigned(PrevInst) && BC.MIB->isRAUnsigned(Inst)) || - (BC.MIB->isRAUnsigned(PrevInst) && BC.MIB->isRASigned(Inst))) { - - It = BF.addCFIInstruction( - &BB, It, MCCFIInstruction::createNegateRAState(nullptr)); + coverFunctionFragmentStart(BF, FF); + bool FirstIter = true; + MCInst PrevInst; + // As this pass runs after function splitting, we should only check + // consecutive instructions inside FunctionFragments. + for (BinaryBasicBlock *BB : FF) { + for (auto It = BB->begin(); It != BB->end(); ++It) { + MCInst &Inst = *It; + if (BC.MIB->isCFI(Inst)) + continue; + if (!FirstIter) { + // Consecutive instructions with different RAState means we need to + // add a OpNegateRAState. + if ((BC.MIB->isRASigned(PrevInst) && BC.MIB->isRAUnsigned(Inst)) || + (BC.MIB->isRAUnsigned(PrevInst) && BC.MIB->isRASigned(Inst))) { + It = BF.addCFIInstruction( + BB, It, MCCFIInstruction::createNegateRAState(nullptr)); + } + } else { + FirstIter = false; } - - } else { - FirstIter = false; + PrevInst = *It; } - PrevInst = *It; } } } @@ -107,6 +85,29 @@ bool InsertNegateRAState::addNegateRAStateAfterPSignOrPAuth( return FoundAny; } +void InsertNegateRAState::coverFunctionFragmentStart(BinaryFunction &BF, + FunctionFragment &FF) { + BinaryContext &BC = BF.getBinaryContext(); + if (FF.empty()) + return; + // Find the first BB in the FF which has Instructions. + // BOLT can generate empty BBs at function splitting which are only used as + // target labels. We should add the negate-ra-state CFI to the first + // non-empty BB. + auto *FirstNonEmpty = + std::find_if(FF.begin(), FF.end(), [](BinaryBasicBlock *BB) { + // getFirstNonPseudo returns BB.end() if it does not find any + // Instructions. + return BB->getFirstNonPseudo() != BB->end(); + }); + // If a function is already split in the input, the first FF can also start + // with Signed state. This covers that scenario as well. + if (BC.MIB->isRASigned(*((*FirstNonEmpty)->begin()))) { + BF.addCFIInstruction(*FirstNonEmpty, (*FirstNonEmpty)->begin(), + MCCFIInstruction::createNegateRAState(nullptr)); + } +} + void InsertNegateRAState::inferUnknownStates(BinaryFunction &BF) { BinaryContext &BC = BF.getBinaryContext(); bool FirstIter = true; From c75645fac3936dfbc2e6e37c9ee872e2c437a4bb Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Mon, 11 Aug 2025 11:20:45 +0000 Subject: [PATCH 16/31] [BOLT] Add function splitting lit test without runtime dependency - Test uses lit and FileCheck to check if OpNegateRAState CFIs are generated in the correct location for function-splitting. - Also fix a bug related to function-splitting. --- bolt/lib/Passes/InsertNegateRAStatePass.cpp | 3 +- bolt/test/AArch64/pacret-split-funcs.s | 56 +++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 bolt/test/AArch64/pacret-split-funcs.s diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp index 8ee2d28d0e379..60af3485ea4d1 100644 --- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp +++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp @@ -102,7 +102,8 @@ void InsertNegateRAState::coverFunctionFragmentStart(BinaryFunction &BF, }); // If a function is already split in the input, the first FF can also start // with Signed state. This covers that scenario as well. - if (BC.MIB->isRASigned(*((*FirstNonEmpty)->begin()))) { + if (BC.MIB->isRASigned(*((*FirstNonEmpty)->begin())) || + BC.MIB->isAuthenticating(*((*FirstNonEmpty)->begin()))) { BF.addCFIInstruction(*FirstNonEmpty, (*FirstNonEmpty)->begin(), MCCFIInstruction::createNegateRAState(nullptr)); } diff --git a/bolt/test/AArch64/pacret-split-funcs.s b/bolt/test/AArch64/pacret-split-funcs.s new file mode 100644 index 0000000000000..6582257948c62 --- /dev/null +++ b/bolt/test/AArch64/pacret-split-funcs.s @@ -0,0 +1,56 @@ +# Checking that we generate an OpNegateRAState CFI after the split point, +# when splitting a region with signed RA state. + +# REQUIRES: system-linux + +# RUN: %clang %cflags -o %t %s +# RUN: %clang %s %cflags -Wl,-q -o %t +# RUN: link_fdata --no-lbr %s %t %t.fdata +# RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions \ +# RUN: --print-only foo --print-split --print-all 2>&1 | FileCheck %s + +# Checking that we don't see any OpNegateRAState CFIs before the insertion pass. +# CHECK-NOT: OpNegateRAState +# CHECK: Binary Function "foo" after insert-negate-ra-state-pass + +# CHECK: paciasp +# CHECK-NEXT: OpNegateRAState + +# CHECK: ------- HOT-COLD SPLIT POINT ------- + +# CHECK: OpNegateRAState +# CHECK-NEXT: autiasp +# CHECK-NEXT: OpNegateRAState +# CHECK-NEXT: ret + +# CHECK: autiasp +# CHECK-NEXT: OpNegateRAState +# CHECK-NEXT: ret + +# End of the insert-negate-ra-state-pass logs +# CHECK: Binary Function "foo" after finalize-functions + + .text + .globl foo + .type foo, %function +foo: +.cfi_startproc +.entry_bb: + paciasp + .cfi_negate_ra_state // indicating that paciasp changed the RA state to signed +# FDATA: 1 foo #.entry_bb# 10 + cmp x0, #0 + b.eq .Lcold_bb1 + autiasp + .cfi_negate_ra_state // indicating that autiasp changed the RA state to unsigned + ret + .cfi_negate_ra_state // ret has unsigned RA state, but the next inst (autiasp) has signed RA state +.Lcold_bb1: + autiasp + .cfi_negate_ra_state // indicating that autiasp changed the RA state to unsigned + ret +.cfi_endproc + .size foo, .-foo + +## Force relocation mode. +.reloc 0, R_AARCH64_NONE From ad2f0290929a2e136031d3335d443a3d1a88e593 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Mon, 11 Aug 2025 13:04:15 +0000 Subject: [PATCH 17/31] [BOLT] Remove unnecessary script, and rewrite unit test using it - the checks that match_dwarf.py helped with can also be done using BOLT logs. - rewrote the negate-ra-state.s test to only rely on logs to verify correct OpNegateRAState CFI generation. --- bolt/test/AArch64/negate-ra-state.s | 100 +++++++++++++------- bolt/test/lit.cfg.py | 7 -- bolt/test/match_dwarf.py | 137 ---------------------------- 3 files changed, 67 insertions(+), 177 deletions(-) delete mode 100755 bolt/test/match_dwarf.py diff --git a/bolt/test/AArch64/negate-ra-state.s b/bolt/test/AArch64/negate-ra-state.s index 11c511a254c71..65483dfaf28e5 100644 --- a/bolt/test/AArch64/negate-ra-state.s +++ b/bolt/test/AArch64/negate-ra-state.s @@ -1,42 +1,76 @@ +# Checking that .cfi-negate_ra_state directives are emitted in the same location as in the input in the case of no optimizations. + # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.exe.bolt --no-threads --print-all | FileCheck %s --check-prefix=CHECK-BOLT + +# Check that the negate-ra-state at the start of bar is not discarded. +# If it was discarded, MarkRAState would report bar as having inconsistent RAStates. +# CHECK-BOLT-NOT: BOLT-INFO: inconsistent RAStates in function foo +# CHECK-BOLT-NOT: BOLT-INFO: inconsistent RAStates in function bar + +# Check that OpNegateRAState CFIs are generated correctly. +# CHECK-BOLT: Binary Function "foo" after insert-negate-ra-state-pass { +# CHECK-BOLT: paciasp +# CHECK-BOLT-NEXT: OpNegateRAState + +# CHECK-BOLT: DWARF CFI Instructions: +# CHECK-BOLT-NEXT: 0: OpNegateRAState +# CHECK-BOLT-NEXT: End of Function "foo" + +# CHECK-BOLT: Binary Function "bar" after insert-negate-ra-state-pass { +# CHECK-BOLT: OpNegateRAState +# CHECK-BOLT-NEXT: mov x1, #0x0 +# CHECK-BOLT-NEXT: mov x1, #0x1 +# CHECK-BOLT-NEXT: autiasp +# CHECK-BOLT-NEXT: OpNegateRAState +# CHECK-BOLT-NEXT: ret -# RUN: llvm-objdump %t.exe -d > %t.exe.dump -# RUN: llvm-objdump --dwarf=frames %t.exe > %t.exe.dump-dwarf -# RUN: match-dwarf %t.exe.dump %t.exe.dump-dwarf foo > %t.match-dwarf.txt +# CHECK-BOLT: DWARF CFI Instructions: +# CHECK-BOLT-NEXT: 0: OpNegateRAState +# CHECK-BOLT-NEXT: 1: OpNegateRAState +# CHECK-BOLT-NEXT: End of Function "bar" -# RUN: llvm-bolt %t.exe -o %t.exe.bolt +# End of negate-ra-state insertion logs for foo and bar. +# CHECK: Binary Function "_start" after insert-negate-ra-state-pass { -# RUN: llvm-objdump %t.exe.bolt -d > %t.exe.bolt.dump -# RUN: llvm-objdump --dwarf=frames %t.exe.bolt > %t.exe.bolt.dump-dwarf -# RUN: match-dwarf %t.exe.bolt.dump %t.exe.bolt.dump-dwarf foo > %t.bolt.match-dwarf.txt +# Check that the functions are in the new .text section +# RUN: llvm-objdump %t.exe.bolt -d -j .text | FileCheck %s --check-prefix=CHECK-OBJDUMP +# CHECK-OBJDUMP: : +# CHECK-OBJDUMP: : -# RUN: diff %t.match-dwarf.txt %t.bolt.match-dwarf.txt - .text - .globl foo - .p2align 2 - .type foo,@function + .text + .globl foo + .p2align 2 + .type foo,@function foo: - .cfi_startproc - hint #25 - .cfi_negate_ra_state - sub sp, sp, #16 - stp x29, x30, [sp, #16] // 16-byte Folded Spill - .cfi_def_cfa_offset 16 - str w0, [sp, #12] - ldr w8, [sp, #12] - add w0, w8, #1 - ldp x29, x30, [sp, #16] // 16-byte Folded Reload - add sp, sp, #16 - hint #29 - .cfi_negate_ra_state - ret -.Lfunc_end1: - .size foo, .Lfunc_end1-foo - .cfi_endproc - - .global _start - .type _start, %function + .cfi_startproc + paciasp + .cfi_negate_ra_state + mov x1, #0 + b bar + .cfi_endproc + .size foo, .-foo + + + + .text + .globl bar + .p2align 2 + .type bar,@function +bar: + .cfi_startproc + .cfi_negate_ra_state // Indicating that RA is signed from the start of bar. + mov x1, #0 + mov x1, #1 + autiasp + .cfi_negate_ra_state + ret + .cfi_endproc + .size bar, .-bar + + .global _start + .type _start, %function _start: - b foo + b foo diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py index c88c055f3f6e4..3299051db4983 100644 --- a/bolt/test/lit.cfg.py +++ b/bolt/test/lit.cfg.py @@ -100,7 +100,6 @@ config.substitutions.append(("%cxxflags", "")) link_fdata_cmd = os.path.join(config.test_source_root, "link_fdata.py") -match_dwarf_cmd = os.path.join(config.test_source_root, "match_dwarf.py") tool_dirs = [config.llvm_tools_dir, config.test_source_root] @@ -144,12 +143,6 @@ ToolSubst("llvm-readobj", unresolved="fatal"), ToolSubst("llvm-dwp", unresolved="fatal"), ToolSubst("split-file", unresolved="fatal"), - ToolSubst( - "match-dwarf", - command=sys.executable, - unresolved="fatal", - extra_args=[match_dwarf_cmd], - ), ] llvm_config.add_tool_substitutions(tools, tool_dirs) diff --git a/bolt/test/match_dwarf.py b/bolt/test/match_dwarf.py deleted file mode 100755 index 3d3ab22042d5c..0000000000000 --- a/bolt/test/match_dwarf.py +++ /dev/null @@ -1,137 +0,0 @@ -#!/usr/bin/env python3 - -# This tool helps matching dwarf dumps -# (= the output from running llvm-objdump --dwarf=frames), -# by address to function names (which are parsed from a normal objdump). -# The script is used for checking if .cfi_negate_ra_state CFIs -# are generated by BOLT the same way they are generated by LLVM. -# The script is called twice in unittests: once with the objdumps of -# the BOLT input binary, and once with the output binary from BOLT. -# We output the offsets of .cfi_negate_ra_state instructions from the -# function's start address to see that BOLT can generate them to the same -# locations. -# Because we check the location, this is only useful for testing without -# optimization flags, so `llvm-bolt input.exe -o output.exe` - - -import argparse -import subprocess -import sys -import re - - -class NameDwarfPair(object): - def __init__(self, name, body): - self.name = name - self.body = body - self.finalized = False - - def append(self, body_line): - # only store elements into the body until the first whitespace line is encountered. - if body_line.isspace(): - self.finalized = True - if not self.finalized: - self.body += body_line - - def print(self): - print(self.name) - print(self.body) - - def parse_negate_offsets(self): - """ - Create a list of locations/offsets of the negate_ra_state CFIs in the - dwarf entry. To find offsets for each, we match the DW_CFA_advance_loc - entries, and sum up their values. - """ - negate_offsets = [] - loc = 0 - # TODO: make sure this is not printed in hex - re_advloc = r"DW_CFA_advance_loc: (\d+)" - - for line in self.body.splitlines(): - # if line matches advance_loc int - match = re.search(re_advloc, line) - if match: - loc += int(match.group(1)) - if "DW_CFA_AARCH64_negate_ra_state" in line: - negate_offsets.append(loc) - - self.negate_offsets = negate_offsets - - def __eq__(self, other): - return self.name == other.name and self.negate_offsets == other.negate_offsets - - -def extract_function_addresses(objdump): - """ - Parse and return address-to-name dictionary from objdump file. - Function names in the objdump look like this: - 000123abc : - We create a dict from the addr (000123abc), to the name (foo). - """ - addr_name_dict = dict() - re_function = re.compile(r"^([0-9a-fA-F]+)\s<(.*)>:$") - with open(objdump, "r") as f: - for line in f.readlines(): - match = re_function.match(line) - if not match: - continue - m_addr = match.groups()[0] - m_name = match.groups()[1] - addr_name_dict[int(m_addr, 16)] = m_name - - return addr_name_dict - - -def match_dwarf_to_name(dwarfdump, addr_name_dict): - """ - Parse dwarf dump, and match names to blocks using the dict from the objdump. - Return a list of NameDwarfPairs. - The matched lines look like this: - 000123 000456 000789 FDE cie=000000 pc=0123abc...0456def - We do not have the function name for this, only the PC range it applies to. - We match the pc=0123abc (the start address), and find the matching name from - the addr_name_dict. - The resultint NameDwarfPair will hold the lines this header applied to, and - instead of the header with the addresses, it will just have the function name. - """ - re_address_line = re.compile(r".*pc=([0-9a-fA-F]+)\.\.\.([0-9a-fA-F]+)") - with open(dwarfdump, "r") as dw: - functions = [] - for line in dw.readlines(): - match = re_address_line.match(line) - if not match: - if len(functions) > 0: - functions[-1].append(line) - continue - pc_start_address = match.groups()[0] - name = addr_name_dict.get(int(pc_start_address, 16)) - functions.append(NameDwarfPair(name, "")) - - return functions - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("objdump", help="Objdump file") - parser.add_argument( - "dwarfdump", help="dwarf dump file created with 'llvm-objdump --dwarf=frames'" - ) - parser.add_argument("function", help="Function to search CFIs in.") - - args = parser.parse_args() - - addr_name_dict = extract_function_addresses(args.objdump) - functions = match_dwarf_to_name(args.dwarfdump, addr_name_dict) - - for f in functions: - if f.name == args.function: - f.parse_negate_offsets() - print(f.negate_offsets) - break - else: - print(f"{args.function} not found") - exit(-1) - - -main() From 8b5b73210f8e8d14d524691cdab484c679767bbe Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Mon, 11 Aug 2025 12:46:30 +0000 Subject: [PATCH 18/31] [BOLT][NFC] Simplify RAState tracking - Remove 'Authenticating' and 'Signing' MCAnnotations. - The same logic can be done using only 'Signed' and 'Unsigned' annotations. - To check if an instruction is signing or authenticating, we can use the PSignOnLR, and PAuthOnLR functions. --- bolt/include/bolt/Core/MCPlus.h | 2 -- bolt/include/bolt/Core/MCPlusBuilder.h | 12 -------- .../bolt/Passes/InsertNegateRAStatePass.h | 9 ------ bolt/lib/Core/MCPlusBuilder.cpp | 21 +------------- bolt/lib/Passes/InsertNegateRAStatePass.cpp | 28 ++----------------- bolt/lib/Passes/MarkRAStates.cpp | 8 ++++-- 6 files changed, 10 insertions(+), 70 deletions(-) diff --git a/bolt/include/bolt/Core/MCPlus.h b/bolt/include/bolt/Core/MCPlus.h index a95bba36c5a6e..8b1f7033ffad1 100644 --- a/bolt/include/bolt/Core/MCPlus.h +++ b/bolt/include/bolt/Core/MCPlus.h @@ -72,9 +72,7 @@ class MCAnnotation { kLabel, /// MCSymbol pointing to this instruction. kSize, /// Size of the instruction. kDynamicBranch, /// Jit instruction patched at runtime. - kSigning, /// Inst is a signing instruction (paciasp, etc.). kSigned, /// Inst is in a range where RA is signed. - kAuthenticating, /// Authenticating inst (e.g. autiasp). kUnsigned, /// Inst is in a range where RA is unsigned. kRememberState, /// Inst has rememberState CFI. kRestoreState, /// Inst has restoreState CFI. diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 56eae11389285..80991f293e5a8 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1361,18 +1361,6 @@ class MCPlusBuilder { /// Return true if \p Inst has Signed RA annotation. bool isRASigned(const MCInst &Inst) const; - /// Stores RA Signing annotation on \p Inst. - void setRASigning(MCInst &Inst) const; - - /// Return true if \p Inst has Signing RA annotation. - bool isRASigning(const MCInst &Inst) const; - - /// Stores Authenticating annotation on \p Inst. - void setAuthenticating(MCInst &Inst) const; - - /// Return true if \p Inst has Authenticating annotation. - bool isAuthenticating(const MCInst &Inst) const; - /// Stores RA Unsigned annotation on \p Inst. void setRAUnsigned(MCInst &Inst) const; diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h index 0b1654c1380b9..836948bf5e9c0 100644 --- a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h +++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h @@ -28,15 +28,6 @@ class InsertNegateRAState : public BinaryFunctionPass { void runOnFunction(BinaryFunction &BF); private: - /// Loops over all instructions and adds OpNegateRAState CFI - /// after any pointer signing or authenticating instructions, - /// which operate on the LR, except fused pauth + ret instructions - /// (such as RETAA). Normal pauth and psign instructions are "special cases", - /// meaning they always need an OpNegateRAState CFI after them. - /// Fused pauth + ret instructions are not, they work as any other - /// instruction. - /// Returns true, if any OpNegateRAState CFIs were added. - bool addNegateRAStateAfterPSignOrPAuth(BinaryFunction &BF); /// Because states are tracked as MCAnnotations on individual instructions, /// newly inserted instructions do not have a state associated with them. /// New states are "inherited" from the last known state. diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp index 4a7b1d73479c1..fa571c2bc8320 100644 --- a/bolt/lib/Core/MCPlusBuilder.cpp +++ b/bolt/lib/Core/MCPlusBuilder.cpp @@ -195,24 +195,6 @@ bool MCPlusBuilder::isRASigned(const MCInst &Inst) const { return hasAnnotation(Inst, MCAnnotation::kSigned); } -void MCPlusBuilder::setRASigning(MCInst &Inst) const { - assert(!hasAnnotation(Inst, MCAnnotation::kSigning)); - setAnnotationOpValue(Inst, MCAnnotation::kSigning, true); -} - -bool MCPlusBuilder::isRASigning(const MCInst &Inst) const { - return hasAnnotation(Inst, MCAnnotation::kSigning); -} - -void MCPlusBuilder::setAuthenticating(MCInst &Inst) const { - assert(!hasAnnotation(Inst, MCAnnotation::kAuthenticating)); - setAnnotationOpValue(Inst, MCAnnotation::kAuthenticating, true); -} - -bool MCPlusBuilder::isAuthenticating(const MCInst &Inst) const { - return hasAnnotation(Inst, MCAnnotation::kAuthenticating); -} - void MCPlusBuilder::setRAUnsigned(MCInst &Inst) const { assert(!hasAnnotation(Inst, MCAnnotation::kUnsigned)); setAnnotationOpValue(Inst, MCAnnotation::kUnsigned, true); @@ -223,8 +205,7 @@ bool MCPlusBuilder::isRAUnsigned(const MCInst &Inst) const { } bool MCPlusBuilder::isRAStateUnknown(const MCInst &Inst) const { - return !(isRAUnsigned(Inst) || isRASigned(Inst) || isRASigning(Inst) || - isAuthenticating(Inst)); + return !(isRAUnsigned(Inst) || isRASigned(Inst)); } std::optional MCPlusBuilder::getEHInfo(const MCInst &Inst) const { diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp index 60af3485ea4d1..273944b860e36 100644 --- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp +++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp @@ -34,9 +34,6 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) { return; } - // Attach .cfi_negate_ra_state to the "trivial" cases first. - addNegateRAStateAfterPSignOrPAuth(BF); - inferUnknownStates(BF); for (FunctionFragment &FF : BF.getLayout().fragments()) { @@ -67,24 +64,6 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) { } } -bool InsertNegateRAState::addNegateRAStateAfterPSignOrPAuth( - BinaryFunction &BF) { - BinaryContext &BC = BF.getBinaryContext(); - bool FoundAny = false; - for (BinaryBasicBlock &BB : BF) { - for (auto Iter = BB.begin(); Iter != BB.end(); ++Iter) { - MCInst &Inst = *Iter; - if (BC.MIB->isPSignOnLR(Inst) || - (BC.MIB->isPAuthOnLR(Inst) && !BC.MIB->isPAuthAndRet(Inst))) { - Iter = BF.addCFIInstruction( - &BB, Iter + 1, MCCFIInstruction::createNegateRAState(nullptr)); - FoundAny = true; - } - } - } - return FoundAny; -} - void InsertNegateRAState::coverFunctionFragmentStart(BinaryFunction &BF, FunctionFragment &FF) { BinaryContext &BC = BF.getBinaryContext(); @@ -102,8 +81,7 @@ void InsertNegateRAState::coverFunctionFragmentStart(BinaryFunction &BF, }); // If a function is already split in the input, the first FF can also start // with Signed state. This covers that scenario as well. - if (BC.MIB->isRASigned(*((*FirstNonEmpty)->begin())) || - BC.MIB->isAuthenticating(*((*FirstNonEmpty)->begin()))) { + if (BC.MIB->isRASigned(*((*FirstNonEmpty)->begin()))) { BF.addCFIInstruction(*FirstNonEmpty, (*FirstNonEmpty)->begin(), MCCFIInstruction::createNegateRAState(nullptr)); } @@ -121,10 +99,10 @@ void InsertNegateRAState::inferUnknownStates(BinaryFunction &BF) { continue; if (!FirstIter && BC.MIB->isRAStateUnknown(Inst)) { - if (BC.MIB->isRASigned(PrevInst) || BC.MIB->isRASigning(PrevInst)) { + if (BC.MIB->isRASigned(PrevInst) || BC.MIB->isPSignOnLR(PrevInst)) { BC.MIB->setRASigned(Inst); } else if (BC.MIB->isRAUnsigned(PrevInst) || - BC.MIB->isAuthenticating(PrevInst)) { + BC.MIB->isPAuthOnLR(PrevInst)) { BC.MIB->setRAUnsigned(Inst); } } else { diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp index aacb14fb7dcc9..8ce1b67814608 100644 --- a/bolt/lib/Passes/MarkRAStates.cpp +++ b/bolt/lib/Passes/MarkRAStates.cpp @@ -78,7 +78,9 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) { BF.setIgnored(); return; } - BC.MIB->setRASigning(Inst); + // The signing instruction itself is unsinged, but the next will be + // signed. + BC.MIB->setRAUnsigned(Inst); } else if (BC.MIB->isPAuthOnLR(Inst)) { if (!RAState) { // RA authenticating instructions should only follow signed RA state. @@ -89,7 +91,9 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) { BF.setIgnored(); return; } - BC.MIB->setAuthenticating(Inst); + // The authenticating instruction itself is signed, but the next will be + // unsigned. + BC.MIB->setRASigned(Inst); } else if (RAState) { BC.MIB->setRASigned(Inst); } else { From 910c09138dae9de24ad704cf50278f33964bbec2 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Wed, 13 Aug 2025 12:44:50 +0000 Subject: [PATCH 19/31] [BOLT] Add negate-ra-state-reorder test - Test checks that that reordering BBs change the number of negate-ra-state CFIs, and their locations. --- bolt/test/AArch64/negate-ra-state-reorder.s | 73 +++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 bolt/test/AArch64/negate-ra-state-reorder.s diff --git a/bolt/test/AArch64/negate-ra-state-reorder.s b/bolt/test/AArch64/negate-ra-state-reorder.s new file mode 100644 index 0000000000000..2659f75aff9c9 --- /dev/null +++ b/bolt/test/AArch64/negate-ra-state-reorder.s @@ -0,0 +1,73 @@ +# Checking that after reordering BasicBlocks, the generated OpNegateRAState instructions +# are placed where the RA state is different between two consecutive instructions. +# This case demonstrates, that the input might have a different amount than the output: +# input has 4, but output only has 3. + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.exe.bolt --no-threads --reorder-blocks=reverse \ +# RUN: --print-cfg --print-after-lowering --print-only foo | FileCheck %s + +# Check that the reordering succeeded. +# CHECK: Binary Function "foo" after building cfg { +# CHECK: BB Layout : .LBB00, .Ltmp2, .Ltmp0, .Ltmp1 +# CHECK: Binary Function "foo" after inst-lowering { +# CHECK: BB Layout : .LBB00, .Ltmp1, .Ltmp0, .Ltmp2 + + +# Check the generated CFIs. +# CHECK: OpNegateRAState +# CHECK-NEXT: mov x2, #0x6 + +# CHECK: autiasp +# CHECK-NEXT: OpNegateRAState +# CHECK-NEXT: ret + +# CHECK: paciasp +# CHECK-NEXT: OpNegateRAState + +# CHECK: DWARF CFI Instructions: +# CHECK-NEXT: 0: OpNegateRAState +# CHECK-NEXT: 1: OpNegateRAState +# CHECK-NEXT: 2: OpNegateRAState +# CHECK-NEXT: End of Function "foo" + + .text + .globl foo + .p2align 2 + .type foo,@function +foo: + .cfi_startproc + // RA is unsigned + mov x1, #0 + mov x1, #1 + mov x1, #2 + // jump into the signed "range" + b .Lmiddle +.Lback: +// sign RA + paciasp + .cfi_negate_ra_state + mov x2, #3 + mov x2, #4 + // skip unsigned instructions + b .Lcont + .cfi_negate_ra_state +.Lmiddle: +// RA is unsigned + mov x4, #5 + b .Lback + .cfi_negate_ra_state +.Lcont: +// continue in signed state + mov x2, #6 + autiasp + .cfi_negate_ra_state + ret + .cfi_endproc + .size foo, .-foo + + .global _start + .type _start, %function +_start: + b foo From 3b73dda9273d6fc99b026cbccc49cb7c13e829c9 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Thu, 14 Aug 2025 11:36:14 +0000 Subject: [PATCH 20/31] [BOLT] Print stats in MarkRAStates, InsertNegateRAState - print statistics on the output about function getting Ignored in MarkRAStates, and on rewriting OpNegateRAState in InsertNegateRAStatePass. - improve error messages in MarkRAStates --- .../bolt/Passes/InsertNegateRAStatePass.h | 1 + bolt/include/bolt/Passes/MarkRAStates.h | 5 ++- bolt/lib/Passes/InsertNegateRAStatePass.cpp | 8 +++- bolt/lib/Passes/MarkRAStates.cpp | 44 +++++++++++-------- bolt/test/AArch64/negate-ra-state-incorrect.s | 11 ++--- 5 files changed, 41 insertions(+), 28 deletions(-) diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h index 836948bf5e9c0..9acd28424c59a 100644 --- a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h +++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h @@ -28,6 +28,7 @@ class InsertNegateRAState : public BinaryFunctionPass { void runOnFunction(BinaryFunction &BF); private: + uint64_t FunctionsModified{0}; /// Because states are tracked as MCAnnotations on individual instructions, /// newly inserted instructions do not have a state associated with them. /// New states are "inherited" from the last known state. diff --git a/bolt/include/bolt/Passes/MarkRAStates.h b/bolt/include/bolt/Passes/MarkRAStates.h index e7a49f813b6a7..8870540aa7e9e 100644 --- a/bolt/include/bolt/Passes/MarkRAStates.h +++ b/bolt/include/bolt/Passes/MarkRAStates.h @@ -25,7 +25,10 @@ class MarkRAStates : public BinaryFunctionPass { /// Pass entry point Error runOnFunctions(BinaryContext &BC) override; - void runOnFunction(BinaryFunction &BF); + bool runOnFunction(BinaryFunction &BF); + +private: + uint64_t FunctionsIgnored{0}; }; } // namespace bolt diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp index 273944b860e36..7b93a0ca2b251 100644 --- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp +++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp @@ -115,11 +115,12 @@ void InsertNegateRAState::inferUnknownStates(BinaryFunction &BF) { Error InsertNegateRAState::runOnFunctions(BinaryContext &BC) { ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { - if (BF.containedNegateRAState()) { + if (BF.containedNegateRAState() && !BF.isIgnored()) { // We can skip functions which did not include negate-ra-state CFIs. This // includes code using pac-ret hardening as well, if the binary is // compiled with `-fno-exceptions -fno-unwind-tables // -fno-asynchronous-unwind-tables` + FunctionsModified++; runOnFunction(BF); } }; @@ -128,6 +129,11 @@ Error InsertNegateRAState::runOnFunctions(BinaryContext &BC) { BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, nullptr, "InsertNegateRAStatePass"); + BC.outs() << "BOLT-INFO: rewritten pac-ret DWARF info in " + << FunctionsModified << " out of " << BC.getBinaryFunctions().size() + << " functions " + << format("(%.2lf%%).\n", (100.0 * FunctionsModified) / + BC.getBinaryFunctions().size()); return Error::success(); } diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp index 8ce1b67814608..2e766a6ed9be6 100644 --- a/bolt/lib/Passes/MarkRAStates.cpp +++ b/bolt/lib/Passes/MarkRAStates.cpp @@ -31,10 +31,7 @@ using namespace llvm; namespace llvm { namespace bolt { -void MarkRAStates::runOnFunction(BinaryFunction &BF) { - - if (BF.isIgnored()) - return; +bool MarkRAStates::runOnFunction(BinaryFunction &BF) { BinaryContext &BC = BF.getBinaryContext(); @@ -49,10 +46,9 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) { // annotation. BF.setIgnored(); BC.outs() << "BOLT-INFO: inconsistent RAStates in function " - << BF.getPrintName() << "\n"; - BC.outs() - << "BOLT-INFO: ptr sign/auth inst without .cfi_negate_ra_state\n"; - return; + << BF.getPrintName() + << ": ptr sign/auth inst without .cfi_negate_ra_state\n"; + return false; } } } @@ -72,11 +68,10 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) { if (RAState) { // RA signing instructions should only follow unsigned RA state. BC.outs() << "BOLT-INFO: inconsistent RAStates in function " - << BF.getPrintName() << "\n"; - BC.outs() << "BOLT-INFO: ptr signing inst encountered in Signed RA " - "state.\n"; + << BF.getPrintName() + << ": ptr signing inst encountered in Signed RA state\n"; BF.setIgnored(); - return; + return false; } // The signing instruction itself is unsinged, but the next will be // signed. @@ -85,11 +80,11 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) { if (!RAState) { // RA authenticating instructions should only follow signed RA state. BC.outs() << "BOLT-INFO: inconsistent RAStates in function " - << BF.getPrintName() << "\n"; - BC.outs() << "BOLT-INFO: ptr authenticating inst encountered in " - "Unsigned RA state.\n"; + << BF.getPrintName() + << ": ptr authenticating inst encountered in Unsigned RA " + "state\n"; BF.setIgnored(); - return; + return false; } // The authenticating instruction itself is signed, but the next will be // unsigned. @@ -120,16 +115,19 @@ void MarkRAStates::runOnFunction(BinaryFunction &BF) { } } } + return true; } Error MarkRAStates::runOnFunctions(BinaryContext &BC) { ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { - if (BF.containedNegateRAState()) { + if (BF.containedNegateRAState() && !BF.isIgnored()) { // We can skip functions which did not include negate-ra-state CFIs. This // includes code using pac-ret hardening as well, if the binary is // compiled with `-fno-exceptions -fno-unwind-tables // -fno-asynchronous-unwind-tables` - runOnFunction(BF); + if (!runOnFunction(BF)) { + FunctionsIgnored++; + } } }; @@ -137,6 +135,16 @@ Error MarkRAStates::runOnFunctions(BinaryContext &BC) { BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, nullptr, "MarkRAStates"); + int Total = llvm::count_if( + BC.getBinaryFunctions(), + [&](std::pair &P) { + return P.second.containedNegateRAState() && !P.second.isIgnored(); + }); + BC.outs() << "BOLT-INFO: MarkRAStates ran on " << Total + << " functions. Ignored " << FunctionsIgnored << " functions " + << format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total) + << " because of CFI inconsistencies.\n"; + return Error::success(); } diff --git a/bolt/test/AArch64/negate-ra-state-incorrect.s b/bolt/test/AArch64/negate-ra-state-incorrect.s index ad922d83c6d61..14d2c384a877d 100644 --- a/bolt/test/AArch64/negate-ra-state-incorrect.s +++ b/bolt/test/AArch64/negate-ra-state-incorrect.s @@ -10,14 +10,9 @@ # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q # RUN: llvm-bolt %t.exe -o %t.exe.bolt --no-threads | FileCheck %s --check-prefix=CHECK-BOLT -# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function foo -# CHECK-BOLT-NEXT: BOLT-INFO: ptr authenticating inst encountered in Unsigned RA state. - -# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function bar -# CHECK-BOLT-NEXT: BOLT-INFO: ptr signing inst encountered in Signed RA state - -# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function baz -# CHECK-BOLT-NEXT: BOLT-INFO: ptr sign/auth inst without .cfi_negate_ra_state +# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function foo: ptr authenticating inst encountered in Unsigned RA state +# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function bar: ptr signing inst encountered in Signed RA state +# CHECK-BOLT: BOLT-INFO: inconsistent RAStates in function baz: ptr sign/auth inst without .cfi_negate_ra_state # Check that the incorrect functions got ignored, so they are not in the new .text section # RUN: llvm-objdump %t.exe.bolt -d -j .text | FileCheck %s --check-prefix=CHECK-OBJDUMP From d2141bcde0136d4c8b3279df7d43e7b634fb1028 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Wed, 21 May 2025 18:40:10 +0200 Subject: [PATCH 21/31] [BOLT] Added docs/PacRetDesign.md --- bolt/docs/PacRetDesign.md | 154 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 bolt/docs/PacRetDesign.md diff --git a/bolt/docs/PacRetDesign.md b/bolt/docs/PacRetDesign.md new file mode 100644 index 0000000000000..b6046d6d70077 --- /dev/null +++ b/bolt/docs/PacRetDesign.md @@ -0,0 +1,154 @@ +# Optimizing binaries with pac-ret hardening + +This is a design document about processing the `DW_CFA_AARCH64_negate_ra_state` DWARF instruction in BOLT. As is describes internal design decisions, the intended audience is BOLT developers. The document is an updated version of the [RFC posted on the LLVM Discourse](https://discourse.llvm.org/t/rfc-bolt-aarch64-handle-opnegaterastate-to-enable-optimizing-binaries-with-pac-ret-hardening/86594). + + +`DW_CFA_AARCH64_negate_ra_state` is also referred to as `.cfi_negate_ra_state` in assembly, or `OpNegateRAState` is BOLT sources. In this document, I will use **negate-ra-state** as a shorthand. + +## Introduction + +### Pointer Authentication + +Refer to the [pac-ret section of the BOLT-binary-analysis document](BinaryAnalysis.md#pac-ret-analysis). + +### DW_CFA_AARCH64_negate_ra_state + +The negate-ra-state CFI is a vendor-specific Call Frame Instruction defined in the [Arm ABI](https://github.com/ARM-software/abi-aa/blob/main/aadwarf64/aadwarf64.rst#id1). + +``` +The DW_CFA_AARCH64_negate_ra_state operation negates bit[0] of the RA_SIGN_STATE pseudo-register. +``` + +This bit indicates to the unwinder whether the current return address is signed or not (hence the name). The unwinder uses this information to authenticate the pointer, and remove the Pointer Authentication Code (PAC) bits. Incorrect negate-ra-state placement can lead to the unwinder trying to authenticate an unsigned pointer (which segfaults), or skipping authenticating a signed pointer, and trying to access an incorrect location (also leading to a segfault). + +(Note: not *all* unwinders do this. Some use the `xpac` instruction to strip the PAC bits without authenticating the pointer. This is incorrect, as it allows control-flow modification in the case of unwinding.) + +There are no DWARF instructions to directly set or clear the RA State. However, two other CFIs can also affect the RA state: +- `DW_CFA_remember_state`: this CFI stores register rules onto an implicit stack. +- `DW_CFA_restore_state`: this CFI pops rules from this stack. + +Example: + +| CFI | Effect on RA state | +| ------------------------------ | ------------------------------ | +| (default) | 0 | +| DW_CFA_AARCH64_negate_ra_state | 0 -> 1 | +| DW_CFA_remember_state | 1 pushed to the stack | +| DW_CFA_AARCH64_negate_ra_state | 1 -> 0 | +| DW_CFA_restore_state | 0 -> 1 (popped from the stack) | + +The Arm ABI also defines the DW_CFA_AARCH64_negate_ra_state_with_pc CFI, but it is not widely used, and is [likely to become deprecated](https://github.com/ARM-software/abi-aa/issues/327). + +### Where are these CFIs needed? + +In all locations, where two consecutive instructions have different RA state, this need to be indicated to the unwinder. This happens at pointer signing and authenticating. The other case where two consecutive instructions have different RA state, but neither of them is signing or authenticating means that they are not next to each other in control flow. One is part of an execution path with signed RA, the other is part of a path with an unsigned RA. + +In the example below, the first BasicBlock ends in a conditional branch, and jumps to two different BasicBlocks, each with their own authentication, and return. The instructions on the border of the second and third BasicBlock have different RA states. The `ret` at the end of the second BasicBlock is in unsigned state. The start of the third BasicBlock is after the `paciasp` in the control flow, but before the authentication. In this case, a negate-ra-state is needed at the end of the second BasicBlock. + +``` + +----------------+ + | paciasp | + | | + | b.cc | + +--------+-------+ + | ++----------------+ +| | +| +--------v-------+ +| | | +| | autiasp | +| | ret | // RA: unsigned +| +----------------+ ++----------------+ + | + +--------v-------+ // RA: signed + | | + | autiasp | + | ret | + +----------------+ +``` + +> [!important] +> The unwinder does not follow the control flow graph. It reads unwind information in the layout order. + +Because these locations are dependent on how the function layout looks, negate-ra-state CFIs will become invalid during BasicBlock reordering. + +## Solution design + +The patch introduces two new passes: +1. `MarkRAStatesPass`: assigns the RA state to each instruction based on the CFIs in the input binary +2. `InsertNegateRAStatePass`: reads those assigned instruction RA states after optimizations, and emits `DW_CFA_AARCH64_negate_ra_state` CFIs at the correct places: wherever there is a state change between two consecutive instructions in the layout order. + +To track metadata on individual instructions, the `MCAnnotation` class was extended. These also have helper function in `MCPlusBuilder`. + +### Saving annotations at CFI reading + +CFIs are read and added to BinaryFunctions in `CFIReaderWriter::FillCFIInfoFor`. At this point, we add MCAnnotations about negate-ra-state, remember-state and restore-state CFIs to the instructions they refer to. This is to not interfere with the CFI processing that already happens in BOLT (e.g. remember-state and restore-state CFIs are removed in `normalizeCFIState` for reasons unrelated to PAC). + +As we add the MCAnnotations *to instructions*, we have to account for the case where the function starts with a CFI altering the RA state. If a function starts with a negate-ra-state CFI for example, we cannot save the annotation on the first instruction, because that itself should already be signed. This is why all BinaryFunctions have an `initialRAState` bool. If the `Offset` the CFI refers to is zero, we don't store an annotation, but set the `initialRAState` in `FillCFIInfoFor`. This info is then used in `MarkRAStates`. + +### Binaries without DWARF info + +In some cases, the DWARF tables are stripped from the binary. These programs usually have some other unwind-mechanism. To account for code that uses Pointer Authentication, but does not have DWARF CFIs, the passes only run on functions that had at least one negate-ra-state CFI. This is marked during CFI reading. + +This also makes sure that the passes don't run on functions that do not store the return address to the stack, and don't need Pointer Authentication, saving on runtime overhead. + +In summary: +- pointer auth is not used: no change, the new passes do not run. +- pointer auth is used, but DWARF info is stripped: no change, the new passes do not run. +- pointer auth is used, and we have DWARF CFIs: passes run, and rewrite the negate-ra-state CFI. + +### MarkRAStates Pass + +This pass runs before optimizations reorder anything. + +It processes MCAnnotations generated during the CFI reading stage to check if instructions have either of the three CFIs that can modify RA state: +- negate-ra-state +- remember-state +- restore-state + +Then it adds new MCAnnotations to each instruction, indicating their RA state. Those annotations are: +- Signed +- Unsigned + +Below is a simple example, that shows the two different type of annotations: what we have before the pass, and after it. + +| Instruction | Before | After | +| --------------------------- | --------------- | -------- | +| paciasp | negate-ra-state | unsigned | +| stp x29, x30, [sp, #-0x10]! | | signed | +| mov x29, sp | | signed | +| ldp x29, x30, [sp], #0x10 | | signed | +| autiasp | negate-ra-state | signed | +| ret | | unsigned | + +##### Error handling in MarkRAState Pass: + +Whenever the MarkRAStates pass finds inconsistencies in the current BinaryFunction, it ignores it by calling `BF.setIgnored()`. This prevents BOLT from optimizing that function, but it will still be emitted as part of the original section (`.bolt.org.text`) in its original form. + +The inconsistencies are as follows: +- finding a `pac*` instruction when already in signed state +- finding an `aut*` instruction when already in unsigned state +- finding `pac*` and `aut*` instructions without `.cfi_negate_ra_state`. + +Users will be informed about the number of ignored function in the pass, and the exact functions ignored. + +### InsertNegateRAStatePass + +This pass runs after the optimizations are done. In essence, it does the _inverse_ of MarkRAState pass: +1. it reads the RA state annotations attached to the instructions, and +2. whenever the state changes, it adds a PseudoInstruction that holds an OpNegateRAState CFI. + +##### Covering newly generated instructions: + +Some BOLT passes can add new Instructions. In InsertNegateRAStatePass, we have to know what RA state these have. + +The current solution has the `inferUnknownStates` function to cover these, using a fairly simple strategy: unknown states are inherited from last known state. Testing so far has shown this implementation is sufficient, but to prove correctness, we would need to examine all passes that insert new instructions. + +### Optimizations requiring special attention + +Marking states before optimizations assure that instructions can be moved around freely. The only special case is function splitting. When a function is split, the split part becomes a new function in the emitted binary. For unwinding to work, it needs to "replay" all CFI that lead up to the split point. BOLT does this for other CFIs. As negate-ra-state is not read (only stored as an Annotation), we have to do this "manually" in InsertNegateRAStatePass. Here, if the split part starts with an instruction that has Signed RA state, we add a negate-ra-state CFI to indicate this. + +## Option to disallow the feature + +To aid debugging, we added the `--disallow-pacret` flag. If the flag is used, and a function `containedNegateRAState()` after `FillCFIInfoFor()`, BOLT exits with an error. With this flag, the feature is on by default. From 250f060f9fe578673f3869a07c89c7f03819373f Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Mon, 8 Sep 2025 13:19:33 +0000 Subject: [PATCH 22/31] [BOLT] Review nits - fix spelling in MarkRAStates - improve negate-ra-state.s test - improve pacret-split-funcs.s test --- bolt/lib/Passes/MarkRAStates.cpp | 2 +- bolt/test/AArch64/negate-ra-state.s | 10 +++++----- bolt/test/AArch64/pacret-split-funcs.s | 5 +++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp index 2e766a6ed9be6..57e727e16074d 100644 --- a/bolt/lib/Passes/MarkRAStates.cpp +++ b/bolt/lib/Passes/MarkRAStates.cpp @@ -73,7 +73,7 @@ bool MarkRAStates::runOnFunction(BinaryFunction &BF) { BF.setIgnored(); return false; } - // The signing instruction itself is unsinged, but the next will be + // The signing instruction itself is unsigned, the next will be // signed. BC.MIB->setRAUnsigned(Inst); } else if (BC.MIB->isPAuthOnLR(Inst)) { diff --git a/bolt/test/AArch64/negate-ra-state.s b/bolt/test/AArch64/negate-ra-state.s index 65483dfaf28e5..30786d4ef9f70 100644 --- a/bolt/test/AArch64/negate-ra-state.s +++ b/bolt/test/AArch64/negate-ra-state.s @@ -1,11 +1,16 @@ # Checking that .cfi-negate_ra_state directives are emitted in the same location as in the input in the case of no optimizations. +# The foo and bar functions are a pair, with the first signing the return address, +# and the second authenticating it. We have a tailcall between the two. +# This is testing that BOLT can handle functions starting in signed RA state. + # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q # RUN: llvm-bolt %t.exe -o %t.exe.bolt --no-threads --print-all | FileCheck %s --check-prefix=CHECK-BOLT # Check that the negate-ra-state at the start of bar is not discarded. # If it was discarded, MarkRAState would report bar as having inconsistent RAStates. +# This is testing the handling of initialRAState on the BinaryFunction. # CHECK-BOLT-NOT: BOLT-INFO: inconsistent RAStates in function foo # CHECK-BOLT-NOT: BOLT-INFO: inconsistent RAStates in function bar @@ -69,8 +74,3 @@ bar: ret .cfi_endproc .size bar, .-bar - - .global _start - .type _start, %function -_start: - b foo diff --git a/bolt/test/AArch64/pacret-split-funcs.s b/bolt/test/AArch64/pacret-split-funcs.s index 6582257948c62..a20d2a3688146 100644 --- a/bolt/test/AArch64/pacret-split-funcs.s +++ b/bolt/test/AArch64/pacret-split-funcs.s @@ -36,16 +36,17 @@ foo: .cfi_startproc .entry_bb: +# FDATA: 1 foo #.entry_bb# 10 paciasp .cfi_negate_ra_state // indicating that paciasp changed the RA state to signed -# FDATA: 1 foo #.entry_bb# 10 cmp x0, #0 b.eq .Lcold_bb1 +.Lfallthrough: autiasp .cfi_negate_ra_state // indicating that autiasp changed the RA state to unsigned ret .cfi_negate_ra_state // ret has unsigned RA state, but the next inst (autiasp) has signed RA state -.Lcold_bb1: +.Lcold_bb1: // split point autiasp .cfi_negate_ra_state // indicating that autiasp changed the RA state to unsigned ret From 5cbd4ce0bd74144abe429c96da5378ac1ab1ca25 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Mon, 8 Sep 2025 14:29:47 +0000 Subject: [PATCH 23/31] [BOLT] Fix multithreading in #120064 - use SkipPredicates, - use atomic counters. --- bolt/include/bolt/Core/BinaryFunction.h | 2 +- .../bolt/Passes/InsertNegateRAStatePass.h | 1 - bolt/include/bolt/Passes/MarkRAStates.h | 3 --- bolt/lib/Passes/InsertNegateRAStatePass.cpp | 23 +++++++++++-------- bolt/lib/Passes/MarkRAStates.cpp | 23 +++++++++++-------- 5 files changed, 27 insertions(+), 25 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 3db39e47f9e94..4dfff58177fca 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -149,7 +149,7 @@ class BinaryFunction { }; void setContainedNegateRAState() { HadNegateRAState = true; } - bool containedNegateRAState() { return HadNegateRAState; } + bool containedNegateRAState() const { return HadNegateRAState; } void setInitialRAState(bool State) { InitialRAState = State; } bool getInitialRAState() { return InitialRAState; } diff --git a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h index 9acd28424c59a..836948bf5e9c0 100644 --- a/bolt/include/bolt/Passes/InsertNegateRAStatePass.h +++ b/bolt/include/bolt/Passes/InsertNegateRAStatePass.h @@ -28,7 +28,6 @@ class InsertNegateRAState : public BinaryFunctionPass { void runOnFunction(BinaryFunction &BF); private: - uint64_t FunctionsModified{0}; /// Because states are tracked as MCAnnotations on individual instructions, /// newly inserted instructions do not have a state associated with them. /// New states are "inherited" from the last known state. diff --git a/bolt/include/bolt/Passes/MarkRAStates.h b/bolt/include/bolt/Passes/MarkRAStates.h index 8870540aa7e9e..675ab9727142b 100644 --- a/bolt/include/bolt/Passes/MarkRAStates.h +++ b/bolt/include/bolt/Passes/MarkRAStates.h @@ -26,9 +26,6 @@ class MarkRAStates : public BinaryFunctionPass { /// Pass entry point Error runOnFunctions(BinaryContext &BC) override; bool runOnFunction(BinaryFunction &BF); - -private: - uint64_t FunctionsIgnored{0}; }; } // namespace bolt diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp index 7b93a0ca2b251..de033736a15ff 100644 --- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp +++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp @@ -114,20 +114,23 @@ void InsertNegateRAState::inferUnknownStates(BinaryFunction &BF) { } Error InsertNegateRAState::runOnFunctions(BinaryContext &BC) { + std::atomic FunctionsModified{0}; ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { - if (BF.containedNegateRAState() && !BF.isIgnored()) { - // We can skip functions which did not include negate-ra-state CFIs. This - // includes code using pac-ret hardening as well, if the binary is - // compiled with `-fno-exceptions -fno-unwind-tables - // -fno-asynchronous-unwind-tables` - FunctionsModified++; - runOnFunction(BF); - } + FunctionsModified++; + runOnFunction(BF); + }; + + ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) { + // We can skip functions which did not include negate-ra-state CFIs. This + // includes code using pac-ret hardening as well, if the binary is + // compiled with `-fno-exceptions -fno-unwind-tables + // -fno-asynchronous-unwind-tables` + return !BF.containedNegateRAState() || BF.isIgnored(); }; ParallelUtilities::runOnEachFunction( - BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, nullptr, - "InsertNegateRAStatePass"); + BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, + SkipPredicate, "InsertNegateRAStatePass"); BC.outs() << "BOLT-INFO: rewritten pac-ret DWARF info in " << FunctionsModified << " out of " << BC.getBinaryFunctions().size() diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp index 57e727e16074d..89a8065a6e3f0 100644 --- a/bolt/lib/Passes/MarkRAStates.cpp +++ b/bolt/lib/Passes/MarkRAStates.cpp @@ -119,21 +119,24 @@ bool MarkRAStates::runOnFunction(BinaryFunction &BF) { } Error MarkRAStates::runOnFunctions(BinaryContext &BC) { + std::atomic FunctionsIgnored{0}; ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { - if (BF.containedNegateRAState() && !BF.isIgnored()) { - // We can skip functions which did not include negate-ra-state CFIs. This - // includes code using pac-ret hardening as well, if the binary is - // compiled with `-fno-exceptions -fno-unwind-tables - // -fno-asynchronous-unwind-tables` - if (!runOnFunction(BF)) { - FunctionsIgnored++; - } + if (!runOnFunction(BF)) { + FunctionsIgnored++; } }; + ParallelUtilities::PredicateTy SkipPredicate = [&](const BinaryFunction &BF) { + // We can skip functions which did not include negate-ra-state CFIs. This + // includes code using pac-ret hardening as well, if the binary is + // compiled with `-fno-exceptions -fno-unwind-tables + // -fno-asynchronous-unwind-tables` + return !BF.containedNegateRAState() || BF.isIgnored(); + }; + ParallelUtilities::runOnEachFunction( - BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, nullptr, - "MarkRAStates"); + BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, + SkipPredicate, "MarkRAStates"); int Total = llvm::count_if( BC.getBinaryFunctions(), From 8e03527c60889ea016303dc2148c321d093edd23 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Wed, 10 Sep 2025 07:09:37 +0000 Subject: [PATCH 24/31] [BOLT] Address review - improve docs/PacRetDesign.md - improve pacret-split-funcs test --- bolt/docs/PacRetDesign.md | 146 ++++++++++++++++++------- bolt/test/AArch64/pacret-split-funcs.s | 19 ++-- 2 files changed, 117 insertions(+), 48 deletions(-) diff --git a/bolt/docs/PacRetDesign.md b/bolt/docs/PacRetDesign.md index b6046d6d70077..1461260949486 100644 --- a/bolt/docs/PacRetDesign.md +++ b/bolt/docs/PacRetDesign.md @@ -1,9 +1,14 @@ # Optimizing binaries with pac-ret hardening -This is a design document about processing the `DW_CFA_AARCH64_negate_ra_state` DWARF instruction in BOLT. As is describes internal design decisions, the intended audience is BOLT developers. The document is an updated version of the [RFC posted on the LLVM Discourse](https://discourse.llvm.org/t/rfc-bolt-aarch64-handle-opnegaterastate-to-enable-optimizing-binaries-with-pac-ret-hardening/86594). +This is a design document about processing the `DW_CFA_AARCH64_negate_ra_state` +DWARF instruction in BOLT. As it describes internal design decisions, the +intended audience is BOLT developers. The document is an updated version of the +[RFC posted on the LLVM Discourse](https://discourse.llvm.org/t/rfc-bolt-aarch64-handle-opnegaterastate-to-enable-optimizing-binaries-with-pac-ret-hardening/86594). -`DW_CFA_AARCH64_negate_ra_state` is also referred to as `.cfi_negate_ra_state` in assembly, or `OpNegateRAState` is BOLT sources. In this document, I will use **negate-ra-state** as a shorthand. +`DW_CFA_AARCH64_negate_ra_state` is also referred to as `.cfi_negate_ra_state` +in assembly, or `OpNegateRAState` in BOLT sources. In this document, I will use +**negate-ra-state** as a shorthand. ## Introduction @@ -13,17 +18,26 @@ Refer to the [pac-ret section of the BOLT-binary-analysis document](BinaryAnalys ### DW_CFA_AARCH64_negate_ra_state -The negate-ra-state CFI is a vendor-specific Call Frame Instruction defined in the [Arm ABI](https://github.com/ARM-software/abi-aa/blob/main/aadwarf64/aadwarf64.rst#id1). +The negate-ra-state CFI is a vendor-specific Call Frame Instruction defined in +the [Arm ABI](https://github.com/ARM-software/abi-aa/blob/main/aadwarf64/aadwarf64.rst#id1). ``` The DW_CFA_AARCH64_negate_ra_state operation negates bit[0] of the RA_SIGN_STATE pseudo-register. ``` -This bit indicates to the unwinder whether the current return address is signed or not (hence the name). The unwinder uses this information to authenticate the pointer, and remove the Pointer Authentication Code (PAC) bits. Incorrect negate-ra-state placement can lead to the unwinder trying to authenticate an unsigned pointer (which segfaults), or skipping authenticating a signed pointer, and trying to access an incorrect location (also leading to a segfault). +This bit indicates to the unwinder whether the current return address is signed +or not (hence the name). The unwinder uses this information to authenticate the +pointer, and remove the Pointer Authentication Code (PAC) bits. Incorrect +negate-ra-state placement can lead to the unwinder trying to authenticate an +unsigned pointer (which segfaults), or skipping authenticating a signed pointer, +and trying to access an incorrect location (also leading to a segfault). -(Note: not *all* unwinders do this. Some use the `xpac` instruction to strip the PAC bits without authenticating the pointer. This is incorrect, as it allows control-flow modification in the case of unwinding.) +Note: not *all* unwinders do this. Some use the `xpac` instruction to strip the +PAC bits without authenticating the pointer. This is an incorrect (incomplete) +implementation, as it allows control-flow modification in the case of unwinding. -There are no DWARF instructions to directly set or clear the RA State. However, two other CFIs can also affect the RA state: +There are no DWARF instructions to directly set or clear the RA State. However, +two other CFIs can also affect the RA state: - `DW_CFA_remember_state`: this CFI stores register rules onto an implicit stack. - `DW_CFA_restore_state`: this CFI pops rules from this stack. @@ -37,13 +51,25 @@ Example: | DW_CFA_AARCH64_negate_ra_state | 1 -> 0 | | DW_CFA_restore_state | 0 -> 1 (popped from the stack) | -The Arm ABI also defines the DW_CFA_AARCH64_negate_ra_state_with_pc CFI, but it is not widely used, and is [likely to become deprecated](https://github.com/ARM-software/abi-aa/issues/327). +The Arm ABI also defines the DW_CFA_AARCH64_negate_ra_state_with_pc CFI, but it +is not widely used, and is [likely to become deprecated](https://github.com/ARM-software/abi-aa/issues/327). ### Where are these CFIs needed? -In all locations, where two consecutive instructions have different RA state, this need to be indicated to the unwinder. This happens at pointer signing and authenticating. The other case where two consecutive instructions have different RA state, but neither of them is signing or authenticating means that they are not next to each other in control flow. One is part of an execution path with signed RA, the other is part of a path with an unsigned RA. - -In the example below, the first BasicBlock ends in a conditional branch, and jumps to two different BasicBlocks, each with their own authentication, and return. The instructions on the border of the second and third BasicBlock have different RA states. The `ret` at the end of the second BasicBlock is in unsigned state. The start of the third BasicBlock is after the `paciasp` in the control flow, but before the authentication. In this case, a negate-ra-state is needed at the end of the second BasicBlock. +In all locations, where two consecutive instructions have different RA state, +this needs to be indicated to the unwinder. This happens at pointer signing and +authenticating. The other case where two consecutive instructions have different +RA state, but neither of them is signing or authenticating means that they are +not next to each other in control flow. One is part of an execution path with +signed RA, the other is part of a path with an unsigned RA. + +In the example below, the first BasicBlock ends in a conditional branch, and +jumps to two different BasicBlocks, each with their own authentication, and +return. The instructions on the border of the second and third BasicBlock have +different RA states. The `ret` at the end of the second BasicBlock is in unsigned +state. The start of the third BasicBlock is after the `paciasp` in the control +flow, but before the authentication. In this case, a negate-ra-state is needed +at the end of the second BasicBlock. ``` +----------------+ @@ -69,86 +95,132 @@ In the example below, the first BasicBlock ends in a conditional branch, and jum ``` > [!important] -> The unwinder does not follow the control flow graph. It reads unwind information in the layout order. +> The unwinder does not follow the control flow graph. It reads unwind +> information in the layout order. -Because these locations are dependent on how the function layout looks, negate-ra-state CFIs will become invalid during BasicBlock reordering. +Because these locations are dependent on how the function layout looks, +negate-ra-state CFIs will become invalid during BasicBlock reordering. ## Solution design The patch introduces two new passes: -1. `MarkRAStatesPass`: assigns the RA state to each instruction based on the CFIs in the input binary -2. `InsertNegateRAStatePass`: reads those assigned instruction RA states after optimizations, and emits `DW_CFA_AARCH64_negate_ra_state` CFIs at the correct places: wherever there is a state change between two consecutive instructions in the layout order. +1. `MarkRAStatesPass`: assigns the RA state to each instruction based on the CFIs + in the input binary +2. `InsertNegateRAStatePass`: reads those assigned instruction RA states after + optimizations, and emits `DW_CFA_AARCH64_negate_ra_state` CFIs at the correct + places: wherever there is a state change between two consecutive instructions + in the layout order. -To track metadata on individual instructions, the `MCAnnotation` class was extended. These also have helper function in `MCPlusBuilder`. +To track metadata on individual instructions, the `MCAnnotation` class was +extended. These also have helper functions in `MCPlusBuilder`. ### Saving annotations at CFI reading -CFIs are read and added to BinaryFunctions in `CFIReaderWriter::FillCFIInfoFor`. At this point, we add MCAnnotations about negate-ra-state, remember-state and restore-state CFIs to the instructions they refer to. This is to not interfere with the CFI processing that already happens in BOLT (e.g. remember-state and restore-state CFIs are removed in `normalizeCFIState` for reasons unrelated to PAC). +CFIs are read and added to BinaryFunctions in `CFIReaderWriter::FillCFIInfoFor`. +At this point, we add MCAnnotations about negate-ra-state, remember-state and +restore-state CFIs to the instructions they refer to. This is to not interfere +with the CFI processing that already happens in BOLT (e.g. remember-state and +restore-state CFIs are removed in `normalizeCFIState` for reasons unrelated to PAC). -As we add the MCAnnotations *to instructions*, we have to account for the case where the function starts with a CFI altering the RA state. If a function starts with a negate-ra-state CFI for example, we cannot save the annotation on the first instruction, because that itself should already be signed. This is why all BinaryFunctions have an `initialRAState` bool. If the `Offset` the CFI refers to is zero, we don't store an annotation, but set the `initialRAState` in `FillCFIInfoFor`. This info is then used in `MarkRAStates`. +As we add the MCAnnotations *to instructions*, we have to account for the case +where the function starts with a CFI altering the RA state. If a function starts +with a negate-ra-state CFI for example, we cannot save the annotation on the +first instruction, because that itself should already be signed. This is why all +BinaryFunctions have an `initialRAState` bool. If the `Offset` the CFI refers to +is zero, we don't store an annotation, but set the `initialRAState` in +`FillCFIInfoFor`. This information is then used in `MarkRAStates`. ### Binaries without DWARF info -In some cases, the DWARF tables are stripped from the binary. These programs usually have some other unwind-mechanism. To account for code that uses Pointer Authentication, but does not have DWARF CFIs, the passes only run on functions that had at least one negate-ra-state CFI. This is marked during CFI reading. +In some cases, the DWARF tables are stripped from the binary. These programs +usually have some other unwind-mechanism. To account for code that uses Pointer +Authentication, but does not have DWARF CFIs, the passes only run on functions +that had at least one negate-ra-state CFI. This information is saved on the +functions during CFI reading. -This also makes sure that the passes don't run on functions that do not store the return address to the stack, and don't need Pointer Authentication, saving on runtime overhead. +This also makes sure that the passes don't run on functions that do not store +the return address to the stack, and don't need Pointer Authentication, saving +on runtime overhead. In summary: - pointer auth is not used: no change, the new passes do not run. -- pointer auth is used, but DWARF info is stripped: no change, the new passes do not run. -- pointer auth is used, and we have DWARF CFIs: passes run, and rewrite the negate-ra-state CFI. +- pointer auth is used, but DWARF info is stripped: no change, the new passes + do not run. +- pointer auth is used, and we have DWARF CFIs: passes run, and rewrite the + negate-ra-state CFI. ### MarkRAStates Pass This pass runs before optimizations reorder anything. -It processes MCAnnotations generated during the CFI reading stage to check if instructions have either of the three CFIs that can modify RA state: +It processes MCAnnotations generated during the CFI reading stage to check if +instructions have either of the three CFIs that can modify RA state: - negate-ra-state - remember-state - restore-state -Then it adds new MCAnnotations to each instruction, indicating their RA state. Those annotations are: +Then it adds new MCAnnotations to each instruction, indicating their RA state. +Those annotations are: - Signed - Unsigned -Below is a simple example, that shows the two different type of annotations: what we have before the pass, and after it. +Below is a simple example, that shows the two different type of annotations: +what we have before the pass, and after it. -| Instruction | Before | After | -| --------------------------- | --------------- | -------- | -| paciasp | negate-ra-state | unsigned | +| Instruction | Before | After | +| ----------------------------- | --------------- | -------- | +| paciasp | negate-ra-state | unsigned | | stp x29, x30, [sp, #-0x10]! | | signed | | mov x29, sp | | signed | | ldp x29, x30, [sp], #0x10 | | signed | -| autiasp | negate-ra-state | signed | -| ret | | unsigned | +| autiasp | negate-ra-state | signed | +| ret | | unsigned | ##### Error handling in MarkRAState Pass: -Whenever the MarkRAStates pass finds inconsistencies in the current BinaryFunction, it ignores it by calling `BF.setIgnored()`. This prevents BOLT from optimizing that function, but it will still be emitted as part of the original section (`.bolt.org.text`) in its original form. +Whenever the MarkRAStates pass finds inconsistencies in the current +BinaryFunction, it ignores it by calling `BF.setIgnored()`. This prevents BOLT +from optimizing that function, but it will still be emitted as part of the +original section (`.bolt.org.text`) in its original form. The inconsistencies are as follows: - finding a `pac*` instruction when already in signed state - finding an `aut*` instruction when already in unsigned state - finding `pac*` and `aut*` instructions without `.cfi_negate_ra_state`. -Users will be informed about the number of ignored function in the pass, and the exact functions ignored. +Users will be informed about the number of ignored functions in the pass, the +exact functions ignored, and the found inconsistency. ### InsertNegateRAStatePass -This pass runs after the optimizations are done. In essence, it does the _inverse_ of MarkRAState pass: +This pass runs after the optimizations are done. In essence, it does the _inverse_ +of MarkRAState pass: 1. it reads the RA state annotations attached to the instructions, and -2. whenever the state changes, it adds a PseudoInstruction that holds an OpNegateRAState CFI. +2. whenever the state changes, it adds a PseudoInstruction that holds an + OpNegateRAState CFI. ##### Covering newly generated instructions: -Some BOLT passes can add new Instructions. In InsertNegateRAStatePass, we have to know what RA state these have. +Some BOLT passes can add new Instructions. In InsertNegateRAStatePass, we have +to know what RA state these have. -The current solution has the `inferUnknownStates` function to cover these, using a fairly simple strategy: unknown states are inherited from last known state. Testing so far has shown this implementation is sufficient, but to prove correctness, we would need to examine all passes that insert new instructions. +The current solution has the `inferUnknownStates` function to cover these, using +a fairly simple strategy: unknown states inherit the last known state. Testing so +far has shown that this implementation is sufficient. ### Optimizations requiring special attention -Marking states before optimizations assure that instructions can be moved around freely. The only special case is function splitting. When a function is split, the split part becomes a new function in the emitted binary. For unwinding to work, it needs to "replay" all CFI that lead up to the split point. BOLT does this for other CFIs. As negate-ra-state is not read (only stored as an Annotation), we have to do this "manually" in InsertNegateRAStatePass. Here, if the split part starts with an instruction that has Signed RA state, we add a negate-ra-state CFI to indicate this. +Marking states before optimizations ensure that instructions can be moved around +freely. The only special case is function splitting. When a function is split, +the split part becomes a new function in the emitted binary. For unwinding to +work, it needs to "replay" all CFIs that lead up to the split point. BOLT does +this for other CFIs. As negate-ra-state is not read (only stored as an Annotation), +we have to do this manually in InsertNegateRAStatePass. Here, if the split part +starts with an instruction that has Signed RA state, we add a negate-ra-state CFI +to indicate this. ## Option to disallow the feature -To aid debugging, we added the `--disallow-pacret` flag. If the flag is used, and a function `containedNegateRAState()` after `FillCFIInfoFor()`, BOLT exits with an error. With this flag, the feature is on by default. +To aid debugging, we added the `--disallow-pacret` flag. If the flag is used, +and a function `containedNegateRAState()` after `FillCFIInfoFor()`, BOLT exits +with an error. With this flag, the feature is on by default. diff --git a/bolt/test/AArch64/pacret-split-funcs.s b/bolt/test/AArch64/pacret-split-funcs.s index a20d2a3688146..27b3471045523 100644 --- a/bolt/test/AArch64/pacret-split-funcs.s +++ b/bolt/test/AArch64/pacret-split-funcs.s @@ -1,10 +1,10 @@ # Checking that we generate an OpNegateRAState CFI after the split point, # when splitting a region with signed RA state. +# We split at the fallthrough label. # REQUIRES: system-linux -# RUN: %clang %cflags -o %t %s -# RUN: %clang %s %cflags -Wl,-q -o %t +# RUN: %clang %s %cflags -march=armv8.3-a -Wl,-q -o %t # RUN: link_fdata --no-lbr %s %t %t.fdata # RUN: llvm-bolt %t -o %t.bolt --data %t.fdata -split-functions \ # RUN: --print-only foo --print-split --print-all 2>&1 | FileCheck %s @@ -19,14 +19,11 @@ # CHECK: ------- HOT-COLD SPLIT POINT ------- # CHECK: OpNegateRAState +# CHECK-NEXT: mov x0, #0x1 # CHECK-NEXT: autiasp # CHECK-NEXT: OpNegateRAState # CHECK-NEXT: ret -# CHECK: autiasp -# CHECK-NEXT: OpNegateRAState -# CHECK-NEXT: ret - # End of the insert-negate-ra-state-pass logs # CHECK: Binary Function "foo" after finalize-functions @@ -41,15 +38,15 @@ foo: .cfi_negate_ra_state // indicating that paciasp changed the RA state to signed cmp x0, #0 b.eq .Lcold_bb1 -.Lfallthrough: +.Lfallthrough: // split point + mov x0, #1 autiasp .cfi_negate_ra_state // indicating that autiasp changed the RA state to unsigned ret +.Lcold_bb1: // Instructions below are not important, they are just here so the cold block is not empty. .cfi_negate_ra_state // ret has unsigned RA state, but the next inst (autiasp) has signed RA state -.Lcold_bb1: // split point - autiasp - .cfi_negate_ra_state // indicating that autiasp changed the RA state to unsigned - ret + mov x0, #2 + retaa .cfi_endproc .size foo, .-foo From fd3642ad858e98c87b8d5c39153bc2532b90680c Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Wed, 10 Sep 2025 13:35:22 +0000 Subject: [PATCH 25/31] [BOLT] Change scheduling policy to SP_INT_LINEAR - count the total applicable functions, and only run the passes if it's more than 0. This way we do not get a warning on the scheduling policy change. --- bolt/lib/Passes/InsertNegateRAStatePass.cpp | 29 ++++++++++++++------- bolt/lib/Passes/MarkRAStates.cpp | 21 +++++++++------ 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp index de033736a15ff..355af4a6948a1 100644 --- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp +++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp @@ -128,15 +128,26 @@ Error InsertNegateRAState::runOnFunctions(BinaryContext &BC) { return !BF.containedNegateRAState() || BF.isIgnored(); }; - ParallelUtilities::runOnEachFunction( - BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, - SkipPredicate, "InsertNegateRAStatePass"); - - BC.outs() << "BOLT-INFO: rewritten pac-ret DWARF info in " - << FunctionsModified << " out of " << BC.getBinaryFunctions().size() - << " functions " - << format("(%.2lf%%).\n", (100.0 * FunctionsModified) / - BC.getBinaryFunctions().size()); + int Total = llvm::count_if( + BC.getBinaryFunctions(), + [&](std::pair &P) { + return P.second.containedNegateRAState() && !P.second.isIgnored(); + }); + + // If we attempt to run the pass, but SkipPredicate skips all functions, + // we get a warning that the SchedulingPolicy is changed to trivial. + // This would break bolt/test/AArch64/unmarked-data.test. + if (Total != 0) { + ParallelUtilities::runOnEachFunction( + BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, + SkipPredicate, "InsertNegateRAStatePass"); + + BC.outs() << "BOLT-INFO: rewritten pac-ret DWARF info in " + << FunctionsModified << " out of " + << BC.getBinaryFunctions().size() << " functions " + << format("(%.2lf%%).\n", (100.0 * FunctionsModified) / + BC.getBinaryFunctions().size()); + } return Error::success(); } diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp index 89a8065a6e3f0..f9fe86260e3b1 100644 --- a/bolt/lib/Passes/MarkRAStates.cpp +++ b/bolt/lib/Passes/MarkRAStates.cpp @@ -134,19 +134,24 @@ Error MarkRAStates::runOnFunctions(BinaryContext &BC) { return !BF.containedNegateRAState() || BF.isIgnored(); }; - ParallelUtilities::runOnEachFunction( - BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, - SkipPredicate, "MarkRAStates"); - int Total = llvm::count_if( BC.getBinaryFunctions(), [&](std::pair &P) { return P.second.containedNegateRAState() && !P.second.isIgnored(); }); - BC.outs() << "BOLT-INFO: MarkRAStates ran on " << Total - << " functions. Ignored " << FunctionsIgnored << " functions " - << format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total) - << " because of CFI inconsistencies.\n"; + + // If we attempt to run the pass, but SkipPredicate skips all functions, + // we get a warning that the SchedulingPolicy is changed to trivial. + // This would break bolt/test/AArch64/unmarked-data.test. + if (Total != 0) { + ParallelUtilities::runOnEachFunction( + BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, + SkipPredicate, "MarkRAStates"); + BC.outs() << "BOLT-INFO: MarkRAStates ran on " << Total + << " functions. Ignored " << FunctionsIgnored << " functions " + << format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total) + << " because of CFI inconsistencies.\n"; + } return Error::success(); } From e12cb5b45648f1fa797511dbcfc130cbbc7e1128 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Thu, 11 Sep 2025 10:07:47 +0000 Subject: [PATCH 26/31] [BOLT] Rename pac-ret feature flag - new name is update-branch-protection, which can be extended later to include a "strict mode", and to cover BTI rewriting. --- bolt/include/bolt/Utils/CommandLineOpts.h | 2 +- bolt/lib/Rewrite/BinaryPassManager.cpp | 13 +++++-------- bolt/lib/Rewrite/RewriteInstance.cpp | 8 ++++---- bolt/test/AArch64/negate-ra-state-disallow.s | 4 ++-- 4 files changed, 12 insertions(+), 15 deletions(-) diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index fb83321ac529a..bcfba4aad53db 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -78,7 +78,7 @@ extern llvm::cl::opt OutputFilename; extern llvm::cl::opt PerfData; extern llvm::cl::opt PrintCacheMetrics; extern llvm::cl::opt PrintSections; -extern llvm::cl::opt DisallowPacret; +extern llvm::cl::opt UpdateBranchProtection; // The format to use with -o in aggregation mode (perf2bolt) enum ProfileFormatKind { PF_Fdata, PF_YAML }; diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index bb8697cb4f5ec..5f3ac22fdf6d0 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -279,14 +279,11 @@ static cl::opt ShortenInstructions("shorten-instructions", cl::init(true), cl::cat(BoltOptCategory)); -// This flag is used to "gate" the negate-ra-state CFI handling. -// Sometimes, binaries use pac-ret but not contain negate-ra-state CFIs. That -// should cause no issues for BOLT. -cl::opt DisallowPacret( - "disallow-pacret", - cl::desc("Disable processing binaries containing negate-ra-state DWARF " - "CFIs (e.g. binaries using pac-ret hardening)"), - cl::cat(BoltOptCategory)); +cl::opt + UpdateBranchProtection("update-branch-protection", + cl::desc("Rewrites pac-ret DWARF CFI instructions " + "(AArch64-only, on by default)"), + cl::init(true), cl::cat(BoltOptCategory)); } // namespace opts namespace llvm { diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 91b7feb59cc14..2b974694142c0 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -3523,10 +3523,10 @@ void RewriteInstance::disassembleFunctions() { // Check if fillCFIInfoFor removed any OpNegateRAState CFIs from the // function. if (Function.containedNegateRAState()) { - if (opts::DisallowPacret) { - BC->errs() << "BOLT-ERROR: --disallow-pacret flag was used, but " - << Function.getPrintName() - << " contains .cfi-negate-ra-state.\n"; + if (!opts::UpdateBranchProtection) { + BC->errs() + << "BOLT-ERROR: --update-branch-protection is set to false, but " + << Function.getPrintName() << " contains .cfi-negate-ra-state.\n"; exit(1); } } diff --git a/bolt/test/AArch64/negate-ra-state-disallow.s b/bolt/test/AArch64/negate-ra-state-disallow.s index 97488be6eb940..391413006330f 100644 --- a/bolt/test/AArch64/negate-ra-state-disallow.s +++ b/bolt/test/AArch64/negate-ra-state-disallow.s @@ -1,8 +1,8 @@ # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -# RUN: not llvm-bolt %t.exe -o %t.exe.bolt --disallow-pacret 2>&1 | FileCheck %s +# RUN: not llvm-bolt %t.exe -o %t.exe.bolt --update-branch-protection=false 2>&1 | FileCheck %s -# CHECK: BOLT-ERROR: --disallow-pacret flag was used, but foo contains .cfi-negate-ra-state. +# CHECK: BOLT-ERROR: --update-branch-protection is set to false, but foo contains .cfi-negate-ra-state. .text .globl foo From 532c150bd6b02ee30cfb875d8780a9d5530f4c2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gergely=20B=C3=A1lint?= Date: Tue, 23 Sep 2025 09:32:46 +0200 Subject: [PATCH 27/31] Update InsertNegateRAStatePass - fix spelling Co-authored-by: Maksim Panchenko --- bolt/lib/Passes/InsertNegateRAStatePass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp index 355af4a6948a1..6e40337a0705b 100644 --- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp +++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp @@ -29,7 +29,7 @@ void InsertNegateRAState::runOnFunction(BinaryFunction &BF) { if (BF.getState() != BinaryFunction::State::CFG && BF.getState() != BinaryFunction::State::CFG_Finalized) { - BC.outs() << "BOLT-INFO: No CFG for " << BF.getPrintName() + BC.outs() << "BOLT-INFO: no CFG for " << BF.getPrintName() << " in InsertNegateRAStatePass\n"; return; } From 35cf13d163de16fa82846055cc838d2c175da493 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Tue, 23 Sep 2025 07:47:18 +0000 Subject: [PATCH 28/31] [BOLT] Review changes - rename kSigned/kUnsigned to kRASigned/kRAUnsigned - const& in MarkRAStates - spelling - change flag to hidden --- bolt/include/bolt/Core/MCPlus.h | 4 ++-- bolt/lib/Core/MCPlusBuilder.cpp | 12 ++++++------ bolt/lib/Passes/InsertNegateRAStatePass.cpp | 4 +--- bolt/lib/Passes/MarkRAStates.cpp | 11 ++++------- bolt/lib/Rewrite/BinaryPassManager.cpp | 2 +- bolt/lib/Rewrite/RewriteInstance.cpp | 2 +- bolt/test/AArch64/negate-ra-state-disallow.s | 2 +- 7 files changed, 16 insertions(+), 21 deletions(-) diff --git a/bolt/include/bolt/Core/MCPlus.h b/bolt/include/bolt/Core/MCPlus.h index 8b1f7033ffad1..ead6ba1470da6 100644 --- a/bolt/include/bolt/Core/MCPlus.h +++ b/bolt/include/bolt/Core/MCPlus.h @@ -72,8 +72,8 @@ class MCAnnotation { kLabel, /// MCSymbol pointing to this instruction. kSize, /// Size of the instruction. kDynamicBranch, /// Jit instruction patched at runtime. - kSigned, /// Inst is in a range where RA is signed. - kUnsigned, /// Inst is in a range where RA is unsigned. + kRASigned, /// Inst is in a range where RA is signed. + kRAUnsigned, /// Inst is in a range where RA is unsigned. kRememberState, /// Inst has rememberState CFI. kRestoreState, /// Inst has restoreState CFI. kNegateState, /// Inst has OpNegateRAState CFI. diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp index fa571c2bc8320..e96de80bfa701 100644 --- a/bolt/lib/Core/MCPlusBuilder.cpp +++ b/bolt/lib/Core/MCPlusBuilder.cpp @@ -187,21 +187,21 @@ bool MCPlusBuilder::hasRestoreState(const MCInst &Inst) const { } void MCPlusBuilder::setRASigned(MCInst &Inst) const { - assert(!hasAnnotation(Inst, MCAnnotation::kSigned)); - setAnnotationOpValue(Inst, MCAnnotation::kSigned, true); + assert(!hasAnnotation(Inst, MCAnnotation::kRASigned)); + setAnnotationOpValue(Inst, MCAnnotation::kRASigned, true); } bool MCPlusBuilder::isRASigned(const MCInst &Inst) const { - return hasAnnotation(Inst, MCAnnotation::kSigned); + return hasAnnotation(Inst, MCAnnotation::kRASigned); } void MCPlusBuilder::setRAUnsigned(MCInst &Inst) const { - assert(!hasAnnotation(Inst, MCAnnotation::kUnsigned)); - setAnnotationOpValue(Inst, MCAnnotation::kUnsigned, true); + assert(!hasAnnotation(Inst, MCAnnotation::kRAUnsigned)); + setAnnotationOpValue(Inst, MCAnnotation::kRAUnsigned, true); } bool MCPlusBuilder::isRAUnsigned(const MCInst &Inst) const { - return hasAnnotation(Inst, MCAnnotation::kUnsigned); + return hasAnnotation(Inst, MCAnnotation::kRAUnsigned); } bool MCPlusBuilder::isRAStateUnknown(const MCInst &Inst) const { diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp index 6e40337a0705b..0a9d5cc8c9125 100644 --- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp +++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp @@ -92,9 +92,7 @@ void InsertNegateRAState::inferUnknownStates(BinaryFunction &BF) { bool FirstIter = true; MCInst PrevInst; for (BinaryBasicBlock &BB : BF) { - for (auto It = BB.begin(); It != BB.end(); ++It) { - - MCInst &Inst = *It; + for (MCInst &Inst : BB) { if (BC.MIB->isCFI(Inst)) continue; diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp index f9fe86260e3b1..0fa8955068435 100644 --- a/bolt/lib/Passes/MarkRAStates.cpp +++ b/bolt/lib/Passes/MarkRAStates.cpp @@ -35,9 +35,8 @@ bool MarkRAStates::runOnFunction(BinaryFunction &BF) { BinaryContext &BC = BF.getBinaryContext(); - for (BinaryBasicBlock &BB : BF) { - for (auto It = BB.begin(); It != BB.end(); ++It) { - MCInst &Inst = *It; + for (const BinaryBasicBlock &BB : BF) { + for (const MCInst &Inst : BB) { if ((BC.MIB->isPSignOnLR(Inst) || (BC.MIB->isPAuthOnLR(Inst) && !BC.MIB->isPAuthAndRet(Inst))) && !BC.MIB->hasNegateRAState(Inst)) { @@ -58,9 +57,7 @@ bool MarkRAStates::runOnFunction(BinaryFunction &BF) { RAStateStack.push(RAState); for (BinaryBasicBlock &BB : BF) { - for (auto It = BB.begin(); It != BB.end(); ++It) { - - MCInst &Inst = *It; + for (MCInst &Inst : BB) { if (BC.MIB->isCFI(Inst)) continue; @@ -150,7 +147,7 @@ Error MarkRAStates::runOnFunctions(BinaryContext &BC) { BC.outs() << "BOLT-INFO: MarkRAStates ran on " << Total << " functions. Ignored " << FunctionsIgnored << " functions " << format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total) - << " because of CFI inconsistencies.\n"; + << " because of CFI inconsistencies\n"; } return Error::success(); diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp index 5f3ac22fdf6d0..782137e807662 100644 --- a/bolt/lib/Rewrite/BinaryPassManager.cpp +++ b/bolt/lib/Rewrite/BinaryPassManager.cpp @@ -283,7 +283,7 @@ cl::opt UpdateBranchProtection("update-branch-protection", cl::desc("Rewrites pac-ret DWARF CFI instructions " "(AArch64-only, on by default)"), - cl::init(true), cl::cat(BoltOptCategory)); + cl::init(true), cl::Hidden, cl::cat(BoltCategory)); } // namespace opts namespace llvm { diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 2b974694142c0..f708d35441a04 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -3526,7 +3526,7 @@ void RewriteInstance::disassembleFunctions() { if (!opts::UpdateBranchProtection) { BC->errs() << "BOLT-ERROR: --update-branch-protection is set to false, but " - << Function.getPrintName() << " contains .cfi-negate-ra-state.\n"; + << Function.getPrintName() << " contains .cfi-negate-ra-state\n"; exit(1); } } diff --git a/bolt/test/AArch64/negate-ra-state-disallow.s b/bolt/test/AArch64/negate-ra-state-disallow.s index 391413006330f..95adb7146cfce 100644 --- a/bolt/test/AArch64/negate-ra-state-disallow.s +++ b/bolt/test/AArch64/negate-ra-state-disallow.s @@ -2,7 +2,7 @@ # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q # RUN: not llvm-bolt %t.exe -o %t.exe.bolt --update-branch-protection=false 2>&1 | FileCheck %s -# CHECK: BOLT-ERROR: --update-branch-protection is set to false, but foo contains .cfi-negate-ra-state. +# CHECK: BOLT-ERROR: --update-branch-protection is set to false, but foo contains .cfi-negate-ra-state .text .globl foo From c0b4df4f2388c57d4b72d6bc2feea46a4329efca Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Wed, 24 Sep 2025 07:39:28 +0000 Subject: [PATCH 29/31] [BOLT] Remove workaround Remove the workaround for the issue that was fixed in #160263. --- bolt/lib/Passes/InsertNegateRAStatePass.cpp | 29 +++++++-------------- bolt/lib/Passes/MarkRAStates.cpp | 19 +++++--------- 2 files changed, 16 insertions(+), 32 deletions(-) diff --git a/bolt/lib/Passes/InsertNegateRAStatePass.cpp b/bolt/lib/Passes/InsertNegateRAStatePass.cpp index 0a9d5cc8c9125..33664e1160a7b 100644 --- a/bolt/lib/Passes/InsertNegateRAStatePass.cpp +++ b/bolt/lib/Passes/InsertNegateRAStatePass.cpp @@ -126,26 +126,15 @@ Error InsertNegateRAState::runOnFunctions(BinaryContext &BC) { return !BF.containedNegateRAState() || BF.isIgnored(); }; - int Total = llvm::count_if( - BC.getBinaryFunctions(), - [&](std::pair &P) { - return P.second.containedNegateRAState() && !P.second.isIgnored(); - }); - - // If we attempt to run the pass, but SkipPredicate skips all functions, - // we get a warning that the SchedulingPolicy is changed to trivial. - // This would break bolt/test/AArch64/unmarked-data.test. - if (Total != 0) { - ParallelUtilities::runOnEachFunction( - BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, - SkipPredicate, "InsertNegateRAStatePass"); - - BC.outs() << "BOLT-INFO: rewritten pac-ret DWARF info in " - << FunctionsModified << " out of " - << BC.getBinaryFunctions().size() << " functions " - << format("(%.2lf%%).\n", (100.0 * FunctionsModified) / - BC.getBinaryFunctions().size()); - } + ParallelUtilities::runOnEachFunction( + BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, + SkipPredicate, "InsertNegateRAStatePass"); + + BC.outs() << "BOLT-INFO: rewritten pac-ret DWARF info in " + << FunctionsModified << " out of " << BC.getBinaryFunctions().size() + << " functions " + << format("(%.2lf%%).\n", (100.0 * FunctionsModified) / + BC.getBinaryFunctions().size()); return Error::success(); } diff --git a/bolt/lib/Passes/MarkRAStates.cpp b/bolt/lib/Passes/MarkRAStates.cpp index 0fa8955068435..2c5ce4aaa72c0 100644 --- a/bolt/lib/Passes/MarkRAStates.cpp +++ b/bolt/lib/Passes/MarkRAStates.cpp @@ -137,18 +137,13 @@ Error MarkRAStates::runOnFunctions(BinaryContext &BC) { return P.second.containedNegateRAState() && !P.second.isIgnored(); }); - // If we attempt to run the pass, but SkipPredicate skips all functions, - // we get a warning that the SchedulingPolicy is changed to trivial. - // This would break bolt/test/AArch64/unmarked-data.test. - if (Total != 0) { - ParallelUtilities::runOnEachFunction( - BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, - SkipPredicate, "MarkRAStates"); - BC.outs() << "BOLT-INFO: MarkRAStates ran on " << Total - << " functions. Ignored " << FunctionsIgnored << " functions " - << format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total) - << " because of CFI inconsistencies\n"; - } + ParallelUtilities::runOnEachFunction( + BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun, + SkipPredicate, "MarkRAStates"); + BC.outs() << "BOLT-INFO: MarkRAStates ran on " << Total + << " functions. Ignored " << FunctionsIgnored << " functions " + << format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total) + << " because of CFI inconsistencies\n"; return Error::success(); } From 11e897deffa6aac23ac533cf83697a9bb8bf8456 Mon Sep 17 00:00:00 2001 From: Gergely Balint Date: Wed, 1 Oct 2025 10:29:00 +0000 Subject: [PATCH 30/31] [BOLT] Update bolt/docs/PacRetDesign.md --- bolt/docs/PacRetDesign.md | 92 ++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 45 deletions(-) diff --git a/bolt/docs/PacRetDesign.md b/bolt/docs/PacRetDesign.md index 1461260949486..136cac707efbc 100644 --- a/bolt/docs/PacRetDesign.md +++ b/bolt/docs/PacRetDesign.md @@ -14,7 +14,7 @@ in assembly, or `OpNegateRAState` in BOLT sources. In this document, I will use ### Pointer Authentication -Refer to the [pac-ret section of the BOLT-binary-analysis document](BinaryAnalysis.md#pac-ret-analysis). +For more information, see the [pac-ret section of the BOLT-binary-analysis document](BinaryAnalysis.md#pac-ret-analysis). ### DW_CFA_AARCH64_negate_ra_state @@ -27,14 +27,14 @@ The DW_CFA_AARCH64_negate_ra_state operation negates bit[0] of the RA_SIGN_STATE This bit indicates to the unwinder whether the current return address is signed or not (hence the name). The unwinder uses this information to authenticate the -pointer, and remove the Pointer Authentication Code (PAC) bits. Incorrect -negate-ra-state placement can lead to the unwinder trying to authenticate an -unsigned pointer (which segfaults), or skipping authenticating a signed pointer, -and trying to access an incorrect location (also leading to a segfault). +pointer, and remove the Pointer Authentication Code (PAC) bits. +Incorrect placment of negate-ra-state CFIs causes the unwinder to either attempt +to authenticate an unsigned pointer (resulting in a segmentation fault), or skip +authentication on a signed pointer, which can also cause a fault. -Note: not *all* unwinders do this. Some use the `xpac` instruction to strip the -PAC bits without authenticating the pointer. This is an incorrect (incomplete) -implementation, as it allows control-flow modification in the case of unwinding. +Note: some unwinders use the `xpac` instruction to strip the PAC bits without +authenticating the pointer. This is an incorrect (incomplete) implementation, +as it allows control-flow modification in the case of unwinding. There are no DWARF instructions to directly set or clear the RA State. However, two other CFIs can also affect the RA state: @@ -56,12 +56,12 @@ is not widely used, and is [likely to become deprecated](https://github.com/ARM- ### Where are these CFIs needed? -In all locations, where two consecutive instructions have different RA state, -this needs to be indicated to the unwinder. This happens at pointer signing and -authenticating. The other case where two consecutive instructions have different -RA state, but neither of them is signing or authenticating means that they are -not next to each other in control flow. One is part of an execution path with -signed RA, the other is part of a path with an unsigned RA. +Whenever two consecutive instructions have different RA states, the unwinder must +be informed of the change. This typically occurs during pointer signing or +authentication. If adjacent instructions differ in RA state but neither signs +nor authenticates the return address, they must belong to different control flow +paths. One is part of an execution path with signed RA, the other is part of a +path with an unsigned RA. In the example below, the first BasicBlock ends in a conditional branch, and jumps to two different BasicBlocks, each with their own authentication, and @@ -103,7 +103,7 @@ negate-ra-state CFIs will become invalid during BasicBlock reordering. ## Solution design -The patch introduces two new passes: +The implementation introduces two new passes: 1. `MarkRAStatesPass`: assigns the RA state to each instruction based on the CFIs in the input binary 2. `InsertNegateRAStatePass`: reads those assigned instruction RA states after @@ -123,24 +123,21 @@ with the CFI processing that already happens in BOLT (e.g. remember-state and restore-state CFIs are removed in `normalizeCFIState` for reasons unrelated to PAC). As we add the MCAnnotations *to instructions*, we have to account for the case -where the function starts with a CFI altering the RA state. If a function starts -with a negate-ra-state CFI for example, we cannot save the annotation on the -first instruction, because that itself should already be signed. This is why all -BinaryFunctions have an `initialRAState` bool. If the `Offset` the CFI refers to -is zero, we don't store an annotation, but set the `initialRAState` in -`FillCFIInfoFor`. This information is then used in `MarkRAStates`. +where the function starts with a CFI altering the RA state. As CFIs modify the RA +state of the instructions before them, we cannot add the annotation to the first +instruction. +This special case is handled by adding an `initialRAState` bool to each BinaryFunction. +If the `Offset` the CFI refers to is zero, we don't store an annotation, but set +the `initialRAState` in `FillCFIInfoFor`. This information is then used in +`MarkRAStates`. ### Binaries without DWARF info In some cases, the DWARF tables are stripped from the binary. These programs -usually have some other unwind-mechanism. To account for code that uses Pointer -Authentication, but does not have DWARF CFIs, the passes only run on functions -that had at least one negate-ra-state CFI. This information is saved on the -functions during CFI reading. - -This also makes sure that the passes don't run on functions that do not store -the return address to the stack, and don't need Pointer Authentication, saving -on runtime overhead. +usually have some other unwind-mechanism. +These passes only run on functions that include at least one negate-ra-state CFI. +This avoids processing functions that do not use Pointer Authentication, or on +functions that use Pointer Authentication, but do not have DWARF info. In summary: - pointer auth is not used: no change, the new passes do not run. @@ -149,20 +146,20 @@ In summary: - pointer auth is used, and we have DWARF CFIs: passes run, and rewrite the negate-ra-state CFI. -### MarkRAStates Pass +### MarkRAStates pass This pass runs before optimizations reorder anything. It processes MCAnnotations generated during the CFI reading stage to check if instructions have either of the three CFIs that can modify RA state: -- negate-ra-state -- remember-state -- restore-state +- negate-ra-state, +- remember-state, +- restore-state. Then it adds new MCAnnotations to each instruction, indicating their RA state. Those annotations are: -- Signed -- Unsigned +- Signed, +- Unsigned. Below is a simple example, that shows the two different type of annotations: what we have before the pass, and after it. @@ -179,9 +176,9 @@ what we have before the pass, and after it. ##### Error handling in MarkRAState Pass: Whenever the MarkRAStates pass finds inconsistencies in the current -BinaryFunction, it ignores it by calling `BF.setIgnored()`. This prevents BOLT -from optimizing that function, but it will still be emitted as part of the -original section (`.bolt.org.text`) in its original form. +BinaryFunction, it marks the function as ignored using `BF.setIgnored()`. BOLT +will not optimize this function but will emit it unchanged in the original section +(`.bolt.org.text`). The inconsistencies are as follows: - finding a `pac*` instruction when already in signed state @@ -193,8 +190,7 @@ exact functions ignored, and the found inconsistency. ### InsertNegateRAStatePass -This pass runs after the optimizations are done. In essence, it does the _inverse_ -of MarkRAState pass: +This pass runs after optimizations. It performns the _inverse_ of MarkRAState pa s: 1. it reads the RA state annotations attached to the instructions, and 2. whenever the state changes, it adds a PseudoInstruction that holds an OpNegateRAState CFI. @@ -205,8 +201,14 @@ Some BOLT passes can add new Instructions. In InsertNegateRAStatePass, we have to know what RA state these have. The current solution has the `inferUnknownStates` function to cover these, using -a fairly simple strategy: unknown states inherit the last known state. Testing so -far has shown that this implementation is sufficient. +a fairly simple strategy: unknown states inherit the last known state. + +This will be updated to a more robust solution. + +> [!important] +> As issue #160989 describes, unwind info is incorrect in stubs with multiple callers. +> For this same reason, we cannot generate correct pac-specific unwind info: the signess +> of the _incorrect_ return address is meaningless. ### Optimizations requiring special attention @@ -221,6 +223,6 @@ to indicate this. ## Option to disallow the feature -To aid debugging, we added the `--disallow-pacret` flag. If the flag is used, -and a function `containedNegateRAState()` after `FillCFIInfoFor()`, BOLT exits -with an error. With this flag, the feature is on by default. +The feature can be guarded with the `--update-branch-prediction` flag, which is +on by default. If the flag is set to false, and a function +`containedNegateRAState()` after `FillCFIInfoFor()`, BOLT exits with an error. From b375f753c5595ee1460b30cd7c460a69f1c5b77f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gergely=20B=C3=A1lint?= Date: Mon, 6 Oct 2025 13:44:13 +0200 Subject: [PATCH 31/31] Update bolt/docs/PacRetDesign.md spelling Co-authored-by: Paschalis Mpeis --- bolt/docs/PacRetDesign.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/docs/PacRetDesign.md b/bolt/docs/PacRetDesign.md index 136cac707efbc..f3fe5fbd522cb 100644 --- a/bolt/docs/PacRetDesign.md +++ b/bolt/docs/PacRetDesign.md @@ -28,7 +28,7 @@ The DW_CFA_AARCH64_negate_ra_state operation negates bit[0] of the RA_SIGN_STATE This bit indicates to the unwinder whether the current return address is signed or not (hence the name). The unwinder uses this information to authenticate the pointer, and remove the Pointer Authentication Code (PAC) bits. -Incorrect placment of negate-ra-state CFIs causes the unwinder to either attempt +Incorrect placement of negate-ra-state CFIs causes the unwinder to either attempt to authenticate an unsigned pointer (resulting in a segmentation fault), or skip authentication on a signed pointer, which can also cause a fault.