Skip to content

Commit

Permalink
[BOLT] Fix instrumentation bug in duplicated JTs
Browse files Browse the repository at this point in the history
Summary:
Fix a bug with instrumentation when trying to instrument
functions that share a jump table with multiple indirect
jumps. Usually, each indirect jump that uses a JT will have its own
copy of it. When this does not happen, we need to duplicate the jump
table safely, so we can split the edges correctly (each copy of the
jump table may have different split edges). For this to happen, we
need to correctly match the sequence of instructions that perform the
indirect jump to identify the base address of the jump table and patch
it to point to the new cloned JT. It was reported to us a case in
which the compiler generated suboptimal code to do an indirect jump
which our matcher failed to identify.

Fixes facebookarchive/BOLT#126

(cherry picked from FBD27065579)
  • Loading branch information
rafaelauler authored and maksfb committed Mar 15, 2021
1 parent b11c826 commit b3c34d5
Show file tree
Hide file tree
Showing 7 changed files with 236 additions and 48 deletions.
87 changes: 53 additions & 34 deletions bolt/src/BinaryFunction.cpp
Expand Up @@ -3864,7 +3864,7 @@ bool BinaryFunction::checkForAmbiguousJumpTables() {

void BinaryFunction::disambiguateJumpTables(
MCPlusBuilder::AllocatorIdTy AllocId) {
assert((opts::JumpTables != JTS_BASIC && isSimple()) || BC.HasRelocations);
assert((opts::JumpTables != JTS_BASIC && isSimple()) || !BC.HasRelocations);
SmallPtrSet<JumpTable *, 4> JumpTables;
for (auto &BB : BasicBlocks) {
for (auto &Inst : *BB) {
Expand All @@ -3881,51 +3881,70 @@ void BinaryFunction::disambiguateJumpTables(
// This instruction is an indirect jump using a jump table, but it is
// using the same jump table of another jump. Try all our tricks to
// extract the jump table symbol and make it point to a new, duplicated JT
MCPhysReg BaseReg1;
uint64_t Scale;
const MCSymbol *Target;
// In case we match if our first matcher, first instruction is the one to
// patch
MCInst *JTLoadInst = &Inst;
// Try a standard indirect jump matcher, scale 8
auto IndJmpMatcher = BC.MIB->matchIndJmp(
BC.MIB->matchReg(), BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
/*Offset=*/BC.MIB->matchSymbol(Target));
if (!BC.MIB->hasPCRelOperand(Inst) ||
!IndJmpMatcher->match(
auto IndJmpMatcher =
BC.MIB->matchIndJmp(BC.MIB->matchReg(BaseReg1),
BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
/*Offset=*/BC.MIB->matchSymbol(Target));
if (!IndJmpMatcher->match(
*BC.MRI, *BC.MIB,
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
BaseReg1 != BC.MIB->getNoRegister() ||
Scale != 8) {
// Standard JT matching failed. Trying now:
// PIC-style matcher, scale 4
// addq %rdx, %rsi
// addq %rdx, %rdi
// leaq DATAat0x402450(%rip), %r11
// movslq (%r11,%rdx,4), %rcx
// addq %r11, %rcx
// jmpq *%rcx # JUMPTABLE @0x402450
MCPhysReg BaseReg1;
MCPhysReg BaseReg2;
uint64_t Offset;
auto PICIndJmpMatcher = BC.MIB->matchIndJmp(BC.MIB->matchAdd(
BC.MIB->matchReg(BaseReg1),
BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg2),
BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
BC.MIB->matchImm(Offset))));
auto LEAMatcherOwner =
BC.MIB->matchLoadAddr(BC.MIB->matchSymbol(Target));
auto LEAMatcher = LEAMatcherOwner.get();
auto PICBaseAddrMatcher = BC.MIB->matchIndJmp(BC.MIB->matchAdd(
std::move(LEAMatcherOwner), BC.MIB->matchAnyOperand()));
if (!PICIndJmpMatcher->match(
// Standard JT matching failed. Trying now:
// movq "jt.2397/1"(,%rax,8), %rax
// jmpq *%rax
auto LoadMatcherOwner = BC.MIB->matchLoad(
BC.MIB->matchReg(BaseReg1), BC.MIB->matchImm(Scale),
BC.MIB->matchReg(), /*Offset=*/BC.MIB->matchSymbol(Target));
auto LoadMatcher = LoadMatcherOwner.get();
auto IndJmpMatcher2 = BC.MIB->matchIndJmp(std::move(LoadMatcherOwner));
if (!IndJmpMatcher2->match(
*BC.MRI, *BC.MIB,
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 ||
!PICBaseAddrMatcher->match(
*BC.MRI, *BC.MIB,
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1)) {
llvm_unreachable("Failed to extract jump table base");
continue;
BaseReg1 != BC.MIB->getNoRegister() || Scale != 8) {
// JT matching failed. Trying now:
// PIC-style matcher, scale 4
// addq %rdx, %rsi
// addq %rdx, %rdi
// leaq DATAat0x402450(%rip), %r11
// movslq (%r11,%rdx,4), %rcx
// addq %r11, %rcx
// jmpq *%rcx # JUMPTABLE @0x402450
auto PICIndJmpMatcher = BC.MIB->matchIndJmp(BC.MIB->matchAdd(
BC.MIB->matchReg(BaseReg1),
BC.MIB->matchLoad(BC.MIB->matchReg(BaseReg2),
BC.MIB->matchImm(Scale), BC.MIB->matchReg(),
BC.MIB->matchImm(Offset))));
auto LEAMatcherOwner =
BC.MIB->matchLoadAddr(BC.MIB->matchSymbol(Target));
auto LEAMatcher = LEAMatcherOwner.get();
auto PICBaseAddrMatcher = BC.MIB->matchIndJmp(BC.MIB->matchAdd(
std::move(LEAMatcherOwner), BC.MIB->matchAnyOperand()));
if (!PICIndJmpMatcher->match(
*BC.MRI, *BC.MIB,
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1) ||
Scale != 4 || BaseReg1 != BaseReg2 || Offset != 0 ||
!PICBaseAddrMatcher->match(
*BC.MRI, *BC.MIB,
MutableArrayRef<MCInst>(&*BB->begin(), &Inst + 1), -1)) {
llvm_unreachable("Failed to extract jump table base");
continue;
}
// Matched PIC, identify the instruction with the reference to the JT
JTLoadInst = LEAMatcher->CurInst;
} else {
// Matched non-PIC
JTLoadInst = LoadMatcher->CurInst;
}
// Matched PIC
JTLoadInst = &*LEAMatcher->CurInst;
}

uint64_t NewJumpTableID{0};
Expand Down
8 changes: 5 additions & 3 deletions bolt/src/MachORewriteInstance.cpp
Expand Up @@ -61,9 +61,11 @@ namespace bolt {
MachORewriteInstance::MachORewriteInstance(object::MachOObjectFile *InputFile,
StringRef ToolPath)
: InputFile(InputFile), ToolPath(ToolPath),
BC(BinaryContext::createBinaryContext(
InputFile, /* IsPIC */ true,
DWARFContext::create(*InputFile))) {}
BC(BinaryContext::createBinaryContext(InputFile, /* IsPIC */ true,
DWARFContext::create(*InputFile))) {
if (opts::Instrument)
BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>());
}

Error MachORewriteInstance::setProfile(StringRef Filename) {
if (!sys::fs::exists(Filename))
Expand Down
20 changes: 12 additions & 8 deletions bolt/src/Passes/Instrumentation.cpp
Expand Up @@ -170,16 +170,18 @@ Instrumentation::createInstrumentationSnippet(BinaryContext &BC, bool IsLeaf) {
MCSymbol *Label;
Label = BC.Ctx->createNamedTempSymbol("InstrEntry");
Summary->Counters.emplace_back(Label);
std::vector<MCInst> CounterInstrs(5);
std::vector<MCInst> CounterInstrs;
CounterInstrs.resize(IsLeaf? 5 : 3);
uint32_t I = 0;
// Don't clobber application red zone (ABI dependent)
if (IsLeaf)
BC.MIB->createStackPointerIncrement(CounterInstrs[0], 128,
BC.MIB->createStackPointerIncrement(CounterInstrs[I++], 128,
/*NoFlagsClobber=*/true);
BC.MIB->createPushFlags(CounterInstrs[1], 2);
BC.MIB->createIncMemory(CounterInstrs[2], Label, &*BC.Ctx);
BC.MIB->createPopFlags(CounterInstrs[3], 2);
BC.MIB->createPushFlags(CounterInstrs[I++], 2);
BC.MIB->createIncMemory(CounterInstrs[I++], Label, &*BC.Ctx);
BC.MIB->createPopFlags(CounterInstrs[I++], 2);
if (IsLeaf)
BC.MIB->createStackPointerDecrement(CounterInstrs[4], 128,
BC.MIB->createStackPointerDecrement(CounterInstrs[I++], 128,
/*NoFlagsClobber=*/true);
return CounterInstrs;
}
Expand Down Expand Up @@ -660,8 +662,10 @@ void Instrumentation::setupRuntimeLibrary(BinaryContext &BC) {
outs() << "BOLT-INSTRUMENTER: Profile will be saved to file "
<< opts::InstrumentationFilename << "\n";

BC.setRuntimeLibrary(
std::make_unique<InstrumentationRuntimeLibrary>(std::move(Summary)));
auto *RtLibrary =
static_cast<InstrumentationRuntimeLibrary *>(BC.getRuntimeLibrary());
assert(RtLibrary && "instrumentation runtime library object must be set");
RtLibrary->setSummary(std::move(Summary));
}
}
}
5 changes: 4 additions & 1 deletion bolt/src/RewriteInstance.cpp
Expand Up @@ -494,8 +494,11 @@ RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc,
if (opts::UpdateDebugSections)
DebugInfoRewriter = std::make_unique<DWARFRewriter>(*BC, SectionPatchers);

if (opts::Hugify)
if (opts::Instrument) {
BC->setRuntimeLibrary(std::make_unique<InstrumentationRuntimeLibrary>());
} else if (opts::Hugify) {
BC->setRuntimeLibrary(std::make_unique<HugifyRuntimeLibrary>());
}
}

RewriteInstance::~RewriteInstance() {}
Expand Down
6 changes: 6 additions & 0 deletions bolt/src/RuntimeLibs/InstrumentationRuntimeLibrary.cpp
Expand Up @@ -10,6 +10,7 @@

#include "InstrumentationRuntimeLibrary.h"
#include "BinaryFunction.h"
#include "JumpTable.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/CommandLine.h"
Expand All @@ -26,6 +27,7 @@ extern cl::opt<std::string> InstrumentationFilename;
extern cl::opt<uint32_t> InstrumentationSleepTime;
extern cl::opt<bool> InstrumentationNoCountersClear;
extern cl::opt<bool> InstrumentationWaitForks;
extern cl::opt<JumpTableSupportLevel> JumpTables;

cl::opt<bool>
Instrument("instrument",
Expand All @@ -46,6 +48,10 @@ void InstrumentationRuntimeLibrary::adjustCommandLineOptions(
"relocations\n";
exit(1);
}
if (opts::JumpTables != JTS_MOVE) {
opts::JumpTables = JTS_MOVE;
outs() << "BOLT-INFO: forcing -jump-tables=move for instrumentation\n";
}
if (!BC.StartFunctionAddress) {
errs() << "BOLT-ERROR: instrumentation runtime libraries require a known "
"entry point of "
Expand Down
5 changes: 3 additions & 2 deletions bolt/src/RuntimeLibs/InstrumentationRuntimeLibrary.h
Expand Up @@ -17,8 +17,9 @@ namespace bolt {

class InstrumentationRuntimeLibrary : public RuntimeLibrary {
public:
InstrumentationRuntimeLibrary(std::unique_ptr<InstrumentationSummary> Summary)
: Summary(std::move(Summary)) {}
void setSummary(std::unique_ptr<InstrumentationSummary> &&S) {
Summary.swap(S);
}

void addRuntimeLibSections(std::vector<std::string> &SecNames) const final {
SecNames.push_back(".bolt.instr.counters");
Expand Down
153 changes: 153 additions & 0 deletions bolt/test/X86/instrumentation-dup-jts.s
@@ -0,0 +1,153 @@
# This reproduces a bug with instrumentation when trying to instrument
# functions that share a jump table with multiple indirect jumps. Usually,
# each indirect jump that uses a JT will have its own copy of it. When
# this does not happen, we need to duplicate the jump table safely, so
# we can split the edges correctly (each copy of the jump table may have
# different split edges). For this to happen, we need to correctly match
# the sequence of instructions that perform the indirect jump to identify
# the base address of the jump table and patch it to point to the new
# cloned JT.
#
# Here we test this variant:
# movq jt.2397(,%rax,8), %rax
# jmp *%rax
#
# Which is suboptimal since the compiler could've avoided using an intermediary
# register, but GCC does generate this code and it triggered a bug in our
# matcher. Usual jumps in non-PIC code have this format:
#
# jmp *jt.2397(,%rax,8)
#
# This is the C code fed to GCC:
# #include <stdio.h>
#int interp(char* code) {
# static void* jt[] = { &&op_end, &&op_inc, &&do_dec };
# int pc = 0;
# int res = 0;
# goto *jt[code[pc++] - '0'];
#
#op_inc:
# res += 1;
# printf("%d\n", res);
# goto *jt[code[pc++] - '0'];
#do_dec:
# res -= 1;
# printf("%d\n", res);
# goto *jt[code[pc++] - '0'];
#op_end:
# return res;
#}
#int main(int argc, char** argv) {
# return interp(argv[1]);
#}


# REQUIRES: system-linux

# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
# RUN: %s -o %t.o
# RUN: %host_cc %t.o -o %t.exe -Wl,-q

# RUN: llvm-bolt %t.exe -instrument -instrumentation-file=%t.fdata \
# RUN: -o %t.instrumented

# Instrumented program needs to finish returning zero
# RUN: %t.instrumented 120

# Test that the instrumented data makes sense
# RUN: llvm-bolt %t.exe -o %t.bolted -data %t.fdata \
# RUN: -reorder-blocks=cache+ -reorder-functions=hfsort+ \
# RUN: -print-only=interp -print-finalized | FileCheck %s

# RUN: %t.bolted 120

# Check that our two indirect jumps are recorded in the fdata file and that
# each has its own independent profile
# CHECK: Successors: .Ltmp1 (mispreds: 0, count: 1), .Ltmp0 (mispreds: 0, count: 0), .Ltmp2 (mispreds: 0, count: 0)
# CHECK: Successors: .Ltmp0 (mispreds: 0, count: 1), .Ltmp2 (mispreds: 0, count: 1), .Ltmp1 (mispreds: 0, count: 0)

.file "test.c"
.text
.section .rodata.str1.1,"aMS",@progbits,1
.LC0:
.string "%d\n"
.text
.p2align 4,,15
.globl interp
.type interp, @function
interp:
.LFB11:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
xorl %ebp, %ebp
pushq %rbx
.cfi_def_cfa_offset 24
.cfi_offset 3, -24
leaq 1(%rdi), %rbx
subq $8, %rsp
.cfi_def_cfa_offset 32
movsbl (%rdi), %eax
subl $48, %eax
cltq
movq jt.2397(,%rax,8), %rax
jmp *%rax
.p2align 4,,10
.p2align 3
.L3:
addl $1, %ebp
.L8:
movl %ebp, %esi
movl $.LC0, %edi
xorl %eax, %eax
addq $1, %rbx
call printf
movsbl -1(%rbx), %eax
subl $48, %eax
cltq
movq jt.2397(,%rax,8), %rax
jmp *%rax
.p2align 4,,10
.p2align 3
.L6:
addq $8, %rsp
.cfi_remember_state
.cfi_def_cfa_offset 24
movl %ebp, %eax
popq %rbx
.cfi_def_cfa_offset 16
popq %rbp
.cfi_def_cfa_offset 8
ret
.p2align 4,,10
.p2align 3
.L4:
.cfi_restore_state
subl $1, %ebp
jmp .L8
.cfi_endproc
.LFE11:
.size interp, .-interp
.section .text.startup,"ax",@progbits
.p2align 4,,15
.globl main
.type main, @function
main:
.LFB12:
.cfi_startproc
movq 8(%rsi), %rdi
jmp interp
.cfi_endproc
.LFE12:
.size main, .-main
.section .rodata
.align 16
.type jt.2397, @object
.size jt.2397, 24
jt.2397:
.quad .L6
.quad .L3
.quad .L4
.ident "GCC: (GNU) 8"
.section .note.GNU-stack,"",@progbits

0 comments on commit b3c34d5

Please sign in to comment.