Skip to content

Commit

Permalink
[BOLT] Do not assert on jump table heuristic failure
Browse files Browse the repository at this point in the history
Summary:
During the initial indirect jump analysis, we used to assert that the
discovered jump table type matched the pattern of the corresponding
instruction sequence. E.g., for PIC jump table memory we expected the
PIC jump table instruction sequence. The assertions were too
conservative, as in the case of a mismatch we can mark the indirect jump
as having an unknown control flow. That should be sufficient to either
skip the function processing or rely on relocation information for
possible recovery of the control flow.

(cherry picked from FBD27255816)
  • Loading branch information
maksfb committed Mar 23, 2021
1 parent b3c34d5 commit e7169be
Show file tree
Hide file tree
Showing 2 changed files with 185 additions and 66 deletions.
119 changes: 53 additions & 66 deletions bolt/src/BinaryFunction.cpp
Expand Up @@ -795,11 +795,11 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction,
unsigned Size,
uint64_t Offset,
uint64_t &TargetAddress) {
const auto PtrSize = BC.AsmInfo->getCodePointerSize();
const unsigned PtrSize = BC.AsmInfo->getCodePointerSize();

// An instruction referencing memory used by jump instruction (directly or
// via register). This location could be an array of function pointers
// in case of indirect tail call, or a jump table.
// The instruction referencing memory used by the branch instruction.
// It could be the branch instruction itself or one of the instructions
// setting the value of the register used by the branch.
MCInst *MemLocInstr;

// Address of the table referenced by MemLocInstr. Could be either an
Expand Down Expand Up @@ -830,26 +830,27 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction,
}
}

auto Type = BC.MIB->analyzeIndirectBranch(Instruction,
Begin,
Instructions.end(),
PtrSize,
MemLocInstr,
BaseRegNum,
IndexRegNum,
DispValue,
DispExpr,
PCRelBaseInstr);
IndirectBranchType BranchType =
BC.MIB->analyzeIndirectBranch(Instruction,
Begin,
Instructions.end(),
PtrSize,
MemLocInstr,
BaseRegNum,
IndexRegNum,
DispValue,
DispExpr,
PCRelBaseInstr);

if (Type == IndirectBranchType::UNKNOWN && !MemLocInstr)
return Type;
if (BranchType == IndirectBranchType::UNKNOWN && !MemLocInstr)
return BranchType;

if (MemLocInstr != &Instruction)
IndexRegNum = BC.MIB->getNoRegister();

if (BC.isAArch64()) {
const auto *Sym = BC.MIB->getTargetSymbol(*PCRelBaseInstr, 1);
assert (Sym && "Symbol extraction failed");
const MCSymbol *Sym = BC.MIB->getTargetSymbol(*PCRelBaseInstr, 1);
assert(Sym && "Symbol extraction failed");
auto SymValueOrError = BC.getSymbolValue(*Sym);
if (SymValueOrError) {
PCRelAddr = *SymValueOrError;
Expand Down Expand Up @@ -921,8 +922,8 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction,
return IndirectBranchType::POSSIBLE_TAIL_CALL;
}

if (Type == IndirectBranchType::POSSIBLE_FIXED_BRANCH) {
auto Value = BC.getPointerAtAddress(ArrayStart);
if (BranchType == IndirectBranchType::POSSIBLE_FIXED_BRANCH) {
ErrorOr<uint64_t> Value = BC.getPointerAtAddress(ArrayStart);
if (!Value)
return IndirectBranchType::UNKNOWN;

Expand All @@ -936,63 +937,49 @@ BinaryFunction::processIndirectBranch(MCInst &Instruction,
<< '\n';

TargetAddress = *Value;
return Type;
return BranchType;
}

auto useJumpTableForInstruction = [&](JumpTable::JumpTableType JTType) {
const MCSymbol *JTLabel =
BC.getOrCreateJumpTable(*this, ArrayStart, JTType);

BC.MIB->replaceMemOperandDisp(const_cast<MCInst &>(*MemLocInstr),
JTLabel, BC.Ctx.get());
BC.MIB->setJumpTable(Instruction, ArrayStart, IndexRegNum);

JTSites.emplace_back(Offset, ArrayStart);
};

// Check if there's already a jump table registered at this address.
// At this point, all jump tables are empty.
if (auto *JT = BC.getJumpTableContainingAddress(ArrayStart)) {
// Make sure the type of the table matches the code.
if (Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
assert(JT->Type == JumpTable::JTT_PIC && "PIC jump table expected");
} else {
assert(JT->Type == JumpTable::JTT_NORMAL && "normal jump table expected");
Type = IndirectBranchType::POSSIBLE_JUMP_TABLE;
MemoryContentsType MemType;
if (JumpTable *JT = BC.getJumpTableContainingAddress(ArrayStart)) {
switch (JT->Type) {
case JumpTable::JTT_NORMAL:
MemType = MemoryContentsType::POSSIBLE_JUMP_TABLE;
break;
case JumpTable::JTT_PIC:
MemType = MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE;
break;
}
} else {
MemType = BC.analyzeMemoryAt(ArrayStart, *this);
}

useJumpTableForInstruction(JT->Type);
// Check that jump table type in instruction pattern matches memory contents.
JumpTable::JumpTableType JTType;
if (BranchType == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
if (MemType != MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
return IndirectBranchType::UNKNOWN;
JTType = JumpTable::JTT_PIC;
} else {
if (MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE)
return IndirectBranchType::UNKNOWN;

return Type;
}
if (MemType == MemoryContentsType::UNKNOWN)
return IndirectBranchType::POSSIBLE_TAIL_CALL;

const auto MemType = BC.analyzeMemoryAt(ArrayStart, *this);
if (Type == IndirectBranchType::POSSIBLE_PIC_JUMP_TABLE) {
assert(MemType == MemoryContentsType::POSSIBLE_PIC_JUMP_TABLE &&
"PIC jump table heuristic failure");
useJumpTableForInstruction(JumpTable::JTT_PIC);
return Type;
BranchType = IndirectBranchType::POSSIBLE_JUMP_TABLE;
JTType = JumpTable::JTT_NORMAL;
}

if (MemType == MemoryContentsType::POSSIBLE_JUMP_TABLE) {
assert(Type == IndirectBranchType::UNKNOWN &&
"non-PIC jump table heuristic failure");
useJumpTableForInstruction(JumpTable::JTT_NORMAL);
return IndirectBranchType::POSSIBLE_JUMP_TABLE;
}
// Convert the instruction into jump table branch.
const MCSymbol *JTLabel = BC.getOrCreateJumpTable(*this, ArrayStart, JTType);
BC.MIB->replaceMemOperandDisp(*MemLocInstr, JTLabel, BC.Ctx.get());
BC.MIB->setJumpTable(Instruction, ArrayStart, IndexRegNum);

// We have a possible tail call, so let's add the value read from the possible
// memory location as a reference. Only do that if the address we read is sane
// enough (is inside an allocatable section). It is possible that we read
// garbage if the load instruction we analyzed is in a basic block different
// than the one where the indirect jump is. However, later,
// postProcessIndirectBranches() is going to mark the function as non-simple
// in this case.
auto Value = BC.getPointerAtAddress(ArrayStart);
if (Value && BC.getSectionForAddress(*Value))
InterproceduralReferences.insert(*Value);
JTSites.emplace_back(Offset, ArrayStart);

return IndirectBranchType::POSSIBLE_TAIL_CALL;
return BranchType;
}

MCSymbol *BinaryFunction::getOrCreateLocalLabel(uint64_t Address,
Expand Down
132 changes: 132 additions & 0 deletions bolt/test/X86/false-jump-table.s
@@ -0,0 +1,132 @@
# Check that jump table detection does not fail on a false
# reference to a jump table.

# REQUIRES: system-linux

# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
# RUN: %s -o %t.o
# RUN: %host_cc %t.o -o %t.exe -Wl,-q

# RUN: llvm-bolt %t.exe -print-cfg \
# RUN: -print-only=inc_dup -o %t.out | FileCheck %s

.file "jump_table.c"
.section .rodata
.LC0:
.string "0"
.LC1:
.string "1"
.LC2:
.string "2"
.LC3:
.string "3"
.LC4:
.string "4"
.LC5:
.string "5"
.text
.globl inc_dup
.type inc_dup, @function
inc_dup:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movl %edi, -4(%rbp)
movl -4(%rbp), %eax
subl $10, %eax
cmpl $5, %eax
ja .L2
# Control flow confusing for JT detection
# CHECK: leaq "JUMP_TABLE{{.*}}"(%rip), %rdx
leaq .L4(%rip), %rdx
jmp .LJT
# CHECK: leaq DATAat{{.*}}(%rip), %rdx
leaq .LC0(%rip), %rdx
jmp .L10
.LJT:
movslq (%rdx,%rax,4), %rax
addq %rdx, %rax
# CHECK: jmpq *%rax # UNKNOWN CONTROL FLOW
jmp *%rax
.section .rodata
.align 4
.align 4
.L4:
.long .L3-.L4
.long .L5-.L4
.long .L6-.L4
.long .L7-.L4
.long .L8-.L4
.long .L9-.L4
.text
.L3:
leaq .LC0(%rip), %rdi
call puts@PLT
movl $1, %eax
jmp .L10
.L5:
leaq .LC1(%rip), %rdi
call puts@PLT
movl $2, %eax
jmp .L10
.L6:
leaq .LC2(%rip), %rdi
call puts@PLT
movl $3, %eax
jmp .L10
.L7:
leaq .LC3(%rip), %rdi
call puts@PLT
movl $4, %eax
jmp .L10
.L8:
leaq .LC4(%rip), %rdi
call puts@PLT
movl $5, %eax
jmp .L10
.L9:
leaq .LC5(%rip), %rdi
call puts@PLT
movl $6, %eax
jmp .L10
.L2:
movl -4(%rbp), %eax
addl $1, %eax
.L10:
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size inc_dup, .-inc_dup
.text
.globl main
.type main, @function
main:
.LFB1:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $16, %rsp
movl %edi, -4(%rbp)
movq %rsi, -16(%rbp)
movl -4(%rbp), %eax
addl $9, %eax
movl %eax, %edi
call inc_dup@PLT
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE1:
.size main, .-main
.ident "GCC: (GNU) 6.3.0"
.section .note.GNU-stack,"",@progbits

0 comments on commit e7169be

Please sign in to comment.