Skip to content

Commit

Permalink
[MachineBlockPlacement] Don't make blocks "uneditable"
Browse files Browse the repository at this point in the history
Summary:
This fixes an issue with MachineBlockPlacement due to a badly timed call
to `analyzeBranch` with `AllowModify` set to true.  The timeline is as
follows:

 1. `MachineBlockPlacement::maybeTailDuplicateBlock` calls
    `TailDup.shouldTailDuplicate` on its argument, which in turn calls
    `analyzeBranch` with `AllowModify` set to true.

 2. This `analyzeBranch` call edits the terminator sequence of the block
    based on the physical layout of the machine function, turning an
    unanalyzable non-fallthrough block to a unanalyzable fallthrough
    block.  Normally MBP bails out of rearranging such blocks, but this
    block was unanalyzable non-fallthrough (and thus rearrangeable) the
    first time MBP looked at it, and so it goes ahead and decides where
    it should be placed in the function.

 3. When placing this block MBP fails to analyze and thus update the
    block in keeping with the new physical layout.

Concretely, before (1) we have something like:

```
LBL0:
  < unknown terminator op that may branch to LBL1 >
  jmp LBL1

LBL1:
  ... A

LBL2:
  ... B
```

In (2), analyze branch simplifies this to

```
LBL0:
  < unknown terminator op that may branch to LBL2 >
  ;; jmp LBL1 <- redundant jump removed

LBL1:
  ... A

LBL2:
  ... B
```

In (3), MachineBlockPlacement goes ahead with its plan of putting LBL2
after the first block since that is profitable.

```
LBL0:
  < unknown terminator op that may branch to LBL2 >
  ;; jmp LBL1 <- redundant jump

LBL2:
  ... B

LBL1:
  ... A
```

and the program now has incorrect behavior (we no longer fall-through
from `LBL0` to `LBL1`) because MBP can no longer edit LBL0.

There are several possible solutions, but I went with removing the teeth
off of the `analyzeBranch` calls in TailDuplicator.  That makes thinking
about the result of these calls easier, and breaks nothing in the lit
test suite.

I've also added some bookkeeping to the MachineBlockPlacement pass and
used that to write an assert that would have caught this.

Reviewers: chandlerc, gberry, MatzeB, iteratee

Subscribers: mcrosier, llvm-commits

Differential Revision: https://reviews.llvm.org/D27783

llvm-svn: 289764
  • Loading branch information
sanjoy committed Dec 15, 2016
1 parent ba80a83 commit d7389d6
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 7 deletions.
22 changes: 22 additions & 0 deletions llvm/lib/CodeGen/MachineBlockPlacement.cpp
Expand Up @@ -324,6 +324,14 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// between basic blocks.
DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;

#ifndef NDEBUG
/// The set of basic blocks that have terminators that cannot be fully
/// analyzed. These basic blocks cannot be re-ordered safely by
/// MachineBlockPlacement, and we must preserve physical layout of these
/// blocks and their successors through the pass.
SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
#endif

/// Decrease the UnscheduledPredecessors count for all blocks in chain, and
/// if the count goes to 0, add them to the appropriate work list.
void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
Expand Down Expand Up @@ -1589,6 +1597,7 @@ void MachineBlockPlacement::buildCFGChains() {
<< getBlockName(BB) << " -> " << getBlockName(NextBB)
<< "\n");
Chain->merge(NextBB, nullptr);
BlocksWithUnanalyzableExits.insert(&*BB);
FI = NextFI;
BB = NextBB;
}
Expand Down Expand Up @@ -1661,6 +1670,19 @@ void MachineBlockPlacement::buildCFGChains() {
Cond.clear();
MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.

#ifndef NDEBUG
if (!BlocksWithUnanalyzableExits.count(PrevBB)) {
// Given the exact block placement we chose, we may actually not _need_ to
// be able to edit PrevBB's terminator sequence, but not being _able_ to
// do that at this point is a bug.
assert((!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond) ||
!PrevBB->canFallThrough()) &&
"Unexpected block with un-analyzable fallthrough!");
Cond.clear();
TBB = FBB = nullptr;
}
#endif

// The "PrevBB" is not yet updated to reflect current code layout, so,
// o. it may fall-through to a block without explicit "goto" instruction
// before layout, and no longer fall-through it after layout; or
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/CodeGen/TailDuplicator.cpp
Expand Up @@ -550,8 +550,8 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
// blocks with unanalyzable fallthrough get layed out contiguously.
MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond, true)
&& TailBB.canFallThrough())
if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond) &&
TailBB.canFallThrough())
return false;

// If the target has hardware branch prediction that can handle indirect
Expand Down Expand Up @@ -660,7 +660,7 @@ bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) {

MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
return false;

if (!PredCond.empty())
Expand All @@ -687,7 +687,7 @@ bool TailDuplicator::duplicateSimpleBB(

MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
continue;

Changed = true;
Expand Down Expand Up @@ -750,7 +750,7 @@ bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB,

MachineBasicBlock *PredTBB, *PredFBB;
SmallVector<MachineOperand, 4> PredCond;
if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
return false;
if (!PredCond.empty())
return false;
Expand Down Expand Up @@ -833,7 +833,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
// Simplify
MachineBasicBlock *PredTBB, *PredFBB;
SmallVector<MachineOperand, 4> PredCond;
TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond);

NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch

Expand Down Expand Up @@ -861,7 +861,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
if (PrevBB->succ_size() == 1 &&
// Layout preds are not always CFG preds. Check.
*PrevBB->succ_begin() == TailBB &&
!TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
!TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) &&
PriorCond.empty() &&
(!PriorTBB || PriorTBB == TailBB) &&
TailBB->pred_size() == 1 &&
Expand Down
87 changes: 87 additions & 0 deletions llvm/test/CodeGen/X86/block-placement.mir
@@ -0,0 +1,87 @@
# RUN: llc -O3 -run-pass=block-placement -o - %s | FileCheck %s

--- |
; ModuleID = 'test.ll'
source_filename = "test.ll"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

declare void @stub(i32*)

define i32 @f(i32* %ptr, i1 %cond) {
entry:
br i1 %cond, label %left, label %right

left: ; preds = %entry
%is_null = icmp eq i32* %ptr, null
br i1 %is_null, label %null, label %not_null, !prof !0, !make.implicit !1

not_null: ; preds = %left
%val = load i32, i32* %ptr
ret i32 %val

null: ; preds = %left
call void @stub(i32* %ptr)
unreachable

right: ; preds = %entry
call void @stub(i32* null)
unreachable
}

; Function Attrs: nounwind
declare void @llvm.stackprotector(i8*, i8**) #0

attributes #0 = { nounwind }

!0 = !{!"branch_weights", i32 1048575, i32 1}
!1 = !{}

...
---
# CHECK: name: f
name: f
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '%rdi' }
- { reg: '%esi' }

# CHECK: %eax = FAULTING_LOAD_OP %bb.3.null, 1684, killed %rdi, 1, _, 0, _ :: (load 4 from %ir.ptr)
# CHECK-NEXT: JMP_1 %bb.2.not_null
# CHECK: bb.3.null:
# CHECK: bb.4.right:
# CHECK: bb.2.not_null:

body: |
bb.0.entry:
successors: %bb.1.left(0x7ffff800), %bb.3.right(0x00000800)
liveins: %esi, %rdi
frame-setup PUSH64r undef %rax, implicit-def %rsp, implicit %rsp
CFI_INSTRUCTION def_cfa_offset 16
TEST8ri %sil, 1, implicit-def %eflags, implicit killed %esi
JE_1 %bb.3.right, implicit killed %eflags
bb.1.left:
successors: %bb.2.null(0x7ffff800), %bb.4.not_null(0x00000800)
liveins: %rdi
%eax = FAULTING_LOAD_OP %bb.2.null, 1684, killed %rdi, 1, _, 0, _ :: (load 4 from %ir.ptr)
JMP_1 %bb.4.not_null
bb.4.not_null:
liveins: %rdi, %eax
%rcx = POP64r implicit-def %rsp, implicit %rsp
RETQ %eax
bb.2.null:
liveins: %rdi
CALL64pcrel32 @stub, csr_64, implicit %rsp, implicit %rdi, implicit-def %rsp
bb.3.right:
dead %edi = XOR32rr undef %edi, undef %edi, implicit-def dead %eflags, implicit-def %rdi
CALL64pcrel32 @stub, csr_64, implicit %rsp, implicit %rdi, implicit-def %rsp
...

0 comments on commit d7389d6

Please sign in to comment.