diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 6d026796e93b7..71739278cf513 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -1040,7 +1040,9 @@ class MachineBasicBlock /// Succ, can be split. If this returns true a subsequent call to /// SplitCriticalEdge is guaranteed to return a valid basic block if /// no changes occurred in the meantime. - LLVM_ABI bool canSplitCriticalEdge(const MachineBasicBlock *Succ) const; + LLVM_ABI bool + canSplitCriticalEdge(const MachineBasicBlock *Succ, + const MachineLoopInfo *MLI = nullptr) const; void pop_front() { Insts.pop_front(); } void pop_back() { Insts.pop_back(); } diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp index bc1df26db2684..1cb57a4fa4258 100644 --- a/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -1180,7 +1180,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( MachineBasicBlock *Succ, const SplitCriticalEdgeAnalyses &Analyses, std::vector> *LiveInSets, MachineDomTreeUpdater *MDTU) { - if (!canSplitCriticalEdge(Succ)) + if (!canSplitCriticalEdge(Succ, Analyses.MLI)) return nullptr; MachineFunction *MF = getParent(); @@ -1408,8 +1408,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge( return NMBB; } -bool MachineBasicBlock::canSplitCriticalEdge( - const MachineBasicBlock *Succ) const { +bool MachineBasicBlock::canSplitCriticalEdge(const MachineBasicBlock *Succ, + const MachineLoopInfo *MLI) const { // Splitting the critical edge to a landing pad block is non-trivial. Don't do // it in this generic function. if (Succ->isEHPad()) @@ -1423,8 +1423,17 @@ bool MachineBasicBlock::canSplitCriticalEdge( const MachineFunction *MF = getParent(); // Performance might be harmed on HW that implements branching using exec mask // where both sides of the branches are always executed. - if (MF->getTarget().requiresStructuredCFG()) + + if (MF->getTarget().requiresStructuredCFG()) { + // If `Succ` is a loop header, splitting the critical edge will not + // break structured CFG. + if (MLI) { + const MachineLoop *L = MLI->getLoopFor(Succ); + return L && L->getHeader() == Succ; + } + return false; + } // Do we have an Indirect jump with a jumptable that we can rewrite? int JTI = findJumpTableIndex(*this); diff --git a/llvm/test/CodeGen/NVPTX/machinelicm-no-preheader.mir b/llvm/test/CodeGen/NVPTX/machinelicm-no-preheader.mir new file mode 100644 index 0000000000000..0b2d85600a2ef --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/machinelicm-no-preheader.mir @@ -0,0 +1,80 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=nvptx64 -mcpu=sm_20 -run-pass=early-machinelicm %s -o - | FileCheck %s + +# This test checks that the early-machineLICM pass successfully creates a new +# loop preheader by splitting the critical edge and hoisting the loop invariant +# value `%8` to the preheader. +# Since the critical edge successor is a loop header, the splitting does not +# break the structured CFG, which is a requirement for the NVPTX target. + +--- +name: test_hoist +tracksRegLiveness: true +registers: + - { id: 0, class: b64, preferred-register: '', flags: [ ] } + - { id: 1, class: b32, preferred-register: '', flags: [ ] } + - { id: 2, class: b32, preferred-register: '', flags: [ ] } + - { id: 3, class: b32, preferred-register: '', flags: [ ] } + - { id: 4, class: b32, preferred-register: '', flags: [ ] } + - { id: 5, class: b32, preferred-register: '', flags: [ ] } + - { id: 6, class: b64, preferred-register: '', flags: [ ] } + - { id: 7, class: b1, preferred-register: '', flags: [ ] } + - { id: 8, class: b32, preferred-register: '', flags: [ ] } + - { id: 9, class: b1, preferred-register: '', flags: [ ] } +body: | + ; CHECK-LABEL: name: test_hoist + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.3(0x50000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LD_i32_:%[0-9]+]]:b32 = LD_i32 0, 0, 101, 3, 32, &test_hoist_param_1, 0 :: (dereferenceable invariant load (s32), addrspace 101) + ; CHECK-NEXT: [[LD_i64_:%[0-9]+]]:b64 = LD_i64 0, 0, 101, 3, 64, &test_hoist_param_0, 0 :: (dereferenceable invariant load (s64), addrspace 101) + ; CHECK-NEXT: [[ADD64ri:%[0-9]+]]:b64 = nuw ADD64ri killed [[LD_i64_]], 2 + ; CHECK-NEXT: [[LD_i32_1:%[0-9]+]]:b32 = LD_i32 0, 0, 1, 3, 32, [[ADD64ri]], 0 + ; CHECK-NEXT: [[SETP_i32ri:%[0-9]+]]:b1 = SETP_i32ri [[LD_i32_]], 0, 0 + ; CHECK-NEXT: CBranch killed [[SETP_i32ri]], %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADD32ri:%[0-9]+]]:b32 = ADD32ri [[LD_i32_]], -1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:b32 = PHI [[LD_i32_1]], %bb.3, %3, %bb.1 + ; CHECK-NEXT: [[SREM32rr:%[0-9]+]]:b32 = SREM32rr [[PHI]], [[ADD32ri]] + ; CHECK-NEXT: [[SETP_i32ri1:%[0-9]+]]:b1 = SETP_i32ri [[SREM32rr]], 0, 1 + ; CHECK-NEXT: CBranch killed [[SETP_i32ri1]], %bb.1 + ; CHECK-NEXT: GOTO %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:b32 = PHI [[LD_i32_1]], %bb.0, [[SREM32rr]], %bb.1 + ; CHECK-NEXT: ST_i32 [[PHI1]], 0, 0, 1, 32, [[ADD64ri]], 0 + ; CHECK-NEXT: Return + bb.0.entry: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + + %5:b32 = LD_i32 0, 0, 101, 3, 32, &test_hoist_param_1, 0 :: (dereferenceable invariant load (s32), addrspace 101) + %6:b64 = LD_i64 0, 0, 101, 3, 64, &test_hoist_param_0, 0 :: (dereferenceable invariant load (s64), addrspace 101) + %0:b64 = nuw ADD64ri killed %6, 2 + %1:b32 = LD_i32 0, 0, 1, 3, 32, %0, 0 + %7:b1 = SETP_i32ri %5, 0, 0 + CBranch killed %7, %bb.2 + GOTO %bb.1 + + + bb.1: + successors: %bb.2(0x04000000), %bb.1(0x7c000000) + + %2:b32 = PHI %1, %bb.0, %3, %bb.1 + %8:b32 = ADD32ri %5, -1 + %3:b32 = SREM32rr %2, %8 + %9:b1 = SETP_i32ri %3, 0, 1 + CBranch killed %9, %bb.1 + GOTO %bb.2 + + bb.2: + %4:b32 = PHI %1, %bb.0, %3, %bb.1 + ST_i32 %4, 0, 0, 1, 32, %0, 0 + Return +...