Skip to content

Commit

Permalink
AMDGPU: Move m0 initializations earlier
Browse files Browse the repository at this point in the history
Summary:
After hoisting and merging m0 initializations schedule them as early as
possible in the MBB. This helps the scheduler avoid hazards in some
cases.

Reviewers: rampitec, arsenm

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, arphaman, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D67450

llvm-svn: 371671
  • Loading branch information
kerbowa committed Sep 11, 2019
1 parent 9769a5e commit 666af67
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 25 deletions.
42 changes: 38 additions & 4 deletions llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,7 @@ getFirstNonPrologue(MachineBasicBlock *MBB, const TargetInstrInfo *TII) {
// executioon.
static bool hoistAndMergeSGPRInits(unsigned Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo *TRI,
MachineDominatorTree &MDT,
const TargetInstrInfo *TII) {
// List of inits by immediate value.
Expand All @@ -480,7 +481,7 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,

for (auto &MI : MRI.def_instructions(Reg)) {
MachineOperand *Imm = nullptr;
for (auto &MO: MI.operands()) {
for (auto &MO : MI.operands()) {
if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) ||
(!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
Imm = nullptr;
Expand Down Expand Up @@ -585,8 +586,41 @@ static bool hoistAndMergeSGPRInits(unsigned Reg,
}
}

for (auto MI : MergedInstrs)
MI->removeFromParent();
// Remove initializations that were merged into another.
for (auto &Init : Inits) {
auto &Defs = Init.second;
for (auto I = Defs.begin(); I != Defs.end(); ++I)
if (MergedInstrs.count(*I)) {
(*I)->eraseFromParent();
I = Defs.erase(I);
}
}

// Try to schedule SGPR initializations as early as possible in the MBB.
for (auto &Init : Inits) {
auto &Defs = Init.second;
for (auto MI : Defs) {
auto MBB = MI->getParent();
MachineInstr &BoundaryMI = *getFirstNonPrologue(MBB, TII);
MachineBasicBlock::reverse_iterator B(BoundaryMI);
// Check if B should actually be a bondary. If not set the previous
// instruction as the boundary instead.
if (!TII->isBasicBlockPrologue(*B))
B++;

auto R = std::next(MI->getReverseIterator());
const unsigned Threshold = 50;
// Search until B or Threashold for a place to insert the initialization.
for (unsigned I = 0; R != B && I < Threshold; ++R, ++I)
if (R->readsRegister(Reg, TRI) || R->definesRegister(Reg, TRI) ||
TII->isSchedulingBoundary(*R, MBB, *MBB->getParent()))
break;

// Move to directly after R.
if (&*--R != MI)
MBB->splice(*R, MBB, MI);
}
}

if (Changed)
MRI.clearKillFlags(Reg);
Expand Down Expand Up @@ -755,7 +789,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
}

if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT, TII);
hoistAndMergeSGPRInits(AMDGPU::M0, MRI, TRI, *MDT, TII);

return true;
}
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ define void @func_mov_fi_i32() #0 {

; CI: s_sub_u32 [[SUB0:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
; CI-NEXT: s_sub_u32 [[SUB1:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[SUB1]], 6
; CI-NEXT: v_lshr_b32_e64 v0, [[SUB0]], 6
; CI-NEXT: v_add_i32_e64 v1, s{{\[[0-9]+:[0-9]+\]}}, 4, [[SCALED]]
; CI-DAG: v_lshr_b32_e64 v0, [[SUB0]], 6
; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[SUB1]], 6
; CI-NOT: v_mov
; CI: ds_write_b32 v0, v0
; CI-NEXT: ds_write_b32 v0, v1
; CI-NEXT: v_add_i32_e64 v0, s{{\[[0-9]+:[0-9]+\]}}, 4, [[SCALED]]
; CI-NEXT: ds_write_b32 v0, v0

; GFX9: s_sub_u32 [[SUB0:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
; GFX9-NEXT: s_sub_u32 [[SUB1:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
Expand Down
106 changes: 89 additions & 17 deletions llvm/test/CodeGen/AMDGPU/merge-m0.mir
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# RUN: llc -march=amdgcn -amdgpu-enable-merge-m0 -verify-machineinstrs -run-pass si-fix-sgpr-copies %s -o - | FileCheck -check-prefix=GCN %s

# GCN-LABEL: name: merge-m0-many-init
# GCN: bb.0.entry:
# GCN: SI_INIT_M0 -1
# GCN-NEXT: IMPLICIT_DEF
# GCN-NEXT: IMPLICIT_DEF
# GCN-NEXT: DS_WRITE_B32
# GCN-NEXT: DS_WRITE_B32
# GCN-NEXT: SI_INIT_M0 65536
Expand Down Expand Up @@ -45,9 +48,8 @@
# GCN-NEXT: DS_WRITE_B32
# GCN-NEXT: SI_INIT_M0 -1
# GCN-NEXT: DS_WRITE_B32

---
name: merge-m0-many-init
name: merge-m0-many-init
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
Expand Down Expand Up @@ -124,22 +126,24 @@ body: |
...

# GCN-LABEL: name: merge-m0-dont-hoist-past-init-with-different-initializer
# GCN: bb.0.entry:
# GCN: SI_INIT_M0 65536
# GCN-NEXT: IMPLICIT_DEF
# GCN-NEXT: IMPLICIT_DEF
# GCN-NEXT: DS_WRITE_B32

#GCN: bb.1:
#GCN-NOT: SI_INIT_M0 65536
#GCN-NOT: SI_INIT_M0 -1

#GCN: bb.2:
#GCN: SI_INIT_M0 -1
# GCN: bb.1:
# GCN-NOT: SI_INIT_M0 65536
# GCN-NOT: SI_INIT_M0 -1

#GCN: bb.3:
#GCN: SI_INIT_M0 -1
# GCN: bb.2:
# GCN: SI_INIT_M0 -1

# GCN: bb.3:
# GCN: SI_INIT_M0 -1
---
name: merge-m0-dont-hoist-past-init-with-different-initializer
name: merge-m0-dont-hoist-past-init-with-different-initializer
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
Expand Down Expand Up @@ -179,19 +183,19 @@ body: |
S_ENDPGM 0
...

# GCN-LABEL: name: merge-m0-after-prologue
# GCN: bb.0.entry:
# GCN-NOT: SI_INIT_M0
# GCN: S_OR_B64
# GCN-NEXT: SI_INIT_M0

#GCN: bb.1:
#GCN-NOT: SI_INIT_M0 -1

#GCN: bb.2:
#GCN-NOT: SI_INIT_MO -1
# GCN: bb.1:
# GCN-NOT: SI_INIT_M0 -1

# GCN: bb.2:
# GCN-NOT: SI_INIT_MO -1
---
name: merge-m0-after-prologue
name: merge-m0-after-prologue
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
Expand Down Expand Up @@ -223,3 +227,71 @@ body: |
bb.3:
S_ENDPGM 0
...

# GCN-LABEL: name: move-m0-avoid-hazard
# GCN: $m0 = S_MOV_B32 -1
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
# GCN-NEXT: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
---
name: move-m0-avoid-hazard
body: |
bb.0:
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$m0 = S_MOV_B32 -1
DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
...

# GCN-LABEL: name: move-m0-with-prologue
# GCN $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
# GCN: $m0 = S_MOV_B32 -1
# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
# GCN-NEXT: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
---
name: move-m0-with-prologue
body: |
bb.0:
liveins: $sgpr0_sgpr1
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$m0 = S_MOV_B32 -1
DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
...

# GCN-LABEL: name: move-m0-different-initializer
# GCN: SI_INIT_M0 -1
# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
# GCN: SI_INIT_M0 65536
# GCN-NEXT: S_NOP
---
name: move-m0-different-initializer
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
SI_INIT_M0 -1, implicit-def $m0
DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
S_NOP 0
SI_INIT_M0 65536, implicit-def $m0
DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
...

# GCN-LABEL: name: move-m0-schedule-boundary
# GCN: S_SETREG
# GCN-NEXT: SI_INIT_M0 -1
---
name: move-m0-schedule-boundary
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
body: |
bb.0:
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
S_SETREG_IMM32_B32 0, 1
SI_INIT_M0 -1, implicit-def $m0
DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
...

0 comments on commit 666af67

Please sign in to comment.