Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ static cl::opt<unsigned, false, MFMAPaddingRatioParser>
cl::desc("Fill a percentage of the latency between "
"neighboring MFMA with s_nops."));

// This is intended for debugging purposes only.
static cl::opt<unsigned>
NopPadding("amdgpu-snop-padding", cl::init(0), cl::Hidden,
cl::desc("Insert a s_nop x before every instruction"));

//===----------------------------------------------------------------------===//
// Hazard Recognizer Implementation
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -300,7 +305,7 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
unsigned W = PreEmitNoopsCommon(MI);
fixHazards(MI);
CurrCycleInstr = nullptr;
return W;
return std::max(W, NopPadding.getValue());
}

unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
Expand Down
132 changes: 132 additions & 0 deletions llvm/test/CodeGen/AMDGPU/amdgpu-snop-padding.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -amdgpu-snop-padding=8 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN8 %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -amdgpu-snop-padding=16 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN16 %s

---
name: test_snop_padding
tracksRegLiveness: true
frameInfo:
maxAlignment: 4
stack:
- { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
machineFunctionInfo:
isEntryFunction: false
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
stackPtrOffsetReg: '$sgpr32'
frameOffsetReg: '$sgpr33'
hasSpilledSGPRs: true
body: |
; GCN8-LABEL: name: test_snop_padding
; GCN8: bb.0:
; GCN8-NEXT: successors: %bb.1(0x80000000)
; GCN8-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
; GCN8-NEXT: {{ $}}
; GCN8-NEXT: S_NOP 7
; GCN8-NEXT: S_BRANCH %bb.1
; GCN8-NEXT: {{ $}}
; GCN8-NEXT: bb.1:
; GCN8-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; GCN8-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
; GCN8-NEXT: {{ $}}
; GCN8-NEXT: S_NOP 7
; GCN8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
; GCN8-NEXT: S_NOP 7
; GCN8-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
; GCN8-NEXT: {{ $}}
; GCN8-NEXT: bb.2:
; GCN8-NEXT: successors: %bb.3(0x80000000)
; GCN8-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
; GCN8-NEXT: {{ $}}
; GCN8-NEXT: S_NOP 7
; GCN8-NEXT: SI_SPILL_S32_SAVE killed $sgpr6, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
; GCN8-NEXT: S_NOP 7
; GCN8-NEXT: S_NOP 0
; GCN8-NEXT: S_NOP 7
; GCN8-NEXT: renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
; GCN8-NEXT: S_NOP 7
; GCN8-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
; GCN8-NEXT: S_NOP 7
; GCN8-NEXT: S_BRANCH %bb.3
; GCN8-NEXT: {{ $}}
; GCN8-NEXT: bb.3:
; GCN8-NEXT: liveins: $sgpr10_sgpr11
; GCN8-NEXT: {{ $}}
; GCN8-NEXT: S_NOP 7
; GCN8-NEXT: $sgpr5 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec
; GCN8-NEXT: S_NOP 7
; GCN8-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0
; GCN8-NEXT: S_NOP 7
; GCN8-NEXT: SI_RETURN
;
; GCN16-LABEL: name: test_snop_padding
; GCN16: bb.0:
; GCN16-NEXT: successors: %bb.1(0x80000000)
; GCN16-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
; GCN16-NEXT: {{ $}}
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: S_BRANCH %bb.1
; GCN16-NEXT: {{ $}}
; GCN16-NEXT: bb.1:
; GCN16-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; GCN16-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
; GCN16-NEXT: {{ $}}
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec
; GCN16-NEXT: {{ $}}
; GCN16-NEXT: bb.2:
; GCN16-NEXT: successors: %bb.3(0x80000000)
; GCN16-NEXT: liveins: $sgpr6, $sgpr10_sgpr11
; GCN16-NEXT: {{ $}}
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: SI_SPILL_S32_SAVE killed $sgpr6, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: S_NOP 0
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: S_BRANCH %bb.3
; GCN16-NEXT: {{ $}}
; GCN16-NEXT: bb.3:
; GCN16-NEXT: liveins: $sgpr10_sgpr11
; GCN16-NEXT: {{ $}}
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: $sgpr5 = V_READFIRSTLANE_B32 [[V_MOV_B32_e32_]], implicit $exec
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: S_NOP 7
; GCN16-NEXT: SI_RETURN
bb.0:
liveins: $sgpr6, $sgpr10_sgpr11
S_BRANCH %bb.1
bb.1:
liveins: $sgpr6, $sgpr10_sgpr11
%0:vgpr_32 = V_MOV_B32_e32 10, implicit $exec
S_CBRANCH_EXECZ %bb.3, implicit $exec
bb.2:
liveins: $sgpr6, $sgpr10_sgpr11
SI_SPILL_S32_SAVE killed $sgpr6, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
S_NOP 0
renamable $sgpr6 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
%0:vgpr_32 = V_MOV_B32_e32 20, implicit $exec
S_BRANCH %bb.3
bb.3:
liveins: $sgpr10_sgpr11
$sgpr5 = V_READFIRSTLANE_B32 %0:vgpr_32, implicit $exec
S_STORE_DWORD_IMM $sgpr5, $sgpr10_sgpr11, 0, 0
SI_RETURN
...
Loading