Skip to content

Commit

Permalink
[AMDGPU] Allow frontends to disable null export for pixel shaders
Browse files Browse the repository at this point in the history
Disable null export (for kills) when a frontend defines a pixel
shader as not exporting using amdgpu-color-export and
amdgpu-depth-export function attrbutes.
This allows the generation of export free pixel shaders.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D105683
  • Loading branch information
perlfu committed Jul 22, 2021
1 parent 16206d1 commit 9dcd75f
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 83 deletions.
20 changes: 15 additions & 5 deletions llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,19 @@ char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID;

static void generateEndPgm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
const SIInstrInfo *TII, bool IsPS) {
// "null export"
if (IsPS) {
const SIInstrInfo *TII, MachineFunction &MF) {
const Function &F = MF.getFunction();
bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS;

// Check if hardware has been configured to expect color or depth exports.
bool HasExports =
AMDGPU::getHasColorExport(F) || AMDGPU::getHasDepthExport(F);

// Prior to GFX10, hardware always expects at least one export for PS.
bool MustExport = !AMDGPU::isGFX10Plus(TII->getSubtarget());

if (IsPS && (HasExports || MustExport)) {
// Generate "null export" if hardware is expecting PS to export.
BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
.addImm(AMDGPU::Exp::ET_NULL)
.addReg(AMDGPU::VGPR0, RegState::Undef)
Expand All @@ -80,6 +90,7 @@ static void generateEndPgm(MachineBasicBlock &MBB,
.addImm(0) // compr
.addImm(0); // en
}

// s_endpgm
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
}
Expand Down Expand Up @@ -168,8 +179,7 @@ bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
BuildMI(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII->get(MovOpc),
ExecReg)
.addImm(0);
generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII,
MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS);
generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII, MF);

for (MachineInstr *Instr : EarlyTermInstrs) {
// Early termination in GS does nothing
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1344,6 +1344,17 @@ unsigned getInitialPSInputAddr(const Function &F) {
return getIntegerAttribute(F, "InitialPSInputAddr", 0);
}

bool getHasColorExport(const Function &F) {
// As a safe default always respond as if PS has color exports.
return getIntegerAttribute(
F, "amdgpu-color-export",
F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
}

bool getHasDepthExport(const Function &F) {
return getIntegerAttribute(F, "amdgpu-depth-export", 0) != 0;
}

bool isShader(CallingConv::ID cc) {
switch(cc) {
case CallingConv::AMDGPU_VS:
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -689,6 +689,10 @@ uint64_t encodeMsg(uint64_t MsgId,

unsigned getInitialPSInputAddr(const Function &F);

bool getHasColorExport(const Function &F);

bool getHasDepthExport(const Function &F);

LLVM_READNONE
bool isShader(CallingConv::ID CC);

Expand Down
202 changes: 124 additions & 78 deletions llvm/test/CodeGen/AMDGPU/early-term.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX9 %s
# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX10 %s

--- |
define amdgpu_ps void @early_term_scc0_end_block() {
Expand All @@ -21,6 +21,12 @@
define amdgpu_cs void @early_term_scc0_cs() {
ret void
}

define amdgpu_ps void @early_term_no_export() #0 {
ret void
}

attributes #0 = { "amdgpu-color-export"="0" "amdgpu-depth-export"="0" }
...

---
Expand All @@ -30,21 +36,21 @@ liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
; GFX10-LABEL: name: early_term_scc0_end_block
; GFX10: bb.0:
; GFX10: successors: %bb.1(0x80000000), %bb.2(0x00000000)
; GFX10: liveins: $sgpr0, $sgpr1
; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GFX10: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GFX10: bb.1:
; GFX10: liveins: $vgpr0
; GFX10: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; GFX10: S_ENDPGM 0
; GFX10: bb.2:
; GFX10: $exec_lo = S_MOV_B32 0
; GFX10: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; GFX10: S_ENDPGM 0
; GCN-LABEL: name: early_term_scc0_end_block
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000), %bb.2(0x00000000)
; GCN: liveins: $sgpr0, $sgpr1
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: bb.1:
; GCN: liveins: $vgpr0
; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; GCN: S_ENDPGM 0
; GCN: bb.2:
; GCN: $exec = S_MOV_B64 0
; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; GCN: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
Expand All @@ -66,25 +72,25 @@ liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
; GFX10-LABEL: name: early_term_scc0_next_terminator
; GFX10: bb.0:
; GFX10: successors: %bb.2(0x80000000), %bb.3(0x00000000)
; GFX10: liveins: $sgpr0, $sgpr1
; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GFX10: S_CBRANCH_SCC0 %bb.3, implicit $scc
; GFX10: S_BRANCH %bb.2
; GFX10: bb.1:
; GFX10: successors: %bb.2(0x80000000)
; GFX10: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
; GFX10: bb.2:
; GFX10: liveins: $vgpr0
; GFX10: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; GFX10: S_ENDPGM 0
; GFX10: bb.3:
; GFX10: $exec_lo = S_MOV_B32 0
; GFX10: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; GFX10: S_ENDPGM 0
; GCN-LABEL: name: early_term_scc0_next_terminator
; GCN: bb.0:
; GCN: successors: %bb.2(0x80000000), %bb.3(0x00000000)
; GCN: liveins: $sgpr0, $sgpr1
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.3, implicit $scc
; GCN: S_BRANCH %bb.2
; GCN: bb.1:
; GCN: successors: %bb.2(0x80000000)
; GCN: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
; GCN: bb.2:
; GCN: liveins: $vgpr0
; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; GCN: S_ENDPGM 0
; GCN: bb.3:
; GCN: $exec = S_MOV_B64 0
; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; GCN: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.2
Expand Down Expand Up @@ -112,26 +118,26 @@ liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
; GFX10-LABEL: name: early_term_scc0_in_block
; GFX10: bb.0:
; GFX10: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; GFX10: liveins: $sgpr0, $sgpr1
; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GFX10: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GFX10: bb.3:
; GFX10: successors: %bb.1(0x80000000)
; GFX10: liveins: $vgpr0, $scc
; GFX10: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; GFX10: bb.1:
; GFX10: liveins: $vgpr0, $vgpr1
; GFX10: EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
; GFX10: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; GFX10: S_ENDPGM 0
; GFX10: bb.2:
; GFX10: $exec_lo = S_MOV_B32 0
; GFX10: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; GFX10: S_ENDPGM 0
; GCN-LABEL: name: early_term_scc0_in_block
; GCN: bb.0:
; GCN: successors: %bb.3(0x40000000), %bb.2(0x40000000)
; GCN: liveins: $sgpr0, $sgpr1
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: bb.3:
; GCN: successors: %bb.1(0x80000000)
; GCN: liveins: $vgpr0, $scc
; GCN: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
; GCN: bb.1:
; GCN: liveins: $vgpr0, $vgpr1
; GCN: EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; GCN: S_ENDPGM 0
; GCN: bb.2:
; GCN: $exec = S_MOV_B64 0
; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; GCN: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
Expand All @@ -155,15 +161,18 @@ liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
; GFX10-LABEL: name: early_term_scc0_gs
; GFX10: bb.0:
; GFX10: successors: %bb.1(0x80000000)
; GFX10: liveins: $sgpr0, $sgpr1
; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GFX10: bb.1:
; GFX10: liveins: $vgpr0
; GFX10: S_ENDPGM 0
; GCN-LABEL: name: early_term_scc0_gs
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000)
; GCN: liveins: $sgpr0, $sgpr1
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GCN: bb.1:
; GCN: liveins: $vgpr0
; GCN: S_ENDPGM 0
; GCN: bb.2:
; GCN: $exec = S_MOV_B64 0
; GCN: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
Expand All @@ -184,19 +193,55 @@ liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
; GFX10-LABEL: name: early_term_scc0_cs
; GFX10: bb.0:
; GFX10: successors: %bb.1(0x80000000), %bb.2(0x00000000)
; GFX10: liveins: $sgpr0, $sgpr1
; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GFX10: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GFX10: bb.1:
; GFX10: liveins: $vgpr0
; GFX10: S_ENDPGM 0
; GFX10: bb.2:
; GFX10: $exec_lo = S_MOV_B32 0
; GFX10: S_ENDPGM 0
; GCN-LABEL: name: early_term_scc0_cs
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000), %bb.2(0x00000000)
; GCN: liveins: $sgpr0, $sgpr1
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: bb.1:
; GCN: liveins: $vgpr0
; GCN: S_ENDPGM 0
; GCN: bb.2:
; GCN: $exec = S_MOV_B64 0
; GCN: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
bb.1:
liveins: $vgpr0
S_ENDPGM 0
...

---
name: early_term_no_export
tracksRegLiveness: true
liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
; GCN-LABEL: name: early_term_no_export
; GCN: bb.0:
; GCN: successors: %bb.1(0x80000000), %bb.2(0x00000000)
; GCN: liveins: $sgpr0, $sgpr1
; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
; GCN: bb.1:
; GCN: liveins: $vgpr0
; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
; GCN: S_ENDPGM 0
; GCN: bb.2:
; GCN: $exec = S_MOV_B64 0
; GFX9: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
; GFX10-NOT: EXP_DONE
; GCN: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
Expand All @@ -207,5 +252,6 @@ body: |
bb.1:
liveins: $vgpr0
EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
S_ENDPGM 0
...

0 comments on commit 9dcd75f

Please sign in to comment.