Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,12 @@ class AMDGPULowerVGPREncoding {
/// instruction to extend it or drop the clause if it cannot be adjusted.
MachineBasicBlock::instr_iterator
handleClause(MachineBasicBlock::instr_iterator I);

/// Check if an instruction \p I is immediately after another program state
/// instruction which it cannot coissue with. If so, insert before that
/// instruction to encourage more coissuing.
MachineBasicBlock::instr_iterator
handleCoissue(MachineBasicBlock::instr_iterator I);
};

bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
Expand Down Expand Up @@ -167,6 +173,7 @@ bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
int64_t OldModeBits = CurrentMode << ModeWidth;

I = handleClause(I);
I = handleCoissue(I);
MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
.addImm(NewMode | OldModeBits);

Expand Down Expand Up @@ -283,6 +290,31 @@ AMDGPULowerVGPREncoding::handleClause(MachineBasicBlock::instr_iterator I) {
return I;
}

MachineBasicBlock::instr_iterator
AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {
if (I.isEnd())
return I;

if (I == I->getParent()->begin())
return I;

MachineBasicBlock::instr_iterator Prev = std::prev(I);
auto isProgramStateSALU = [this](MachineInstr *MI) {
return TII->isBarrier(MI->getOpcode()) ||
TII->isWaitcnt(MI || (SIInstrInfo::isProgramStateSALU(*MI) &&
MI->getOpcode() != AMDGPU::S_SET_VGPR_MSB));
};

if (!isProgramStateSALU(&*Prev))
return I;

while (!Prev.isEnd() && (Prev != Prev->getParent()->begin()) &&
isProgramStateSALU(&*Prev)) {
--Prev;
}
return Prev;
}

bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
if (!ST.has1024AddressableVGPRs())
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return get(Opcode).TSFlags & SIInstrFlags::SALU;
}

static bool isProgramStateSALU(const MachineInstr &MI) {
return MI.getOpcode() == AMDGPU::S_DELAY_ALU ||
MI.getOpcode() == AMDGPU::S_SET_VGPR_MSB ||
MI.getOpcode() == AMDGPU::ATOMIC_FENCE;
}

static bool isVALU(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::VALU;
}
Expand Down
64 changes: 64 additions & 0 deletions llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=amdgpu-lower-vgpr-encoding -o - %s | FileCheck %s

---
name: multi
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
; CHECK-LABEL: name: multi
; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: S_WAIT_DSCNT 0
; CHECK-NEXT: S_BARRIER_SIGNAL_IMM -1
; CHECK-NEXT: S_BARRIER_WAIT -1
; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
$vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
S_WAIT_DSCNT 0
S_BARRIER_SIGNAL_IMM -1
S_BARRIER_WAIT -1
$vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
S_ENDPGM 0
...

---
name: high_vgprs
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
; CHECK-LABEL: name: high_vgprs
; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: S_BARRIER_SIGNAL_IMM -1
; CHECK-NEXT: S_BARRIER_WAIT -1
; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
S_BARRIER_SIGNAL_IMM -1
S_BARRIER_WAIT -1
$vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
S_ENDPGM 0
...

---
name: no_control
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
; CHECK-LABEL: name: no_control
; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
$vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
$vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
S_ENDPGM 0
...