Skip to content

Commit

Permalink
[FastRA] Fix handling of bundled MIs
Browse files Browse the repository at this point in the history
Fast register allocator skips bundled MIs, as the main assignment
loop uses MachineBasicBlock::iterator (= MachineInstrBundleIterator)
This was causing SIInsertWaitcnts to crash which expects all
instructions to have registers assigned.

This patch makes sure to set everything inside bundle to the same
assignments done on BUNDLE header.

Reviewed By: qcolombet

Differential Revision: https://reviews.llvm.org/D90369
  • Loading branch information
pdhaliwal-amd committed Dec 21, 2020
1 parent 3183add commit e2303a4
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 0 deletions.
43 changes: 43 additions & 0 deletions llvm/lib/CodeGen/RegAllocFast.cpp
Expand Up @@ -105,6 +105,9 @@ namespace {
/// available in a physical register.
LiveRegMap LiveVirtRegs;

/// Stores assigned virtual registers present in the bundle MI.
DenseMap<Register, MCPhysReg> BundleVirtRegsMap;

DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap;
/// List of DBG_VALUE that we encountered without the vreg being assigned
/// because they were placed after the last use of the vreg.
Expand Down Expand Up @@ -218,6 +221,8 @@ namespace {

void allocateInstruction(MachineInstr &MI);
void handleDebugValue(MachineInstr &MI);
void handleBundle(MachineInstr &MI);

bool usePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
bool definePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
bool displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg);
Expand Down Expand Up @@ -889,6 +894,9 @@ void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
LRI->LiveOut = false;
LRI->Reloaded = false;
}
if (MI.getOpcode() == TargetOpcode::BUNDLE) {
BundleVirtRegsMap[VirtReg] = PhysReg;
}
markRegUsedInInstr(PhysReg);
setPhysReg(MI, MO, PhysReg);
}
Expand Down Expand Up @@ -934,6 +942,10 @@ void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
}

LRI->LastUse = &MI;

if (MI.getOpcode() == TargetOpcode::BUNDLE) {
BundleVirtRegsMap[VirtReg] = LRI->PhysReg;
}
markRegUsedInInstr(LRI->PhysReg);
setPhysReg(MI, MO, LRI->PhysReg);
}
Expand Down Expand Up @@ -1064,6 +1076,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// operands and early-clobbers.

UsedInInstr.clear();
BundleVirtRegsMap.clear();

// Scan for special cases; Apply pre-assigned register defs to state.
bool HasPhysRegUse = false;
Expand Down Expand Up @@ -1382,6 +1395,30 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
LiveDbgValueMap[Reg].push_back(&MI);
}

void RegAllocFast::handleBundle(MachineInstr &MI) {
MachineBasicBlock::instr_iterator BundledMI = MI.getIterator();
++BundledMI;
while (BundledMI->isBundledWithPred()) {
for (unsigned I = 0; I < BundledMI->getNumOperands(); ++I) {
MachineOperand &MO = BundledMI->getOperand(I);
if (!MO.isReg())
continue;

Register Reg = MO.getReg();
if (!Reg.isVirtual())
continue;

DenseMap<Register, MCPhysReg>::iterator DI;
DI = BundleVirtRegsMap.find(Reg);
assert(DI != BundleVirtRegsMap.end() && "Unassigned virtual register");

setPhysReg(MI, MO, DI->second);
}

++BundledMI;
}
}

void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
this->MBB = &MBB;
LLVM_DEBUG(dbgs() << "\nAllocating " << MBB);
Expand Down Expand Up @@ -1411,6 +1448,12 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
}

allocateInstruction(MI);

// Once BUNDLE header is assigned registers, same assignments need to be
// done for bundled MIs.
if (MI.getOpcode() == TargetOpcode::BUNDLE) {
handleBundle(MI);
}
}

LLVM_DEBUG(
Expand Down
26 changes: 26 additions & 0 deletions llvm/test/CodeGen/AMDGPU/fast-regalloc-bundles.mir
@@ -0,0 +1,26 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx902 -verify-machineinstrs -run-pass=regallocfast %s -o - | FileCheck -check-prefixes=GCN,XNACK,GCX9 %s

---
name: fast_regalloc_bundle_handling
tracksRegLiveness: true
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
- { id: 2, class: vgpr_32 }
body: |
bb.0:
; GCN-LABEL: name: fast_regalloc_bundle_handling
; GCN: renamable $vgpr0 = IMPLICIT_DEF
; GCN: renamable $vgpr1 = IMPLICIT_DEF
; GCN: renamable $vgpr0 = BUNDLE implicit killed renamable $vgpr0, implicit killed renamable $vgpr1, implicit $exec {
; GCN: renamable $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
; GCN: }
; GCN: S_ENDPGM 0, implicit killed renamable $vgpr0
%0 = IMPLICIT_DEF
%1 = IMPLICIT_DEF
%2 = BUNDLE implicit %0, implicit %1, implicit $exec {
%2 = V_ADD_U32_e32 %0, %1, implicit $exec
}
S_ENDPGM 0, implicit %2
...
@@ -0,0 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -stop-after=postrapseudos -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MIR %s


; MIR-LABEL: name: gws_barrier_offset0{{$}}
; MIR: BUNDLE implicit{{( killed)?( renamable)?}} $vgpr0, implicit $m0, implicit $exec {
; MIR-NEXT: DS_GWS_BARRIER renamable $vgpr0, 0, implicit $m0, implicit $exec :: (load 4 from custom "GWSResource")
; MIR-NEXT: S_WAITCNT 0
; MIR-NEXT: }
define amdgpu_kernel void @gws_barrier_offset0(i32 %val) #0 {
call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 0)
ret void
}


declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) #1

attributes #0 = { nounwind }
attributes #1 = { convergent inaccessiblememonly nounwind }

0 comments on commit e2303a4

Please sign in to comment.