Skip to content

Commit

Permalink
[AMDGPU] Don't remove VGPR to AGPR dead spills from frame info
Browse files Browse the repository at this point in the history
Removing dead frame indices for VGPR to AGPR spills is incorrect
when the frame index is shared by multiple objects, which may
occur due to stack slot coloring. The problem is that subsequent
code that processes the other object will assert because the stack
frame index is marked dead.

Removing dead frame indices is needed prior to stack slot
coloring, which is what happens with SGPR to VGPR spills. These
spills are lowered prior to stack slot coloring, but the VGPR
to AGPR spills are processed afterwards during the Prolog/Epilog
Inserter pass. This patch marks the VGPR to AGPR spill slot as
dead if the slot is not used by another object.

Differential Revision: https://reviews.llvm.org/D115996
  • Loading branch information
bcahoon committed Dec 23, 2021
1 parent 4c8becb commit d45a247
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 2 deletions.
12 changes: 11 additions & 1 deletion llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
Expand Up @@ -1167,11 +1167,13 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
if (SpillVGPRToAGPR) {
// To track the spill frame indices handled in this pass.
BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);

bool SeenDbgInstr = false;

for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
int FrameIndex;
if (MI.isDebugInstr())
SeenDbgInstr = true;

Expand All @@ -1191,10 +1193,18 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
SpillFIs.set(FI);
continue;
}
}
} else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
TII->isLoadFromStackSlot(MI, FrameIndex))
NonVGPRSpillFIs.set(FrameIndex);
}
}

// Stack slot coloring may assign different objets to the same stack slot.
// If not, then the VGPR to AGPR spill slot is dead.
for (unsigned FI : SpillFIs.set_bits())
if (!NonVGPRSpillFIs.test(FI))
FuncInfo->setVGPRToAGPRSpillDead(FI);

for (MachineBasicBlock &MBB : MF) {
for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
MBB.addLiveIn(Reg);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
Expand Up @@ -444,7 +444,7 @@ void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
MFI.setStackID(i, TargetStackID::Default);

for (auto &R : VGPRToAGPRSpills) {
if (R.second.FullyAllocated)
if (R.second.IsDead)
MFI.RemoveStackObject(R.first);
}
}
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
Expand Up @@ -465,6 +465,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
struct VGPRSpillToAGPR {
SmallVector<MCPhysReg, 32> Lanes;
bool FullyAllocated = false;
bool IsDead = false;
};

// Map WWM VGPR to a stack slot that is used to save/restore it in the
Expand Down Expand Up @@ -546,6 +547,12 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
: I->second.Lanes[Lane];
}

void setVGPRToAGPRSpillDead(int FrameIndex) {
auto I = VGPRToAGPRSpills.find(FrameIndex);
if (I != VGPRToAGPRSpills.end())
I->second.IsDead = true;
}

bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
unsigned NumLane) const;
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
Expand Down
88 changes: 88 additions & 0 deletions llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir
@@ -0,0 +1,88 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck %s

---
name: same_slot_agpr_sgpr
tracksRegLiveness: true
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4 }
machineFunctionInfo:
hasSpilledVGPRs: true
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
; CHECK-LABEL: name: same_slot_agpr_sgpr
; CHECK: liveins: $agpr0, $agpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF
; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
; CHECK-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit $sgpr4_sgpr5
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5
; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr0
; CHECK-NEXT: S_ENDPGM 0
$vgpr0 = IMPLICIT_DEF
SI_SPILL_AV32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
$sgpr4_sgpr5 = IMPLICIT_DEF
SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
---
name: diff_slot_agpr_sgpr
tracksRegLiveness: true
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
- { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4 }
machineFunctionInfo:
hasSpilledVGPRs: true
scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
; CHECK-LABEL: name: diff_slot_agpr_sgpr
; CHECK: liveins: $agpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF
; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
; CHECK-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit $sgpr4_sgpr5
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr0
; CHECK-NEXT: S_ENDPGM 0
$vgpr0 = IMPLICIT_DEF
SI_SPILL_AV32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
$sgpr4_sgpr5 = IMPLICIT_DEF
SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
S_ENDPGM 0
...
---
name: dead_vgpr_slot
tracksRegLiveness: true
stack:
- { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 }
machineFunctionInfo:
hasSpilledVGPRs: true
stackPtrOffsetReg: $sgpr32
body: |
bb.0:
; CHECK-LABEL: name: dead_vgpr_slot
; CHECK: liveins: $agpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF
; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
$vgpr0 = IMPLICIT_DEF
SI_SPILL_AV32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
S_ENDPGM 0
...

0 comments on commit d45a247

Please sign in to comment.