Skip to content

Commit

Permalink
[AMDGPU] Add pseudo instructions for SGPR spill to VGPR (#69923)
Browse files Browse the repository at this point in the history
For a future patch, is it important to keep the lowered SGPR
spills to be recognized as spill instructions during regalloc.
Directly lowering them into V_WRITELANE/V_READLANE won't allow
us to attach the SPILL flag to their instructions.

This patch introduces the pseudo instructions with the SGPRSpill
flag set in their Desc. They will get lowered to equivalent
instructions later during post RA pseudo expansion.
  • Loading branch information
cdevadas committed Oct 27, 2023
1 parent 9bcb30d commit f9cd789
Show file tree
Hide file tree
Showing 34 changed files with 852 additions and 821 deletions.
13 changes: 6 additions & 7 deletions llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,8 @@ class PrologEpilogSGPRSpillBuilder {
Register SubReg = NumSubRegs == 1
? SuperReg
: Register(TRI.getSubReg(SuperReg, SplitParts[I]));
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill[I].VGPR)
BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),
Spill[I].VGPR)
.addReg(SubReg)
.addImm(Spill[I].Lane)
.addReg(Spill[I].VGPR, RegState::Undef);
Expand Down Expand Up @@ -319,7 +320,7 @@ class PrologEpilogSGPRSpillBuilder {
Register SubReg = NumSubRegs == 1
? SuperReg
: Register(TRI.getSubReg(SuperReg, SplitParts[I]));
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
.addReg(Spill[I].VGPR)
.addImm(Spill[I].Lane);
}
Expand Down Expand Up @@ -1554,12 +1555,10 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
// TODO: Handle this elsewhere at an early point. Walking through all MBBs
// here would be a bad heuristic. A better way should be by calling
// allocateWWMSpill during the regalloc pipeline whenever a physical
// register is allocated for the intended virtual registers. That will
// also help excluding the general use of WRITELANE/READLANE intrinsics
// that won't really need any such special handling.
if (MI.getOpcode() == AMDGPU::V_WRITELANE_B32)
// register is allocated for the intended virtual registers.
if (MI.getOpcode() == AMDGPU::SI_SPILL_S32_TO_VGPR)
MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg());
else if (MI.getOpcode() == AMDGPU::V_READLANE_B32)
else if (MI.getOpcode() == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg());
else if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
NeedExecCopyReservedReg = true;
Expand Down
18 changes: 16 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2111,6 +2111,14 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(AMDGPU::S_AND_SAVEEXEC_B32));
break;

case AMDGPU::SI_SPILL_S32_TO_VGPR:
MI.setDesc(get(AMDGPU::V_WRITELANE_B32));
break;

case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
MI.setDesc(get(AMDGPU::V_READLANE_B32));
break;

case AMDGPU::V_MOV_B64_PSEUDO: {
Register Dst = MI.getOperand(0).getReg();
Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
Expand Down Expand Up @@ -4014,7 +4022,9 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
// However, executing them with EXEC = 0 causes them to operate on undefined
// data, which we avoid by returning true here.
if (Opcode == AMDGPU::V_READFIRSTLANE_B32 ||
Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32)
Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32 ||
Opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR ||
Opcode == AMDGPU::SI_SPILL_S32_TO_VGPR)
return true;

return false;
Expand Down Expand Up @@ -4408,7 +4418,9 @@ static bool shouldReadExec(const MachineInstr &MI) {
if (SIInstrInfo::isVALU(MI)) {
switch (MI.getOpcode()) {
case AMDGPU::V_READLANE_B32:
case AMDGPU::SI_RESTORE_S32_FROM_VGPR:
case AMDGPU::V_WRITELANE_B32:
case AMDGPU::SI_SPILL_S32_TO_VGPR:
return false;
}

Expand Down Expand Up @@ -9071,7 +9083,9 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
return InstructionUniformity::NeverUniform;

unsigned opcode = MI.getOpcode();
if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32)
if (opcode == AMDGPU::V_READLANE_B32 ||
opcode == AMDGPU::V_READFIRSTLANE_B32 ||
opcode == AMDGPU::SI_RESTORE_S32_FROM_VGPR)
return InstructionUniformity::AlwaysUniform;

if (isCopyInstr(MI)) {
Expand Down
22 changes: 22 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,28 @@ defm SI_SPILL_S384 : SI_SPILL_SGPR <SReg_384>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>;

let SGPRSpill = 1, VALU = 1, isConvergent = 1 in {
def SI_SPILL_S32_TO_VGPR : PseudoInstSI <(outs VGPR_32:$vdst),
(ins SReg_32:$src0, i32imm:$src1, VGPR_32:$vdst_in)> {
let Size = 4;
let FixedSize = 1;
let IsNeverUniform = 1;
let hasSideEffects = 0;
let mayLoad = 0;
let mayStore = 0;
let Constraints = "$vdst = $vdst_in";
}

def SI_RESTORE_S32_FROM_VGPR : PseudoInstSI <(outs SReg_32:$sdst),
(ins VGPR_32:$src0, i32imm:$src1)> {
let Size = 4;
let FixedSize = 1;
let hasSideEffects = 0;
let mayLoad = 0;
let mayStore = 0;
}
} // End SGPRSpill = 1, VALU = 1, isConvergent = 1

// VGPR or AGPR spill instructions. In case of AGPR spilling a temp register
// needs to be used and an extra instruction to move between VGPR and AGPR.
// UsesTmp adds to the total size of an expanded spill in this case.
Expand Down
12 changes: 6 additions & 6 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1769,7 +1769,7 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
// Mark the "old value of vgpr" input undef only if this is the first sgpr
// spill to this specific vgpr in the first basic block.
auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
SB.TII.get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)
.addReg(SubReg, getKillRegState(UseKill))
.addImm(Spill.Lane)
.addReg(Spill.VGPR);
Expand Down Expand Up @@ -1815,8 +1815,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));

MachineInstrBuilder WriteLane =
BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
SB.TmpVGPR)
BuildMI(*SB.MBB, MI, SB.DL,
SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.TmpVGPR)
.addReg(SubReg, SubKillState)
.addImm(i % PVD.PerVGPR)
.addReg(SB.TmpVGPR, TmpVGPRFlags);
Expand Down Expand Up @@ -1877,8 +1877,8 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index,
: Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));

SpilledReg Spill = VGPRSpills[i];
auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
SubReg)
auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
.addReg(Spill.VGPR)
.addImm(Spill.Lane);
if (SB.NumSubRegs > 1 && i == 0)
Expand Down Expand Up @@ -1911,7 +1911,7 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index,

bool LastSubReg = (i + 1 == e);
auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
.addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
.addImm(i);
if (SB.NumSubRegs > 1 && i == 0)
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,8 @@ body: |
; GCN-NEXT: renamable $vgpr46 = COPY $vgpr1, implicit $exec
; GCN-NEXT: renamable $vgpr45 = COPY $vgpr0, implicit $exec
; GCN-NEXT: renamable $sgpr16_sgpr17 = IMPLICIT_DEF
; GCN-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
; GCN-NEXT: $vgpr40 = V_WRITELANE_B32 $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
; GCN-NEXT: $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15, implicit $vgpr14_vgpr15 :: (store (s32) into %stack.1, addrspace 5)
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec, implicit killed $vgpr14_vgpr15 :: (store (s32) into %stack.1 + 4, addrspace 5)
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr10_vgpr11, implicit $vgpr10_vgpr11 :: (store (s32) into %stack.2, addrspace 5)
Expand Down Expand Up @@ -124,8 +124,8 @@ body: |
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
renamable $sgpr16_sgpr17 = IMPLICIT_DEF
$vgpr40 = V_WRITELANE_B32 $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
$vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr40, implicit killed $sgpr30_sgpr31
$vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
$vgpr40 = SI_SPILL_S32_TO_VGPR killed $sgpr31, 1, $vgpr40, implicit killed $sgpr30_sgpr31
dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0
%8:vreg_64 = nofpexcept V_FMA_F64_e64 0, %7, 0, %6, 0, %5, 0, 0, implicit $mode, implicit $exec
ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/bb-prolog-spill-during-regalloc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
; REGALLOC-NEXT: renamable $sgpr6_sgpr7 = COPY $exec, implicit-def $exec
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 renamable $sgpr6_sgpr7, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; REGALLOC-NEXT: renamable $sgpr6_sgpr7 = S_XOR_B64 renamable $sgpr4_sgpr5, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
; REGALLOC-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr6, 0, $vgpr0, implicit-def $sgpr6_sgpr7, implicit $sgpr6_sgpr7
; REGALLOC-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr7, 1, $vgpr0, implicit killed $sgpr6_sgpr7
; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr6, 0, $vgpr0, implicit-def $sgpr6_sgpr7, implicit $sgpr6_sgpr7
; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr7, 1, $vgpr0, implicit killed $sgpr6_sgpr7
; REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr0, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; REGALLOC-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr4_sgpr5
; REGALLOC-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
Expand All @@ -34,13 +34,13 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
; REGALLOC-NEXT: {{ $}}
; REGALLOC-NEXT: $vgpr0 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
; REGALLOC-NEXT: $vgpr1 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
; REGALLOC-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5
; REGALLOC-NEXT: $sgpr5 = V_READLANE_B32 $vgpr0, 1
; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr4_sgpr5
; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def dead $scc, implicit $exec
; REGALLOC-NEXT: SI_SPILL_V32_SAVE killed $vgpr1, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
; REGALLOC-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; REGALLOC-NEXT: renamable $vgpr0 = V_WRITELANE_B32 killed $sgpr4, 2, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
; REGALLOC-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr5, 3, $vgpr0, implicit $sgpr4_sgpr5
; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 2, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
; REGALLOC-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr5, 3, $vgpr0, implicit $sgpr4_sgpr5
; REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr0, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; REGALLOC-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; REGALLOC-NEXT: S_CBRANCH_EXECZ %bb.4, implicit $exec
Expand All @@ -67,8 +67,8 @@ define i32 @prolog_spill(i32 %arg0, i32 %arg1, i32 %arg2) {
; REGALLOC-NEXT: bb.4.bb.3:
; REGALLOC-NEXT: $vgpr1 = SI_SPILL_WWM_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
; REGALLOC-NEXT: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
; REGALLOC-NEXT: $sgpr4 = V_READLANE_B32 $vgpr1, 2, implicit-def $sgpr4_sgpr5
; REGALLOC-NEXT: $sgpr5 = V_READLANE_B32 $vgpr1, 3
; REGALLOC-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 2, implicit-def $sgpr4_sgpr5
; REGALLOC-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr1, 3
; REGALLOC-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
; REGALLOC-NEXT: renamable $sgpr4 = S_MOV_B32 5
; REGALLOC-NEXT: renamable $vgpr0 = V_MUL_LO_U32_e64 killed $vgpr0, killed $sgpr4, implicit $exec
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/csr-sgpr-spill-live-ins.mir
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ body: |
; CHECK-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr42, 0, $vgpr0
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr43, 1, $vgpr0
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr46, 2, $vgpr0
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr47, 3, $vgpr0
; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr42, 0, $vgpr0
; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr43, 1, $vgpr0
; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr46, 2, $vgpr0
; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr47, 3, $vgpr0
; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/extend-wwm-virt-reg-liveness.mir
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ body: |
; GCN: liveins: $sgpr4, $vgpr2_vgpr3
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
; GCN-NEXT: S_NOP 0
; GCN-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0
; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
; GCN-NEXT: KILL killed renamable $vgpr0
Expand Down Expand Up @@ -77,9 +77,9 @@ body: |
; GCN-NEXT: successors: %bb.3(0x80000000)
; GCN-NEXT: liveins: $sgpr6, $vgpr0, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr6, 0, killed $vgpr0
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr6, 0, killed $vgpr0
; GCN-NEXT: S_NOP 0
; GCN-NEXT: $sgpr6 = V_READLANE_B32 $vgpr0, 0
; GCN-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
; GCN-NEXT: S_BRANCH %bb.3
; GCN-NEXT: {{ $}}
Expand Down Expand Up @@ -143,9 +143,9 @@ body: |
; GCN-NEXT: successors: %bb.2(0x80000000)
; GCN-NEXT: liveins: $sgpr4, $vgpr0, $sgpr10_sgpr11
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
; GCN-NEXT: S_NOP 0
; GCN-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0
; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 20, implicit $exec
; GCN-NEXT: S_BRANCH %bb.2
; GCN-NEXT: {{ $}}
Expand Down Expand Up @@ -245,9 +245,9 @@ body: |
; GCN-NEXT: bb.1:
; GCN-NEXT: liveins: $sgpr4, $vgpr0, $vgpr2_vgpr3
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, killed $vgpr0
; GCN-NEXT: renamable $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, killed $vgpr0
; GCN-NEXT: S_NOP 0
; GCN-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0
; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 10, implicit $exec
; GCN-NEXT: GLOBAL_STORE_DWORD $vgpr2_vgpr3, killed renamable $vgpr1, 0, 0, implicit $exec
; GCN-NEXT: KILL killed renamable $vgpr0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ body: |
; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr2
; CHECK-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
; CHECK-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc
; CHECK-NEXT: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr27, implicit-def $sgpr28, implicit-def $sgpr29, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
; CHECK-NEXT: $sgpr33 = S_LSHR_B32 $sgpr33, 6, implicit-def $scc
Expand All @@ -50,7 +50,7 @@ body: |
; CHECK-NEXT: $sgpr33 = S_ADD_I32 killed $sgpr33, -16384, implicit-def $scc
; CHECK-NEXT: $sgpr33 = S_LSHL_B32 $sgpr33, 6, implicit-def $scc
; CHECK-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr27, implicit $sgpr28, implicit $sgpr29, implicit $sgpr30, implicit $sgpr31
; CHECK-NEXT: $sgpr4 = V_READLANE_B32 $vgpr2, 0
; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
; CHECK-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
Expand Down
Loading

0 comments on commit f9cd789

Please sign in to comment.