Skip to content

Commit

Permalink
Revert "[fastalloc] Support allocating specific register class in fas…
Browse files Browse the repository at this point in the history
…talloc"

This reverts commit 719658d.
Breaks a few things, see comments on https://reviews.llvm.org/D128437
There's disagreement about the best fix.
So let's keep HEAD green while discussions are happening.
  • Loading branch information
nico committed Jun 23, 2022
1 parent 7c9a382 commit 851a5ef
Show file tree
Hide file tree
Showing 9 changed files with 236 additions and 272 deletions.
46 changes: 6 additions & 40 deletions llvm/lib/CodeGen/RegAllocFast.cpp
Expand Up @@ -281,7 +281,6 @@ namespace {
Register traceCopies(Register VirtReg) const;
Register traceCopyChain(Register Reg) const;

bool shouldAllocateRegister(const Register Reg) const;
int getStackSpaceFor(Register VirtReg);
void spill(MachineBasicBlock::iterator Before, Register VirtReg,
MCPhysReg AssignedReg, bool Kill, bool LiveOut);
Expand All @@ -301,12 +300,6 @@ char RegAllocFast::ID = 0;
INITIALIZE_PASS(RegAllocFast, "regallocfast", "Fast Register Allocator", false,
false)

bool RegAllocFast::shouldAllocateRegister(const Register Reg) const {
assert(Register::isVirtualRegister(Reg));
const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
return ShouldAllocateClass(*TRI, RC);
}

void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI)
RegUnitStates[*UI] = NewState;
Expand Down Expand Up @@ -846,8 +839,6 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
assert(MO.isUndef() && "expected undef use");
Register VirtReg = MO.getReg();
assert(Register::isVirtualRegister(VirtReg) && "Expected virtreg");
if (!shouldAllocateRegister(VirtReg))
return;

LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
MCPhysReg PhysReg;
Expand All @@ -873,8 +864,6 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
/// (tied or earlyclobber) that may interfere with preassigned uses.
void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
Register VirtReg) {
if (!shouldAllocateRegister(VirtReg))
return;
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
if (LRI != LiveVirtRegs.end()) {
MCPhysReg PrevReg = LRI->PhysReg;
Expand Down Expand Up @@ -908,8 +897,6 @@ void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
Register VirtReg, bool LookAtPhysRegUses) {
assert(VirtReg.isVirtual() && "Not a virtual register");
if (!shouldAllocateRegister(VirtReg))
return;
MachineOperand &MO = MI.getOperand(OpNum);
LiveRegMap::iterator LRI;
bool New;
Expand Down Expand Up @@ -960,8 +947,6 @@ void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
Register VirtReg) {
assert(VirtReg.isVirtual() && "Not a virtual register");
if (!shouldAllocateRegister(VirtReg))
return;
MachineOperand &MO = MI.getOperand(OpNum);
LiveRegMap::iterator LRI;
bool New;
Expand All @@ -986,13 +971,8 @@ void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
Register Hint;
if (MI.isCopy() && MI.getOperand(1).getSubReg() == 0) {
Hint = MI.getOperand(0).getReg();
if (Hint.isVirtual()) {
assert(!shouldAllocateRegister(Hint));
Hint = Register();
} else {
assert(Hint.isPhysical() &&
"Copy destination should already be assigned");
}
assert(Hint.isPhysical() &&
"Copy destination should already be assigned");
}
allocVirtReg(MI, *LRI, Hint, false);
if (LRI->Error) {
Expand Down Expand Up @@ -1100,8 +1080,6 @@ void RegAllocFast::addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts
assert(RegClassDefCounts.size() == TRI->getNumRegClasses());

if (Reg.isVirtual()) {
if (!shouldAllocateRegister(Reg))
return;
const TargetRegisterClass *OpRC = MRI->getRegClass(Reg);
for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses();
RCIdx != RCIdxEnd; ++RCIdx) {
Expand Down Expand Up @@ -1161,8 +1139,6 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
if (MO.isReg()) {
Register Reg = MO.getReg();
if (Reg.isVirtual()) {
if (!shouldAllocateRegister(Reg))
continue;
if (MO.isDef()) {
HasDef = true;
HasVRegDef = true;
Expand Down Expand Up @@ -1226,7 +1202,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
}

if (MO.isDef()) {
if (Reg.isVirtual() && shouldAllocateRegister(Reg))
if (Reg.isVirtual())
DefOperandIndexes.push_back(I);

addRegClassDefCounts(RegClassDefCounts, Reg);
Expand Down Expand Up @@ -1316,10 +1292,6 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
Register Reg = MO.getReg();
if (!Reg)
continue;
if (Reg.isVirtual()) {
assert(!shouldAllocateRegister(Reg));
continue;
}
assert(Reg.isPhysical());
if (MRI->isReserved(Reg))
continue;
Expand Down Expand Up @@ -1366,7 +1338,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
if (!MO.isReg() || !MO.isUse())
continue;
Register Reg = MO.getReg();
if (!Reg.isVirtual() || !shouldAllocateRegister(Reg))
if (!Reg.isVirtual())
continue;

if (MO.isUndef()) {
Expand All @@ -1393,7 +1365,7 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
if (!MO.isReg() || !MO.isUse())
continue;
Register Reg = MO.getReg();
if (!Reg.isVirtual() || !shouldAllocateRegister(Reg))
if (!Reg.isVirtual())
continue;

assert(MO.isUndef() && "Should only have undef virtreg uses left");
Expand All @@ -1416,10 +1388,6 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
Register Reg = MO.getReg();
if (!Reg)
continue;
if (Reg.isVirtual()) {
assert(!shouldAllocateRegister(Reg));
continue;
}
assert(Reg.isPhysical() && "should have register assigned");

// We sometimes get odd situations like:
Expand Down Expand Up @@ -1449,8 +1417,6 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
for (Register Reg : MI.getUsedDebugRegs()) {
if (!Register::isVirtualRegister(Reg))
continue;
if (!shouldAllocateRegister(Reg))
continue;

// Already spilled to a stackslot?
int SS = StackSlotForVirtReg[Reg];
Expand Down Expand Up @@ -1491,7 +1457,7 @@ void RegAllocFast::handleBundle(MachineInstr &MI) {
continue;

Register Reg = MO.getReg();
if (!Reg.isVirtual() || !shouldAllocateRegister(Reg))
if (!Reg.isVirtual())
continue;

DenseMap<Register, MCPhysReg>::iterator DI;
Expand Down
150 changes: 75 additions & 75 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
Expand Up @@ -8,67 +8,67 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
; CHECK-NEXT: s_or_saveexec_b32 s4, -1
; CHECK-NEXT: buffer_store_dword v8, off, s[0:3], s32 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v16, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b32 exec_lo, s4
; CHECK-NEXT: v_mov_b32_e32 v15, v1
; CHECK-NEXT: v_mov_b32_e32 v14, v2
; CHECK-NEXT: v_mov_b32_e32 v13, v3
; CHECK-NEXT: v_mov_b32_e32 v12, v4
; CHECK-NEXT: v_mov_b32_e32 v11, v5
; CHECK-NEXT: v_mov_b32_e32 v10, v6
; CHECK-NEXT: v_mov_b32_e32 v9, v7
; CHECK-NEXT: v_mov_b32_e32 v14, v1
; CHECK-NEXT: v_mov_b32_e32 v13, v2
; CHECK-NEXT: v_mov_b32_e32 v12, v3
; CHECK-NEXT: v_mov_b32_e32 v11, v4
; CHECK-NEXT: v_mov_b32_e32 v10, v5
; CHECK-NEXT: v_mov_b32_e32 v9, v6
; CHECK-NEXT: v_mov_b32_e32 v8, v7
; CHECK-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 killed $exec
; CHECK-NEXT: v_mov_b32_e32 v1, v15
; CHECK-NEXT: v_mov_b32_e32 v2, v14
; CHECK-NEXT: v_mov_b32_e32 v3, v13
; CHECK-NEXT: v_mov_b32_e32 v4, v12
; CHECK-NEXT: v_mov_b32_e32 v5, v11
; CHECK-NEXT: v_mov_b32_e32 v6, v10
; CHECK-NEXT: v_mov_b32_e32 v7, v9
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
; CHECK-NEXT: v_mov_b32_e32 v1, v14
; CHECK-NEXT: v_mov_b32_e32 v2, v13
; CHECK-NEXT: v_mov_b32_e32 v3, v12
; CHECK-NEXT: v_mov_b32_e32 v4, v11
; CHECK-NEXT: v_mov_b32_e32 v5, v10
; CHECK-NEXT: v_mov_b32_e32 v6, v9
; CHECK-NEXT: v_mov_b32_e32 v7, v8
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b32 s8, 0
; CHECK-NEXT: s_mov_b32 s4, s8
; CHECK-NEXT: s_mov_b32 s5, s8
; CHECK-NEXT: s_mov_b32 s6, s8
; CHECK-NEXT: s_mov_b32 s7, s8
; CHECK-NEXT: v_writelane_b32 v8, s4, 0
; CHECK-NEXT: v_writelane_b32 v8, s5, 1
; CHECK-NEXT: v_writelane_b32 v8, s6, 2
; CHECK-NEXT: v_writelane_b32 v8, s7, 3
; CHECK-NEXT: v_writelane_b32 v16, s4, 0
; CHECK-NEXT: v_writelane_b32 v16, s5, 1
; CHECK-NEXT: v_writelane_b32 v16, s6, 2
; CHECK-NEXT: v_writelane_b32 v16, s7, 3
; CHECK-NEXT: s_mov_b32 s6, 0
; CHECK-NEXT: s_mov_b32 s4, s6
; CHECK-NEXT: s_mov_b32 s5, s6
; CHECK-NEXT: v_mov_b32_e32 v0, s4
; CHECK-NEXT: v_mov_b32_e32 v1, s5
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b32 s4, exec_lo
; CHECK-NEXT: v_writelane_b32 v8, s4, 4
; CHECK-NEXT: v_writelane_b32 v16, s4, 4
; CHECK-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v13, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v14, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v15, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v7, v9
; CHECK-NEXT: v_mov_b32_e32 v6, v10
; CHECK-NEXT: v_mov_b32_e32 v5, v11
; CHECK-NEXT: v_mov_b32_e32 v4, v12
; CHECK-NEXT: v_mov_b32_e32 v3, v13
; CHECK-NEXT: v_mov_b32_e32 v2, v14
; CHECK-NEXT: v_mov_b32_e32 v1, v15
; CHECK-NEXT: v_mov_b32_e32 v0, v16
; CHECK-NEXT: v_mov_b32_e32 v7, v8
; CHECK-NEXT: v_mov_b32_e32 v6, v9
; CHECK-NEXT: v_mov_b32_e32 v5, v10
; CHECK-NEXT: v_mov_b32_e32 v4, v11
; CHECK-NEXT: v_mov_b32_e32 v3, v12
; CHECK-NEXT: v_mov_b32_e32 v2, v13
; CHECK-NEXT: v_mov_b32_e32 v1, v14
; CHECK-NEXT: v_mov_b32_e32 v0, v15
; CHECK-NEXT: v_readfirstlane_b32 s12, v7
; CHECK-NEXT: v_readfirstlane_b32 s10, v6
; CHECK-NEXT: v_readfirstlane_b32 s9, v5
Expand All @@ -85,22 +85,22 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
; CHECK-NEXT: s_mov_b32 s17, s6
; CHECK-NEXT: s_mov_b32 s18, s5
; CHECK-NEXT: s_mov_b32 s19, s4
; CHECK-NEXT: v_writelane_b32 v8, s12, 5
; CHECK-NEXT: v_writelane_b32 v8, s13, 6
; CHECK-NEXT: v_writelane_b32 v8, s14, 7
; CHECK-NEXT: v_writelane_b32 v8, s15, 8
; CHECK-NEXT: v_writelane_b32 v8, s16, 9
; CHECK-NEXT: v_writelane_b32 v8, s17, 10
; CHECK-NEXT: v_writelane_b32 v8, s18, 11
; CHECK-NEXT: v_writelane_b32 v8, s19, 12
; CHECK-NEXT: v_mov_b32_e32 v6, v9
; CHECK-NEXT: v_mov_b32_e32 v7, v10
; CHECK-NEXT: v_mov_b32_e32 v4, v11
; CHECK-NEXT: v_mov_b32_e32 v5, v12
; CHECK-NEXT: v_mov_b32_e32 v2, v13
; CHECK-NEXT: v_mov_b32_e32 v3, v14
; CHECK-NEXT: v_mov_b32_e32 v0, v15
; CHECK-NEXT: v_mov_b32_e32 v1, v16
; CHECK-NEXT: v_writelane_b32 v16, s12, 5
; CHECK-NEXT: v_writelane_b32 v16, s13, 6
; CHECK-NEXT: v_writelane_b32 v16, s14, 7
; CHECK-NEXT: v_writelane_b32 v16, s15, 8
; CHECK-NEXT: v_writelane_b32 v16, s16, 9
; CHECK-NEXT: v_writelane_b32 v16, s17, 10
; CHECK-NEXT: v_writelane_b32 v16, s18, 11
; CHECK-NEXT: v_writelane_b32 v16, s19, 12
; CHECK-NEXT: v_mov_b32_e32 v6, v8
; CHECK-NEXT: v_mov_b32_e32 v7, v9
; CHECK-NEXT: v_mov_b32_e32 v4, v10
; CHECK-NEXT: v_mov_b32_e32 v5, v11
; CHECK-NEXT: v_mov_b32_e32 v2, v12
; CHECK-NEXT: v_mov_b32_e32 v3, v13
; CHECK-NEXT: v_mov_b32_e32 v0, v14
; CHECK-NEXT: v_mov_b32_e32 v1, v15
; CHECK-NEXT: s_mov_b64 s[4:5], s[12:13]
; CHECK-NEXT: s_mov_b64 s[10:11], s[14:15]
; CHECK-NEXT: s_mov_b64 s[8:9], s[16:17]
Expand All @@ -113,40 +113,40 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) {
; CHECK-NEXT: v_cmp_eq_u64_e64 s5, s[6:7], v[0:1]
; CHECK-NEXT: s_and_b32 s4, s4, s5
; CHECK-NEXT: s_and_saveexec_b32 s4, s4
; CHECK-NEXT: v_writelane_b32 v8, s4, 13
; CHECK-NEXT: v_writelane_b32 v16, s4, 13
; CHECK-NEXT: ; %bb.2: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: v_readlane_b32 s4, v8, 13
; CHECK-NEXT: v_readlane_b32 s8, v8, 5
; CHECK-NEXT: v_readlane_b32 s9, v8, 6
; CHECK-NEXT: v_readlane_b32 s10, v8, 7
; CHECK-NEXT: v_readlane_b32 s11, v8, 8
; CHECK-NEXT: v_readlane_b32 s12, v8, 9
; CHECK-NEXT: v_readlane_b32 s13, v8, 10
; CHECK-NEXT: v_readlane_b32 s14, v8, 11
; CHECK-NEXT: v_readlane_b32 s15, v8, 12
; CHECK-NEXT: v_readlane_b32 s16, v8, 0
; CHECK-NEXT: v_readlane_b32 s17, v8, 1
; CHECK-NEXT: v_readlane_b32 s18, v8, 2
; CHECK-NEXT: v_readlane_b32 s19, v8, 3
; CHECK-NEXT: v_readlane_b32 s4, v16, 13
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: v_readlane_b32 s8, v16, 5
; CHECK-NEXT: v_readlane_b32 s9, v16, 6
; CHECK-NEXT: v_readlane_b32 s10, v16, 7
; CHECK-NEXT: v_readlane_b32 s11, v16, 8
; CHECK-NEXT: v_readlane_b32 s12, v16, 9
; CHECK-NEXT: v_readlane_b32 s13, v16, 10
; CHECK-NEXT: v_readlane_b32 s14, v16, 11
; CHECK-NEXT: v_readlane_b32 s15, v16, 12
; CHECK-NEXT: v_readlane_b32 s16, v16, 0
; CHECK-NEXT: v_readlane_b32 s17, v16, 1
; CHECK-NEXT: v_readlane_b32 s18, v16, 2
; CHECK-NEXT: v_readlane_b32 s19, v16, 3
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: image_sample v0, v[0:1], s[8:15], s[16:19] dmask:0x1 dim:SQ_RSRC_IMG_2D
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
; CHECK-NEXT: s_xor_b32 exec_lo, exec_lo, s4
; CHECK-NEXT: s_cbranch_execnz .LBB0_1
; CHECK-NEXT: ; %bb.3:
; CHECK-NEXT: v_readlane_b32 s4, v8, 4
; CHECK-NEXT: v_readlane_b32 s4, v16, 4
; CHECK-NEXT: s_mov_b32 exec_lo, s4
; CHECK-NEXT: ; %bb.4:
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; CHECK-NEXT: ; implicit-def: $sgpr4
; CHECK-NEXT: v_mov_b32_e32 v1, s4
; CHECK-NEXT: v_mov_b32_e32 v2, s4
; CHECK-NEXT: v_mov_b32_e32 v3, s4
; CHECK-NEXT: s_or_saveexec_b32 s4, -1
; CHECK-NEXT: buffer_load_dword v8, off, s[0:3], s32 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v16, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b32 exec_lo, s4
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_waitcnt_vscnt null, 0x0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
Expand Up @@ -422,11 +422,11 @@ bb.end: ; preds = %bb.then, %bb
; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0:[0-9]+]]
; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1:[0-9]+]]
; GCN-O0: [[INNER_LOOP:.LBB[0-9]+_[0-9]+]]:
; GCN-O0: buffer_load_dword
; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_0]]
; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_BACK_EDGE_EXEC_SPILL_LANE_1]]
; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_0]]
; GCN-O0-DAG: v_readlane_b32 s{{[0-9]+}}, [[VGPR]], [[INNER_LOOP_IN_EXEC_SPILL_LANE_1]]
; GCN-O0: buffer_load_dword
; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_0:[0-9]+]]
; GCN-O0-DAG: v_writelane_b32 [[VGPR]], s{{[0-9]+}}, [[OUTER_LOOP_EXEC_SPILL_LANE_1:[0-9]+]]
; GCN-O0: s_or_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}]
Expand Down

0 comments on commit 851a5ef

Please sign in to comment.