diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h index 73d49002b7528..d55f88dd50e57 100644 --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -1028,12 +1028,6 @@ class TargetRegisterInfo : public MCRegisterInfo { return false; } - /// Process frame indices in reverse block order. This changes the behavior of - /// the RegScavenger passed to eliminateFrameIndex. If this is true targets - /// should scavengeRegisterBackwards in eliminateFrameIndex. New targets - /// should prefer reverse scavenging behavior. - virtual bool supportsBackwardScavenger() const { return false; } - /// This method must be overriden to eliminate abstract frame indices from /// instructions which may use them. The instruction referenced by the /// iterator contains an MO_FrameIndex operand which must be eliminated by diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 5b80a40a2f45b..be0dd7fe4a528 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -128,16 +128,8 @@ class PEI : public MachineFunctionPass { void replaceFrameIndices(MachineFunction &MF); void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, int &SPAdj); - // Frame indices in debug values are encoded in a target independent - // way with simply the frame index and offset rather than any - // target-specific addressing mode. bool replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI, unsigned OpIdx, int SPAdj = 0); - // Does same as replaceFrameIndices but using the backward MIR walk and - // backward register scavenger walk. Does not yet support call sequence - // processing. - void replaceFrameIndicesBackward(MachineBasicBlock *BB, MachineFunction &MF, - int &SPAdj); void insertPrologEpilogCode(MachineFunction &MF); void insertZeroCallUsedRegs(MachineFunction &MF); @@ -1446,70 +1438,6 @@ bool PEI::replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI, return false; } -void PEI::replaceFrameIndicesBackward(MachineBasicBlock *BB, - MachineFunction &MF, int &SPAdj) { - assert(MF.getSubtarget().getRegisterInfo() && - "getRegisterInfo() must be implemented!"); - - const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - - RS->enterBasicBlockEnd(*BB); - - for (MachineInstr &MI : make_early_inc_range(reverse(*BB))) { - - // Register scavenger backward step - MachineBasicBlock::iterator Step(MI); - for (unsigned i = 0; i != MI.getNumOperands(); ++i) { - if (!MI.getOperand(i).isFI()) - continue; - - if (replaceFrameIndexDebugInstr(MF, MI, i, SPAdj)) - continue; - - // If this instruction has a FrameIndex operand, we need to - // use that target machine register info object to eliminate - // it. - - // TRI.eliminateFrameIndex may lower the frame index to a sequence of - // instructions. It also can remove/change instructions passed by the - // iterator and invalidate the iterator. We have to take care of this. For - // that we support two iterators: *Step* - points to the position up to - // which the scavenger should scan by the next iteration to have liveness - // information up to date. *Curr* - keeps track of the correct RS->MBBI - - // the scan start point. It points to the currently processed instruction - // right before the frame lowering. - // - // ITERATORS WORK AS FOLLOWS: - // *Step* is shifted one step back right before the frame lowering and - // one step forward right after it. No matter how many instructions were - // inserted, *Step* will be right after the position which is going to be - // processed in the next iteration, thus, in the correct position for the - // scavenger to go up to. - // *Curr* is shifted one step forward right before calling - // TRI.eliminateFrameIndex and one step backward after. Thus, we make sure - // it points right to the position that is the correct starting point for - // the scavenger to scan. - MachineBasicBlock::iterator Curr = ++RS->getCurrentPosition(); - - // Shift back - Step--; - - TRI.eliminateFrameIndex(MI, SPAdj, i, RS); - // Restore to unify logic with a shift back that happens in the end of - // the outer loop. - Step++; - RS->skipTo(--Curr); - } - - // Shift it to make RS collect reg info up to the current instruction. - if (Step != BB->begin()) - Step--; - - // Update register states. - RS->backward(Step); - } -} - void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, int &SPAdj) { assert(MF.getSubtarget().getRegisterInfo() && @@ -1518,9 +1446,6 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - if (RS && TRI.supportsBackwardScavenger()) - return replaceFrameIndicesBackward(BB, MF, SPAdj); - if (RS && FrameIndexEliminationScavenging) RS->enterBasicBlock(*BB); @@ -1541,6 +1466,9 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, if (!MI.getOperand(i).isFI()) continue; + // Frame indices in debug values are encoded in a target independent + // way with simply the frame index and offset rather than any + // target-specific addressing mode. if (replaceFrameIndexDebugInstr(MF, MI, i, SPAdj)) continue; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 2b21e7e5794ee..249de0d3892b6 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1364,7 +1364,7 @@ void SIRegisterInfo::buildSpillLoadStore( // TODO: Clobbering SCC is not necessary for scratch instructions in the // entry. if (RS) { - SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass, MI, false, 0, false); + SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0, false); // Piggy back on the liveness scan we just did see if SCC is dead. CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC); @@ -1385,7 +1385,7 @@ void SIRegisterInfo::buildSpillLoadStore( UseVGPROffset = true; if (RS) { - TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0); + TmpOffsetVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); } else { assert(LiveRegs); for (MCRegister Reg : AMDGPU::VGPR_32RegClass) { @@ -2249,8 +2249,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, // Convert to a swizzled stack address by scaling by the wave size. // In an entry function/kernel the offset is already swizzled. bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum)); - bool LiveSCC = - RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC); + bool LiveSCC = RS->isRegUsed(AMDGPU::SCC); const TargetRegisterClass *RC = IsSALU && !LiveSCC ? &AMDGPU::SReg_32RegClass : &AMDGPU::VGPR_32RegClass; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h index d78d75a97dccf..0e260c8016fcf 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -149,10 +149,6 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo { MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const; - bool supportsBackwardScavenger() const override { - return true; - } - void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll index a607ccb6946a0..54fc4ddd72ff6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll @@ -155,52 +155,52 @@ define amdgpu_kernel void @kernel_caller_byval() { ; FLATSCR-NEXT: v_mov_b32_e32 v0, 0 ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 ; FLATSCR-NEXT: v_mov_b32_e32 v1, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 ; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_lo offset:8 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:8 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:72 +; FLATSCR-NEXT: s_mov_b32 s33, 0 ; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:16 -; FLATSCR-NEXT: s_mov_b32 s11, 0 -; FLATSCR-NEXT: s_mov_b32 s10, 0 -; FLATSCR-NEXT: s_mov_b32 s9, 0 -; FLATSCR-NEXT: s_mov_b32 s8, 0 -; FLATSCR-NEXT: s_mov_b32 s7, 0 -; FLATSCR-NEXT: s_mov_b32 s6, 0 -; FLATSCR-NEXT: s_mov_b32 s5, 0 -; FLATSCR-NEXT: s_mov_b32 s1, 0 -; FLATSCR-NEXT: s_mov_b32 s0, 0 -; FLATSCR-NEXT: s_mov_b32 s4, 0 -; FLATSCR-NEXT: s_mov_b32 s3, 0 -; FLATSCR-NEXT: s_mov_b32 s2, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 ; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s11 offset:24 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s10 offset:32 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s9 offset:40 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s8 offset:48 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s7 offset:56 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s6 offset:64 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s5 offset:72 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s1 offset:80 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 offset:88 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s4 offset:96 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s3 offset:104 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s2 offset:112 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_lo offset:120 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:128 -; FLATSCR-NEXT: s_mov_b32 s40, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s40 offset:8 -; FLATSCR-NEXT: s_mov_b32 s39, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s39 offset:16 -; FLATSCR-NEXT: s_mov_b32 s38, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[4:5], off, s38 offset:24 -; FLATSCR-NEXT: s_mov_b32 s37, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[6:7], off, s37 offset:32 -; FLATSCR-NEXT: s_mov_b32 s36, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s36 offset:40 -; FLATSCR-NEXT: s_mov_b32 s35, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s35 offset:48 -; FLATSCR-NEXT: s_mov_b32 s34, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[12:13], off, s34 offset:56 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:80 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:24 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:88 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:32 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:96 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:40 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:104 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:48 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:112 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:56 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:120 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], vcc_hi offset:64 +; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s33 offset:128 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s33 offset:8 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s33 offset:16 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[4:5], off, s33 offset:24 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[6:7], off, s33 offset:32 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s33 offset:40 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s33 offset:48 +; FLATSCR-NEXT: s_mov_b32 s33, 0 +; FLATSCR-NEXT: scratch_load_dwordx2 v[12:13], off, s33 offset:56 ; FLATSCR-NEXT: s_mov_b32 s33, 0 ; FLATSCR-NEXT: scratch_load_dwordx2 v[14:15], off, s33 offset:64 ; FLATSCR-NEXT: s_movk_i32 s32, 0x50 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll index 1e4438526779a..cfa56f05fac7a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll @@ -308,10 +308,10 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload ; GCN-NEXT: v_bfe_u32 v0, v6, 1, 6 -; GCN-NEXT: v_lshrrev_b32_e64 v2, 6, s33 +; GCN-NEXT: v_lshrrev_b32_e64 v5, 6, s33 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0x100, v2 -; GCN-NEXT: v_add_u32_e32 v0, v2, v0 +; GCN-NEXT: v_add_u32_e32 v5, 0x100, v5 +; GCN-NEXT: v_add_u32_e32 v0, v5, v0 ; GCN-NEXT: v_and_b32_e32 v1, 1, v6 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 4, v1 ; GCN-NEXT: s_waitcnt vmcnt(0) @@ -487,10 +487,10 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) { ; GCN-NEXT: buffer_load_dword v30, off, s[0:3], s33 offset:568 ; 4-byte Folded Reload ; GCN-NEXT: buffer_load_dword v31, off, s[0:3], s33 offset:572 ; 4-byte Folded Reload ; GCN-NEXT: v_and_b32_e32 v0, 31, v6 -; GCN-NEXT: v_lshrrev_b32_e64 v2, 6, s33 +; GCN-NEXT: v_lshrrev_b32_e64 v5, 6, s33 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 3, v0 -; GCN-NEXT: v_add_u32_e32 v2, 0x100, v2 -; GCN-NEXT: v_add_u32_e32 v1, v2, v0 +; GCN-NEXT: v_add_u32_e32 v5, 0x100, v5 +; GCN-NEXT: v_add_u32_e32 v1, v5, v0 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_mov_b32_e32 v16, v20 ; GCN-NEXT: v_mov_b32_e32 v17, v21 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll index 7d0029e9efa56..8d0733ea0037f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -458,9 +458,9 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) { ; GFX9-NEXT: scratch_load_dword v1, off, s32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX9-NEXT: s_add_i32 vcc_lo, s32, 0x100 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x100 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX9-NEXT: v_add_u32_e32 v1, vcc_lo, v1 +; GFX9-NEXT: v_add_u32_e32 v1, vcc_hi, v1 ; GFX9-NEXT: v_mov_b32_e32 v2, 15 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x100 @@ -477,13 +477,13 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) { ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-NEXT: s_add_i32 s0, s32, 0x100 ; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 -; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX10-NEXT: scratch_load_dword v3, off, s32 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v0, vcc_lo, v0 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100 ; GFX10-NEXT: v_add_nc_u32_e32 v1, vcc_lo, v1 ; GFX10-NEXT: scratch_store_dword v0, v2, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -585,16 +585,16 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) { ; GFX940-NEXT: scratch_load_dword v0, off, off offset:4 sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v0, 15 -; GFX940-NEXT: s_movk_i32 vcc_lo, 0x4004 ; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4004 ; GFX940-NEXT: s_waitcnt lgkmcnt(0) ; GFX940-NEXT: s_lshl_b32 s1, s0, 2 ; GFX940-NEXT: s_and_b32 s0, s0, 15 ; GFX940-NEXT: v_mov_b32_e32 v1, s1 ; GFX940-NEXT: s_lshl_b32 s0, s0, 2 -; GFX940-NEXT: scratch_store_dword v1, v0, vcc_lo sc0 sc1 +; GFX940-NEXT: scratch_store_dword v1, v0, vcc_hi sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_mov_b32_e32 v0, s0 +; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4004 ; GFX940-NEXT: scratch_load_dword v0, v0, vcc_hi sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_endpgm @@ -613,9 +613,9 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) { ; GFX11-NEXT: s_lshl_b32 s0, s0, 2 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v2, s0 -; GFX11-NEXT: s_movk_i32 s0, 0x4004 -; GFX11-NEXT: scratch_store_b32 v0, v1, s0 dlc +; GFX11-NEXT: scratch_store_b32 v0, v1, vcc_lo dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_movk_i32 vcc_lo, 0x4004 ; GFX11-NEXT: scratch_load_b32 v0, v2, vcc_lo glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_endpgm @@ -681,9 +681,9 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() { ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX940-NEXT: v_mov_b32_e32 v2, 15 -; GFX940-NEXT: s_movk_i32 vcc_lo, 0x4004 +; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4004 ; GFX940-NEXT: v_sub_u32_e32 v0, 0, v0 -; GFX940-NEXT: scratch_store_dword v1, v2, vcc_lo sc0 sc1 +; GFX940-NEXT: scratch_store_dword v1, v2, vcc_hi sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4004 @@ -696,13 +696,13 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() { ; GFX11-NEXT: v_sub_nc_u32_e32 v1, 0, v0 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-NEXT: v_mov_b32_e32 v2, 15 -; GFX11-NEXT: s_movk_i32 s0, 0x4004 ; GFX11-NEXT: s_movk_i32 vcc_lo, 0x4004 -; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX11-NEXT: scratch_load_b32 v3, off, off offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b32 v0, v2, s0 dlc +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-NEXT: scratch_store_b32 v0, v2, vcc_lo dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_movk_i32 vcc_lo, 0x4004 ; GFX11-NEXT: scratch_load_b32 v0, v1, vcc_lo offset:124 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_endpgm @@ -731,9 +731,9 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { ; GFX9-NEXT: scratch_load_dword v1, off, s32 offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 -; GFX9-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4004 ; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX9-NEXT: v_add_u32_e32 v1, vcc_lo, v1 +; GFX9-NEXT: v_add_u32_e32 v1, vcc_hi, v1 ; GFX9-NEXT: v_mov_b32_e32 v2, 15 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4004 @@ -750,13 +750,13 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 -; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0 ; GFX10-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX10-NEXT: v_add_nc_u32_e32 v0, vcc_lo, v0 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX10-NEXT: v_add_nc_u32_e32 v1, vcc_lo, v1 ; GFX10-NEXT: scratch_store_dword v0, v2, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -771,9 +771,9 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX940-NEXT: v_mov_b32_e32 v2, 15 -; GFX940-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4004 ; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX940-NEXT: scratch_store_dword v1, v2, vcc_lo sc0 sc1 +; GFX940-NEXT: scratch_store_dword v1, v2, vcc_hi sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4004 @@ -787,14 +787,13 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX11-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b32 v0, v2, s0 dlc +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-NEXT: scratch_store_b32 v0, v2, vcc_lo dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX11-NEXT: scratch_load_b32 v0, v1, vcc_lo glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir index 152e73d1b803b..1efddc59e0454 100644 --- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir @@ -36,8 +36,6 @@ body: | ; GFX908-NEXT: S_NOP 0 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: - ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 - ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; GFX90A-LABEL: name: agpr32_restore_clobber_scc ; GFX90A: bb.0: @@ -282,7 +280,7 @@ body: | ; GFX90A-NEXT: S_NOP 0 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: - ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5) ; GFX90A-NEXT: $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5) @@ -529,8 +527,6 @@ body: | ; GFX908-FLATSCR-NEXT: S_NOP 0 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: bb.2: - ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 - ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; GFX90A-FLATSCR-LABEL: name: agpr32_restore_clobber_scc ; GFX90A-FLATSCR: bb.0: @@ -776,7 +772,7 @@ body: | ; GFX90A-FLATSCR-NEXT: S_NOP 0 ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.2: - ; GFX90A-FLATSCR-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX90A-FLATSCR-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55 ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5) @@ -1014,7 +1010,6 @@ body: | S_NOP 0 bb.2: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 S_ENDPGM 0, amdgpu_allvgprs ... @@ -1053,8 +1048,6 @@ body: | ; GFX908-NEXT: S_NOP 0 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: - ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 - ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; GFX90A-LABEL: name: agpr64_restore_clobber_scc ; GFX90A: bb.0: @@ -1300,7 +1293,7 @@ body: | ; GFX90A-NEXT: S_NOP 0 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: - ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5) ; GFX90A-NEXT: $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5) @@ -1549,8 +1542,6 @@ body: | ; GFX908-FLATSCR-NEXT: S_NOP 0 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: bb.2: - ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 - ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; GFX90A-FLATSCR-LABEL: name: agpr64_restore_clobber_scc ; GFX90A-FLATSCR: bb.0: @@ -1796,7 +1787,7 @@ body: | ; GFX90A-FLATSCR-NEXT: S_NOP 0 ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.2: - ; GFX90A-FLATSCR-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX90A-FLATSCR-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55 ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5) @@ -2034,7 +2025,6 @@ body: | S_NOP 0 bb.2: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 S_ENDPGM 0, amdgpu_allvgprs ... @@ -2075,8 +2065,6 @@ body: | ; GFX908-NEXT: S_NOP 0 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: - ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 - ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; GFX90A-LABEL: name: agpr96_restore_clobber_scc ; GFX90A: bb.0: @@ -2323,7 +2311,7 @@ body: | ; GFX90A-NEXT: S_NOP 0 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: - ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5) ; GFX90A-NEXT: $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5) @@ -2574,8 +2562,6 @@ body: | ; GFX908-FLATSCR-NEXT: S_NOP 0 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: bb.2: - ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 - ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; GFX90A-FLATSCR-LABEL: name: agpr96_restore_clobber_scc ; GFX90A-FLATSCR: bb.0: @@ -2821,7 +2807,7 @@ body: | ; GFX90A-FLATSCR-NEXT: S_NOP 0 ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.2: - ; GFX90A-FLATSCR-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX90A-FLATSCR-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55 ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5) @@ -3059,7 +3045,6 @@ body: | S_NOP 0 bb.2: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 S_ENDPGM 0, amdgpu_allvgprs ... @@ -3096,8 +3081,6 @@ body: | ; GFX908-NEXT: S_NOP 0 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: - ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0 - ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; GFX90A-LABEL: name: agpr32_save_clobber_scc ; GFX90A: bb.0: @@ -3342,7 +3325,7 @@ body: | ; GFX90A-NEXT: S_NOP 0 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: - ; GFX90A-NEXT: liveins: $agpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5) ; GFX90A-NEXT: $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5) @@ -3589,8 +3572,6 @@ body: | ; GFX908-FLATSCR-NEXT: S_NOP 0 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: bb.2: - ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0 - ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; GFX90A-FLATSCR-LABEL: name: agpr32_save_clobber_scc ; GFX90A-FLATSCR: bb.0: @@ -3836,7 +3817,7 @@ body: | ; GFX90A-FLATSCR-NEXT: S_NOP 0 ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.2: - ; GFX90A-FLATSCR-NEXT: liveins: $agpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX90A-FLATSCR-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55 ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5) @@ -4074,7 +4055,6 @@ body: | S_NOP 0 bb.2: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0 S_ENDPGM 0, amdgpu_allvgprs ... @@ -4112,8 +4092,6 @@ body: | ; GFX908-NEXT: S_NOP 0 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: - ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 - ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; GFX90A-LABEL: name: agpr64_save_clobber_scc ; GFX90A: bb.0: @@ -4359,7 +4337,7 @@ body: | ; GFX90A-NEXT: S_NOP 0 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: - ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5) ; GFX90A-NEXT: $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5) @@ -4608,8 +4586,6 @@ body: | ; GFX908-FLATSCR-NEXT: S_NOP 0 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: bb.2: - ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 - ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; GFX90A-FLATSCR-LABEL: name: agpr64_save_clobber_scc ; GFX90A-FLATSCR: bb.0: @@ -4855,7 +4831,7 @@ body: | ; GFX90A-FLATSCR-NEXT: S_NOP 0 ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.2: - ; GFX90A-FLATSCR-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX90A-FLATSCR-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55 ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5) @@ -5093,7 +5069,6 @@ body: | S_NOP 0 bb.2: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 S_ENDPGM 0, amdgpu_allvgprs ... --- @@ -5132,8 +5107,6 @@ body: | ; GFX908-NEXT: S_NOP 0 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: bb.2: - ; GFX908-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 - ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; GFX90A-LABEL: name: agpr96_save_clobber_scc ; GFX90A: bb.0: @@ -5380,7 +5353,7 @@ body: | ; GFX90A-NEXT: S_NOP 0 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: - ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX90A-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5) ; GFX90A-NEXT: $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5) @@ -5631,8 +5604,6 @@ body: | ; GFX908-FLATSCR-NEXT: S_NOP 0 ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: bb.2: - ; GFX908-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 - ; GFX908-FLATSCR-NEXT: {{ $}} ; GFX908-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs ; GFX90A-FLATSCR-LABEL: name: agpr96_save_clobber_scc ; GFX90A-FLATSCR: bb.0: @@ -5878,7 +5849,7 @@ body: | ; GFX90A-FLATSCR-NEXT: S_NOP 0 ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: bb.2: - ; GFX90A-FLATSCR-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 + ; GFX90A-FLATSCR-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55 ; GFX90A-FLATSCR-NEXT: {{ $}} ; GFX90A-FLATSCR-NEXT: $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5) ; GFX90A-FLATSCR-NEXT: $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5) @@ -6116,6 +6087,5 @@ body: | S_NOP 0 bb.2: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1 S_ENDPGM 0, amdgpu_allvgprs ... diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll index 44e5a0b0000a7..0a234a5c4cb02 100644 --- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll @@ -495,25 +495,25 @@ define amdgpu_kernel void @vload2_private(i16 addrspace(1)* nocapture readonly % ; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 ; FLATSCR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; FLATSCR-NEXT: v_mov_b32_e32 v2, 0 -; FLATSCR-NEXT: s_mov_b32 s5, 0 -; FLATSCR-NEXT: s_mov_b32 s4, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 ; FLATSCR-NEXT: s_waitcnt lgkmcnt(0) ; FLATSCR-NEXT: global_load_ushort v0, v2, s[0:1] -; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_short off, v0, s5 offset:4 +; FLATSCR-NEXT: scratch_store_short off, v0, vcc_hi offset:4 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: global_load_ushort v0, v2, s[0:1] offset:2 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_short off, v0, s4 offset:6 +; FLATSCR-NEXT: scratch_store_short off, v0, vcc_hi offset:6 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: global_load_ushort v0, v2, s[0:1] offset:4 -; FLATSCR-NEXT: s_mov_b32 s0, 0 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_short off, v0, s0 offset:8 +; FLATSCR-NEXT: scratch_store_short off, v0, vcc_hi offset:8 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_lo offset:4 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 +; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 +; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 ; FLATSCR-NEXT: scratch_load_dword v1, off, vcc_hi offset:6 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] @@ -558,27 +558,29 @@ define amdgpu_kernel void @vload2_private(i16 addrspace(1)* nocapture readonly % ; FLATSCR_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; FLATSCR_GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 ; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v2, 0 -; FLATSCR_GFX10-NEXT: s_mov_b32 s5, 0 -; FLATSCR_GFX10-NEXT: s_mov_b32 s4, 0 ; FLATSCR_GFX10-NEXT: s_mov_b32 vcc_lo, 0 ; FLATSCR_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; FLATSCR_GFX10-NEXT: global_load_ushort v0, v2, s[0:1] ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) -; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s5 offset:4 +; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, vcc_lo offset:4 ; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FLATSCR_GFX10-NEXT: global_load_ushort v0, v2, s[0:1] offset:2 +; FLATSCR_GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; FLATSCR_GFX10-NEXT: s_mov_b32 vcc_lo, 0 ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) -; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s4 offset:6 +; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, vcc_lo offset:6 ; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; FLATSCR_GFX10-NEXT: global_load_ushort v0, v2, s[0:1] offset:4 ; FLATSCR_GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; FLATSCR_GFX10-NEXT: s_mov_b32 s1, 0 -; FLATSCR_GFX10-NEXT: s_mov_b32 s0, 0 +; FLATSCR_GFX10-NEXT: s_mov_b32 vcc_lo, 0 ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) -; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, s1 offset:8 +; FLATSCR_GFX10-NEXT: scratch_store_short off, v0, vcc_lo offset:8 ; FLATSCR_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; FLATSCR_GFX10-NEXT: s_clause 0x1 -; FLATSCR_GFX10-NEXT: scratch_load_dword v0, off, s0 offset:4 +; FLATSCR_GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; FLATSCR_GFX10-NEXT: s_mov_b32 vcc_lo, 0 +; FLATSCR_GFX10-NEXT: scratch_load_dword v0, off, vcc_lo offset:4 +; FLATSCR_GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; FLATSCR_GFX10-NEXT: s_mov_b32 vcc_lo, 0 ; FLATSCR_GFX10-NEXT: scratch_load_dword v1, off, vcc_lo offset:6 ; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) ; FLATSCR_GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll index 91f3e3581fa2c..9edfbefa7fcde 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-init.ll @@ -121,16 +121,16 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) { ; FLAT_SCR_OPT-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; FLAT_SCR_OPT-NEXT: s_mov_b32 s104, exec_lo ; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, 3 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s4, 0 -; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v72, s4 +; FLAT_SCR_OPT-NEXT: s_mov_b32 s105, 0 +; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v72, s105 ; FLAT_SCR_OPT-NEXT: s_waitcnt lgkmcnt(0) ; FLAT_SCR_OPT-NEXT: v_writelane_b32 v72, s2, 0 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s4, 4 +; FLAT_SCR_OPT-NEXT: s_mov_b32 s105, 4 ; FLAT_SCR_OPT-NEXT: v_writelane_b32 v72, s3, 1 -; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v72, s4 ; 4-byte Folded Spill +; FLAT_SCR_OPT-NEXT: scratch_store_dword off, v72, s105 ; 4-byte Folded Spill ; FLAT_SCR_OPT-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_OPT-NEXT: s_mov_b32 s4, 0 -; FLAT_SCR_OPT-NEXT: scratch_load_dword v72, off, s4 +; FLAT_SCR_OPT-NEXT: s_mov_b32 s105, 0 +; FLAT_SCR_OPT-NEXT: scratch_load_dword v72, off, s105 ; FLAT_SCR_OPT-NEXT: s_waitcnt vmcnt(0) ; FLAT_SCR_OPT-NEXT: s_waitcnt_depctr 0xffe3 ; FLAT_SCR_OPT-NEXT: s_mov_b32 exec_lo, s104 @@ -255,16 +255,16 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) { ; FLAT_SCR_ARCH-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 ; FLAT_SCR_ARCH-NEXT: s_mov_b32 s104, exec_lo ; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, 3 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s4, 0 -; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v72, s4 +; FLAT_SCR_ARCH-NEXT: s_mov_b32 s105, 0 +; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v72, s105 ; FLAT_SCR_ARCH-NEXT: s_waitcnt lgkmcnt(0) ; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v72, s2, 0 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s4, 4 +; FLAT_SCR_ARCH-NEXT: s_mov_b32 s105, 4 ; FLAT_SCR_ARCH-NEXT: v_writelane_b32 v72, s3, 1 -; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v72, s4 ; 4-byte Folded Spill +; FLAT_SCR_ARCH-NEXT: scratch_store_dword off, v72, s105 ; 4-byte Folded Spill ; FLAT_SCR_ARCH-NEXT: s_waitcnt_depctr 0xffe3 -; FLAT_SCR_ARCH-NEXT: s_mov_b32 s4, 0 -; FLAT_SCR_ARCH-NEXT: scratch_load_dword v72, off, s4 +; FLAT_SCR_ARCH-NEXT: s_mov_b32 s105, 0 +; FLAT_SCR_ARCH-NEXT: scratch_load_dword v72, off, s105 ; FLAT_SCR_ARCH-NEXT: s_waitcnt vmcnt(0) ; FLAT_SCR_ARCH-NEXT: s_waitcnt_depctr 0xffe3 ; FLAT_SCR_ARCH-NEXT: s_mov_b32 exec_lo, s104 diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll index d00a446ae0397..bb3133d99a157 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -21,13 +21,13 @@ define amdgpu_kernel void @zero_init_kernel() { ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_mov_b32 s1, 0 -; GFX9-NEXT: s_mov_b32 s0, 0 -; GFX9-NEXT: s_mov_b32 vcc_lo, 0 ; GFX9-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:52 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:36 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:20 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:52 +; GFX9-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:36 +; GFX9-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:20 +; GFX9-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:4 ; GFX9-NEXT: s_endpgm ; @@ -74,7 +74,6 @@ define amdgpu_kernel void @zero_init_kernel() { ; GFX9-PAL-NEXT: s_mov_b32 s2, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; GFX9-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-PAL-NEXT: s_mov_b32 vcc_lo, 0 ; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff @@ -87,11 +86,12 @@ define amdgpu_kernel void @zero_init_kernel() { ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:52 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:36 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:20 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:52 +; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:36 +; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:20 +; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:4 ; GFX9-PAL-NEXT: s_endpgm ; @@ -129,12 +129,15 @@ define amdgpu_kernel void @zero_init_kernel() { ; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX1010-PAL-NEXT: s_mov_b32 s2, 0 -; GFX1010-PAL-NEXT: s_mov_b32 s1, 0 -; GFX1010-PAL-NEXT: s_mov_b32 s0, 0 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:52 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:36 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:20 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:52 +; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:36 +; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:20 +; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:4 ; GFX1010-PAL-NEXT: s_endpgm ; @@ -968,8 +971,8 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: scratch_load_dword v0, off, s4 offset:4 glc +; GFX9-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: s_mov_b32 s1, s0 @@ -979,13 +982,13 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() { ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_mov_b32 s1, 0 -; GFX9-NEXT: s_mov_b32 s0, 0 -; GFX9-NEXT: s_mov_b32 vcc_lo, 0 ; GFX9-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:260 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:276 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:292 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:260 +; GFX9-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:276 +; GFX9-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:292 +; GFX9-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:308 ; GFX9-NEXT: s_endpgm ; @@ -1035,15 +1038,13 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() { ; GFX9-PAL-NEXT: s_getpc_b64 s[2:3] ; GFX9-PAL-NEXT: s_mov_b32 s2, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 -; GFX9-PAL-NEXT: s_mov_b32 s4, 0 -; GFX9-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-PAL-NEXT: s_mov_b32 vcc_lo, 0 ; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-PAL-NEXT: s_mov_b32 s0, 0 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s1 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-PAL-NEXT: scratch_load_dword v0, off, s4 offset:4 glc +; GFX9-PAL-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_mov_b32 s1, s0 ; GFX9-PAL-NEXT: s_mov_b32 s2, s0 @@ -1052,11 +1053,13 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() { ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:260 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:276 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:292 +; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:260 +; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:276 +; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:292 +; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:308 ; GFX9-PAL-NEXT: s_endpgm ; @@ -1087,9 +1090,9 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() { ; GFX1010-PAL-NEXT: s_addc_u32 s3, s3, 0 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 -; GFX1010-PAL-NEXT: s_mov_b32 s4, 0 +; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 ; GFX1010-PAL-NEXT: s_mov_b32 s0, 0 -; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s4 offset:4 glc dlc +; GFX1010-PAL-NEXT: scratch_load_dword v0, off, vcc_lo offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: s_mov_b32 s1, s0 ; GFX1010-PAL-NEXT: s_mov_b32 s2, s0 @@ -1098,13 +1101,16 @@ define amdgpu_kernel void @zero_init_small_offset_kernel() { ; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX1010-PAL-NEXT: s_mov_b32 s2, 0 -; GFX1010-PAL-NEXT: s_mov_b32 s1, 0 -; GFX1010-PAL-NEXT: s_mov_b32 s0, 0 ; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:260 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:276 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:292 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:260 +; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:276 +; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:292 +; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:308 ; GFX1010-PAL-NEXT: s_endpgm ; @@ -1882,13 +1888,13 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 -; GFX10-NEXT: s_add_i32 s0, s32, 0x100 ; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100 -; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, s0 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-NEXT: v_lshl_add_u32 v1, v1, 2, vcc_lo +; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, vcc_lo +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x100 ; GFX10-NEXT: scratch_load_dword v3, off, s32 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_lshl_add_u32 v1, v1, 2, vcc_lo ; GFX10-NEXT: scratch_store_dword v0, v2, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc @@ -1947,13 +1953,13 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) { ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_and_b32_e32 v1, 15, v0 -; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x100 ; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x100 -; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v1, 2, vcc_lo +; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, vcc_lo +; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x100 ; GFX10-PAL-NEXT: scratch_load_dword v3, off, s32 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v1, 2, vcc_lo ; GFX10-PAL-NEXT: scratch_store_dword v0, v2, off ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_load_dword v0, v1, off glc dlc @@ -2009,8 +2015,8 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() { ; GFX9: ; %bb.0: ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-NEXT: s_mov_b32 s4, 0 -; GFX9-NEXT: scratch_load_dword v0, off, s4 offset:4 glc +; GFX9-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: s_mov_b32 s1, s0 @@ -2020,13 +2026,13 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() { ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_movk_i32 s1, 0x4004 -; GFX9-NEXT: s_movk_i32 s0, 0x4004 -; GFX9-NEXT: s_movk_i32 vcc_lo, 0x4004 ; GFX9-NEXT: s_movk_i32 vcc_hi, 0x4004 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi +; GFX9-NEXT: s_movk_i32 vcc_hi, 0x4004 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16 +; GFX9-NEXT: s_movk_i32 vcc_hi, 0x4004 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32 +; GFX9-NEXT: s_movk_i32 vcc_hi, 0x4004 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 ; GFX9-NEXT: s_endpgm ; @@ -2047,12 +2053,12 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() { ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-NEXT: s_movk_i32 s2, 0x4004 -; GFX10-NEXT: s_movk_i32 s1, 0x4004 -; GFX10-NEXT: s_movk_i32 s0, 0x4004 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s2 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:16 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo +; GFX10-NEXT: s_movk_i32 vcc_lo, 0x4004 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16 +; GFX10-NEXT: s_movk_i32 vcc_lo, 0x4004 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 +; GFX10-NEXT: s_movk_i32 vcc_lo, 0x4004 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 ; GFX10-NEXT: s_endpgm ; @@ -2067,13 +2073,12 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() { ; GFX11-NEXT: s_mov_b32 s3, s0 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-NEXT: s_movk_i32 s2, 0x4004 -; GFX11-NEXT: s_movk_i32 s1, 0x4004 -; GFX11-NEXT: s_movk_i32 s0, 0x4004 -; GFX11-NEXT: s_clause 0x3 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], vcc_lo +; GFX11-NEXT: s_movk_i32 vcc_lo, 0x4004 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:16 +; GFX11-NEXT: s_movk_i32 vcc_lo, 0x4004 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:32 +; GFX11-NEXT: s_movk_i32 vcc_lo, 0x4004 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:48 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm @@ -2083,15 +2088,13 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() { ; GFX9-PAL-NEXT: s_getpc_b64 s[2:3] ; GFX9-PAL-NEXT: s_mov_b32 s2, s0 ; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 -; GFX9-PAL-NEXT: s_mov_b32 s4, 0 +; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-PAL-NEXT: s_movk_i32 vcc_lo, 0x4004 -; GFX9-PAL-NEXT: s_movk_i32 vcc_hi, 0x4004 ; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s1 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-PAL-NEXT: scratch_load_dword v0, off, s4 offset:4 glc +; GFX9-PAL-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_mov_b32 s1, s0 ; GFX9-PAL-NEXT: s_mov_b32 s2, s0 @@ -2100,11 +2103,13 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() { ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-PAL-NEXT: s_movk_i32 s1, 0x4004 -; GFX9-PAL-NEXT: s_movk_i32 s0, 0x4004 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 +; GFX9-PAL-NEXT: s_movk_i32 vcc_hi, 0x4004 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi +; GFX9-PAL-NEXT: s_movk_i32 vcc_hi, 0x4004 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16 +; GFX9-PAL-NEXT: s_movk_i32 vcc_hi, 0x4004 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32 +; GFX9-PAL-NEXT: s_movk_i32 vcc_hi, 0x4004 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 ; GFX9-PAL-NEXT: s_endpgm ; @@ -2118,13 +2123,13 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() { ; GFX940-NEXT: s_mov_b32 s3, s0 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NEXT: s_movk_i32 s1, 0x4004 -; GFX940-NEXT: s_movk_i32 s0, 0x4004 -; GFX940-NEXT: s_movk_i32 vcc_lo, 0x4004 ; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4004 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi +; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4004 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16 +; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4004 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32 +; GFX940-NEXT: s_movk_i32 vcc_hi, 0x4004 ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 ; GFX940-NEXT: s_endpgm ; @@ -2139,9 +2144,9 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() { ; GFX1010-PAL-NEXT: s_addc_u32 s3, s3, 0 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 -; GFX1010-PAL-NEXT: s_mov_b32 s4, 0 +; GFX1010-PAL-NEXT: s_mov_b32 vcc_lo, 0 ; GFX1010-PAL-NEXT: s_mov_b32 s0, 0 -; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s4 offset:4 glc dlc +; GFX1010-PAL-NEXT: scratch_load_dword v0, off, vcc_lo offset:4 glc dlc ; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX1010-PAL-NEXT: s_mov_b32 s1, s0 ; GFX1010-PAL-NEXT: s_mov_b32 s2, s0 @@ -2150,13 +2155,16 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() { ; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX1010-PAL-NEXT: s_movk_i32 s2, 0x4004 -; GFX1010-PAL-NEXT: s_movk_i32 s1, 0x4004 -; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x4004 ; GFX1010-PAL-NEXT: s_movk_i32 vcc_lo, 0x4004 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:16 -; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo +; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-PAL-NEXT: s_movk_i32 vcc_lo, 0x4004 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16 +; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-PAL-NEXT: s_movk_i32 vcc_lo, 0x4004 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 +; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-PAL-NEXT: s_movk_i32 vcc_lo, 0x4004 ; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 ; GFX1010-PAL-NEXT: s_endpgm ; @@ -2182,12 +2190,12 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() { ; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX1030-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX1030-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX1030-PAL-NEXT: s_movk_i32 s2, 0x4004 -; GFX1030-PAL-NEXT: s_movk_i32 s1, 0x4004 -; GFX1030-PAL-NEXT: s_movk_i32 s0, 0x4004 -; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 -; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:16 -; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo +; GFX1030-PAL-NEXT: s_movk_i32 vcc_lo, 0x4004 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16 +; GFX1030-PAL-NEXT: s_movk_i32 vcc_lo, 0x4004 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 +; GFX1030-PAL-NEXT: s_movk_i32 vcc_lo, 0x4004 ; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 ; GFX1030-PAL-NEXT: s_endpgm ; @@ -2202,13 +2210,12 @@ define amdgpu_kernel void @zero_init_large_offset_kernel() { ; GFX11-PAL-NEXT: s_mov_b32 s3, s0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-PAL-NEXT: s_movk_i32 s2, 0x4004 -; GFX11-PAL-NEXT: s_movk_i32 s1, 0x4004 -; GFX11-PAL-NEXT: s_movk_i32 s0, 0x4004 -; GFX11-PAL-NEXT: s_clause 0x3 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s2 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s1 offset:16 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], vcc_lo +; GFX11-PAL-NEXT: s_movk_i32 vcc_lo, 0x4004 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:16 +; GFX11-PAL-NEXT: s_movk_i32 vcc_lo, 0x4004 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:32 +; GFX11-PAL-NEXT: s_movk_i32 vcc_lo, 0x4004 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:48 ; GFX11-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-PAL-NEXT: s_endpgm @@ -2235,13 +2242,13 @@ define void @zero_init_large_offset_foo() { ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-NEXT: s_add_i32 s1, s32, 0x4004 -; GFX9-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX9-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4004 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s1 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4004 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4004 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32 +; GFX9-NEXT: s_add_i32 vcc_hi, s32, 0x4004 ; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -2253,6 +2260,7 @@ define void @zero_init_large_offset_foo() { ; GFX10-NEXT: scratch_load_dword v0, off, s32 offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_mov_b32 s0, 0 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX10-NEXT: s_mov_b32 s1, s0 ; GFX10-NEXT: s_mov_b32 s2, s0 ; GFX10-NEXT: s_mov_b32 s3, s0 @@ -2260,13 +2268,12 @@ define void @zero_init_large_offset_foo() { ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s2 ; GFX10-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-NEXT: s_add_i32 s2, s32, 0x4004 -; GFX10-NEXT: s_add_i32 s1, s32, 0x4004 -; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16 +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 ; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s2 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:16 -; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 ; GFX10-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -2278,20 +2285,18 @@ define void @zero_init_large_offset_foo() { ; GFX11-NEXT: scratch_load_b32 v0, off, s32 offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_mov_b32 s0, 0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX11-NEXT: s_mov_b32 s1, s0 ; GFX11-NEXT: s_mov_b32 s2, s0 ; GFX11-NEXT: s_mov_b32 s3, s0 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-NEXT: s_add_i32 s2, s32, 0x4004 -; GFX11-NEXT: s_add_i32 s1, s32, 0x4004 -; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], vcc_lo +; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:16 +; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:32 ; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX11-NEXT: s_clause 0x3 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:48 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -2309,13 +2314,13 @@ define void @zero_init_large_offset_foo() { ; GFX9-PAL-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-PAL-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX9-PAL-NEXT: s_add_i32 s1, s32, 0x4004 -; GFX9-PAL-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX9-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4004 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi +; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4004 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16 +; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4004 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32 +; GFX9-PAL-NEXT: s_add_i32 vcc_hi, s32, 0x4004 ; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_setpc_b64 s[30:31] @@ -2331,41 +2336,69 @@ define void @zero_init_large_offset_foo() { ; GFX940-NEXT: s_mov_b32 s3, s0 ; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX940-NEXT: v_mov_b64_e32 v[2:3], s[2:3] -; GFX940-NEXT: s_add_i32 s1, s32, 0x4004 -; GFX940-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX940-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4004 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s1 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:16 -; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi +; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4004 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:16 +; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4004 +; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:32 +; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4004 ; GFX940-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:48 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-PAL-LABEL: zero_init_large_offset_foo: -; GFX10-PAL: ; %bb.0: -; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-PAL-NEXT: scratch_load_dword v0, off, s32 offset:4 glc dlc -; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-PAL-NEXT: s_mov_b32 s0, 0 -; GFX10-PAL-NEXT: s_mov_b32 s1, s0 -; GFX10-PAL-NEXT: s_mov_b32 s2, s0 -; GFX10-PAL-NEXT: s_mov_b32 s3, s0 -; GFX10-PAL-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-PAL-NEXT: v_mov_b32_e32 v1, s1 -; GFX10-PAL-NEXT: v_mov_b32_e32 v2, s2 -; GFX10-PAL-NEXT: v_mov_b32_e32 v3, s3 -; GFX10-PAL-NEXT: s_add_i32 s2, s32, 0x4004 -; GFX10-PAL-NEXT: s_add_i32 s1, s32, 0x4004 -; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x4004 -; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 -; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:16 -; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s0 offset:32 -; GFX10-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 -; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-PAL-NEXT: s_setpc_b64 s[30:31] +; GFX1010-PAL-LABEL: zero_init_large_offset_foo: +; GFX1010-PAL: ; %bb.0: +; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1010-PAL-NEXT: scratch_load_dword v0, off, s32 offset:4 glc dlc +; GFX1010-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX1010-PAL-NEXT: s_mov_b32 s0, 0 +; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX1010-PAL-NEXT: s_mov_b32 s1, s0 +; GFX1010-PAL-NEXT: s_mov_b32 s2, s0 +; GFX1010-PAL-NEXT: s_mov_b32 s3, s0 +; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, s0 +; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1010-PAL-NEXT: v_mov_b32_e32 v2, s2 +; GFX1010-PAL-NEXT: v_mov_b32_e32 v3, s3 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo +; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16 +; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 +; GFX1010-PAL-NEXT: s_waitcnt_depctr 0xffe3 +; GFX1010-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX1010-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 +; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1010-PAL-NEXT: s_setpc_b64 s[30:31] +; +; GFX1030-PAL-LABEL: zero_init_large_offset_foo: +; GFX1030-PAL: ; %bb.0: +; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1030-PAL-NEXT: scratch_load_dword v0, off, s32 offset:4 glc dlc +; GFX1030-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX1030-PAL-NEXT: s_mov_b32 s0, 0 +; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX1030-PAL-NEXT: s_mov_b32 s1, s0 +; GFX1030-PAL-NEXT: s_mov_b32 s2, s0 +; GFX1030-PAL-NEXT: s_mov_b32 s3, s0 +; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, s0 +; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, s1 +; GFX1030-PAL-NEXT: v_mov_b32_e32 v2, s2 +; GFX1030-PAL-NEXT: v_mov_b32_e32 v3, s3 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo +; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:16 +; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:32 +; GFX1030-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX1030-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:48 +; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX1030-PAL-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-PAL-LABEL: zero_init_large_offset_foo: ; GFX11-PAL: ; %bb.0: @@ -2374,20 +2407,18 @@ define void @zero_init_large_offset_foo() { ; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s32 offset:4 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX11-PAL-NEXT: s_mov_b32 s0, 0 -; GFX11-PAL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX11-PAL-NEXT: s_mov_b32 s1, s0 ; GFX11-PAL-NEXT: s_mov_b32 s2, s0 ; GFX11-PAL-NEXT: s_mov_b32 s3, s0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GFX11-PAL-NEXT: s_add_i32 s2, s32, 0x4004 -; GFX11-PAL-NEXT: s_add_i32 s1, s32, 0x4004 -; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x4004 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], vcc_lo +; GFX11-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:16 +; GFX11-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:32 ; GFX11-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX11-PAL-NEXT: s_clause 0x3 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s2 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s1 offset:16 -; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s0 offset:32 ; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], vcc_lo offset:48 ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] @@ -2955,13 +2986,13 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: v_and_b32_e32 v1, 15, v0 -; GFX10-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, s0 ; GFX10-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-NEXT: v_lshl_add_u32 v1, v1, 2, vcc_lo +; GFX10-NEXT: v_lshl_add_u32 v0, v0, 2, vcc_lo +; GFX10-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX10-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_lshl_add_u32 v1, v1, 2, vcc_lo ; GFX10-NEXT: scratch_store_dword v0, v2, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_load_dword v0, v1, off glc dlc @@ -2974,14 +3005,13 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX11-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b32 v0, v2, s0 dlc +; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-NEXT: scratch_store_b32 v0, v2, vcc_lo dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX11-NEXT: scratch_load_b32 v0, v1, vcc_lo glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] @@ -3010,9 +3040,9 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX940-NEXT: v_mov_b32_e32 v2, 15 -; GFX940-NEXT: s_add_i32 vcc_lo, s32, 0x4004 +; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4004 ; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 -; GFX940-NEXT: scratch_store_dword v1, v2, vcc_lo sc0 sc1 +; GFX940-NEXT: scratch_store_dword v1, v2, vcc_hi sc0 sc1 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX940-NEXT: s_add_i32 vcc_hi, s32, 0x4004 @@ -3025,13 +3055,13 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: v_and_b32_e32 v1, 15, v0 -; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, s0 ; GFX10-PAL-NEXT: v_mov_b32_e32 v2, 15 -; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v1, 2, vcc_lo +; GFX10-PAL-NEXT: v_lshl_add_u32 v0, v0, 2, vcc_lo +; GFX10-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX10-PAL-NEXT: scratch_load_dword v3, off, s32 offset:4 glc dlc ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX10-PAL-NEXT: v_lshl_add_u32 v1, v1, 2, vcc_lo ; GFX10-PAL-NEXT: scratch_store_dword v0, v2, off ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_load_dword v0, v1, off glc dlc @@ -3044,14 +3074,13 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: v_dual_mov_b32 v2, 15 :: v_dual_and_b32 v1, 15, v0 ; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX11-PAL-NEXT: s_add_i32 s0, s32, 0x4004 ; GFX11-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 -; GFX11-PAL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX11-PAL-NEXT: scratch_load_b32 v3, off, s32 offset:4 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: scratch_store_b32 v0, v2, s0 dlc +; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX11-PAL-NEXT: scratch_store_b32 v0, v2, vcc_lo dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-PAL-NEXT: s_add_i32 vcc_lo, s32, 0x4004 ; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, vcc_lo glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX11-PAL-NEXT: s_setpc_b64 s[30:31] @@ -4109,8 +4138,8 @@ define amdgpu_ps void @large_offset() { ; GFX9-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-NEXT: v_mov_b32_e32 v3, v0 -; GFX9-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:3024 +; GFX9-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:3024 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-NEXT: scratch_load_dwordx4 v[0:3], off, vcc_hi offset:3024 glc @@ -4184,8 +4213,8 @@ define amdgpu_ps void @large_offset() { ; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff ; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0 ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:3024 +; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_hi offset:3024 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) ; GFX9-PAL-NEXT: s_mov_b32 vcc_hi, 0 ; GFX9-PAL-NEXT: scratch_load_dwordx4 v[0:3], off, vcc_hi offset:3024 glc diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir index bdf7364b74bed..82bd45d71da14 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir @@ -89,8 +89,8 @@ body: | ; GCN-LABEL: name: func_add_constant_to_fi_uniform_SCC_clobber_i32 ; GCN: liveins: $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $vcc_lo = S_LSHR_B32 6, $sgpr32, implicit-def dead $scc - ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 killed $vcc_lo, 4, implicit-def $scc + ; GCN-NEXT: $vcc_hi = S_LSHR_B32 6, $sgpr32, implicit-def dead $scc + ; GCN-NEXT: renamable $sgpr4 = nuw S_ADD_U32 killed $vcc_hi, 4, implicit-def $scc ; GCN-NEXT: renamable $sgpr5 = S_ADDC_U32 $sgpr4, 1234567, implicit-def $scc, implicit $scc ; GCN-NEXT: $vcc_hi = S_LSHR_B32 $sgpr32, 6, implicit-def $scc ; GCN-NEXT: $vcc_hi = S_ADD_I32 killed $vcc_hi, 8, implicit-def $scc @@ -184,30 +184,3 @@ body: | S_SETPC_B64_return killed renamable $sgpr30_sgpr31 ... ---- -name: func_frame_idx_at_the_end_of_bb -tracksRegLiveness: true -stack: - - { id: 0, type: default, offset: 0, size: 8, alignment: 4, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - local-offset: 0, debug-info-variable: '', debug-info-expression: '', - debug-info-location: '' } -machineFunctionInfo: - stackPtrOffsetReg: '$sgpr32' - -body: | - bb.0: - liveins: $vgpr31 - - ; GCN-LABEL: name: func_frame_idx_at_the_end_of_bb - ; GCN: liveins: $vgpr31 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: renamable $vgpr0 = V_AND_B32_e32 1023, killed $vgpr31, implicit $exec - ; GCN-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec - ; GCN-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec - ; GCN-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 killed $vgpr1, killed $vgpr0, implicit-def dead $vcc, implicit $exec - renamable $vgpr0 = V_AND_B32_e32 1023, killed $vgpr31, implicit $exec - renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec - renamable $vgpr0 = V_ADD_CO_U32_e32 %stack.0, killed $vgpr0, implicit-def dead $vcc, implicit $exec -... - diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll index 55cd662a0bd8e..450cae8a4a388 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll @@ -42,14 +42,14 @@ define void @callee_with_stack_and_call() #0 { ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; NO-SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s32 ; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x800 -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[10:11], exec +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[8:9], exec ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v2, s30, 0 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v1, s30, 0 +; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[10:11] +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[8:9] ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[8:9], exec ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 @@ -65,15 +65,15 @@ define void @callee_with_stack_and_call() #0 { ; NO-SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; NO-SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; NO-SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[6:7], exec +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v2, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v2, 0 -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v2, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v1, 0 +; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[6:7] +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll index 16f513dba3c56..776d4ed37e4c3 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll @@ -236,29 +236,29 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(<3 x i64> addrspace(1 ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: v_or_b32_e32 v1, 0x12cc, v0 ; MUBUF-NEXT: v_or_b32_e32 v0, 0x12c8, v0 -; MUBUF-NEXT: v_mov_b32_e32 v18, 0x4000 -; MUBUF-NEXT: v_mov_b32_e32 v17, 0x4000 -; MUBUF-NEXT: v_mov_b32_e32 v16, 0x4000 +; MUBUF-NEXT: v_mov_b32_e32 v13, 0x4000 ; MUBUF-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: v_mov_b32_e32 v15, 0x4000 +; MUBUF-NEXT: v_mov_b32_e32 v12, 0 ; MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: v_mov_b32_e32 v14, 0x4000 -; MUBUF-NEXT: buffer_load_dword v8, v18, s[0:3], 0 offen glc +; MUBUF-NEXT: buffer_load_dword v8, v13, s[0:3], 0 offen glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_load_dword v9, v17, s[0:3], 0 offen offset:4 glc +; MUBUF-NEXT: v_mov_b32_e32 v13, 0x4000 +; MUBUF-NEXT: buffer_load_dword v9, v13, s[0:3], 0 offen offset:4 glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_load_dword v2, v16, s[0:3], 0 offen offset:8 glc +; MUBUF-NEXT: v_mov_b32_e32 v13, 0x4000 +; MUBUF-NEXT: buffer_load_dword v2, v13, s[0:3], 0 offen offset:8 glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_load_dword v3, v15, s[0:3], 0 offen offset:12 glc +; MUBUF-NEXT: v_mov_b32_e32 v13, 0x4000 +; MUBUF-NEXT: buffer_load_dword v3, v13, s[0:3], 0 offen offset:12 glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_load_dword v10, v14, s[0:3], 0 offen offset:16 glc +; MUBUF-NEXT: v_mov_b32_e32 v13, 0x4000 +; MUBUF-NEXT: buffer_load_dword v10, v13, s[0:3], 0 offen offset:16 glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: v_mov_b32_e32 v13, 0x4000 ; MUBUF-NEXT: buffer_load_dword v11, v13, s[0:3], 0 offen offset:20 glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: v_mov_b32_e32 v12, 0 ; MUBUF-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2 ; MUBUF-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v3, vcc ; MUBUF-NEXT: v_add_co_u32_e32 v0, vcc, v7, v8 @@ -297,10 +297,10 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(<3 x i64> addrspace(1 ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 offset:704 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_movk_i32 s3, 0x2000 ; FLATSCR-NEXT: s_movk_i32 s2, 0x2000 -; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s3 offset:16 glc +; FLATSCR-NEXT: scratch_load_dwordx2 v[10:11], off, s2 offset:16 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) +; FLATSCR-NEXT: s_movk_i32 s2, 0x2000 ; FLATSCR-NEXT: scratch_load_dwordx4 v[4:7], off, s2 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: v_mov_b32_e32 v12, 0 diff --git a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir index aa4428f3da4eb..f0d0abd31ed52 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir @@ -29,8 +29,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr4, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr4 = S_MOV_B32 524288 - ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 8192, addrspace 5) + ; CHECK-NEXT: $sgpr5 = S_MOV_B32 524288 + ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 8192, addrspace 5) ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir index faf4c16a64ffe..e61e224d32522 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir @@ -20,109 +20,84 @@ machineFunctionInfo: stackPtrOffsetReg: $sgpr32 body: | - ; GFX8-LABEL: name: pei_scavenge_vgpr_spill - ; GFX8: bb.0: - ; GFX8-NEXT: successors: %bb.1(0x80000000) - ; GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 - ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GFX8-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) - ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; GFX8-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 - ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc - ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc - ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc - ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec - ; GFX8-NEXT: $vcc_lo = S_MOV_B32 8192 - ; GFX8-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr0, 0, implicit $exec - ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) - ; GFX8-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec - ; GFX8-NEXT: $vcc_lo = S_MOV_B32 16384 - ; GFX8-NEXT: $vgpr2, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec - ; GFX8-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec - ; GFX8-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) - ; GFX8-NEXT: S_BRANCH %bb.1 - ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: bb.1: - ; GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 - ; GFX8-NEXT: {{ $}} - ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc - ; GFX8-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0 - ; GFX8-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GFX8-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc - ; GFX8-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) - ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; GFX8-NEXT: S_ENDPGM 0, amdgpu_allvgprs - ; GFX9-LABEL: name: pei_scavenge_vgpr_spill - ; GFX9: bb.0: - ; GFX9-NEXT: successors: %bb.1(0x80000000) - ; GFX9-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GFX9-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) - ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; GFX9-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 - ; GFX9-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc - ; GFX9-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc - ; GFX9-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc - ; GFX9-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec - ; GFX9-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) - ; GFX9-NEXT: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec - ; GFX9-NEXT: $vgpr2 = V_ADD_U32_e32 16384, killed $vgpr2, implicit $exec - ; GFX9-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec - ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) - ; GFX9-NEXT: S_BRANCH %bb.1 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: bb.1: - ; GFX9-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 - ; GFX9-NEXT: {{ $}} - ; GFX9-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc - ; GFX9-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0 - ; GFX9-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GFX9-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc - ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) - ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; GFX9-NEXT: S_ENDPGM 0, amdgpu_allvgprs - ; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill - ; GFX9-FLATSCR: bb.0: - ; GFX9-FLATSCR-NEXT: successors: %bb.1(0x80000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 - ; GFX9-FLATSCR-NEXT: {{ $}} - ; GFX9-FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GFX9-FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc - ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) - ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; GFX9-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 - ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc - ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc - ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc - ; GFX9-FLATSCR-NEXT: $vcc_lo = S_ADD_I32 $sgpr33, 8192, implicit-def $scc - ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vcc_lo, implicit $exec - ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADD_I32 $sgpr33, 16384, implicit-def $scc - ; GFX9-FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $vcc_hi, $vgpr1, implicit $exec - ; GFX9-FLATSCR-NEXT: S_BRANCH %bb.1 - ; GFX9-FLATSCR-NEXT: {{ $}} - ; GFX9-FLATSCR-NEXT: bb.1: - ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 - ; GFX9-FLATSCR-NEXT: {{ $}} - ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc - ; GFX9-FLATSCR-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0 - ; GFX9-FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec - ; GFX9-FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc - ; GFX9-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5) - ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 - ; GFX9-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs bb.0: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 + ; GFX8-LABEL: name: pei_scavenge_vgpr_spill + ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GFX8-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GFX8-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 + ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX8-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX8-NEXT: $vcc_lo = S_MOV_B32 8192 + ; GFX8-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr0, 0, implicit $exec + ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GFX8-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX8-NEXT: $vcc_lo = S_MOV_B32 16384 + ; GFX8-NEXT: $vgpr3, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec + ; GFX8-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec + ; GFX8-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX8-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0 + ; GFX8-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GFX8-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc + ; GFX8-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GFX8-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) + ; GFX8-NEXT: S_ENDPGM 0, amdgpu_allvgprs + ; GFX9-LABEL: name: pei_scavenge_vgpr_spill + ; GFX9: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GFX9-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GFX9-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 + ; GFX9-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc + ; GFX9-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc + ; GFX9-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 2097152, implicit-def dead $scc + ; GFX9-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX9-NEXT: $vgpr0 = V_ADD_U32_e32 8192, killed $vgpr0, implicit $exec + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GFX9-NEXT: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec + ; GFX9-NEXT: $vgpr3 = V_ADD_U32_e32 16384, killed $vgpr3, implicit $exec + ; GFX9-NEXT: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec + ; GFX9-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -2097152, implicit-def dead $scc + ; GFX9-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0 + ; GFX9-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GFX9-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 1048832, implicit-def dead $scc + ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr6, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GFX9-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) + ; GFX9-NEXT: S_ENDPGM 0, amdgpu_allvgprs + ; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill + ; GFX9-FLATSCR: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr2 + ; GFX9-FLATSCR-NEXT: {{ $}} + ; GFX9-FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GFX9-FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc + ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GFX9-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr33, 0, undef $vgpr2 + ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc + ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc + ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc + ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADD_I32 $sgpr33, 8192, implicit-def $scc + ; GFX9-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vcc_hi, implicit $exec + ; GFX9-FLATSCR-NEXT: $vcc_hi = S_ADD_I32 $sgpr33, 16384, implicit-def $scc + ; GFX9-FLATSCR-NEXT: $vgpr0 = V_OR_B32_e32 killed $vcc_hi, $vgpr1, implicit $exec + ; GFX9-FLATSCR-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -32768, implicit-def dead $scc + ; GFX9-FLATSCR-NEXT: $sgpr33 = V_READLANE_B32 $vgpr2, 0 + ; GFX9-FLATSCR-NEXT: $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GFX9-FLATSCR-NEXT: $sgpr6 = S_ADD_I32 $sgpr32, 16388, implicit-def dead $scc + ; GFX9-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr6, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5) + ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GFX9-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs $vgpr0 = V_MOV_B32_e32 %stack.0, implicit $exec $vgpr0 = V_OR_B32_e32 %stack.1, $vgpr1, implicit $exec - S_BRANCH %bb.1 - - bb.1: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 S_ENDPGM 0, amdgpu_allvgprs ... diff --git a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll index b877900236278..9d8397f640ac3 100644 --- a/llvm/test/CodeGen/AMDGPU/scratch-simple.ll +++ b/llvm/test/CodeGen/AMDGPU/scratch-simple.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,SI,SIVI,MUBUF %s ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx803 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,VI,SIVI,MUBUF %s ; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-use-divergent-register-indexing -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9_10,MUBUF,GFX9-MUBUF,GFX9_10-MUBUF %s @@ -13,5939 +12,303 @@ ; RELS: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD0 ; RELS: R_AMDGPU_ABS32_LO SCRATCH_RSRC_DWORD1 +; This used to fail due to a v_add_i32 instruction with an illegal immediate +; operand that was created during Local Stack Slot Allocation. Test case derived +; from https://bugs.freedesktop.org/show_bug.cgi?id=96602 +; +; GCN-LABEL: {{^}}ps_main: + +; GFX9-FLATSCR-DAG: s_add_u32 flat_scratch_lo, s0, s2 +; GFX9-FLATSCR-DAG: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-FLATSCR-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0 + +; GFX10-FLATSCR: s_add_u32 s0, s0, s2 +; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 +; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 + +; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3] +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0 +; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 +; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff +; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0 +; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0 +; GFX9-FLATSCR-PAL-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0 + +; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3] +; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0 +; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) +; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff +; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0 +; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0 +; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 + +; MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 +; MUBUF-DAG: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 +; MUBUF-DAG: s_mov_b32 s2, -1 +; SI-DAG: s_mov_b32 s3, 0xe8f000 +; VI-DAG: s_mov_b32 s3, 0xe80000 +; GFX9-MUBUF-DAG: s_mov_b32 s3, 0xe00000 +; GFX10_W32-MUBUF-DAG: s_mov_b32 s3, 0x31c16000 +; GFX10_W64-MUBUF-DAG: s_mov_b32 s3, 0x31e16000 + +; FLATSCR-NOT: SCRATCH_RSRC_DWORD +; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0 +; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SP]] offset: + +; GFX10-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], off offset: + +; MUBUF-DAG: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0 +; MUBUF-DAG: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, [[BYTES]] +; GFX10-FLATSCR: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0 +; GFX10-FLATSCR-PAL: v_and_b32_e32 [[CLAMP_IDX:v[0-9]+]], 0x1fc, v0 +; GCN-NOT: s_mov_b32 s0 + +; GCN-DAG: v_add{{_|_nc_}}{{i|u}}32_e32 [[HI_OFF:v[0-9]+]],{{.*}} 0x280, [[CLAMP_IDX]] +; GCN-DAG: v_add{{_|_nc_}}{{i|u}}32_e32 [[LO_OFF:v[0-9]+]],{{.*}} {{v2|0x80}}, [[CLAMP_IDX]] + +; MUBUF: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen +; MUBUF: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen +; FLATSCR: scratch_load_dword {{v[0-9]+}}, [[LO_OFF]], off define amdgpu_ps float @ps_main(i32 %idx) { -; SI-LABEL: ps_main: -; SI: ; %bb.0: -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; SI-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_mov_b32 s3, 0xe8f000 -; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; SI-NEXT: s_add_u32 s0, s0, s4 -; SI-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; SI-NEXT: v_mov_b32_e32 v2, 0x80 -; SI-NEXT: s_addc_u32 s1, s1, 0 -; SI-NEXT: v_add_i32_e32 v1, vcc, 0x280, v0 -; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0 -; SI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; SI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; SI-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; SI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; SI-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:448 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:444 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:440 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:436 -; SI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:432 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:428 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:424 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:420 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:416 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:412 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; SI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; SI-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:408 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:404 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:400 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:392 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:388 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:384 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; SI-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:380 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:364 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; SI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; SI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; SI-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; SI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; SI-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:396 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:376 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:368 -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:360 -; SI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:356 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:352 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; SI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; SI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; SI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:328 -; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:324 -; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; SI-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; SI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:960 -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:956 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:952 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:948 -; SI-NEXT: s_waitcnt expcnt(3) -; SI-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; SI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; SI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:944 -; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:940 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:936 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:932 -; SI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:928 -; SI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:924 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:920 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:916 -; SI-NEXT: s_waitcnt expcnt(2) -; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; SI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:912 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:908 -; SI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; SI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:900 -; SI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:892 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:888 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:884 -; SI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:880 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:876 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:872 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:868 -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:864 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:860 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:856 -; SI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:852 -; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:848 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:844 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:840 -; SI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:836 -; SI-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: ; return to shader part epilog -; -; VI-LABEL: ps_main: -; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s4, s0 -; VI-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_mov_b32 s3, 0xe80000 -; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: s_add_u32 s0, s0, s4 -; VI-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; VI-NEXT: v_mov_b32_e32 v2, 0x80 -; VI-NEXT: s_addc_u32 s1, s1, 0 -; VI-NEXT: v_add_u32_e32 v1, vcc, 0x280, v0 -; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0 -; VI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; VI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; VI-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; VI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; VI-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:448 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:444 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:440 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:436 -; VI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:432 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:428 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:424 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:420 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:416 -; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:412 -; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; VI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; VI-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:408 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:404 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:400 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:392 -; VI-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:388 -; VI-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:384 -; VI-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; VI-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:380 -; VI-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:364 -; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; VI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; VI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; VI-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; VI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; VI-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:396 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:376 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:368 -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:360 -; VI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:356 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:352 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; VI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; VI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; VI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:328 -; VI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:324 -; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; VI-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; VI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:960 -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:956 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:952 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:948 -; VI-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; VI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; VI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:944 -; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:940 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:936 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:932 -; VI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:928 -; VI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:924 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:920 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:916 -; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; VI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:912 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:908 -; VI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; VI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:900 -; VI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:892 -; VI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:888 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:884 -; VI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:880 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:876 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:872 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:868 -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:864 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:860 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:856 -; VI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:852 -; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:848 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:844 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:840 -; VI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:836 -; VI-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_add_f32_e32 v0, v0, v1 -; VI-NEXT: ; return to shader part epilog -; -; GFX9-MUBUF-LABEL: ps_main: -; GFX9-MUBUF: ; %bb.0: -; GFX9-MUBUF-NEXT: s_mov_b32 s4, s0 -; GFX9-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; GFX9-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; GFX9-MUBUF-NEXT: s_mov_b32 s2, -1 -; GFX9-MUBUF-NEXT: s_mov_b32 s3, 0xe00000 -; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s4 -; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:448 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:444 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:440 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:436 -; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:432 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:428 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:424 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:420 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:416 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:412 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:408 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:404 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:400 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:392 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:388 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:384 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; GFX9-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:380 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:364 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX9-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:396 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:376 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:368 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:360 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:356 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:352 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:328 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; GFX9-MUBUF-NEXT: v_add_u32_e32 v1, 0x280, v0 -; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 0x80, v0 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:324 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GFX9-MUBUF-NEXT: s_nop 0 -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:960 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:956 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:952 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:948 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:944 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:940 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:936 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:932 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:928 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:924 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:920 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:916 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:912 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:908 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:900 -; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:892 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:888 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:884 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:880 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:876 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:872 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:868 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:864 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:860 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:856 -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:852 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:848 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:844 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:840 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:836 -; GFX9-MUBUF-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX9-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX9-MUBUF-NEXT: ; return to shader part epilog -; -; GFX10_W32-MUBUF-LABEL: ps_main: -; GFX10_W32-MUBUF: ; %bb.0: -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s4, s0 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s2, -1 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s3, 0x31c16000 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2 -; GFX10_W32-MUBUF-NEXT: s_add_u32 s0, s0, s4 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3 -; GFX10_W32-MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:448 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:444 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:440 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:436 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:432 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:428 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:424 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:420 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:416 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:412 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:408 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:404 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:400 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f -; GFX10_W32-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:396 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:392 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:388 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:384 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:380 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f -; GFX10_W32-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:376 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:368 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:364 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:360 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x280, v0 -; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v0, 0x80, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:356 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:352 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:328 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:324 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499 -; GFX10_W32-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:960 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:956 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:952 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:948 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:944 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:940 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:936 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:932 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:928 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:924 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:920 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:916 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:912 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:908 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:900 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:892 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:888 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:884 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:880 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:876 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:872 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:868 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:864 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:860 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:856 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:852 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:848 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:844 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:840 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:836 -; GFX10_W32-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen -; GFX10_W32-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX10_W32-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10_W32-MUBUF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10_W32-MUBUF-NEXT: ; return to shader part epilog -; -; GFX10_W64-MUBUF-LABEL: ps_main: -; GFX10_W64-MUBUF: ; %bb.0: -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s4, s0 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s2, -1 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s3, 0x31e16000 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2 -; GFX10_W64-MUBUF-NEXT: s_add_u32 s0, s0, s4 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3 -; GFX10_W64-MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:448 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:444 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:440 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:436 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:432 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:428 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:424 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:420 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:416 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:412 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:408 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:404 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:400 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f -; GFX10_W64-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:396 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:392 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:388 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:384 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:380 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f -; GFX10_W64-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:376 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:368 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:364 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:360 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x280, v0 -; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v0, 0x80, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:356 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:352 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:328 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:324 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499 -; GFX10_W64-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:960 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:956 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:952 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:948 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:944 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:940 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:936 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:932 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:928 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:924 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:920 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:916 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:912 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:908 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:900 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:892 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:888 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:884 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:880 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:876 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:872 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:868 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:864 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:860 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:856 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:852 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:848 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:844 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:840 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:836 -; GFX10_W64-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen -; GFX10_W64-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX10_W64-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10_W64-MUBUF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10_W64-MUBUF-NEXT: ; return to shader part epilog -; -; GFX9-FLATSCR-LABEL: ps_main: -; GFX9-FLATSCR: ; %bb.0: -; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s2 -; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-FLATSCR-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:416 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:400 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:384 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18 -; GFX9-FLATSCR-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s2 offset:336 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s1 offset:320 -; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x80, v23 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:960 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912 -; GFX9-FLATSCR-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17 -; GFX9-FLATSCR-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s3 offset:880 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s2 offset:864 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s1 offset:944 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:928 -; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x280, v23 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832 -; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0 -; GFX9-FLATSCR-NEXT: ; return to shader part epilog -; -; GFX10-FLATSCR-LABEL: ps_main: -; GFX10-FLATSCR: ; %bb.0: -; GFX10-FLATSCR-NEXT: s_add_u32 s0, s0, s2 -; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e -; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:448 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8 -; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:432 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:416 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:400 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f -; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x280, v0 -; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v35, 0x80, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:384 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:368 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:352 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:336 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:320 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16 -; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:960 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:944 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:928 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:912 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:896 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:880 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:864 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:848 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:832 -; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0 -; GFX10-FLATSCR-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLATSCR-NEXT: ; return to shader part epilog -; -; GFX9-FLATSCR-PAL-LABEL: ps_main: -; GFX9-FLATSCR-PAL: ; %bb.0: -; GFX9-FLATSCR-PAL-NEXT: s_getpc_b64 s[2:3] -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, s0 -; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 -; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s3, s3, 0xffff -; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0 -; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:416 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:400 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:384 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s2 offset:336 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s1 offset:320 -; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x80, v23 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:960 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s3 offset:880 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s2 offset:864 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s1 offset:944 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:928 -; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x280, v23 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832 -; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off -; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0 -; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog -; -; GFX10-FLATSCR-PAL-LABEL: ps_main: -; GFX10-FLATSCR-PAL: ; %bb.0: -; GFX10-FLATSCR-PAL-NEXT: s_getpc_b64 s[2:3] -; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s2, s0 -; GFX10-FLATSCR-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-FLATSCR-PAL-NEXT: s_and_b32 s3, s3, 0xffff -; GFX10-FLATSCR-PAL-NEXT: s_add_u32 s2, s2, s0 -; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s3, s3, 0 -; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 -; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e -; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:448 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8 -; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:432 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:416 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:400 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f -; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x280, v0 -; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v35, 0x80, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:384 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:368 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:352 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:336 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:320 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16 -; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:960 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:944 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:928 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:912 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:896 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:880 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:864 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:848 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:832 -; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0 -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog %v1 = extractelement <81 x float> , i32 %idx %v2 = extractelement <81 x float> , i32 %idx %r = fadd float %v1, %v2 ret float %r } +; GCN-LABEL: {{^}}vs_main: +; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s2 +; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 + +; GFX10-FLATSCR: s_add_u32 s0, s0, s2 +; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 +; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 + +; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3] +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0 +; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 +; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff +; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0 +; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0 + +; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3] +; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0 +; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) +; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff +; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0 +; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0 +; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 + +; MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 +; GCN-NOT: s_mov_b32 s0 + +; FLATSCR-NOT: SCRATCH_RSRC_DWORD + +; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen +; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen + +; GFX9-FLATSCR: s_mov_b32 [[SP:[^,]+]], 0 +; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SP]] offset: + +; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off +; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off + define amdgpu_vs float @vs_main(i32 %idx) { -; SI-LABEL: vs_main: -; SI: ; %bb.0: -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; SI-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_mov_b32 s3, 0xe8f000 -; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; SI-NEXT: s_add_u32 s0, s0, s4 -; SI-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; SI-NEXT: v_mov_b32_e32 v2, 0x80 -; SI-NEXT: s_addc_u32 s1, s1, 0 -; SI-NEXT: v_add_i32_e32 v1, vcc, 0x280, v0 -; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0 -; SI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; SI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; SI-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; SI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; SI-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:448 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:444 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:440 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:436 -; SI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:432 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:428 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:424 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:420 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:416 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:412 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; SI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; SI-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:408 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:404 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:400 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:392 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:388 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:384 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; SI-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:380 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:364 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; SI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; SI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; SI-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; SI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; SI-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:396 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:376 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:368 -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:360 -; SI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:356 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:352 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; SI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; SI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; SI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:328 -; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:324 -; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; SI-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; SI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:960 -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:956 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:952 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:948 -; SI-NEXT: s_waitcnt expcnt(3) -; SI-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; SI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; SI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:944 -; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:940 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:936 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:932 -; SI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:928 -; SI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:924 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:920 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:916 -; SI-NEXT: s_waitcnt expcnt(2) -; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; SI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:912 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:908 -; SI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; SI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:900 -; SI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:892 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:888 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:884 -; SI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:880 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:876 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:872 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:868 -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:864 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:860 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:856 -; SI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:852 -; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:848 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:844 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:840 -; SI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:836 -; SI-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: ; return to shader part epilog -; -; VI-LABEL: vs_main: -; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s4, s0 -; VI-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_mov_b32 s3, 0xe80000 -; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: s_add_u32 s0, s0, s4 -; VI-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; VI-NEXT: v_mov_b32_e32 v2, 0x80 -; VI-NEXT: s_addc_u32 s1, s1, 0 -; VI-NEXT: v_add_u32_e32 v1, vcc, 0x280, v0 -; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0 -; VI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; VI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; VI-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; VI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; VI-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:448 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:444 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:440 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:436 -; VI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:432 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:428 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:424 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:420 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:416 -; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:412 -; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; VI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; VI-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:408 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:404 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:400 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:392 -; VI-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:388 -; VI-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:384 -; VI-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; VI-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:380 -; VI-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:364 -; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; VI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; VI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; VI-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; VI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; VI-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:396 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:376 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:368 -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:360 -; VI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:356 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:352 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; VI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; VI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; VI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:328 -; VI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:324 -; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; VI-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; VI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:960 -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:956 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:952 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:948 -; VI-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; VI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; VI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:944 -; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:940 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:936 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:932 -; VI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:928 -; VI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:924 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:920 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:916 -; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; VI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:912 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:908 -; VI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; VI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:900 -; VI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:892 -; VI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:888 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:884 -; VI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:880 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:876 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:872 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:868 -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:864 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:860 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:856 -; VI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:852 -; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:848 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:844 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:840 -; VI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:836 -; VI-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_add_f32_e32 v0, v0, v1 -; VI-NEXT: ; return to shader part epilog -; -; GFX9-MUBUF-LABEL: vs_main: -; GFX9-MUBUF: ; %bb.0: -; GFX9-MUBUF-NEXT: s_mov_b32 s4, s0 -; GFX9-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; GFX9-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; GFX9-MUBUF-NEXT: s_mov_b32 s2, -1 -; GFX9-MUBUF-NEXT: s_mov_b32 s3, 0xe00000 -; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s4 -; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:448 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:444 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:440 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:436 -; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:432 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:428 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:424 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:420 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:416 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:412 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:408 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:404 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:400 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:392 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:388 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:384 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; GFX9-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:380 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:364 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX9-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:396 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:376 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:368 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:360 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:356 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:352 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:328 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; GFX9-MUBUF-NEXT: v_add_u32_e32 v1, 0x280, v0 -; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 0x80, v0 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:324 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GFX9-MUBUF-NEXT: s_nop 0 -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:960 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:956 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:952 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:948 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:944 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:940 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:936 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:932 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:928 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:924 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:920 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:916 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:912 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:908 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:900 -; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:892 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:888 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:884 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:880 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:876 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:872 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:868 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:864 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:860 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:856 -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:852 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:848 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:844 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:840 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:836 -; GFX9-MUBUF-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX9-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX9-MUBUF-NEXT: ; return to shader part epilog -; -; GFX10_W32-MUBUF-LABEL: vs_main: -; GFX10_W32-MUBUF: ; %bb.0: -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s4, s0 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s2, -1 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s3, 0x31c16000 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2 -; GFX10_W32-MUBUF-NEXT: s_add_u32 s0, s0, s4 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3 -; GFX10_W32-MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:448 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:444 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:440 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:436 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:432 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:428 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:424 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:420 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:416 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:412 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:408 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:404 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:400 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f -; GFX10_W32-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:396 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:392 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:388 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:384 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:380 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f -; GFX10_W32-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:376 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:368 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:364 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:360 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x280, v0 -; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v0, 0x80, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:356 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:352 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:328 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:324 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499 -; GFX10_W32-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:960 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:956 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:952 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:948 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:944 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:940 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:936 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:932 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:928 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:924 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:920 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:916 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:912 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:908 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:900 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:892 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:888 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:884 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:880 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:876 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:872 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:868 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:864 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:860 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:856 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:852 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:848 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:844 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:840 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:836 -; GFX10_W32-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen -; GFX10_W32-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX10_W32-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10_W32-MUBUF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10_W32-MUBUF-NEXT: ; return to shader part epilog -; -; GFX10_W64-MUBUF-LABEL: vs_main: -; GFX10_W64-MUBUF: ; %bb.0: -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s4, s0 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s2, -1 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s3, 0x31e16000 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2 -; GFX10_W64-MUBUF-NEXT: s_add_u32 s0, s0, s4 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3 -; GFX10_W64-MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:448 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:444 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:440 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:436 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:432 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:428 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:424 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:420 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:416 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:412 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:408 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:404 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:400 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f -; GFX10_W64-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:396 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:392 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:388 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:384 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:380 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f -; GFX10_W64-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:376 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:368 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:364 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:360 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x280, v0 -; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v0, 0x80, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:356 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:352 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:328 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:324 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499 -; GFX10_W64-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:960 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:956 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:952 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:948 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:944 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:940 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:936 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:932 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:928 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:924 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:920 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:916 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:912 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:908 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:900 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:892 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:888 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:884 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:880 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:876 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:872 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:868 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:864 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:860 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:856 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:852 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:848 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:844 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:840 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:836 -; GFX10_W64-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen -; GFX10_W64-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX10_W64-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10_W64-MUBUF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10_W64-MUBUF-NEXT: ; return to shader part epilog -; -; GFX9-FLATSCR-LABEL: vs_main: -; GFX9-FLATSCR: ; %bb.0: -; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s2 -; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-FLATSCR-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:416 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:400 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:384 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18 -; GFX9-FLATSCR-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s2 offset:336 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s1 offset:320 -; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x80, v23 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:960 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912 -; GFX9-FLATSCR-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17 -; GFX9-FLATSCR-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s3 offset:880 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s2 offset:864 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s1 offset:944 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:928 -; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x280, v23 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832 -; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0 -; GFX9-FLATSCR-NEXT: ; return to shader part epilog -; -; GFX10-FLATSCR-LABEL: vs_main: -; GFX10-FLATSCR: ; %bb.0: -; GFX10-FLATSCR-NEXT: s_add_u32 s0, s0, s2 -; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e -; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:448 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8 -; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:432 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:416 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:400 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f -; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x280, v0 -; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v35, 0x80, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:384 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:368 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:352 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:336 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:320 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16 -; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:960 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:944 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:928 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:912 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:896 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:880 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:864 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:848 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:832 -; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0 -; GFX10-FLATSCR-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLATSCR-NEXT: ; return to shader part epilog -; -; GFX9-FLATSCR-PAL-LABEL: vs_main: -; GFX9-FLATSCR-PAL: ; %bb.0: -; GFX9-FLATSCR-PAL-NEXT: s_getpc_b64 s[2:3] -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, s0 -; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 -; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s3, s3, 0xffff -; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0 -; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:416 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:400 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:384 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s2 offset:336 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s1 offset:320 -; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x80, v23 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:960 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s3 offset:880 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s2 offset:864 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s1 offset:944 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:928 -; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x280, v23 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832 -; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off -; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0 -; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog -; -; GFX10-FLATSCR-PAL-LABEL: vs_main: -; GFX10-FLATSCR-PAL: ; %bb.0: -; GFX10-FLATSCR-PAL-NEXT: s_getpc_b64 s[2:3] -; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s2, s0 -; GFX10-FLATSCR-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-FLATSCR-PAL-NEXT: s_and_b32 s3, s3, 0xffff -; GFX10-FLATSCR-PAL-NEXT: s_add_u32 s2, s2, s0 -; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s3, s3, 0 -; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 -; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e -; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:448 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8 -; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:432 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:416 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:400 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f -; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x280, v0 -; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v35, 0x80, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:384 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:368 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:352 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:336 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:320 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16 -; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:960 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:944 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:928 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:912 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:896 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:880 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:864 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:848 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:832 -; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0 -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog %v1 = extractelement <81 x float> , i32 %idx %v2 = extractelement <81 x float> , i32 %idx %r = fadd float %v1, %v2 ret float %r } +; GCN-LABEL: {{^}}cs_main: +; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s2 +; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 + +; GFX10-FLATSCR: s_add_u32 s0, s0, s2 +; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 +; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 + +; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[2:3] +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s2, s0 +; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[2:3], s[2:3], 0x10 +; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 +; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-PAL-DAG: s_and_b32 s3, s3, 0xffff +; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s2, s0 +; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s3, 0 + +; GFX10-FLATSCR-PAL: s_getpc_b64 s[2:3] +; GFX10-FLATSCR-PAL: s_mov_b32 s2, s0 +; GFX10-FLATSCR-PAL: s_load_dwordx2 s[2:3], s[2:3], 0x10 +; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) +; GFX10-FLATSCR-PAL: s_and_b32 s3, s3, 0xffff +; GFX10-FLATSCR-PAL: s_add_u32 s2, s2, s0 +; GFX10-FLATSCR-PAL: s_addc_u32 s3, s3, 0 +; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 + +; MUBUF-DAG: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 + +; FLATSCR-NOT: SCRATCH_RSRC_DWORD + +; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen +; MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen + +; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off +; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off define amdgpu_cs float @cs_main(i32 %idx) { -; SI-LABEL: cs_main: -; SI: ; %bb.0: -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; SI-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_mov_b32 s3, 0xe8f000 -; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; SI-NEXT: s_add_u32 s0, s0, s4 -; SI-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; SI-NEXT: v_mov_b32_e32 v2, 0x80 -; SI-NEXT: s_addc_u32 s1, s1, 0 -; SI-NEXT: v_add_i32_e32 v1, vcc, 0x280, v0 -; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0 -; SI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; SI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; SI-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; SI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; SI-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:448 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:444 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:440 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:436 -; SI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:432 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:428 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:424 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:420 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:416 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:412 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; SI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; SI-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:408 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:404 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:400 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:392 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:388 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:384 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; SI-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:380 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:364 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; SI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; SI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; SI-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; SI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; SI-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:396 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:376 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:368 -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:360 -; SI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:356 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:352 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; SI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; SI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; SI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:328 -; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:324 -; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; SI-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; SI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:960 -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:956 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:952 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:948 -; SI-NEXT: s_waitcnt expcnt(3) -; SI-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; SI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; SI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:944 -; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:940 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:936 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:932 -; SI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:928 -; SI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:924 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:920 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:916 -; SI-NEXT: s_waitcnt expcnt(2) -; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; SI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:912 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:908 -; SI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; SI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:900 -; SI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:892 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:888 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:884 -; SI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:880 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:876 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:872 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:868 -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:864 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:860 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:856 -; SI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:852 -; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:848 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:844 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:840 -; SI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:836 -; SI-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: ; return to shader part epilog -; -; VI-LABEL: cs_main: -; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s4, s0 -; VI-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_mov_b32 s3, 0xe80000 -; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: s_add_u32 s0, s0, s4 -; VI-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; VI-NEXT: v_mov_b32_e32 v2, 0x80 -; VI-NEXT: s_addc_u32 s1, s1, 0 -; VI-NEXT: v_add_u32_e32 v1, vcc, 0x280, v0 -; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0 -; VI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; VI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; VI-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; VI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; VI-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:448 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:444 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:440 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:436 -; VI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:432 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:428 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:424 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:420 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:416 -; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:412 -; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; VI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; VI-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:408 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:404 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:400 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:392 -; VI-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:388 -; VI-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:384 -; VI-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; VI-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:380 -; VI-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:364 -; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; VI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; VI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; VI-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; VI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; VI-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:396 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:376 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:368 -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:360 -; VI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:356 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:352 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; VI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; VI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; VI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:328 -; VI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:324 -; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; VI-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; VI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:960 -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:956 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:952 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:948 -; VI-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; VI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; VI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:944 -; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:940 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:936 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:932 -; VI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:928 -; VI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:924 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:920 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:916 -; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; VI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:912 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:908 -; VI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; VI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:900 -; VI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:892 -; VI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:888 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:884 -; VI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:880 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:876 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:872 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:868 -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:864 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:860 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:856 -; VI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:852 -; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:848 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:844 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:840 -; VI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:836 -; VI-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_add_f32_e32 v0, v0, v1 -; VI-NEXT: ; return to shader part epilog -; -; GFX9-MUBUF-LABEL: cs_main: -; GFX9-MUBUF: ; %bb.0: -; GFX9-MUBUF-NEXT: s_mov_b32 s4, s0 -; GFX9-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; GFX9-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; GFX9-MUBUF-NEXT: s_mov_b32 s2, -1 -; GFX9-MUBUF-NEXT: s_mov_b32 s3, 0xe00000 -; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s4 -; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:448 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:444 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:440 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:436 -; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:432 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:428 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:424 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:420 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:416 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:412 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:408 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:404 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:400 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:392 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:388 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:384 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; GFX9-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:380 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:364 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX9-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:396 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:376 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:368 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:360 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:356 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:352 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:328 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; GFX9-MUBUF-NEXT: v_add_u32_e32 v1, 0x280, v0 -; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 0x80, v0 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:324 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GFX9-MUBUF-NEXT: s_nop 0 -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:960 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:956 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:952 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:948 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:944 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:940 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:936 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:932 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:928 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:924 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:920 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:916 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:912 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:908 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:900 -; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:892 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:888 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:884 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:880 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:876 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:872 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:868 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:864 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:860 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:856 -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:852 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:848 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:844 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:840 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:836 -; GFX9-MUBUF-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX9-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX9-MUBUF-NEXT: ; return to shader part epilog -; -; GFX10_W32-MUBUF-LABEL: cs_main: -; GFX10_W32-MUBUF: ; %bb.0: -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s4, s0 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s2, -1 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s3, 0x31c16000 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2 -; GFX10_W32-MUBUF-NEXT: s_add_u32 s0, s0, s4 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3 -; GFX10_W32-MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:448 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:444 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:440 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:436 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:432 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:428 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:424 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:420 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:416 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:412 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:408 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:404 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:400 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f -; GFX10_W32-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:396 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:392 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:388 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:384 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:380 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f -; GFX10_W32-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:376 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:368 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:364 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:360 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x280, v0 -; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v0, 0x80, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:356 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:352 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:328 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:324 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499 -; GFX10_W32-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:960 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:956 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:952 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:948 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:944 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:940 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:936 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:932 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:928 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:924 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:920 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:916 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:912 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:908 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:900 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:892 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:888 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:884 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:880 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:876 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:872 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:868 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:864 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:860 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:856 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:852 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:848 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:844 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:840 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:836 -; GFX10_W32-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen -; GFX10_W32-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX10_W32-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10_W32-MUBUF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10_W32-MUBUF-NEXT: ; return to shader part epilog -; -; GFX10_W64-MUBUF-LABEL: cs_main: -; GFX10_W64-MUBUF: ; %bb.0: -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s4, s0 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s2, -1 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s3, 0x31e16000 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2 -; GFX10_W64-MUBUF-NEXT: s_add_u32 s0, s0, s4 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3 -; GFX10_W64-MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:448 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:444 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:440 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:436 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:432 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:428 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:424 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:420 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:416 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:412 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:408 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:404 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:400 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f -; GFX10_W64-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:396 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:392 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:388 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:384 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:380 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f -; GFX10_W64-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:376 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:368 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:364 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:360 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x280, v0 -; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v0, 0x80, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:356 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:352 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:328 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:324 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499 -; GFX10_W64-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:960 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:956 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:952 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:948 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:944 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:940 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:936 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:932 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:928 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:924 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:920 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:916 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:912 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:908 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:900 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:892 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:888 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:884 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:880 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:876 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:872 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:868 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:864 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:860 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:856 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:852 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:848 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:844 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:840 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:836 -; GFX10_W64-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen -; GFX10_W64-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX10_W64-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10_W64-MUBUF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10_W64-MUBUF-NEXT: ; return to shader part epilog -; -; GFX9-FLATSCR-LABEL: cs_main: -; GFX9-FLATSCR: ; %bb.0: -; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s2 -; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-FLATSCR-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:416 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:400 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:384 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18 -; GFX9-FLATSCR-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s2 offset:336 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s1 offset:320 -; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x80, v23 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:960 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912 -; GFX9-FLATSCR-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17 -; GFX9-FLATSCR-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s3 offset:880 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s2 offset:864 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s1 offset:944 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:928 -; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x280, v23 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832 -; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0 -; GFX9-FLATSCR-NEXT: ; return to shader part epilog -; -; GFX10-FLATSCR-LABEL: cs_main: -; GFX10-FLATSCR: ; %bb.0: -; GFX10-FLATSCR-NEXT: s_add_u32 s0, s0, s2 -; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e -; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:448 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8 -; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:432 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:416 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:400 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f -; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x280, v0 -; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v35, 0x80, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:384 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:368 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:352 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:336 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:320 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16 -; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:960 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:944 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:928 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:912 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:896 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:880 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:864 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:848 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:832 -; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0 -; GFX10-FLATSCR-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLATSCR-NEXT: ; return to shader part epilog -; -; GFX9-FLATSCR-PAL-LABEL: cs_main: -; GFX9-FLATSCR-PAL: ; %bb.0: -; GFX9-FLATSCR-PAL-NEXT: s_getpc_b64 s[2:3] -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, s0 -; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x10 -; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s3, s3, 0xffff -; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s0 -; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:416 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:400 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:384 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s2 offset:336 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s1 offset:320 -; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x80, v23 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:960 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s3 offset:880 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s2 offset:864 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s1 offset:944 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:928 -; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x280, v23 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832 -; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off -; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0 -; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog -; -; GFX10-FLATSCR-PAL-LABEL: cs_main: -; GFX10-FLATSCR-PAL: ; %bb.0: -; GFX10-FLATSCR-PAL-NEXT: s_getpc_b64 s[2:3] -; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s2, s0 -; GFX10-FLATSCR-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x10 -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-FLATSCR-PAL-NEXT: s_and_b32 s3, s3, 0xffff -; GFX10-FLATSCR-PAL-NEXT: s_add_u32 s2, s2, s0 -; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s3, s3, 0 -; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 -; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e -; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:448 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8 -; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:432 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:416 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:400 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f -; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x280, v0 -; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v35, 0x80, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:384 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:368 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:352 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:336 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:320 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16 -; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:960 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:944 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:928 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:912 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:896 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:880 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:864 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:848 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:832 -; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0 -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog %v1 = extractelement <81 x float> , i32 %idx %v2 = extractelement <81 x float> , i32 %idx %r = fadd float %v1, %v2 ret float %r } +; GCN-LABEL: {{^}}hs_main: +; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5 +; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 + +; GFX10-FLATSCR: s_add_u32 s0, s0, s5 +; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 +; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 + +; SIVI: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 +; SIVI-NOT: s_mov_b32 s0 +; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen +; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen + +; GFX9_10-MUBUF: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 +; GFX9_10-NOT: s_mov_b32 s5 +; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen +; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen + +; FLATSCR-NOT: SCRATCH_RSRC_DWORD +; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off +; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off define amdgpu_hs float @hs_main(i32 %idx) { -; SI-LABEL: hs_main: -; SI: ; %bb.0: -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; SI-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_mov_b32 s3, 0xe8f000 -; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; SI-NEXT: s_add_u32 s0, s0, s4 -; SI-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; SI-NEXT: v_mov_b32_e32 v2, 0x80 -; SI-NEXT: s_addc_u32 s1, s1, 0 -; SI-NEXT: v_add_i32_e32 v1, vcc, 0x280, v0 -; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0 -; SI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; SI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; SI-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; SI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; SI-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:448 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:444 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:440 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:436 -; SI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:432 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:428 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:424 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:420 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:416 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:412 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; SI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; SI-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:408 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:404 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:400 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:392 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:388 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:384 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; SI-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:380 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:364 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; SI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; SI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; SI-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; SI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; SI-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:396 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:376 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:368 -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:360 -; SI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:356 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:352 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; SI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; SI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; SI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:328 -; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:324 -; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; SI-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; SI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:960 -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:956 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:952 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:948 -; SI-NEXT: s_waitcnt expcnt(3) -; SI-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; SI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; SI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:944 -; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:940 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:936 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:932 -; SI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:928 -; SI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:924 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:920 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:916 -; SI-NEXT: s_waitcnt expcnt(2) -; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; SI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:912 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:908 -; SI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; SI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:900 -; SI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:892 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:888 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:884 -; SI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:880 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:876 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:872 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:868 -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:864 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:860 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:856 -; SI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:852 -; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:848 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:844 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:840 -; SI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:836 -; SI-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: ; return to shader part epilog -; -; VI-LABEL: hs_main: -; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s4, s0 -; VI-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_mov_b32 s3, 0xe80000 -; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: s_add_u32 s0, s0, s4 -; VI-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; VI-NEXT: v_mov_b32_e32 v2, 0x80 -; VI-NEXT: s_addc_u32 s1, s1, 0 -; VI-NEXT: v_add_u32_e32 v1, vcc, 0x280, v0 -; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0 -; VI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; VI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; VI-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; VI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; VI-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:448 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:444 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:440 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:436 -; VI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:432 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:428 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:424 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:420 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:416 -; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:412 -; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; VI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; VI-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:408 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:404 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:400 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:392 -; VI-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:388 -; VI-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:384 -; VI-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; VI-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:380 -; VI-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:364 -; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; VI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; VI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; VI-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; VI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; VI-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:396 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:376 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:368 -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:360 -; VI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:356 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:352 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; VI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; VI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; VI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:328 -; VI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:324 -; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; VI-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; VI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:960 -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:956 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:952 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:948 -; VI-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; VI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; VI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:944 -; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:940 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:936 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:932 -; VI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:928 -; VI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:924 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:920 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:916 -; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; VI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:912 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:908 -; VI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; VI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:900 -; VI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:892 -; VI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:888 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:884 -; VI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:880 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:876 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:872 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:868 -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:864 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:860 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:856 -; VI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:852 -; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:848 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:844 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:840 -; VI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:836 -; VI-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_add_f32_e32 v0, v0, v1 -; VI-NEXT: ; return to shader part epilog -; -; GFX9-MUBUF-LABEL: hs_main: -; GFX9-MUBUF: ; %bb.0: -; GFX9-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; GFX9-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; GFX9-MUBUF-NEXT: s_mov_b32 s2, -1 -; GFX9-MUBUF-NEXT: s_mov_b32 s3, 0xe00000 -; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s5 -; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:448 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:444 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:440 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:436 -; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:432 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:428 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:424 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:420 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:416 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:412 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:408 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:404 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:400 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:392 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:388 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:384 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; GFX9-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:380 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:364 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX9-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:396 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:376 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:368 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:360 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:356 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:352 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:328 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; GFX9-MUBUF-NEXT: v_add_u32_e32 v1, 0x280, v0 -; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 0x80, v0 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:324 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GFX9-MUBUF-NEXT: s_nop 0 -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:960 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:956 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:952 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:948 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:944 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:940 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:936 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:932 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:928 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:924 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:920 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:916 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:912 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:908 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:900 -; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:892 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:888 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:884 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:880 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:876 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:872 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:868 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:864 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:860 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:856 -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:852 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:848 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:844 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:840 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:836 -; GFX9-MUBUF-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX9-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX9-MUBUF-NEXT: ; return to shader part epilog -; -; GFX10_W32-MUBUF-LABEL: hs_main: -; GFX10_W32-MUBUF: ; %bb.0: -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s2, -1 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s3, 0x31c16000 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2 -; GFX10_W32-MUBUF-NEXT: s_add_u32 s0, s0, s5 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3 -; GFX10_W32-MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:448 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:444 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:440 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:436 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:432 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:428 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:424 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:420 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:416 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:412 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:408 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:404 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:400 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f -; GFX10_W32-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:396 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:392 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:388 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:384 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:380 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f -; GFX10_W32-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:376 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:368 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:364 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:360 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x280, v0 -; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v0, 0x80, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:356 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:352 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:328 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:324 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499 -; GFX10_W32-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:960 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:956 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:952 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:948 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:944 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:940 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:936 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:932 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:928 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:924 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:920 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:916 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:912 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:908 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:900 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:892 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:888 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:884 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:880 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:876 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:872 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:868 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:864 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:860 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:856 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:852 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:848 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:844 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:840 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:836 -; GFX10_W32-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen -; GFX10_W32-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX10_W32-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10_W32-MUBUF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10_W32-MUBUF-NEXT: ; return to shader part epilog -; -; GFX10_W64-MUBUF-LABEL: hs_main: -; GFX10_W64-MUBUF: ; %bb.0: -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s2, -1 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s3, 0x31e16000 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2 -; GFX10_W64-MUBUF-NEXT: s_add_u32 s0, s0, s5 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3 -; GFX10_W64-MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:448 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:444 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:440 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:436 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:432 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:428 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:424 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:420 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:416 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:412 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:408 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:404 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:400 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f -; GFX10_W64-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:396 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:392 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:388 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:384 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:380 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f -; GFX10_W64-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:376 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:368 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:364 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:360 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x280, v0 -; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v0, 0x80, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:356 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:352 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:328 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:324 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499 -; GFX10_W64-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:960 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:956 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:952 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:948 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:944 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:940 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:936 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:932 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:928 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:924 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:920 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:916 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:912 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:908 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:900 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:892 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:888 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:884 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:880 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:876 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:872 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:868 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:864 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:860 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:856 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:852 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:848 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:844 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:840 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:836 -; GFX10_W64-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen -; GFX10_W64-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX10_W64-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10_W64-MUBUF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10_W64-MUBUF-NEXT: ; return to shader part epilog -; -; GFX9-FLATSCR-LABEL: hs_main: -; GFX9-FLATSCR: ; %bb.0: -; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s5 -; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-FLATSCR-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:416 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:400 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:384 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18 -; GFX9-FLATSCR-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s2 offset:336 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s1 offset:320 -; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x80, v23 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:960 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912 -; GFX9-FLATSCR-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17 -; GFX9-FLATSCR-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s3 offset:880 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s2 offset:864 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s1 offset:944 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:928 -; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x280, v23 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832 -; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0 -; GFX9-FLATSCR-NEXT: ; return to shader part epilog -; -; GFX10-FLATSCR-LABEL: hs_main: -; GFX10-FLATSCR: ; %bb.0: -; GFX10-FLATSCR-NEXT: s_add_u32 s0, s0, s5 -; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e -; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:448 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8 -; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:432 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:416 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:400 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f -; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x280, v0 -; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v35, 0x80, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:384 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:368 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:352 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:336 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:320 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16 -; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:960 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:944 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:928 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:912 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:896 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:880 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:864 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:848 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:832 -; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0 -; GFX10-FLATSCR-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLATSCR-NEXT: ; return to shader part epilog -; -; GFX9-FLATSCR-PAL-LABEL: hs_main: -; GFX9-FLATSCR-PAL: ; %bb.0: -; GFX9-FLATSCR-PAL-NEXT: s_getpc_b64 s[0:1] -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8 -; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff -; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s0, s5 -; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:416 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:400 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:384 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s2 offset:336 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s1 offset:320 -; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x80, v23 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:960 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s3 offset:880 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s2 offset:864 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s1 offset:944 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:928 -; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x280, v23 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832 -; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off -; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0 -; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog -; -; GFX10-FLATSCR-PAL-LABEL: hs_main: -; GFX10-FLATSCR-PAL: ; %bb.0: -; GFX10-FLATSCR-PAL-NEXT: s_getpc_b64 s[0:1] -; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8 -; GFX10-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff -; GFX10-FLATSCR-PAL-NEXT: s_add_u32 s0, s0, s5 -; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e -; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:448 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8 -; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:432 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:416 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:400 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f -; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x280, v0 -; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v35, 0x80, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:384 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:368 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:352 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:336 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:320 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16 -; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:960 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:944 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:928 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:912 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:896 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:880 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:864 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:848 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:832 -; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0 -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog %v1 = extractelement <81 x float> , i32 %idx %v2 = extractelement <81 x float> , i32 %idx %r = fadd float %v1, %v2 ret float %r } +; GCN-LABEL: {{^}}gs_main: +; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5 +; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 + +; GFX10-FLATSCR: s_add_u32 s0, s0, s5 +; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 +; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 + +; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1] +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8 +; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 +; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff +; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5 +; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0 + +; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1] +; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8 +; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) +; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff +; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5 +; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0 +; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 + +; SIVI: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 +; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen +; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen + +; GFX9_10-MUBUF: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 +; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen +; GFX9_10-MUBUF: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen + +; FLATSCR-NOT: SCRATCH_RSRC_DWORD +; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off +; FLATSCR: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off define amdgpu_gs float @gs_main(i32 %idx) { -; SI-LABEL: gs_main: -; SI: ; %bb.0: -; SI-NEXT: s_mov_b32 s4, s0 -; SI-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; SI-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: s_mov_b32 s3, 0xe8f000 -; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; SI-NEXT: s_add_u32 s0, s0, s4 -; SI-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; SI-NEXT: v_mov_b32_e32 v2, 0x80 -; SI-NEXT: s_addc_u32 s1, s1, 0 -; SI-NEXT: v_add_i32_e32 v1, vcc, 0x280, v0 -; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0 -; SI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; SI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; SI-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; SI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; SI-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:448 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:444 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:440 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:436 -; SI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:432 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:428 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:424 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:420 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:416 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:412 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; SI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; SI-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:408 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:404 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:400 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:392 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:388 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:384 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; SI-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:380 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:364 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; SI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; SI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; SI-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; SI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; SI-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:396 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:376 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:368 -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:360 -; SI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:356 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:352 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; SI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; SI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; SI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:328 -; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:324 -; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; SI-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; SI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:960 -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:956 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:952 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:948 -; SI-NEXT: s_waitcnt expcnt(3) -; SI-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; SI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; SI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:944 -; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:940 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:936 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:932 -; SI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:928 -; SI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:924 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:920 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:916 -; SI-NEXT: s_waitcnt expcnt(2) -; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; SI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:912 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:908 -; SI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; SI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:900 -; SI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:892 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; SI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:888 -; SI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:884 -; SI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:880 -; SI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:876 -; SI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:872 -; SI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:868 -; SI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:864 -; SI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:860 -; SI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:856 -; SI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:852 -; SI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:848 -; SI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:844 -; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:840 -; SI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:836 -; SI-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: ; return to shader part epilog -; -; VI-LABEL: gs_main: -; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s4, s0 -; VI-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s2, -1 -; VI-NEXT: s_mov_b32 s3, 0xe80000 -; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: s_add_u32 s0, s0, s4 -; VI-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; VI-NEXT: v_mov_b32_e32 v2, 0x80 -; VI-NEXT: s_addc_u32 s1, s1, 0 -; VI-NEXT: v_add_u32_e32 v1, vcc, 0x280, v0 -; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0 -; VI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; VI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; VI-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; VI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; VI-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:448 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:444 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:440 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:436 -; VI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:432 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:428 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:424 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:420 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:416 -; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:412 -; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; VI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; VI-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:408 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:404 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:400 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:392 -; VI-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:388 -; VI-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:384 -; VI-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; VI-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:380 -; VI-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:364 -; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; VI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; VI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; VI-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; VI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; VI-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:396 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:376 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:368 -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:360 -; VI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:356 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:352 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; VI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; VI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; VI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:328 -; VI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:324 -; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; VI-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; VI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:960 -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:956 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:952 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:948 -; VI-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; VI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; VI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:944 -; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:940 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:936 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:932 -; VI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:928 -; VI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:924 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:920 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:916 -; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; VI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:912 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:908 -; VI-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; VI-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:900 -; VI-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:892 -; VI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; VI-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:888 -; VI-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:884 -; VI-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:880 -; VI-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:876 -; VI-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:872 -; VI-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:868 -; VI-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:864 -; VI-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:860 -; VI-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:856 -; VI-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:852 -; VI-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:848 -; VI-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:844 -; VI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:840 -; VI-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:836 -; VI-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_add_f32_e32 v0, v0, v1 -; VI-NEXT: ; return to shader part epilog -; -; GFX9-MUBUF-LABEL: gs_main: -; GFX9-MUBUF: ; %bb.0: -; GFX9-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; GFX9-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; GFX9-MUBUF-NEXT: s_mov_b32 s2, -1 -; GFX9-MUBUF-NEXT: s_mov_b32 s3, 0xe00000 -; GFX9-MUBUF-NEXT: s_add_u32 s0, s0, s5 -; GFX9-MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:448 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:444 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:440 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:436 -; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:432 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:428 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:424 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:420 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:416 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:412 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:408 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:404 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:400 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:392 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:388 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:384 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; GFX9-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:380 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:364 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX9-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:396 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:376 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:368 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:360 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:356 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:352 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:328 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; GFX9-MUBUF-NEXT: v_add_u32_e32 v1, 0x280, v0 -; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 0x80, v0 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:324 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GFX9-MUBUF-NEXT: s_nop 0 -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:960 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:956 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:952 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:948 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:944 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:940 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:936 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:932 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:928 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:924 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:920 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:916 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:912 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:908 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[0:3], 0 offset:900 -; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:892 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:888 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:884 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:880 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:876 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:872 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:868 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:864 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:860 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:856 -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:852 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:848 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:844 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:840 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:836 -; GFX9-MUBUF-NEXT: buffer_load_dword v1, v1, s[0:3], 0 offen -; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX9-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX9-MUBUF-NEXT: ; return to shader part epilog -; -; GFX10_W32-MUBUF-LABEL: gs_main: -; GFX10_W32-MUBUF: ; %bb.0: -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s2, -1 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s3, 0x31c16000 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2 -; GFX10_W32-MUBUF-NEXT: s_add_u32 s0, s0, s5 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3 -; GFX10_W32-MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:448 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:444 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:440 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:436 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:432 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:428 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:424 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:420 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:416 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:412 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:408 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:404 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:400 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f -; GFX10_W32-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:396 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:392 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:388 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:384 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:380 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f -; GFX10_W32-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:376 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:368 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:364 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:360 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x280, v0 -; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v0, 0x80, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:356 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:352 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:328 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:324 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499 -; GFX10_W32-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:960 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:956 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:952 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:948 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:944 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:940 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:936 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:932 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:928 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:924 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:920 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:916 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:912 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:908 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:900 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:892 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:888 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:884 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:880 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:876 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:872 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:868 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:864 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:860 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:856 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:852 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:848 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:844 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:840 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:836 -; GFX10_W32-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen -; GFX10_W32-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX10_W32-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10_W32-MUBUF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10_W32-MUBUF-NEXT: ; return to shader part epilog -; -; GFX10_W64-MUBUF-LABEL: gs_main: -; GFX10_W64-MUBUF: ; %bb.0: -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s0, SCRATCH_RSRC_DWORD0 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s1, SCRATCH_RSRC_DWORD1 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s2, -1 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s3, 0x31e16000 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2 -; GFX10_W64-MUBUF-NEXT: s_add_u32 s0, s0, s5 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3 -; GFX10_W64-MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:448 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:444 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:440 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:436 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:432 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:428 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:424 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:420 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:416 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:412 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:408 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:404 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:400 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f -; GFX10_W64-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:396 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:392 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:388 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:384 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:380 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f -; GFX10_W64-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:376 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:372 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:368 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:364 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:360 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x280, v0 -; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v0, 0x80, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:356 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:352 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:348 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:344 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:340 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:336 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:332 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:328 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:324 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499 -; GFX10_W64-MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:960 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:956 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:952 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:948 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:944 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:940 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:936 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:932 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:928 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:924 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:920 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:916 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:912 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:908 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[0:3], 0 offset:904 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:900 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[0:3], 0 offset:896 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:892 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[0:3], 0 offset:888 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:884 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[0:3], 0 offset:880 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:876 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[0:3], 0 offset:872 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[0:3], 0 offset:868 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[0:3], 0 offset:864 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[0:3], 0 offset:860 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[0:3], 0 offset:856 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[0:3], 0 offset:852 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:848 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[0:3], 0 offset:844 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[0:3], 0 offset:840 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[0:3], 0 offset:836 -; GFX10_W64-MUBUF-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen -; GFX10_W64-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX10_W64-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10_W64-MUBUF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10_W64-MUBUF-NEXT: ; return to shader part epilog -; -; GFX9-FLATSCR-LABEL: gs_main: -; GFX9-FLATSCR: ; %bb.0: -; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s5 -; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-FLATSCR-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:416 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:400 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:384 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18 -; GFX9-FLATSCR-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s2 offset:336 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s1 offset:320 -; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x80, v23 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:960 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912 -; GFX9-FLATSCR-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17 -; GFX9-FLATSCR-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s3 offset:880 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s2 offset:864 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s1 offset:944 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:928 -; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x280, v23 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832 -; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0 -; GFX9-FLATSCR-NEXT: ; return to shader part epilog -; -; GFX10-FLATSCR-LABEL: gs_main: -; GFX10-FLATSCR: ; %bb.0: -; GFX10-FLATSCR-NEXT: s_add_u32 s0, s0, s5 -; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e -; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:448 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8 -; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:432 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:416 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:400 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f -; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x280, v0 -; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v35, 0x80, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:384 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:368 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:352 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:336 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:320 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16 -; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:960 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:944 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:928 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:912 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:896 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:880 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:864 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:848 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:832 -; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0 -; GFX10-FLATSCR-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLATSCR-NEXT: ; return to shader part epilog -; -; GFX9-FLATSCR-PAL-LABEL: gs_main: -; GFX9-FLATSCR-PAL: ; %bb.0: -; GFX9-FLATSCR-PAL-NEXT: s_getpc_b64 s[0:1] -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8 -; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff -; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s0, s5 -; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s2 offset:416 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:400 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:384 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s2 offset:336 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s1 offset:320 -; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x80, v23 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:960 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s3 offset:880 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s2 offset:864 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s1 offset:944 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:928 -; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x280, v23 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832 -; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off -; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0 -; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog -; -; GFX10-FLATSCR-PAL-LABEL: gs_main: -; GFX10-FLATSCR-PAL: ; %bb.0: -; GFX10-FLATSCR-PAL-NEXT: s_getpc_b64 s[0:1] -; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8 -; GFX10-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff -; GFX10-FLATSCR-PAL-NEXT: s_add_u32 s0, s0, s5 -; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e -; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:448 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8 -; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:432 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:416 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:400 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f -; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x280, v0 -; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v35, 0x80, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:384 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:368 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:352 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:336 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:320 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16 -; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:960 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:944 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:928 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:912 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:896 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:880 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:864 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:848 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:832 -; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0 -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog %v1 = extractelement <81 x float> , i32 %idx %v2 = extractelement <81 x float> , i32 %idx %r = fadd float %v1, %v2 ret float %r } +; Mesa GS and HS shaders have the preloaded scratch wave offset SGPR fixed at +; SGPR5, and the inreg implementation is used to reference it in the IR. The +; following tests confirm the shader and anything inserted after the return +; (i.e. SI_RETURN_TO_EPILOG) can access the scratch wave offset. + +; GCN-LABEL: {{^}}hs_ir_uses_scratch_offset: +; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5 +; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 + +; GFX10-FLATSCR: s_add_u32 s0, s0, s5 +; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 +; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 + +; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1] +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8 +; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 +; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff +; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5 +; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0 + +; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1] +; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8 +; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) +; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff +; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5 +; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0 +; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 + +; MUBUF: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 +; FLATSCR-NOT: SCRATCH_RSRC_DWORD + +; SIVI-NOT: s_mov_b32 s6 +; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen +; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen + +; GFX9_10-NOT: s_mov_b32 s5 +; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen +; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen + +; MUBUF-DAG: s_mov_b32 s2, s5 + +; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off +; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) { -; SI-LABEL: hs_ir_uses_scratch_offset: -; SI: ; %bb.0: -; SI-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; SI-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; SI-NEXT: s_mov_b32 s10, -1 -; SI-NEXT: s_mov_b32 s11, 0xe8f000 -; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; SI-NEXT: s_add_u32 s8, s8, s6 -; SI-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; SI-NEXT: v_mov_b32_e32 v2, 0x80 -; SI-NEXT: s_addc_u32 s9, s9, 0 -; SI-NEXT: v_add_i32_e32 v1, vcc, 0x280, v0 -; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0 -; SI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; SI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; SI-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; SI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; SI-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:448 -; SI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:444 -; SI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:440 -; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:436 -; SI-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:432 -; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:428 -; SI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:424 -; SI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:420 -; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:416 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:412 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; SI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; SI-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:408 -; SI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:404 -; SI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:400 -; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:392 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:388 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:384 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; SI-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:380 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; SI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:372 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; SI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:364 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; SI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; SI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; SI-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; SI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; SI-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; SI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:396 -; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:376 -; SI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:368 -; SI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:360 -; SI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:356 -; SI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:352 -; SI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:348 -; SI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:344 -; SI-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:340 -; SI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:336 -; SI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:332 -; SI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:328 -; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; SI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:324 -; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; SI-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen -; SI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:960 -; SI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:956 -; SI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:952 -; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:948 -; SI-NEXT: s_waitcnt expcnt(3) -; SI-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; SI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; SI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:944 -; SI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:940 -; SI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:936 -; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:932 -; SI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:928 -; SI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:924 -; SI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:920 -; SI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:916 -; SI-NEXT: s_waitcnt expcnt(2) -; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; SI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:912 -; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:908 -; SI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:904 -; SI-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:900 -; SI-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:896 -; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:892 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:888 -; SI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:884 -; SI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:880 -; SI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:876 -; SI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:872 -; SI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:868 -; SI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:864 -; SI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:860 -; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:856 -; SI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:852 -; SI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:848 -; SI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:844 -; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:840 -; SI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:836 -; SI-NEXT: buffer_load_dword v1, v1, s[8:11], 0 offen -; SI-NEXT: s_mov_b32 s2, s5 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: ; return to shader part epilog -; -; VI-LABEL: hs_ir_uses_scratch_offset: -; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s10, -1 -; VI-NEXT: s_mov_b32 s11, 0xe80000 -; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: s_add_u32 s8, s8, s6 -; VI-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; VI-NEXT: v_mov_b32_e32 v2, 0x80 -; VI-NEXT: s_addc_u32 s9, s9, 0 -; VI-NEXT: v_add_u32_e32 v1, vcc, 0x280, v0 -; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0 -; VI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; VI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; VI-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; VI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; VI-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:448 -; VI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:444 -; VI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:440 -; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:436 -; VI-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:432 -; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:428 -; VI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:424 -; VI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:420 -; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:416 -; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:412 -; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; VI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; VI-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:408 -; VI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:404 -; VI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:400 -; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:392 -; VI-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:388 -; VI-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:384 -; VI-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; VI-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:380 -; VI-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; VI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:372 -; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; VI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:364 -; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; VI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; VI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; VI-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; VI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; VI-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; VI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:396 -; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:376 -; VI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:368 -; VI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:360 -; VI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:356 -; VI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:352 -; VI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:348 -; VI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:344 -; VI-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:340 -; VI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:336 -; VI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:332 -; VI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:328 -; VI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; VI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:324 -; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; VI-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen -; VI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:960 -; VI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:956 -; VI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:952 -; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:948 -; VI-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; VI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; VI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:944 -; VI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:940 -; VI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:936 -; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:932 -; VI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:928 -; VI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:924 -; VI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:920 -; VI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:916 -; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; VI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:912 -; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:908 -; VI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:904 -; VI-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:900 -; VI-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:896 -; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:892 -; VI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:888 -; VI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:884 -; VI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:880 -; VI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:876 -; VI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:872 -; VI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:868 -; VI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:864 -; VI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:860 -; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:856 -; VI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:852 -; VI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:848 -; VI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:844 -; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:840 -; VI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:836 -; VI-NEXT: buffer_load_dword v1, v1, s[8:11], 0 offen -; VI-NEXT: s_mov_b32 s2, s5 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_add_f32_e32 v0, v0, v1 -; VI-NEXT: ; return to shader part epilog -; -; GFX9-MUBUF-LABEL: hs_ir_uses_scratch_offset: -; GFX9-MUBUF: ; %bb.0: -; GFX9-MUBUF-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX9-MUBUF-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX9-MUBUF-NEXT: s_mov_b32 s10, -1 -; GFX9-MUBUF-NEXT: s_mov_b32 s11, 0xe00000 -; GFX9-MUBUF-NEXT: s_add_u32 s8, s8, s5 -; GFX9-MUBUF-NEXT: s_addc_u32 s9, s9, 0 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:448 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:444 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:440 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:436 -; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:432 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:428 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:424 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:420 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:416 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:412 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:408 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:404 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:400 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:392 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:388 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:384 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; GFX9-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:380 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:372 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:364 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX9-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:396 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:376 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:368 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:360 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:356 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:352 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:348 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:344 -; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:340 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:336 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:332 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:328 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; GFX9-MUBUF-NEXT: v_add_u32_e32 v1, 0x280, v0 -; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 0x80, v0 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:324 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen -; GFX9-MUBUF-NEXT: s_nop 0 -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:960 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:956 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:952 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:948 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:944 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:940 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:936 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:932 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:928 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:924 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:920 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:916 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:912 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:908 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:904 -; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:900 -; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:896 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:892 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:888 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:884 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:880 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:876 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:872 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:868 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:864 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:860 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:856 -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:852 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:848 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:844 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:840 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:836 -; GFX9-MUBUF-NEXT: buffer_load_dword v1, v1, s[8:11], 0 offen -; GFX9-MUBUF-NEXT: s_mov_b32 s2, s5 -; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX9-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX9-MUBUF-NEXT: ; return to shader part epilog -; -; GFX10_W32-MUBUF-LABEL: hs_ir_uses_scratch_offset: -; GFX10_W32-MUBUF: ; %bb.0: -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s10, -1 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s11, 0x31c16000 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1 -; GFX10_W32-MUBUF-NEXT: s_add_u32 s8, s8, s5 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3 -; GFX10_W32-MUBUF-NEXT: s_addc_u32 s9, s9, 0 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:448 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:444 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:440 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:436 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:432 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:428 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:424 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:420 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:416 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:412 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:408 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:404 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:400 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f -; GFX10_W32-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:396 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:392 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:388 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:384 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:380 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f -; GFX10_W32-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:376 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:372 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:368 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:364 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:360 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x280, v0 -; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v0, 0x80, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:356 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:352 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:348 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:344 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:340 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:336 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:332 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:328 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:324 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499 -; GFX10_W32-MUBUF-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:960 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:956 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:952 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:948 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:944 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:940 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:936 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:932 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:928 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:924 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:920 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:916 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:912 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:908 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:904 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:900 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:896 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:892 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:888 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:884 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:880 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:876 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:872 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:868 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:864 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:860 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:856 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:852 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:848 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:844 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:840 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:836 -; GFX10_W32-MUBUF-NEXT: buffer_load_dword v1, v6, s[8:11], 0 offen -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s2, s5 -; GFX10_W32-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX10_W32-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10_W32-MUBUF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10_W32-MUBUF-NEXT: ; return to shader part epilog -; -; GFX10_W64-MUBUF-LABEL: hs_ir_uses_scratch_offset: -; GFX10_W64-MUBUF: ; %bb.0: -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s10, -1 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s11, 0x31e16000 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1 -; GFX10_W64-MUBUF-NEXT: s_add_u32 s8, s8, s5 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3 -; GFX10_W64-MUBUF-NEXT: s_addc_u32 s9, s9, 0 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:448 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:444 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:440 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:436 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:432 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:428 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:424 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:420 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:416 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:412 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:408 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:404 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:400 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f -; GFX10_W64-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:396 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:392 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:388 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:384 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:380 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f -; GFX10_W64-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:376 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:372 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:368 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:364 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:360 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x280, v0 -; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v0, 0x80, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:356 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:352 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:348 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:344 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:340 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:336 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:332 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:328 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:324 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499 -; GFX10_W64-MUBUF-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:960 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:956 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:952 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:948 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:944 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:940 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:936 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:932 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:928 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:924 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:920 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:916 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:912 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:908 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:904 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:900 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:896 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:892 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:888 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:884 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:880 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:876 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:872 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:868 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:864 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:860 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:856 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:852 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:848 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:844 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:840 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:836 -; GFX10_W64-MUBUF-NEXT: buffer_load_dword v1, v6, s[8:11], 0 offen -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s2, s5 -; GFX10_W64-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX10_W64-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10_W64-MUBUF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10_W64-MUBUF-NEXT: ; return to shader part epilog -; -; GFX9-FLATSCR-LABEL: hs_ir_uses_scratch_offset: -; GFX9-FLATSCR: ; %bb.0: -; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s5 -; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-FLATSCR-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s3 offset:416 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:400 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:384 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18 -; GFX9-FLATSCR-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s3 offset:336 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s1 offset:320 -; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x80, v23 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:960 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912 -; GFX9-FLATSCR-NEXT: s_mov_b32 s4, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17 -; GFX9-FLATSCR-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s4 offset:880 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s3 offset:864 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s1 offset:944 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:928 -; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x280, v23 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832 -; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off -; GFX9-FLATSCR-NEXT: s_mov_b32 s2, s7 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0 -; GFX9-FLATSCR-NEXT: ; return to shader part epilog -; -; GFX10-FLATSCR-LABEL: hs_ir_uses_scratch_offset: -; GFX10-FLATSCR: ; %bb.0: -; GFX10-FLATSCR-NEXT: s_add_u32 s0, s0, s5 -; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e -; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:448 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8 -; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:432 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:416 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:400 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f -; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x280, v0 -; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v35, 0x80, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:384 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:368 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:352 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:336 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:320 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16 -; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:960 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:944 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:928 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:912 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:896 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:880 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:864 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:848 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:832 -; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off -; GFX10-FLATSCR-NEXT: s_mov_b32 s2, s7 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0 -; GFX10-FLATSCR-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLATSCR-NEXT: ; return to shader part epilog -; -; GFX9-FLATSCR-PAL-LABEL: hs_ir_uses_scratch_offset: -; GFX9-FLATSCR-PAL: ; %bb.0: -; GFX9-FLATSCR-PAL-NEXT: s_getpc_b64 s[0:1] -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8 -; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff -; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s0, s5 -; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 offset:416 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:400 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:384 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s3 offset:336 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s1 offset:320 -; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x80, v23 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:960 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s4, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s4 offset:880 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s3 offset:864 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s1 offset:944 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:928 -; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x280, v23 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832 -; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, s5 -; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0 -; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog -; -; GFX10-FLATSCR-PAL-LABEL: hs_ir_uses_scratch_offset: -; GFX10-FLATSCR-PAL: ; %bb.0: -; GFX10-FLATSCR-PAL-NEXT: s_getpc_b64 s[0:1] -; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8 -; GFX10-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff -; GFX10-FLATSCR-PAL-NEXT: s_add_u32 s0, s0, s5 -; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e -; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:448 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8 -; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:432 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:416 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:400 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f -; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x280, v0 -; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v35, 0x80, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:384 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:368 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:352 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:336 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:320 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16 -; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:960 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:944 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:928 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:912 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:896 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:880 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:864 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:848 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:832 -; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off -; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s2, s5 -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0 -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog %v1 = extractelement <81 x float> , i32 %idx %v2 = extractelement <81 x float> , i32 %idx %f = fadd float %v1, %v2 @@ -5954,994 +317,50 @@ define amdgpu_hs <{i32, i32, i32, float}> @hs_ir_uses_scratch_offset(i32 inreg, ret <{i32, i32, i32, float}> %r2 } +; GCN-LABEL: {{^}}gs_ir_uses_scratch_offset: +; GFX9-FLATSCR: s_add_u32 flat_scratch_lo, s0, s5 +; GFX9-FLATSCR: s_addc_u32 flat_scratch_hi, s1, 0 + +; GFX10-FLATSCR: s_add_u32 s0, s0, s5 +; GFX10-FLATSCR: s_addc_u32 s1, s1, 0 +; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-FLATSCR: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 + +; GFX9-FLATSCR-PAL-DAG: s_getpc_b64 s[0:1] +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 s0, s8 +; GFX9-FLATSCR-PAL-DAG: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX9-FLATSCR-PAL-DAG: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-FLATSCR-PAL-DAG: v_mov_b32_e32 v0, 0xbf20e7f4 +; GFX9-FLATSCR-PAL-DAG: s_mov_b32 vcc_hi, 0 +; GFX9-FLATSCR-PAL-DAG: s_waitcnt lgkmcnt(0) +; GFX9-FLATSCR-PAL-DAG: s_and_b32 s1, s1, 0xffff +; GFX9-FLATSCR-PAL-DAG: s_add_u32 flat_scratch_lo, s0, s5 +; GFX9-FLATSCR-PAL-DAG: s_addc_u32 flat_scratch_hi, s1, 0 + +; GFX10-FLATSCR-PAL: s_getpc_b64 s[0:1] +; GFX10-FLATSCR-PAL: s_mov_b32 s0, s8 +; GFX10-FLATSCR-PAL: s_load_dwordx2 s[0:1], s[0:1], 0x0 +; GFX10-FLATSCR-PAL: s_waitcnt lgkmcnt(0) +; GFX10-FLATSCR-PAL: s_and_b32 s1, s1, 0xffff +; GFX10-FLATSCR-PAL: s_add_u32 s0, s0, s5 +; GFX10-FLATSCR-PAL: s_addc_u32 s1, s1, 0 +; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-FLATSCR-PAL: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 + +; MUBUF: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 +; FLATSCR-NOT: SCRATCH_RSRC_DWORD + +; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen +; SIVI: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen + +; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen +; GFX9_10-MUBUF-DAG: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen + +; MUBUF-DAG: s_mov_b32 s2, s5 + +; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off +; FLATSCR-DAG: scratch_load_dword {{v[0-9]+}}, {{v[0-9]+}}, off define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg %swo, i32 %idx) { -; SI-LABEL: gs_ir_uses_scratch_offset: -; SI: ; %bb.0: -; SI-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; SI-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; SI-NEXT: s_mov_b32 s10, -1 -; SI-NEXT: s_mov_b32 s11, 0xe8f000 -; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; SI-NEXT: s_add_u32 s8, s8, s6 -; SI-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; SI-NEXT: v_mov_b32_e32 v2, 0x80 -; SI-NEXT: s_addc_u32 s9, s9, 0 -; SI-NEXT: v_add_i32_e32 v1, vcc, 0x280, v0 -; SI-NEXT: v_add_i32_e32 v0, vcc, v2, v0 -; SI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; SI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; SI-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; SI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; SI-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:448 -; SI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:444 -; SI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:440 -; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:436 -; SI-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:432 -; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:428 -; SI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:424 -; SI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:420 -; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:416 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:412 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; SI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; SI-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:408 -; SI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:404 -; SI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:400 -; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:392 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:388 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:384 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; SI-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:380 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; SI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:372 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; SI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:364 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; SI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; SI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; SI-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; SI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; SI-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; SI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:396 -; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:376 -; SI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:368 -; SI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:360 -; SI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:356 -; SI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:352 -; SI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:348 -; SI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:344 -; SI-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:340 -; SI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:336 -; SI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:332 -; SI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:328 -; SI-NEXT: s_waitcnt expcnt(1) -; SI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; SI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:324 -; SI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; SI-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen -; SI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:960 -; SI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:956 -; SI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:952 -; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:948 -; SI-NEXT: s_waitcnt expcnt(3) -; SI-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; SI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; SI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:944 -; SI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:940 -; SI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:936 -; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:932 -; SI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:928 -; SI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:924 -; SI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:920 -; SI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:916 -; SI-NEXT: s_waitcnt expcnt(2) -; SI-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; SI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:912 -; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:908 -; SI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:904 -; SI-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:900 -; SI-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:896 -; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:892 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; SI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:888 -; SI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:884 -; SI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:880 -; SI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:876 -; SI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:872 -; SI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:868 -; SI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:864 -; SI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:860 -; SI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:856 -; SI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:852 -; SI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:848 -; SI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:844 -; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:840 -; SI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:836 -; SI-NEXT: buffer_load_dword v1, v1, s[8:11], 0 offen -; SI-NEXT: s_mov_b32 s2, s5 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_add_f32_e32 v0, v0, v1 -; SI-NEXT: s_waitcnt expcnt(0) -; SI-NEXT: ; return to shader part epilog -; -; VI-LABEL: gs_ir_uses_scratch_offset: -; VI: ; %bb.0: -; VI-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; VI-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; VI-NEXT: s_mov_b32 s10, -1 -; VI-NEXT: s_mov_b32 s11, 0xe80000 -; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; VI-NEXT: s_add_u32 s8, s8, s6 -; VI-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; VI-NEXT: v_mov_b32_e32 v2, 0x80 -; VI-NEXT: s_addc_u32 s9, s9, 0 -; VI-NEXT: v_add_u32_e32 v1, vcc, 0x280, v0 -; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0 -; VI-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; VI-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; VI-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; VI-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; VI-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:448 -; VI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:444 -; VI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:440 -; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:436 -; VI-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:432 -; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:428 -; VI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:424 -; VI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:420 -; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:416 -; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:412 -; VI-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; VI-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; VI-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:408 -; VI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:404 -; VI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:400 -; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:392 -; VI-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:388 -; VI-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:384 -; VI-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; VI-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:380 -; VI-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; VI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:372 -; VI-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; VI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:364 -; VI-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; VI-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; VI-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; VI-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; VI-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; VI-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; VI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:396 -; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:376 -; VI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:368 -; VI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:360 -; VI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:356 -; VI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:352 -; VI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:348 -; VI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:344 -; VI-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:340 -; VI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:336 -; VI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:332 -; VI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:328 -; VI-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; VI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:324 -; VI-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; VI-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen -; VI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:960 -; VI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:956 -; VI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:952 -; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:948 -; VI-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; VI-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; VI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:944 -; VI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:940 -; VI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:936 -; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:932 -; VI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:928 -; VI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:924 -; VI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:920 -; VI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:916 -; VI-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; VI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:912 -; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:908 -; VI-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:904 -; VI-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:900 -; VI-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:896 -; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:892 -; VI-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; VI-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:888 -; VI-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:884 -; VI-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:880 -; VI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:876 -; VI-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:872 -; VI-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:868 -; VI-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:864 -; VI-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:860 -; VI-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:856 -; VI-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:852 -; VI-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:848 -; VI-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:844 -; VI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:840 -; VI-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:836 -; VI-NEXT: buffer_load_dword v1, v1, s[8:11], 0 offen -; VI-NEXT: s_mov_b32 s2, s5 -; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_add_f32_e32 v0, v0, v1 -; VI-NEXT: ; return to shader part epilog -; -; GFX9-MUBUF-LABEL: gs_ir_uses_scratch_offset: -; GFX9-MUBUF: ; %bb.0: -; GFX9-MUBUF-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX9-MUBUF-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX9-MUBUF-NEXT: s_mov_b32 s10, -1 -; GFX9-MUBUF-NEXT: s_mov_b32 s11, 0xe00000 -; GFX9-MUBUF-NEXT: s_add_u32 s8, s8, s5 -; GFX9-MUBUF-NEXT: s_addc_u32 s9, s9, 0 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbf20e7f4 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f3d349e -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f523be1 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v6, 0x3f638e37 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:448 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:444 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:440 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:436 -; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:432 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:428 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:424 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:420 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:416 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd8a3 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:412 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbeae29dc -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbe31934f -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:408 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:404 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:400 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:392 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:388 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0xb702e758 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:384 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e31934f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89c -; GFX9-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbe319356 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:380 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v9, 0x3e319356 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:372 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29dc -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:364 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3efcd89f -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf20e7f5 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v13, 0xbf3d349e -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX9-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:396 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:376 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:368 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:360 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:356 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:352 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:348 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:344 -; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:340 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:336 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:332 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:328 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f5 -; GFX9-MUBUF-NEXT: v_add_u32_e32 v1, 0x280, v0 -; GFX9-MUBUF-NEXT: v_add_u32_e32 v0, 0x80, v0 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:324 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f4 -; GFX9-MUBUF-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen -; GFX9-MUBUF-NEXT: s_nop 0 -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:960 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:956 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:952 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:948 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3703c499 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3f3d349c -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:944 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:940 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:936 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:932 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:928 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:924 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:920 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:916 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:912 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:908 -; GFX9-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:904 -; GFX9-MUBUF-NEXT: buffer_store_dword v6, off, s[8:11], 0 offset:900 -; GFX9-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:896 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:892 -; GFX9-MUBUF-NEXT: v_mov_b32_e32 v5, 0xbf5f2ee2 -; GFX9-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:888 -; GFX9-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:884 -; GFX9-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:880 -; GFX9-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:876 -; GFX9-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:872 -; GFX9-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:868 -; GFX9-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:864 -; GFX9-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:860 -; GFX9-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:856 -; GFX9-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:852 -; GFX9-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:848 -; GFX9-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:844 -; GFX9-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:840 -; GFX9-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:836 -; GFX9-MUBUF-NEXT: buffer_load_dword v1, v1, s[8:11], 0 offen -; GFX9-MUBUF-NEXT: s_mov_b32 s2, s5 -; GFX9-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX9-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX9-MUBUF-NEXT: ; return to shader part epilog -; -; GFX10_W32-MUBUF-LABEL: gs_ir_uses_scratch_offset: -; GFX10_W32-MUBUF: ; %bb.0: -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s10, -1 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2 -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s11, 0x31c16000 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1 -; GFX10_W32-MUBUF-NEXT: s_add_u32 s8, s8, s5 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3 -; GFX10_W32-MUBUF-NEXT: s_addc_u32 s9, s9, 0 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:448 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:444 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:440 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:436 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:432 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:428 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:424 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:420 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:416 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:412 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:408 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:404 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:400 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f -; GFX10_W32-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:396 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:392 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:388 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:384 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:380 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f -; GFX10_W32-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:376 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:372 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:368 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:364 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:360 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x280, v0 -; GFX10_W32-MUBUF-NEXT: v_add_nc_u32_e32 v0, 0x80, v0 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:356 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:352 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:348 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:344 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:340 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:336 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:332 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:328 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:324 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499 -; GFX10_W32-MUBUF-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:960 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:956 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:952 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:948 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:944 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:940 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:936 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:932 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:928 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:924 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:920 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:916 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:912 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:908 -; GFX10_W32-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:904 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:900 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:896 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:892 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:888 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:884 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:880 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:876 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:872 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:868 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:864 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:860 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:856 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:852 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:848 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:844 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:840 -; GFX10_W32-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:836 -; GFX10_W32-MUBUF-NEXT: buffer_load_dword v1, v6, s[8:11], 0 offen -; GFX10_W32-MUBUF-NEXT: s_mov_b32 s2, s5 -; GFX10_W32-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX10_W32-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10_W32-MUBUF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10_W32-MUBUF-NEXT: ; return to shader part epilog -; -; GFX10_W64-MUBUF-LABEL: gs_ir_uses_scratch_offset: -; GFX10_W64-MUBUF: ; %bb.0: -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s10, -1 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v2, 0x3f3d349e -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v4, 0x3f5f2ee2 -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s11, 0x31e16000 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v3, 0x3f523be1 -; GFX10_W64-MUBUF-NEXT: s_add_u32 s8, s8, s5 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbefcd8a3 -; GFX10_W64-MUBUF-NEXT: s_addc_u32 s9, s9, 0 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v8, 0xbefcd89f -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:448 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:444 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:440 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:436 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:432 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:428 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:424 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:420 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:416 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:412 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:408 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:404 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:400 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v1, 0xbe319356 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0xbe31934f -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xb7043519 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0xb702e758 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3e31934f -; GFX10_W64-MUBUF-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:396 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:392 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:388 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:384 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:380 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v7, 0x3e319356 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0x3eae29d8 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v11, 0x3eae29dc -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0x3efcd89c -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v13, 0x3efcd89f -; GFX10_W64-MUBUF-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:376 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:372 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:368 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:364 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:360 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v10, 0xbf20e7f5 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v15, 0xbf5f2ee3 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v12, 0xbf3d349e -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf523be3 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v16, 0xbf638e39 -; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v6, 0x280, v0 -; GFX10_W64-MUBUF-NEXT: v_add_nc_u32_e32 v0, 0x80, v0 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:356 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:352 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:348 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:344 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:340 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v17, 0x3f20e7f5 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:336 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:332 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f20e7f4 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:328 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:324 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v18, 0x3703c499 -; GFX10_W64-MUBUF-NEXT: buffer_load_dword v0, v0, s[8:11], 0 offen -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:960 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:956 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:952 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:948 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:944 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:940 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0x3f3d349c -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v19, 0xbf523be1 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:936 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:932 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:928 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:924 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:920 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:916 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:912 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:908 -; GFX10_W64-MUBUF-NEXT: v_mov_b32_e32 v14, 0xbf5f2ee2 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v15, off, s[8:11], 0 offset:904 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v5, off, s[8:11], 0 offset:900 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v16, off, s[8:11], 0 offset:896 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v4, off, s[8:11], 0 offset:892 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v14, off, s[8:11], 0 offset:888 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:884 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v19, off, s[8:11], 0 offset:880 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:876 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v12, off, s[8:11], 0 offset:872 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v17, off, s[8:11], 0 offset:868 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v13, off, s[8:11], 0 offset:864 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v11, off, s[8:11], 0 offset:860 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v7, off, s[8:11], 0 offset:856 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v18, off, s[8:11], 0 offset:852 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:848 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v9, off, s[8:11], 0 offset:844 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v8, off, s[8:11], 0 offset:840 -; GFX10_W64-MUBUF-NEXT: buffer_store_dword v10, off, s[8:11], 0 offset:836 -; GFX10_W64-MUBUF-NEXT: buffer_load_dword v1, v6, s[8:11], 0 offen -; GFX10_W64-MUBUF-NEXT: s_mov_b32 s2, s5 -; GFX10_W64-MUBUF-NEXT: s_waitcnt vmcnt(0) -; GFX10_W64-MUBUF-NEXT: v_add_f32_e32 v0, v0, v1 -; GFX10_W64-MUBUF-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10_W64-MUBUF-NEXT: ; return to shader part epilog -; -; GFX9-FLATSCR-LABEL: gs_ir_uses_scratch_offset: -; GFX9-FLATSCR: ; %bb.0: -; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s5 -; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-FLATSCR-NEXT: v_and_b32_e32 v23, 0x1fc, v0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f523be1 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f3d349e -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f638e37 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-FLATSCR-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s3 offset:416 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xbeae29dc -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v15, 0xbf3d349e -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, 0x3efcd89f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89c -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0xb702e758 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, 0xbe31934f -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbe319356 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf523be3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:400 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:384 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, 0xbf638e39 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v18 -; GFX9-FLATSCR-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v15 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v21 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s3 offset:336 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s1 offset:320 -; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x80, v23 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v18 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v4 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v22, v5 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: scratch_load_dword v13, v1, off -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, 0x3f3d349c -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v19, v6 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, v15 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:960 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912 -; GFX9-FLATSCR-NEXT: s_mov_b32 s4, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v21, v5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v7 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v17 -; GFX9-FLATSCR-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v3, 0x3703c499 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v17 -; GFX9-FLATSCR-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], s4 offset:880 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[7:10], s3 offset:864 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[3:6], s1 offset:944 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v8, v16 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v9, v2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v10, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, v11 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v5, v3 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, v12 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v18, v0 -; GFX9-FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:928 -; GFX9-FLATSCR-NEXT: v_add_u32_e32 v1, 0x280, v23 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832 -; GFX9-FLATSCR-NEXT: scratch_load_dword v0, v1, off -; GFX9-FLATSCR-NEXT: s_mov_b32 s2, s7 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: v_add_f32_e32 v0, v13, v0 -; GFX9-FLATSCR-NEXT: ; return to shader part epilog -; -; GFX10-FLATSCR-LABEL: gs_ir_uses_scratch_offset: -; GFX10-FLATSCR: ; %bb.0: -; GFX10-FLATSCR-NEXT: s_add_u32 s0, s0, s5 -; GFX10-FLATSCR-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, 0x3f3d349e -; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 0x3f523be1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:448 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v8 -; GFX10-FLATSCR-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v7 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, v6 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, 0xbefcd89f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v9 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, 0xbf3d349e -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, 0xbf523be3 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:432 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:416 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:400 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, 0x3efcd89f -; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v39, 0x280, v0 -; GFX10-FLATSCR-NEXT: v_add_nc_u32_e32 v35, 0x80, v0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, 0xb702e758 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, 0xb7043519 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, 0xbe31934f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, 0xbe319356 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, 0x3efcd89c -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, 0xbf638e39 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v20 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v16 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v23 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, 0x3703c499 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v14 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v12 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v18 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:384 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:368 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:352 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:336 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:320 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v17 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v11 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v9 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, 0xbf523be1 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, 0x3f3d349c -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v7 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v16 -; GFX10-FLATSCR-NEXT: scratch_load_dword v10, v35, off -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v21 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v5 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v20 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v6 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:960 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:944 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:928 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:912 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v6 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v18 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v8 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v1, v27 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v2, v14 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v3, v12 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v11 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v9 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:896 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:880 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:864 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:848 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:832 -; GFX10-FLATSCR-NEXT: scratch_load_dword v0, v39, off -; GFX10-FLATSCR-NEXT: s_mov_b32 s2, s7 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: v_add_f32_e32 v0, v10, v0 -; GFX10-FLATSCR-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLATSCR-NEXT: ; return to shader part epilog -; -; GFX9-FLATSCR-PAL-LABEL: gs_ir_uses_scratch_offset: -; GFX9-FLATSCR-PAL: ; %bb.0: -; GFX9-FLATSCR-PAL-NEXT: s_getpc_b64 s[0:1] -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8 -; GFX9-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX9-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-FLATSCR-PAL-NEXT: v_and_b32_e32 v23, 0x1fc, v0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbf20e7f4 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff -; GFX9-FLATSCR-PAL-NEXT: s_add_u32 flat_scratch_lo, s0, s5 -; GFX9-FLATSCR-PAL-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f5f2ee2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f523be1 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f3d349e -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], vcc_lo offset:448 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f638e37 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v7 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v6 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v5 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_hi offset:432 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s3 offset:416 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], vcc_lo offset:368 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xbeae29dc -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0xbf3d349e -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf20e7f5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0x3efcd89f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89c -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbefcd89f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbefcd8a3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0xb702e758 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xb7043519 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, 0xbe31934f -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbe319356 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:352 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf523be3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf5f2ee3 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], s1 offset:400 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:384 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0xbf638e39 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v18 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f20e7f5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v15 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v21 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s3 offset:336 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s1 offset:320 -; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x80, v23 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v18 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v4 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v5 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v13, v1, off -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0x3f20e7f4 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[19:22], vcc_lo offset:896 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0xbf523be1 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0x3f3d349c -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v6 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, v15 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], s0 offset:960 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], vcc_hi offset:912 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s4, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, v5 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v7 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v17 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s3, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0x3703c499 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v14 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v12 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v17 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s1, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[18:21], s4 offset:880 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[7:10], s3 offset:864 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[3:6], s1 offset:944 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, v16 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, v2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s0, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v11 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, v3 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, v14 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, v12 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_lo, 0 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, v2 -; GFX9-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v0 -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 vcc_hi, 0 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[8:11], s0 offset:928 -; GFX9-FLATSCR-PAL-NEXT: v_add_u32_e32 v1, 0x280, v23 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], vcc_lo offset:848 -; GFX9-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[15:18], vcc_hi offset:832 -; GFX9-FLATSCR-PAL-NEXT: scratch_load_dword v0, v1, off -; GFX9-FLATSCR-PAL-NEXT: s_mov_b32 s2, s5 -; GFX9-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v13, v0 -; GFX9-FLATSCR-PAL-NEXT: ; return to shader part epilog -; -; GFX10-FLATSCR-PAL-LABEL: gs_ir_uses_scratch_offset: -; GFX10-FLATSCR-PAL: ; %bb.0: -; GFX10-FLATSCR-PAL-NEXT: s_getpc_b64 s[0:1] -; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s0, s8 -; GFX10-FLATSCR-PAL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-FLATSCR-PAL-NEXT: s_and_b32 s1, s1, 0xffff -; GFX10-FLATSCR-PAL-NEXT: s_add_u32 s0, s0, s5 -; GFX10-FLATSCR-PAL-NEXT: s_addc_u32 s1, s1, 0 -; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 -; GFX10-FLATSCR-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xbf20e7f4 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v8, 0x3f3d349e -; GFX10-FLATSCR-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v6, 0x3f5f2ee2 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v7, 0x3f523be1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v9, 0xbeae29dc -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v5, 0x3f638e37 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:448 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v8 -; GFX10-FLATSCR-PAL-NEXT: v_and_b32_e32 v0, 0x1fc, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v7 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, v6 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v11, 0xbefcd89f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0xbefcd8a3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v10, v9 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v16, 0xbf3d349e -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v20, 0xbf5f2ee3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, 0xbf523be3 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[5:8], off offset:432 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[1:4], off offset:416 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[9:12], off offset:400 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v12, 0x3eae29dc -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v14, 0x3e319356 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, 0x3efcd89f -; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v39, 0x280, v0 -; GFX10-FLATSCR-PAL-NEXT: v_add_nc_u32_e32 v35, 0x80, v0 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, 0xb702e758 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, 0xb7043519 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, 0xbe31934f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, 0xbe319356 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v17, 0xbf20e7f5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v13, 0x3eae29d8 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v15, 0x3e31934f -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, 0x3efcd89c -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v21, 0xbf638e39 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v22, v20 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v24, 0x3f20e7f5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v25, v16 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v23 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v4, 0x3f20e7f4 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v27, 0x3703c499 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v28, v14 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v29, v12 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v30, v18 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:384 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:368 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:352 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:336 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:320 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v17 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v11 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v9 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v31, 0xbf523be1 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, 0x3f3d349c -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v32, v7 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, v16 -; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v10, v35, off -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v35, v21 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v36, v5 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v37, v20 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v38, v6 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[4:7], off offset:960 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[27:30], off offset:944 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:928 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:912 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v33, 0xbf5f2ee2 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v34, v6 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v23, v18 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v26, v8 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v0, v3 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v1, v27 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v2, v14 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v3, v12 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v18, v11 -; GFX10-FLATSCR-PAL-NEXT: v_mov_b32_e32 v19, v9 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[35:38], off offset:896 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[31:34], off offset:880 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:864 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[0:3], off offset:848 -; GFX10-FLATSCR-PAL-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:832 -; GFX10-FLATSCR-PAL-NEXT: scratch_load_dword v0, v39, off -; GFX10-FLATSCR-PAL-NEXT: s_mov_b32 s2, s5 -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-PAL-NEXT: v_add_f32_e32 v0, v10, v0 -; GFX10-FLATSCR-PAL-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-FLATSCR-PAL-NEXT: ; return to shader part epilog %v1 = extractelement <81 x float> , i32 %idx %v2 = extractelement <81 x float> , i32 %idx %f = fadd float %v1, %v2 @@ -6949,10 +368,3 @@ define amdgpu_gs <{i32, i32, i32, float}> @gs_ir_uses_scratch_offset(i32 inreg, %r2 = insertvalue <{i32, i32, i32, float}> %r1, float %f, 3 ret <{i32, i32, i32, float}> %r2 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; FLATSCR: {{.*}} -; GCN: {{.*}} -; GFX9_10: {{.*}} -; GFX9_10-MUBUF: {{.*}} -; MUBUF: {{.*}} -; SIVI: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir index 2fca0021f5f99..6fca8187f6e21 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir @@ -235,7 +235,6 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -248,7 +247,6 @@ body: | S_CBRANCH_SCC1 %bb.2, implicit $scc bb.1: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8 S_NOP 0 bb.2: @@ -288,7 +286,7 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8 + ; VMEM-GFX8-NEXT: liveins: $sgpr8 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -303,7 +301,7 @@ body: | S_CBRANCH_SCC1 %bb.2, implicit $scc bb.1: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8 + liveins: $sgpr8 S_NOP 0 bb.2: @@ -345,7 +343,6 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -358,7 +355,6 @@ body: | S_CBRANCH_SCC1 %bb.2, implicit $scc bb.1: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9 S_NOP 0 bb.2: @@ -399,7 +395,7 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9 + ; VMEM-GFX8-NEXT: liveins: $sgpr8_sgpr9 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -414,7 +410,7 @@ body: | S_CBRANCH_SCC1 %bb.2, implicit $scc bb.1: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8_sgpr9 + liveins: $sgpr8_sgpr9 S_NOP 0 bb.2: @@ -444,7 +440,7 @@ body: | ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; VMEM-GFX8-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec + ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1 ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 0, undef $vgpr0 @@ -452,23 +448,22 @@ body: | ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) ; VMEM-GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) - ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 - ; VMEM-GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) + ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1 + ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1 + ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; VMEM-GFX8-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr9, 0, undef $vgpr1 ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) - ; VMEM-GFX8-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr9, 0, undef $vgpr0 - ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) - ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 8200, implicit $exec - ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; VMEM-GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) - ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; VMEM-GFX8-NEXT: $vgpr0 = V_MOV_B32_e32 8200, implicit $exec + ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFEN killed $vgpr1, killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; VMEM-GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 ; VMEM-GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) + ; VMEM-GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) ; VMEM-GFX8-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -482,7 +477,6 @@ body: | S_CBRANCH_SCC1 %bb.2, implicit $scc bb.1: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9 S_NOP 0 bb.2: @@ -508,7 +502,7 @@ body: | ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; VMEM-GFX8-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec + ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1 ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) @@ -516,23 +510,23 @@ body: | ; VMEM-GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) ; VMEM-GFX8-NEXT: $sgpr8 = V_READLANE_B32 killed $vgpr0, 0 ; VMEM-GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) - ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7 - ; VMEM-GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) + ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 ; VMEM-GFX8-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1 + ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1 + ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) - ; VMEM-GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) - ; VMEM-GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 16392, implicit $exec - ; VMEM-GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) - ; VMEM-GFX8-NEXT: $sgpr9 = V_READLANE_B32 killed $vgpr0, 0 - ; VMEM-GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) - ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; VMEM-GFX8-NEXT: $vgpr0 = V_MOV_B32_e32 16392, implicit $exec + ; VMEM-GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; VMEM-GFX8-NEXT: $sgpr9 = V_READLANE_B32 killed $vgpr1, 0 + ; VMEM-GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; VMEM-GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 ; VMEM-GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) + ; VMEM-GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) ; VMEM-GFX8-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9 + ; VMEM-GFX8-NEXT: liveins: $sgpr8, $sgpr9 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -548,7 +542,7 @@ body: | S_CBRANCH_SCC1 %bb.2, implicit $scc bb.1: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $sgpr8, $sgpr9 + liveins: $sgpr8, $sgpr9 S_NOP 0 bb.2: diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir index 87ccf7a930ef9..076e891b16bbc 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir @@ -1,10 +1,35 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=false -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s # Check that we allocate 2 emergency stack slots if we're spilling # SGPRs to memory and potentially have an offset larger than fits in # the addressing mode of the memory instructions. +# CHECK-LABEL: name: test +# CHECK: stack: +# CHECK-NEXT: - { id: 0, name: '', type: spill-slot, offset: 8, size: 4, alignment: 4, +# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, +# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +# CHECK-NEXT: - { id: 1, name: '', type: default, offset: 12, size: 4096, alignment: 4, +# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, +# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +# CHECK-NEXT: - { id: 2, name: '', type: default, offset: 0, size: 4, alignment: 4, +# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, +# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +# CHECK-NEXT: - { id: 3, name: '', type: default, offset: 4, size: 4, alignment: 4, +# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true, +# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + + +# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) +# CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr10, 0, undef $vgpr1 +# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) +# CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + + +# CHECK: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) +# CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) +# CHECK-NEXT: $sgpr10 = V_READLANE_B32 killed $vgpr1, 0 +# CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) --- name: test tracksRegLiveness: true @@ -22,25 +47,6 @@ machineFunctionInfo: body: | bb.0: liveins: $sgpr30_sgpr31, $sgpr10, $sgpr11 - ; CHECK-LABEL: name: test - ; CHECK: liveins: $sgpr30_sgpr31, $sgpr10, $sgpr11 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc - ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec - ; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr2 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) - ; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 killed $sgpr10, 0, undef $vgpr2 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) - ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr2 - ; CHECK-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1 - ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) - ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; CHECK-NEXT: $sgpr10 = V_READLANE_B32 killed $vgpr1, 0 - ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) - ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 - ; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $scc S_CMP_EQ_U32 0, 0, implicit-def $scc SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir index 8c0211ac25068..c0f70c7ef2acb 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir @@ -253,81 +253,81 @@ body: | ; FLATSCR: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def $scc, implicit $scc ; FLATSCR: renamable $sgpr12 = IMPLICIT_DEF ; FLATSCR: INLINEASM &"", 0 /* attdialect */, implicit-def $vgpr0 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 ; FLATSCR: $sgpr12 = S_MOV_B32 $exec_lo ; FLATSCR: $exec_lo = S_MOV_B32 1 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR: $exec_lo = S_MOV_B32 -1 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: INLINEASM &"", 1 /* sideeffect attdialect */, implicit $vgpr0 ; FLATSCR: $exec_lo = S_MOV_B32 killed $sgpr12 ; FLATSCR: renamable $sgpr12 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 ; FLATSCR: $sgpr12 = S_MOV_B32 $exec_lo ; FLATSCR: $exec_lo = S_MOV_B32 1 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 4, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 4, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; FLATSCR: $exec_lo = S_MOV_B32 killed $sgpr12 ; FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec ; FLATSCR: $exec = S_MOV_B64 3 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 -1 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 ; FLATSCR: renamable $sgpr12_sgpr13 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec ; FLATSCR: $exec = S_MOV_B64 3 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 ; FLATSCR: $sgpr13 = V_READLANE_B32 $vgpr0, 1 ; FLATSCR: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec ; FLATSCR: $exec = S_MOV_B64 7 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 -1 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 @@ -335,14 +335,14 @@ body: | ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15 ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec ; FLATSCR: $exec = S_MOV_B64 15 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 -1 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 @@ -351,14 +351,14 @@ body: | ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec ; FLATSCR: $exec = S_MOV_B64 31 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 -1 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 @@ -370,14 +370,14 @@ body: | ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec ; FLATSCR: $exec = S_MOV_B64 255 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 -1 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 ; FLATSCR: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 @@ -397,14 +397,14 @@ body: | ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; FLATSCR: $sgpr12_sgpr13 = S_MOV_B64 $exec ; FLATSCR: $exec = S_MOV_B64 65535 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 -1 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr12_sgpr13 ; FLATSCR: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5) ; FLATSCR: $exec = S_XOR_B64 $exec, -1, implicit-def $scc ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 @@ -440,9 +440,9 @@ body: | ; FLATSCR: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 ; FLATSCR: $sgpr64_sgpr65 = S_MOV_B64 $exec ; FLATSCR: $exec = S_MOV_B64 4294967295 - ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5) + ; FLATSCR: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 -1 - ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) + ; FLATSCR: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5) ; FLATSCR: $exec = S_MOV_B64 killed $sgpr64_sgpr65 ; GCN64-MUBUF-LABEL: name: check_spill ; GCN64-MUBUF: liveins: $sgpr8, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr11 @@ -455,13 +455,13 @@ body: | ; GCN64-MUBUF-NEXT: $sgpr28 = S_ADD_U32 $sgpr28, $sgpr11, implicit-def $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 ; GCN64-MUBUF-NEXT: $sgpr29 = S_ADDC_U32 $sgpr29, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF - ; GCN64-MUBUF-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr1 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr1 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 @@ -471,33 +471,33 @@ body: | ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF - ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13 - ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF - ; GCN64-MUBUF-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr3 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13 - ; GCN64-MUBUF-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr3 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF - ; GCN64-MUBUF-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr1 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr1 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0 @@ -510,32 +510,32 @@ body: | ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF - ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr2 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr14, 2, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr15, 3, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr16, 4, $vgpr2, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF - ; GCN64-MUBUF-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr1 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr15, 3, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr16, 4, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr17, 5, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr18, 6, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr19, 7, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0 @@ -560,44 +560,44 @@ body: | ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF - ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr1 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr65, 1, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr66, 2, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr67, 3, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr68, 4, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr69, 5, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr70, 6, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr71, 7, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr72, 8, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr73, 9, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr74, 10, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr75, 11, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr76, 12, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr77, 13, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr78, 14, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr79, 15, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr80, 16, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr81, 17, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr82, 18, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr83, 19, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr84, 20, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr85, 21, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr86, 22, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr87, 23, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr88, 24, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr89, 25, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr90, 26, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr91, 27, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr92, 28, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr93, 29, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr94, 30, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr95, 31, $vgpr1, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 @@ -618,13 +618,13 @@ body: | ; GCN32-MUBUF-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr11, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GCN32-MUBUF-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF - ; GCN32-MUBUF-NEXT: $sgpr3 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr1 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr1 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr3, implicit killed $vgpr1 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 @@ -634,33 +634,33 @@ body: | ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF - ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr2 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13 - ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr2 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF - ; GCN32-MUBUF-NEXT: $sgpr1 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr3 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13 - ; GCN32-MUBUF-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1, implicit killed $vgpr3 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF - ; GCN32-MUBUF-NEXT: $sgpr3 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr1 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr3, implicit killed $vgpr1 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 15, implicit-def $vgpr0 @@ -673,32 +673,32 @@ body: | ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF - ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr2 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr14, 2, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr15, 3, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN32-MUBUF-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr16, 4, $vgpr2, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr2 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF - ; GCN32-MUBUF-NEXT: $sgpr1 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr1 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr15, 3, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr16, 4, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr17, 5, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr18, 6, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr19, 7, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1, implicit killed $vgpr1 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr0 @@ -723,44 +723,44 @@ body: | ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF - ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr1 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr65, 1, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr66, 2, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr67, 3, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr68, 4, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr69, 5, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr70, 6, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr71, 7, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr72, 8, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr73, 9, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr74, 10, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr75, 11, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr76, 12, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr77, 13, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr78, 14, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr79, 15, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr80, 16, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr81, 17, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr82, 18, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr83, 19, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr84, 20, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr85, 21, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr86, 22, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr87, 23, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr88, 24, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr89, 25, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr90, 26, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr91, 27, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr92, 28, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr93, 29, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr94, 30, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr95, 31, $vgpr1, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr1 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 @@ -777,51 +777,51 @@ body: | ; GCN64-FLATSCR-NEXT: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc ; GCN64-FLATSCR-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF - ; GCN64-FLATSCR-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr1 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr1 + ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 0, undef $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF - ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF - ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13 - ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit killed $sgpr12_sgpr13 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2 + ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr3 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr3, implicit $sgpr12_sgpr13 - ; GCN64-FLATSCR-NEXT: $vgpr3 = V_WRITELANE_B32 $sgpr13, 1, $vgpr3, implicit $sgpr12_sgpr13 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr3 + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF - ; GCN64-FLATSCR-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr1 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr1 + ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF - ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0 ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15 @@ -830,36 +830,36 @@ body: | ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15 ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5) ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF - ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr2 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr13, 1, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr14, 2, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr15, 3, $vgpr2, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-FLATSCR-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr16, 4, $vgpr2, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2 + ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr1 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr13, 1, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr14, 2, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr15, 3, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr16, 4, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr17, 5, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr18, 6, $vgpr1, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr19, 7, $vgpr1, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1 + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr16, 4, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF - ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0 ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 @@ -880,55 +880,55 @@ body: | ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5) ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF - ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr1 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr65, 1, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr66, 2, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr67, 3, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr68, 4, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr69, 5, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr70, 6, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr71, 7, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr72, 8, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr73, 9, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr74, 10, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr75, 11, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr76, 12, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr77, 13, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr78, 14, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr79, 15, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr80, 16, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr81, 17, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr82, 18, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr83, 19, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr84, 20, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr85, 21, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr86, 22, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr87, 23, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr88, 24, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr89, 25, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr90, 26, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr91, 27, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr92, 28, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr93, 29, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr94, 30, $vgpr1, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr95, 31, $vgpr1, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 + ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr67, 3, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr68, 4, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr69, 5, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr70, 6, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr71, 7, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr72, 8, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr73, 9, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr74, 10, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr75, 11, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr76, 12, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr77, 13, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr78, 14, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr79, 15, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr80, 16, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr81, 17, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr82, 18, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr83, 19, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr84, 20, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr85, 21, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr86, 22, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr87, 23, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr88, 24, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr89, 25, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr90, 26, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr91, 27, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr92, 28, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF - ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 0, undef $vgpr0 - ; GCN64-FLATSCR-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, killed $sgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, align 4096, addrspace 5) + ; GCN64-FLATSCR-NEXT: $sgpr9 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, killed $sgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, align 4096, addrspace 5) ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 renamable $sgpr12 = IMPLICIT_DEF SI_SPILL_S32_SAVE killed $sgpr12, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 @@ -1011,30 +1011,30 @@ body: | ; GCN64-MUBUF-NEXT: $sgpr31 = S_MOV_B32 14680064, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 ; GCN64-MUBUF-NEXT: $sgpr28 = S_ADD_U32 $sgpr28, $sgpr11, implicit-def $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 ; GCN64-MUBUF-NEXT: $sgpr29 = S_ADDC_U32 $sgpr29, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31 - ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr1, 0 - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 - ; GCN64-MUBUF-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13 - ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr2, 1 - ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr2 - ; GCN64-MUBUF-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr3 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) - ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr3, 0, implicit-def $sgpr12_sgpr13_sgpr14 - ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr3, 1 - ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr3, 2 - ; GCN64-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr3 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13 + ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1 + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14 + ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2 + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0 ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -1045,31 +1045,31 @@ body: | ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3 ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 - ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr1 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) - ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr1, 1 - ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr1, 2 - ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr1, 3 - ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr1, 4 - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 - ; GCN64-MUBUF-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr2 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) - ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr2, 1 - ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr2, 2 - ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr2, 3 - ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr2, 4 - ; GCN64-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr2, 5 - ; GCN64-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr2, 6 - ; GCN64-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr2, 7 - ; GCN64-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr2 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) + ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2 + ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3 + ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4 + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) + ; GCN64-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2 + ; GCN64-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3 + ; GCN64-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4 + ; GCN64-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5 + ; GCN64-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6 + ; GCN64-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7 + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0 ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -1092,44 +1092,44 @@ body: | ; GCN64-MUBUF-NEXT: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15 ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 - ; GCN64-MUBUF-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr1 - ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5) - ; GCN64-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr1, 1 - ; GCN64-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr1, 2 - ; GCN64-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr1, 3 - ; GCN64-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr1, 4 - ; GCN64-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr1, 5 - ; GCN64-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr1, 6 - ; GCN64-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr1, 7 - ; GCN64-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr1, 8 - ; GCN64-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr1, 9 - ; GCN64-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr1, 10 - ; GCN64-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr1, 11 - ; GCN64-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr1, 12 - ; GCN64-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr1, 13 - ; GCN64-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr1, 14 - ; GCN64-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr1, 15 - ; GCN64-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr1, 16 - ; GCN64-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr1, 17 - ; GCN64-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr1, 18 - ; GCN64-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr1, 19 - ; GCN64-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr1, 20 - ; GCN64-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr1, 21 - ; GCN64-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr1, 22 - ; GCN64-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr1, 23 - ; GCN64-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr1, 24 - ; GCN64-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr1, 25 - ; GCN64-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr1, 26 - ; GCN64-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr1, 27 - ; GCN64-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr1, 28 - ; GCN64-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr1, 29 - ; GCN64-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr1, 30 - ; GCN64-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr1, 31 - ; GCN64-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 + ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0 + ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5) + ; GCN64-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr0, 2 + ; GCN64-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr0, 3 + ; GCN64-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr0, 4 + ; GCN64-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr0, 5 + ; GCN64-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr0, 6 + ; GCN64-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr0, 7 + ; GCN64-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr0, 8 + ; GCN64-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr0, 9 + ; GCN64-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr0, 10 + ; GCN64-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr0, 11 + ; GCN64-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr0, 12 + ; GCN64-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr0, 13 + ; GCN64-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr0, 14 + ; GCN64-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr0, 15 + ; GCN64-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr0, 16 + ; GCN64-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr0, 17 + ; GCN64-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr0, 18 + ; GCN64-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr0, 19 + ; GCN64-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr0, 20 + ; GCN64-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr0, 21 + ; GCN64-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr0, 22 + ; GCN64-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr0, 23 + ; GCN64-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr0, 24 + ; GCN64-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr0, 25 + ; GCN64-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr0, 26 + ; GCN64-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr0, 27 + ; GCN64-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr0, 28 + ; GCN64-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr0, 29 + ; GCN64-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr0, 30 + ; GCN64-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31 + ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -1148,30 +1148,30 @@ body: | ; GCN32-MUBUF-NEXT: $sgpr99 = S_MOV_B32 834756608, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GCN32-MUBUF-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr11, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GCN32-MUBUF-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 - ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr1 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) - ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr1, 0 - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr1 - ; GCN32-MUBUF-NEXT: $sgpr1 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr2 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) - ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13 - ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr2, 1 - ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1, implicit killed $vgpr2 - ; GCN32-MUBUF-NEXT: $sgpr3 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr3 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) - ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr3, 0, implicit-def $sgpr12_sgpr13_sgpr14 - ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr3, 1 - ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr3, 2 - ; GCN32-MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr3, implicit killed $vgpr3 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5) + ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13 + ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1 + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14 + ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2 + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 15, implicit-def $vgpr0 ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -1182,31 +1182,31 @@ body: | ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3 ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 - ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr1 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) - ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr1, 1 - ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr1, 2 - ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr1, 3 - ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr1, 4 - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr1 - ; GCN32-MUBUF-NEXT: $sgpr1 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr2 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) - ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr2, 1 - ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr2, 2 - ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr2, 3 - ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr2, 4 - ; GCN32-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr2, 5 - ; GCN32-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr2, 6 - ; GCN32-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr2, 7 - ; GCN32-MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1, implicit killed $vgpr2 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5) + ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2 + ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3 + ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4 + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5) + ; GCN32-MUBUF-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN32-MUBUF-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN32-MUBUF-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2 + ; GCN32-MUBUF-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3 + ; GCN32-MUBUF-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4 + ; GCN32-MUBUF-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5 + ; GCN32-MUBUF-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6 + ; GCN32-MUBUF-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7 + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr0 ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -1229,44 +1229,44 @@ body: | ; GCN32-MUBUF-NEXT: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15 ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 - ; GCN32-MUBUF-NEXT: $sgpr2 = S_MOV_B32 $exec_lo - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr1 - ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5) - ; GCN32-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN32-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr1, 1 - ; GCN32-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr1, 2 - ; GCN32-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr1, 3 - ; GCN32-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr1, 4 - ; GCN32-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr1, 5 - ; GCN32-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr1, 6 - ; GCN32-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr1, 7 - ; GCN32-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr1, 8 - ; GCN32-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr1, 9 - ; GCN32-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr1, 10 - ; GCN32-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr1, 11 - ; GCN32-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr1, 12 - ; GCN32-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr1, 13 - ; GCN32-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr1, 14 - ; GCN32-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr1, 15 - ; GCN32-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr1, 16 - ; GCN32-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr1, 17 - ; GCN32-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr1, 18 - ; GCN32-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr1, 19 - ; GCN32-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr1, 20 - ; GCN32-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr1, 21 - ; GCN32-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr1, 22 - ; GCN32-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr1, 23 - ; GCN32-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr1, 24 - ; GCN32-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr1, 25 - ; GCN32-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr1, 26 - ; GCN32-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr1, 27 - ; GCN32-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr1, 28 - ; GCN32-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr1, 29 - ; GCN32-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr1, 30 - ; GCN32-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr1, 31 - ; GCN32-MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr2, implicit killed $vgpr1 + ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr0 + ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5) + ; GCN32-MUBUF-NEXT: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN32-MUBUF-NEXT: $sgpr65 = V_READLANE_B32 $vgpr0, 1 + ; GCN32-MUBUF-NEXT: $sgpr66 = V_READLANE_B32 $vgpr0, 2 + ; GCN32-MUBUF-NEXT: $sgpr67 = V_READLANE_B32 $vgpr0, 3 + ; GCN32-MUBUF-NEXT: $sgpr68 = V_READLANE_B32 $vgpr0, 4 + ; GCN32-MUBUF-NEXT: $sgpr69 = V_READLANE_B32 $vgpr0, 5 + ; GCN32-MUBUF-NEXT: $sgpr70 = V_READLANE_B32 $vgpr0, 6 + ; GCN32-MUBUF-NEXT: $sgpr71 = V_READLANE_B32 $vgpr0, 7 + ; GCN32-MUBUF-NEXT: $sgpr72 = V_READLANE_B32 $vgpr0, 8 + ; GCN32-MUBUF-NEXT: $sgpr73 = V_READLANE_B32 $vgpr0, 9 + ; GCN32-MUBUF-NEXT: $sgpr74 = V_READLANE_B32 $vgpr0, 10 + ; GCN32-MUBUF-NEXT: $sgpr75 = V_READLANE_B32 $vgpr0, 11 + ; GCN32-MUBUF-NEXT: $sgpr76 = V_READLANE_B32 $vgpr0, 12 + ; GCN32-MUBUF-NEXT: $sgpr77 = V_READLANE_B32 $vgpr0, 13 + ; GCN32-MUBUF-NEXT: $sgpr78 = V_READLANE_B32 $vgpr0, 14 + ; GCN32-MUBUF-NEXT: $sgpr79 = V_READLANE_B32 $vgpr0, 15 + ; GCN32-MUBUF-NEXT: $sgpr80 = V_READLANE_B32 $vgpr0, 16 + ; GCN32-MUBUF-NEXT: $sgpr81 = V_READLANE_B32 $vgpr0, 17 + ; GCN32-MUBUF-NEXT: $sgpr82 = V_READLANE_B32 $vgpr0, 18 + ; GCN32-MUBUF-NEXT: $sgpr83 = V_READLANE_B32 $vgpr0, 19 + ; GCN32-MUBUF-NEXT: $sgpr84 = V_READLANE_B32 $vgpr0, 20 + ; GCN32-MUBUF-NEXT: $sgpr85 = V_READLANE_B32 $vgpr0, 21 + ; GCN32-MUBUF-NEXT: $sgpr86 = V_READLANE_B32 $vgpr0, 22 + ; GCN32-MUBUF-NEXT: $sgpr87 = V_READLANE_B32 $vgpr0, 23 + ; GCN32-MUBUF-NEXT: $sgpr88 = V_READLANE_B32 $vgpr0, 24 + ; GCN32-MUBUF-NEXT: $sgpr89 = V_READLANE_B32 $vgpr0, 25 + ; GCN32-MUBUF-NEXT: $sgpr90 = V_READLANE_B32 $vgpr0, 26 + ; GCN32-MUBUF-NEXT: $sgpr91 = V_READLANE_B32 $vgpr0, 27 + ; GCN32-MUBUF-NEXT: $sgpr92 = V_READLANE_B32 $vgpr0, 28 + ; GCN32-MUBUF-NEXT: $sgpr93 = V_READLANE_B32 $vgpr0, 29 + ; GCN32-MUBUF-NEXT: $sgpr94 = V_READLANE_B32 $vgpr0, 30 + ; GCN32-MUBUF-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31 + ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0 ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0 ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -1281,31 +1281,31 @@ body: | ; GCN64-FLATSCR-NEXT: $sgpr33 = S_MOV_B32 0 ; GCN64-FLATSCR-NEXT: $flat_scr_lo = S_ADD_U32 $sgpr0, $sgpr11, implicit-def $scc ; GCN64-FLATSCR-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc - ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) - ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr1, 0 - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) - ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13 - ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr2, 1 - ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr2 - ; GCN64-FLATSCR-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr3 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5) - ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr3, 0, implicit-def $sgpr12_sgpr13_sgpr14 - ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr3, 1 - ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr3, 2 - ; GCN64-FLATSCR-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr3 - ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5) + ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 + ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5) + ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13 + ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 killed $vgpr0, 1 + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 + ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5) + ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14 + ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 killed $vgpr0, 2 + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 + ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0 ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5) @@ -1314,33 +1314,33 @@ body: | ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2 ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 killed $vgpr0, 3 ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 - ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr1 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5) - ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 - ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr1, 1 - ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr1, 2 - ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr1, 3 - ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr1, 4 - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 + ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5) + ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 + ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2 + ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3 + ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 killed $vgpr0, 4 + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 + ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.5, addrspace 5) + ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 + ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 2 + ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 3 + ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 $vgpr0, 4 + ; GCN64-FLATSCR-NEXT: $sgpr17 = V_READLANE_B32 $vgpr0, 5 + ; GCN64-FLATSCR-NEXT: $sgpr18 = V_READLANE_B32 $vgpr0, 6 + ; GCN64-FLATSCR-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr0, 7 + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr2 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.5, addrspace 5) - ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 - ; GCN64-FLATSCR-NEXT: $sgpr13 = V_READLANE_B32 $vgpr2, 1 - ; GCN64-FLATSCR-NEXT: $sgpr14 = V_READLANE_B32 $vgpr2, 2 - ; GCN64-FLATSCR-NEXT: $sgpr15 = V_READLANE_B32 $vgpr2, 3 - ; GCN64-FLATSCR-NEXT: $sgpr16 = V_READLANE_B32 $vgpr2, 4 - ; GCN64-FLATSCR-NEXT: $sgpr17 = V_READLANE_B32 $vgpr2, 5 - ; GCN64-FLATSCR-NEXT: $sgpr18 = V_READLANE_B32 $vgpr2, 6 - ; GCN64-FLATSCR-NEXT: $sgpr19 = V_READLANE_B32 killed $vgpr2, 7 - ; GCN64-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr2 - ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0 ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.6, addrspace 5) @@ -1361,53 +1361,53 @@ body: | ; GCN64-FLATSCR-NEXT: $sgpr26 = V_READLANE_B32 $vgpr0, 14 ; GCN64-FLATSCR-NEXT: $sgpr27 = V_READLANE_B32 killed $vgpr0, 15 ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 - ; GCN64-FLATSCR-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr1 - ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.7, addrspace 5) - ; GCN64-FLATSCR-NEXT: $sgpr64 = V_READLANE_B32 $vgpr1, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 - ; GCN64-FLATSCR-NEXT: $sgpr65 = V_READLANE_B32 $vgpr1, 1 - ; GCN64-FLATSCR-NEXT: $sgpr66 = V_READLANE_B32 $vgpr1, 2 - ; GCN64-FLATSCR-NEXT: $sgpr67 = V_READLANE_B32 $vgpr1, 3 - ; GCN64-FLATSCR-NEXT: $sgpr68 = V_READLANE_B32 $vgpr1, 4 - ; GCN64-FLATSCR-NEXT: $sgpr69 = V_READLANE_B32 $vgpr1, 5 - ; GCN64-FLATSCR-NEXT: $sgpr70 = V_READLANE_B32 $vgpr1, 6 - ; GCN64-FLATSCR-NEXT: $sgpr71 = V_READLANE_B32 $vgpr1, 7 - ; GCN64-FLATSCR-NEXT: $sgpr72 = V_READLANE_B32 $vgpr1, 8 - ; GCN64-FLATSCR-NEXT: $sgpr73 = V_READLANE_B32 $vgpr1, 9 - ; GCN64-FLATSCR-NEXT: $sgpr74 = V_READLANE_B32 $vgpr1, 10 - ; GCN64-FLATSCR-NEXT: $sgpr75 = V_READLANE_B32 $vgpr1, 11 - ; GCN64-FLATSCR-NEXT: $sgpr76 = V_READLANE_B32 $vgpr1, 12 - ; GCN64-FLATSCR-NEXT: $sgpr77 = V_READLANE_B32 $vgpr1, 13 - ; GCN64-FLATSCR-NEXT: $sgpr78 = V_READLANE_B32 $vgpr1, 14 - ; GCN64-FLATSCR-NEXT: $sgpr79 = V_READLANE_B32 $vgpr1, 15 - ; GCN64-FLATSCR-NEXT: $sgpr80 = V_READLANE_B32 $vgpr1, 16 - ; GCN64-FLATSCR-NEXT: $sgpr81 = V_READLANE_B32 $vgpr1, 17 - ; GCN64-FLATSCR-NEXT: $sgpr82 = V_READLANE_B32 $vgpr1, 18 - ; GCN64-FLATSCR-NEXT: $sgpr83 = V_READLANE_B32 $vgpr1, 19 - ; GCN64-FLATSCR-NEXT: $sgpr84 = V_READLANE_B32 $vgpr1, 20 - ; GCN64-FLATSCR-NEXT: $sgpr85 = V_READLANE_B32 $vgpr1, 21 - ; GCN64-FLATSCR-NEXT: $sgpr86 = V_READLANE_B32 $vgpr1, 22 - ; GCN64-FLATSCR-NEXT: $sgpr87 = V_READLANE_B32 $vgpr1, 23 - ; GCN64-FLATSCR-NEXT: $sgpr88 = V_READLANE_B32 $vgpr1, 24 - ; GCN64-FLATSCR-NEXT: $sgpr89 = V_READLANE_B32 $vgpr1, 25 - ; GCN64-FLATSCR-NEXT: $sgpr90 = V_READLANE_B32 $vgpr1, 26 - ; GCN64-FLATSCR-NEXT: $sgpr91 = V_READLANE_B32 $vgpr1, 27 - ; GCN64-FLATSCR-NEXT: $sgpr92 = V_READLANE_B32 $vgpr1, 28 - ; GCN64-FLATSCR-NEXT: $sgpr93 = V_READLANE_B32 $vgpr1, 29 - ; GCN64-FLATSCR-NEXT: $sgpr94 = V_READLANE_B32 $vgpr1, 30 - ; GCN64-FLATSCR-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr1, 31 - ; GCN64-FLATSCR-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1 - ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 + ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0 + ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.7, addrspace 5) + ; GCN64-FLATSCR-NEXT: $sgpr64 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 + ; GCN64-FLATSCR-NEXT: $sgpr65 = V_READLANE_B32 $vgpr0, 1 + ; GCN64-FLATSCR-NEXT: $sgpr66 = V_READLANE_B32 $vgpr0, 2 + ; GCN64-FLATSCR-NEXT: $sgpr67 = V_READLANE_B32 $vgpr0, 3 + ; GCN64-FLATSCR-NEXT: $sgpr68 = V_READLANE_B32 $vgpr0, 4 + ; GCN64-FLATSCR-NEXT: $sgpr69 = V_READLANE_B32 $vgpr0, 5 + ; GCN64-FLATSCR-NEXT: $sgpr70 = V_READLANE_B32 $vgpr0, 6 + ; GCN64-FLATSCR-NEXT: $sgpr71 = V_READLANE_B32 $vgpr0, 7 + ; GCN64-FLATSCR-NEXT: $sgpr72 = V_READLANE_B32 $vgpr0, 8 + ; GCN64-FLATSCR-NEXT: $sgpr73 = V_READLANE_B32 $vgpr0, 9 + ; GCN64-FLATSCR-NEXT: $sgpr74 = V_READLANE_B32 $vgpr0, 10 + ; GCN64-FLATSCR-NEXT: $sgpr75 = V_READLANE_B32 $vgpr0, 11 + ; GCN64-FLATSCR-NEXT: $sgpr76 = V_READLANE_B32 $vgpr0, 12 + ; GCN64-FLATSCR-NEXT: $sgpr77 = V_READLANE_B32 $vgpr0, 13 + ; GCN64-FLATSCR-NEXT: $sgpr78 = V_READLANE_B32 $vgpr0, 14 + ; GCN64-FLATSCR-NEXT: $sgpr79 = V_READLANE_B32 $vgpr0, 15 + ; GCN64-FLATSCR-NEXT: $sgpr80 = V_READLANE_B32 $vgpr0, 16 + ; GCN64-FLATSCR-NEXT: $sgpr81 = V_READLANE_B32 $vgpr0, 17 + ; GCN64-FLATSCR-NEXT: $sgpr82 = V_READLANE_B32 $vgpr0, 18 + ; GCN64-FLATSCR-NEXT: $sgpr83 = V_READLANE_B32 $vgpr0, 19 + ; GCN64-FLATSCR-NEXT: $sgpr84 = V_READLANE_B32 $vgpr0, 20 + ; GCN64-FLATSCR-NEXT: $sgpr85 = V_READLANE_B32 $vgpr0, 21 + ; GCN64-FLATSCR-NEXT: $sgpr86 = V_READLANE_B32 $vgpr0, 22 + ; GCN64-FLATSCR-NEXT: $sgpr87 = V_READLANE_B32 $vgpr0, 23 + ; GCN64-FLATSCR-NEXT: $sgpr88 = V_READLANE_B32 $vgpr0, 24 + ; GCN64-FLATSCR-NEXT: $sgpr89 = V_READLANE_B32 $vgpr0, 25 + ; GCN64-FLATSCR-NEXT: $sgpr90 = V_READLANE_B32 $vgpr0, 26 + ; GCN64-FLATSCR-NEXT: $sgpr91 = V_READLANE_B32 $vgpr0, 27 + ; GCN64-FLATSCR-NEXT: $sgpr92 = V_READLANE_B32 $vgpr0, 28 + ; GCN64-FLATSCR-NEXT: $sgpr93 = V_READLANE_B32 $vgpr0, 29 + ; GCN64-FLATSCR-NEXT: $sgpr94 = V_READLANE_B32 $vgpr0, 30 + ; GCN64-FLATSCR-NEXT: $sgpr95 = V_READLANE_B32 killed $vgpr0, 31 + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 + ; GCN64-FLATSCR-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0 ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc - ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, align 4096, addrspace 5) + ; GCN64-FLATSCR-NEXT: $sgpr9 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc + ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, align 4096, addrspace 5) ; GCN64-FLATSCR-NEXT: $sgpr12 = V_READLANE_B32 killed $vgpr0, 0 ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 + ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr0 renamable $sgpr12 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 renamable $sgpr12_sgpr13 = SI_SPILL_S64_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll index 2b7635fede0bc..515253e6a43f2 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll @@ -1,275 +1,22 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx803 -O0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s define void @child_function() #0 { -; GCN-LABEL: child_function: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_setpc_b64 s[30:31] call void asm sideeffect "", "~{vcc}" () #0 ret void } +; GCN-LABEL: {{^}}spill_sgpr_with_no_lower_vgpr_available: +; GCN: buffer_store_dword v255, off, s[0:3], s32 +; GCN: v_writelane_b32 v255, s33, 2 +; GCN: v_writelane_b32 v255, s30, 0 +; GCN: v_writelane_b32 v255, s31, 1 +; GCN: s_swappc_b64 s[30:31], s[4:5] +; GCN: v_readlane_b32 s31, v255, 1 +; GCN: v_readlane_b32 s30, v255, 0 +; GCN: v_readlane_b32 s33, v255, 2 +; GCN: ; NumVgprs: 256 + define void @spill_sgpr_with_no_lower_vgpr_available() #0 { -; GCN-LABEL: spill_sgpr_with_no_lower_vgpr_available: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v255, s33, 2 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_add_i32 s32, s32, 0x7400 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v255, s30, 0 -; GCN-NEXT: v_writelane_b32 v255, s31, 1 -; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:444 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_getpc_b64 s[4:5] -; GCN-NEXT: s_add_u32 s4, s4, child_function@gotpcrel32@lo+4 -; GCN-NEXT: s_addc_u32 s5, s5, child_function@gotpcrel32@hi+12 -; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: s_mov_b64 s[10:11], s[2:3] -; GCN-NEXT: s_mov_b64 s[8:9], s[0:1] -; GCN-NEXT: s_mov_b64 s[0:1], s[8:9] -; GCN-NEXT: s_mov_b64 s[2:3], s[10:11] -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v255, 1 -; GCN-NEXT: v_readlane_b32 s30, v255, 0 -; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v250, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v249, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v248, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v239, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v238, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v237, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v236, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v235, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v234, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v233, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v232, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v223, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v222, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v221, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v220, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v219, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v218, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v217, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v216, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v207, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v206, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v205, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v204, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v203, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v202, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v201, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v200, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v191, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v190, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v189, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v188, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v187, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v186, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v185, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v184, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v175, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v174, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v173, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v172, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v171, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v170, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v169, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v168, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v159, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v158, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v157, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v156, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v155, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v154, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v153, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v152, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v143, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v142, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v141, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v140, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v139, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v138, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v137, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v136, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v127, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v95, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:320 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v93, off, s[0:3], s33 offset:324 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v92, off, s[0:3], s33 offset:328 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v91, off, s[0:3], s33 offset:332 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v90, off, s[0:3], s33 offset:336 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v89, off, s[0:3], s33 offset:340 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v88, off, s[0:3], s33 offset:344 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v79, off, s[0:3], s33 offset:348 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v78, off, s[0:3], s33 offset:352 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v77, off, s[0:3], s33 offset:356 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v76, off, s[0:3], s33 offset:360 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v75, off, s[0:3], s33 offset:364 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v74, off, s[0:3], s33 offset:368 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v73, off, s[0:3], s33 offset:372 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v72, off, s[0:3], s33 offset:376 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:380 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s33 offset:384 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:388 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:392 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:396 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:400 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:404 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:408 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:412 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:416 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:420 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:424 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:428 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload -; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00 -; GCN-NEXT: v_readlane_b32 s33, v255, 2 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v255, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, align 4, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -304,262 +51,17 @@ define void @spill_sgpr_with_no_lower_vgpr_available() #0 { ret void } +; GCN-LABEL: {{^}}spill_to_lowest_available_vgpr: +; GCN: buffer_store_dword v254, off, s[0:3], s32 +; GCN: v_writelane_b32 v254, s33, 2 +; GCN: v_writelane_b32 v254, s30, 0 +; GCN: v_writelane_b32 v254, s31, 1 +; GCN: s_swappc_b64 s[30:31], s[4:5] +; GCN: v_readlane_b32 s31, v254, 1 +; GCN: v_readlane_b32 s30, v254, 0 +; GCN: v_readlane_b32 s33, v254, 2 + define void @spill_to_lowest_available_vgpr() #0 { -; GCN-LABEL: spill_to_lowest_available_vgpr: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v254, s33, 2 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_add_i32 s32, s32, 0x7400 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v254, s30, 0 -; GCN-NEXT: v_writelane_b32 v254, s31, 1 -; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:440 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_getpc_b64 s[4:5] -; GCN-NEXT: s_add_u32 s4, s4, child_function@gotpcrel32@lo+4 -; GCN-NEXT: s_addc_u32 s5, s5, child_function@gotpcrel32@hi+12 -; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: s_mov_b64 s[10:11], s[2:3] -; GCN-NEXT: s_mov_b64 s[8:9], s[0:1] -; GCN-NEXT: s_mov_b64 s[0:1], s[8:9] -; GCN-NEXT: s_mov_b64 s[2:3], s[10:11] -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: v_readlane_b32 s31, v254, 1 -; GCN-NEXT: v_readlane_b32 s30, v254, 0 -; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v250, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v249, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v248, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v239, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v238, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v237, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v236, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v235, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v234, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v233, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v232, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v223, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v222, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v221, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v220, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v219, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v218, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v217, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v216, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v207, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v206, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v205, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v204, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v203, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v202, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v201, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v200, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v191, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v190, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v189, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v188, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v187, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v186, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v185, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v184, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v175, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v174, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v173, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v172, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v171, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v170, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v169, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v168, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v159, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v158, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v157, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v156, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v155, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v154, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v153, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v152, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v143, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v142, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v141, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v140, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v139, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v138, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v137, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v136, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v127, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v95, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v93, off, s[0:3], s33 offset:320 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v92, off, s[0:3], s33 offset:324 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v91, off, s[0:3], s33 offset:328 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v90, off, s[0:3], s33 offset:332 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v89, off, s[0:3], s33 offset:336 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v88, off, s[0:3], s33 offset:340 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v79, off, s[0:3], s33 offset:344 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v78, off, s[0:3], s33 offset:348 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v77, off, s[0:3], s33 offset:352 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v76, off, s[0:3], s33 offset:356 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v75, off, s[0:3], s33 offset:360 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v74, off, s[0:3], s33 offset:364 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v73, off, s[0:3], s33 offset:368 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v72, off, s[0:3], s33 offset:372 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:376 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s33 offset:380 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:384 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:388 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:392 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:396 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:400 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:404 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:408 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:412 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:416 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:420 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:424 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:428 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload -; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00 -; GCN-NEXT: v_readlane_b32 s33, v254, 2 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, align 4, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -594,254 +96,14 @@ define void @spill_to_lowest_available_vgpr() #0 { ret void } +; GCN-LABEL: {{^}}spill_sgpr_with_sgpr_uses: +; GCN-NOT: buffer_store_dword v255, off, s[0:3], s32 +; GCN: ; def s4 +; GCN: v_writelane_b32 v254, s4, 0 +; GCN: v_readlane_b32 s4, v254, 0 +; GCN: ; use s4 + define void @spill_sgpr_with_sgpr_uses() #0 { -; GCN-LABEL: spill_sgpr_with_sgpr_uses: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v72, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v73, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v74, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v75, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v76, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v77, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v78, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v79, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v88, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v89, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v90, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v91, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v92, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v93, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v94, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v95, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v104, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v105, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v106, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v107, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v108, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v109, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v110, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v111, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v120, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v121, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v122, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v123, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v124, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v125, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v126, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v127, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v136, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v137, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v138, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v139, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v140, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v141, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v142, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v143, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v152, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v153, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v154, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v155, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v156, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v157, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v158, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v159, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v168, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v169, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v170, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v171, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v172, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v173, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v174, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v175, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v184, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v185, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v186, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v187, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v188, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v189, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v190, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v191, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v200, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v201, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v202, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v203, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v204, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v205, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v206, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v207, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v216, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v217, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v218, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v219, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v220, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v221, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v222, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v223, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v232, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v233, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v234, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v235, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v236, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v237, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v238, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v239, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v248, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v249, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v250, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v251, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v252, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:440 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s4 -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v254, s4, 0 -; GCN-NEXT: s_cbranch_scc1 .LBB3_2 -; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s4, v254, 0 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s4 -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: .LBB3_2: ; %ret -; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v252, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v251, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v250, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v249, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v248, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v239, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v238, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v237, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v236, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v235, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v234, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v233, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v232, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v223, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v222, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v221, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v220, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v219, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v218, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v217, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v216, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v207, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v206, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v205, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v204, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v203, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v202, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v201, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v200, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v191, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v190, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v189, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v188, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v187, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v186, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v185, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v184, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v175, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v174, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v173, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v172, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v171, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v170, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v169, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v168, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v159, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v158, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v157, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v156, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v155, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v154, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v153, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v152, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v143, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v142, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v141, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v140, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v139, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v138, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v137, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v136, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v127, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v126, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v125, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v124, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v123, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v122, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v121, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v120, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v111, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v110, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v109, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v108, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v107, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v106, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v105, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v104, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v95, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v94, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v93, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v92, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v91, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v90, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v89, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v88, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v79, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v78, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v77, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v76, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v75, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v74, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v73, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v72, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, align 4, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -885,243 +147,12 @@ ret: ret void } +; GCN-LABEL: {{^}}spill_sgpr_with_tail_call +; GCN-NOT: buffer_store_dword v255, off, s[0:3], s32 +; GCN-NOT: v_writelane +; GCN: s_setpc_b64 s[4:5] + define void @spill_sgpr_with_tail_call() #0 { -; GCN-LABEL: spill_sgpr_with_tail_call: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v72, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v73, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v74, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v75, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v76, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v77, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v78, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v79, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v88, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v89, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v90, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v91, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v92, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v93, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v94, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v95, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v104, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v105, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v106, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v107, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v108, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v109, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v110, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v111, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v120, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v121, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v122, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v123, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v124, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v125, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v126, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v127, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v136, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v137, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v138, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v139, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v140, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v141, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v142, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v143, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v152, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v153, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v154, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v155, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v156, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v157, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v158, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v159, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v168, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v169, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v170, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v171, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v172, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v173, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v174, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v175, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v184, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v185, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v186, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v187, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v188, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v189, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v190, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v191, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v200, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v201, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v202, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v203, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v204, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v205, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v206, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v207, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v216, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v217, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v218, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v219, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v220, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v221, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v222, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v223, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v232, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v233, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v234, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v235, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v236, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v237, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v238, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v239, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v248, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v249, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v250, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v251, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v252, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: v_mov_b32_e32 v0, 0 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:444 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_getpc_b64 s[4:5] -; GCN-NEXT: s_add_u32 s4, s4, child_function@gotpcrel32@lo+4 -; GCN-NEXT: s_addc_u32 s5, s5, child_function@gotpcrel32@hi+12 -; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v252, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v251, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v250, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v249, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v248, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v239, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v238, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v237, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v236, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v235, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v234, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v233, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v232, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v223, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v222, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v221, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v220, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v219, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v218, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v217, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v216, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v207, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v206, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v205, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v204, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v203, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v202, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v201, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v200, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v191, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v190, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v189, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v188, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v187, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v186, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v185, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v184, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v175, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v174, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v173, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v172, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v171, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v170, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v169, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v168, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v159, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v158, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v157, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v156, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v155, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v154, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v153, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v152, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v143, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v142, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v141, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v140, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v139, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v138, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v137, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v136, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v127, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v126, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v125, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v124, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v123, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v122, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v121, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v120, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v111, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v110, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v109, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v108, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v107, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v106, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v105, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v104, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v95, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v94, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v93, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v92, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v91, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v90, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v89, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v88, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v79, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v78, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v77, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v76, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v75, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v74, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v73, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v72, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] %alloca = alloca i32, align 4, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -1156,281 +187,29 @@ define void @spill_sgpr_with_tail_call() #0 { ret void } +; Special case where all registers are explicitly clobbered in the function and +; we have no VGPR to allocate for SGPR spills. We are forced to spill to memory. + +; GCN-LABEL: {{^}}spill_sgpr_no_free_vgpr: +; GCN: v_writelane_b32 v{{[0-9]+}}, s34, 0 +; GCN: v_writelane_b32 v{{[0-9]+}}, s35, 1 +; GCN: v_writelane_b32 v{{[0-9]+}}, s36, 2 +; GCN: v_writelane_b32 v{{[0-9]+}}, s37, 3 +; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 +; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 +; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 +; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 +; GCN: #ASMEND +; GCN: buffer_load_dword v{{[0-9]+}} +; GCN: buffer_load_dword v{{[0-9]+}} +; GCN: buffer_load_dword v{{[0-9]+}} +; GCN: buffer_load_dword v{{[0-9]+}} +; GCN: v_readlane_b32 s37, v{{[0-9]+}}, 3 +; GCN: v_readlane_b32 s36, v{{[0-9]+}}, 2 +; GCN: v_readlane_b32 s35, v{{[0-9]+}}, 1 +; GCN: v_readlane_b32 s34, v{{[0-9]+}}, 0 + define void @spill_sgpr_no_free_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { -; GCN-LABEL: spill_sgpr_no_free_vgpr: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v72, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v73, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v74, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v75, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v76, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v77, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v78, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v79, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v88, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v89, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v90, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v91, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v92, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v93, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v94, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v95, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v104, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v105, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v106, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v107, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v108, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v109, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v110, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v111, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v120, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v121, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v122, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v123, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v124, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v125, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v126, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v127, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v136, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v137, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v138, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v139, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v140, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v141, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v142, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v143, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v152, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v153, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v154, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v155, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v156, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v157, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v158, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v159, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v168, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v169, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v170, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v171, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v172, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v173, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v174, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v175, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v184, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v185, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v186, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v187, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v188, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v189, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v190, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v191, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v200, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v201, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v202, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v203, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v204, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v205, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v206, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v207, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v216, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v217, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v218, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v219, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v220, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v221, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v222, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v223, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v232, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v233, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v234, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v235, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v236, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v237, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v238, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v239, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v248, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v249, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v250, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v251, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v252, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v4, s34, 0 -; GCN-NEXT: v_writelane_b32 v4, s35, 1 -; GCN-NEXT: v_writelane_b32 v4, s36, 2 -; GCN-NEXT: v_writelane_b32 v4, s37, 3 -; GCN-NEXT: v_mov_b32_e32 v5, v3 -; GCN-NEXT: v_mov_b32_e32 v3, v1 -; GCN-NEXT: ; implicit-def: $sgpr4 -; GCN-NEXT: ; implicit-def: $sgpr4 -; GCN-NEXT: ; kill: def $vgpr3 killed $vgpr3 killed $exec -; GCN-NEXT: ; kill: def $vgpr0 killed $vgpr0 def $vgpr0_vgpr1 killed $exec -; GCN-NEXT: v_mov_b32_e32 v1, v3 -; GCN-NEXT: ; implicit-def: $sgpr4 -; GCN-NEXT: ; implicit-def: $sgpr4 -; GCN-NEXT: ; kill: def $vgpr5 killed $vgpr5 killed $exec -; GCN-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec -; GCN-NEXT: v_mov_b32_e32 v3, v5 -; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5 -; GCN-NEXT: flat_load_dwordx4 v[5:8], v[2:3] -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: flat_store_dwordx4 v[0:1], v[5:8] -; GCN-NEXT: v_readlane_b32 s37, v4, 3 -; GCN-NEXT: v_readlane_b32 s36, v4, 2 -; GCN-NEXT: v_readlane_b32 s35, v4, 1 -; GCN-NEXT: v_readlane_b32 s34, v4, 0 -; GCN-NEXT: buffer_load_dword v255, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v252, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v251, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v250, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v249, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v248, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v239, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v238, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v237, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v236, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v235, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v234, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v233, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v232, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v223, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v222, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v221, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v220, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v219, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v218, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v217, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v216, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v207, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v206, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v205, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v204, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v203, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v202, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v201, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v200, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v191, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v190, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v189, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v188, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v187, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v186, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v185, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v184, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v175, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v174, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v173, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v172, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v171, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v170, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v169, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v168, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v159, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v158, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v157, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v156, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v155, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v154, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v153, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v152, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v143, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v142, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v141, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v140, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v139, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v138, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v137, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v136, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v127, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v126, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v125, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v124, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v123, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v122, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v121, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v120, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v111, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v110, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v109, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v108, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v107, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v106, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v105, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v104, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v95, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v94, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v93, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v92, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v91, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v90, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v89, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v88, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v79, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v78, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v77, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v76, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v75, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v74, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v73, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v72, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 -; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] %a = load <4 x i32>, <4 x i32> addrspace(1)* %in call void asm sideeffect "", "~{v6},~{v7},~{v8},~{v9} @@ -1467,13 +246,11 @@ define void @spill_sgpr_no_free_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> add ret void } +; If IPRA no-CSR optimization is enabled, we will not be able to allocate an +; SGPR for VGPR spills in the parent function since this child function uses all +; VGPRs. + define internal void @child_function_ipra() #0 { -; GCN-LABEL: child_function_ipra: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_setpc_b64 s[30:31] call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9} ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19} @@ -1504,519 +281,22 @@ define internal void @child_function_ipra() #0 { ret void } +; GCN-LABEL: {{^}}spill_sgpr_no_free_vgpr_ipra: +; GCN: v_writelane_b32 v0, s30, 0 +; GCN: buffer_store_dword v0, off +; GCN: v_writelane_b32 v0, s31, 0 +; GCN: buffer_store_dword v0, off +; GCN: swappc +; GCN: buffer_load_dword v0, off +; GCN: v_readlane_b32 s31, v0, 0 +; GCN: buffer_load_dword v0, off +; GCN: v_readlane_b32 s30, v0, 0 define void @spill_sgpr_no_free_vgpr_ipra() #0 { -; GCN-LABEL: spill_sgpr_no_free_vgpr_ipra: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_mov_b32 s6, s33 -; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: s_add_i32 s32, s32, 0x7400 -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 s[14:15], exec -; GCN-NEXT: s_mov_b64 exec, 1 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:456 -; GCN-NEXT: v_writelane_b32 v1, s30, 0 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:448 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:456 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[14:15] -; GCN-NEXT: s_mov_b64 s[12:13], exec -; GCN-NEXT: s_mov_b64 exec, 1 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 -; GCN-NEXT: v_writelane_b32 v0, s31, 0 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:452 ; 4-byte Folded Spill -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:456 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[12:13] -; GCN-NEXT: s_getpc_b64 s[4:5] -; GCN-NEXT: s_add_u32 s4, s4, child_function_ipra@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s5, s5, child_function_ipra@rel32@hi+12 -; GCN-NEXT: s_mov_b64 s[10:11], s[2:3] -; GCN-NEXT: s_mov_b64 s[8:9], s[0:1] -; GCN-NEXT: s_mov_b64 s[0:1], s[8:9] -; GCN-NEXT: s_mov_b64 s[2:3], s[10:11] -; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GCN-NEXT: s_mov_b64 s[8:9], exec -; GCN-NEXT: s_mov_b64 exec, 1 -; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:456 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:452 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s31, v1, 0 -; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:456 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[8:9] -; GCN-NEXT: s_mov_b64 s[4:5], exec -; GCN-NEXT: s_mov_b64 exec, 1 -; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:456 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:448 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s30, v0, 0 -; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:456 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: buffer_load_dword v255, off, s[0:3], s33 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload -; GCN-NEXT: s_add_i32 s32, s32, 0xffff8c00 -; GCN-NEXT: s_mov_b32 s33, s6 -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] call void @child_function_ipra() ret void } define internal void @child_function_ipra_tail_call() #0 { -; GCN-LABEL: child_function_ipra_tail_call: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s32 offset:436 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:432 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:428 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s32 offset:424 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s32 offset:420 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s32 offset:416 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s32 offset:412 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s32 offset:408 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s32 offset:404 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s32 offset:400 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v60, off, s[0:3], s32 offset:396 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v61, off, s[0:3], s32 offset:392 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v62, off, s[0:3], s32 offset:388 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v63, off, s[0:3], s32 offset:384 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v72, off, s[0:3], s32 offset:380 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v73, off, s[0:3], s32 offset:376 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v74, off, s[0:3], s32 offset:372 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v75, off, s[0:3], s32 offset:368 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v76, off, s[0:3], s32 offset:364 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v77, off, s[0:3], s32 offset:360 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v78, off, s[0:3], s32 offset:356 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v79, off, s[0:3], s32 offset:352 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v88, off, s[0:3], s32 offset:348 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v89, off, s[0:3], s32 offset:344 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v90, off, s[0:3], s32 offset:340 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v91, off, s[0:3], s32 offset:336 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v92, off, s[0:3], s32 offset:332 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v93, off, s[0:3], s32 offset:328 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v94, off, s[0:3], s32 offset:324 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v95, off, s[0:3], s32 offset:320 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v104, off, s[0:3], s32 offset:316 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v105, off, s[0:3], s32 offset:312 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v106, off, s[0:3], s32 offset:308 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v107, off, s[0:3], s32 offset:304 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v108, off, s[0:3], s32 offset:300 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v109, off, s[0:3], s32 offset:296 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v110, off, s[0:3], s32 offset:292 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v111, off, s[0:3], s32 offset:288 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v120, off, s[0:3], s32 offset:284 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v121, off, s[0:3], s32 offset:280 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v122, off, s[0:3], s32 offset:276 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v123, off, s[0:3], s32 offset:272 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v124, off, s[0:3], s32 offset:268 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v125, off, s[0:3], s32 offset:264 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v126, off, s[0:3], s32 offset:260 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v127, off, s[0:3], s32 offset:256 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v136, off, s[0:3], s32 offset:252 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v137, off, s[0:3], s32 offset:248 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v138, off, s[0:3], s32 offset:244 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v139, off, s[0:3], s32 offset:240 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v140, off, s[0:3], s32 offset:236 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v141, off, s[0:3], s32 offset:232 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v142, off, s[0:3], s32 offset:228 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v143, off, s[0:3], s32 offset:224 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v152, off, s[0:3], s32 offset:220 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v153, off, s[0:3], s32 offset:216 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v154, off, s[0:3], s32 offset:212 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v155, off, s[0:3], s32 offset:208 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v156, off, s[0:3], s32 offset:204 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v157, off, s[0:3], s32 offset:200 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v158, off, s[0:3], s32 offset:196 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v159, off, s[0:3], s32 offset:192 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v168, off, s[0:3], s32 offset:188 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v169, off, s[0:3], s32 offset:184 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v170, off, s[0:3], s32 offset:180 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v171, off, s[0:3], s32 offset:176 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v172, off, s[0:3], s32 offset:172 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v173, off, s[0:3], s32 offset:168 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v174, off, s[0:3], s32 offset:164 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v175, off, s[0:3], s32 offset:160 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v184, off, s[0:3], s32 offset:156 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v185, off, s[0:3], s32 offset:152 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v186, off, s[0:3], s32 offset:148 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v187, off, s[0:3], s32 offset:144 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v188, off, s[0:3], s32 offset:140 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v189, off, s[0:3], s32 offset:136 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v190, off, s[0:3], s32 offset:132 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v191, off, s[0:3], s32 offset:128 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v200, off, s[0:3], s32 offset:124 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v201, off, s[0:3], s32 offset:120 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v202, off, s[0:3], s32 offset:116 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v203, off, s[0:3], s32 offset:112 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v204, off, s[0:3], s32 offset:108 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v205, off, s[0:3], s32 offset:104 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v206, off, s[0:3], s32 offset:100 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v207, off, s[0:3], s32 offset:96 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v216, off, s[0:3], s32 offset:92 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v217, off, s[0:3], s32 offset:88 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v218, off, s[0:3], s32 offset:84 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v219, off, s[0:3], s32 offset:80 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v220, off, s[0:3], s32 offset:76 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v221, off, s[0:3], s32 offset:72 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v222, off, s[0:3], s32 offset:68 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v223, off, s[0:3], s32 offset:64 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v232, off, s[0:3], s32 offset:60 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v233, off, s[0:3], s32 offset:56 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v234, off, s[0:3], s32 offset:52 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v235, off, s[0:3], s32 offset:48 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v236, off, s[0:3], s32 offset:44 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v237, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v238, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v239, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v248, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v249, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v250, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v251, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v252, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s32 ; 4-byte Folded Spill -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: buffer_load_dword v255, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v252, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v251, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v250, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v249, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v248, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v239, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v238, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v237, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v236, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v235, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v234, off, s[0:3], s32 offset:52 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v233, off, s[0:3], s32 offset:56 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v232, off, s[0:3], s32 offset:60 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v223, off, s[0:3], s32 offset:64 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v222, off, s[0:3], s32 offset:68 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v221, off, s[0:3], s32 offset:72 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v220, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v219, off, s[0:3], s32 offset:80 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v218, off, s[0:3], s32 offset:84 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v217, off, s[0:3], s32 offset:88 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v216, off, s[0:3], s32 offset:92 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v207, off, s[0:3], s32 offset:96 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v206, off, s[0:3], s32 offset:100 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v205, off, s[0:3], s32 offset:104 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v204, off, s[0:3], s32 offset:108 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v203, off, s[0:3], s32 offset:112 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v202, off, s[0:3], s32 offset:116 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v201, off, s[0:3], s32 offset:120 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v200, off, s[0:3], s32 offset:124 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v191, off, s[0:3], s32 offset:128 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v190, off, s[0:3], s32 offset:132 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v189, off, s[0:3], s32 offset:136 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v188, off, s[0:3], s32 offset:140 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v187, off, s[0:3], s32 offset:144 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v186, off, s[0:3], s32 offset:148 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v185, off, s[0:3], s32 offset:152 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v184, off, s[0:3], s32 offset:156 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v175, off, s[0:3], s32 offset:160 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v174, off, s[0:3], s32 offset:164 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v173, off, s[0:3], s32 offset:168 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v172, off, s[0:3], s32 offset:172 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v171, off, s[0:3], s32 offset:176 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v170, off, s[0:3], s32 offset:180 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v169, off, s[0:3], s32 offset:184 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v168, off, s[0:3], s32 offset:188 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v159, off, s[0:3], s32 offset:192 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v158, off, s[0:3], s32 offset:196 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v157, off, s[0:3], s32 offset:200 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v156, off, s[0:3], s32 offset:204 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v155, off, s[0:3], s32 offset:208 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v154, off, s[0:3], s32 offset:212 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v153, off, s[0:3], s32 offset:216 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v152, off, s[0:3], s32 offset:220 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v143, off, s[0:3], s32 offset:224 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v142, off, s[0:3], s32 offset:228 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v141, off, s[0:3], s32 offset:232 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v140, off, s[0:3], s32 offset:236 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v139, off, s[0:3], s32 offset:240 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v138, off, s[0:3], s32 offset:244 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v137, off, s[0:3], s32 offset:248 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v136, off, s[0:3], s32 offset:252 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v127, off, s[0:3], s32 offset:256 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v126, off, s[0:3], s32 offset:260 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v125, off, s[0:3], s32 offset:264 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v124, off, s[0:3], s32 offset:268 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v123, off, s[0:3], s32 offset:272 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v122, off, s[0:3], s32 offset:276 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v121, off, s[0:3], s32 offset:280 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v120, off, s[0:3], s32 offset:284 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v111, off, s[0:3], s32 offset:288 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v110, off, s[0:3], s32 offset:292 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v109, off, s[0:3], s32 offset:296 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v108, off, s[0:3], s32 offset:300 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v107, off, s[0:3], s32 offset:304 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v106, off, s[0:3], s32 offset:308 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v105, off, s[0:3], s32 offset:312 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v104, off, s[0:3], s32 offset:316 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v95, off, s[0:3], s32 offset:320 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v94, off, s[0:3], s32 offset:324 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v93, off, s[0:3], s32 offset:328 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v92, off, s[0:3], s32 offset:332 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v91, off, s[0:3], s32 offset:336 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v90, off, s[0:3], s32 offset:340 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v89, off, s[0:3], s32 offset:344 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v88, off, s[0:3], s32 offset:348 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v79, off, s[0:3], s32 offset:352 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v78, off, s[0:3], s32 offset:356 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v77, off, s[0:3], s32 offset:360 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v76, off, s[0:3], s32 offset:364 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v75, off, s[0:3], s32 offset:368 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v74, off, s[0:3], s32 offset:372 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v73, off, s[0:3], s32 offset:376 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v72, off, s[0:3], s32 offset:380 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v63, off, s[0:3], s32 offset:384 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v62, off, s[0:3], s32 offset:388 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v61, off, s[0:3], s32 offset:392 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v60, off, s[0:3], s32 offset:396 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v59, off, s[0:3], s32 offset:400 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v58, off, s[0:3], s32 offset:404 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v57, off, s[0:3], s32 offset:408 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v56, off, s[0:3], s32 offset:412 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v47, off, s[0:3], s32 offset:416 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v46, off, s[0:3], s32 offset:420 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v45, off, s[0:3], s32 offset:424 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v44, off, s[0:3], s32 offset:428 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v43, off, s[0:3], s32 offset:432 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v42, off, s[0:3], s32 offset:436 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload -; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9} ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19} @@ -2047,14 +327,14 @@ define internal void @child_function_ipra_tail_call() #0 { ret void } +; GCN-LABEL: {{^}}spill_sgpr_no_free_vgpr_ipra_tail_call: +; GCN-NOT: v_writelane_b32 +; GCN-NOT: buffer_store_dword +; GCN-NOT: swappc +; GCN-NOT: buffer_load_dword v0, off +; GCN-NOT: v_readlane_b32 +; GCN: setpc define void @spill_sgpr_no_free_vgpr_ipra_tail_call() #0 { -; GCN-LABEL: spill_sgpr_no_free_vgpr_ipra_tail_call: -; GCN: ; %bb.0: -; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: s_getpc_b64 s[4:5] -; GCN-NEXT: s_add_u32 s4, s4, child_function_ipra_tail_call@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s5, s5, child_function_ipra_tail_call@rel32@hi+12 -; GCN-NEXT: s_setpc_b64 s[4:5] tail call void @child_function_ipra_tail_call() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll index 34bc7523051ff..8077a0b6adfbd 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll @@ -1,45 +1,12 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck -check-prefix=MUBUF %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 -mattr=+enable-flat-scratch < %s | FileCheck -check-prefix=FLATSCR %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 < %s | FileCheck -check-prefixes=GCN,MUBUF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -stress-regalloc=8 -mattr=+enable-flat-scratch < %s | FileCheck -check-prefixes=GCN,FLATSCR %s ; Test that the VGPR spiller correctly switches to SGPR offsets when the ; instruction offset field would overflow, and that it accounts for memory ; swizzling. +; GCN-LABEL: test_inst_offset_kernel define amdgpu_kernel void @test_inst_offset_kernel() { -; MUBUF-LABEL: test_inst_offset_kernel: -; MUBUF: ; %bb.0: ; %entry -; MUBUF-NEXT: s_add_u32 s0, s0, s7 -; MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4092 ; 4-byte Folded Spill -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4092 ; 4-byte Folded Reload -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_endpgm -; -; FLATSCR-LABEL: test_inst_offset_kernel: -; FLATSCR: ; %bb.0: ; %entry -; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 -; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_lo offset:8 glc -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_movk_i32 s0, 0xffc -; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: s_movk_i32 s0, 0xffc -; FLATSCR-NEXT: scratch_load_dword v0, off, s0 ; 4-byte Folded Reload -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_dword off, v0, vcc_hi offset:8 -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_endpgm entry: ; Occupy 4092 bytes of scratch, so the offset of the spill of %a just fits in ; the instruction offset field. @@ -47,8 +14,8 @@ entry: %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 - - + ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4092 ; 4-byte Folded Spill + ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s{{[0-9]+}} ; 4-byte Folded Spill %a = load volatile i32, i32 addrspace(5)* %aptr ; Force %a to spill. @@ -60,42 +27,8 @@ entry: ret void } +; GCN-LABEL: test_sgpr_offset_kernel define amdgpu_kernel void @test_sgpr_offset_kernel() { -; MUBUF-LABEL: test_sgpr_offset_kernel: -; MUBUF: ; %bb.0: ; %entry -; MUBUF-NEXT: s_add_u32 s0, s0, s7 -; MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_mov_b32 s4, 0x40000 -; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: s_mov_b32 s4, 0x40000 -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_endpgm -; -; FLATSCR-LABEL: test_sgpr_offset_kernel: -; FLATSCR: ; %bb.0: ; %entry -; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 -; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_lo offset:8 glc -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_movk_i32 s0, 0x1000 -; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: s_movk_i32 s0, 0x1000 -; FLATSCR-NEXT: scratch_load_dword v0, off, s0 ; 4-byte Folded Reload -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_dword off, v0, vcc_hi offset:8 -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_endpgm entry: ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not ; fit in the instruction, and has to live in the SGPR offset. @@ -104,7 +37,12 @@ entry: %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 ; 0x40000 / 64 = 4096 (for wave64) + ; MUBUF: s_mov_b32 s4, 0x40000 + ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill + ; FLATSCR: s_movk_i32 s2, 0x1000 + ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s2 ; 4-byte Folded Spill %a = load volatile i32, i32 addrspace(5)* %aptr + ; Force %a to spill call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" () @@ -114,50 +52,11 @@ entry: ret void } +; FIXME: If we fail to scavenge an SGPR in a kernel we don't have a stack +; pointer to temporarily update, so we just crash. + +; GCN-LABEL: test_sgpr_offset_function_scavenge_fail_func define void @test_sgpr_offset_function_scavenge_fail_func() #2 { -; MUBUF-LABEL: test_sgpr_offset_function_scavenge_fail_func: -; MUBUF: ; %bb.0: ; %entry -; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1004 -; MUBUF-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen ; 4-byte Folded Spill -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1004 -; MUBUF-NEXT: buffer_load_dword v0, v1, s[0:3], s32 offen ; 4-byte Folded Reload -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: s_setpc_b64 s[30:31] -; -; FLATSCR-LABEL: test_sgpr_offset_function_scavenge_fail_func: -; FLATSCR: ; %bb.0: ; %entry -; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: scratch_load_dword v0, off, s32 offset:8 glc -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_add_i32 s8, s32, 0x1004 -; FLATSCR-NEXT: scratch_store_dword off, v0, s8 ; 4-byte Folded Spill -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: s_add_i32 s8, s32, 0x1004 -; FLATSCR-NEXT: scratch_load_dword v0, off, s8 ; 4-byte Folded Reload -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: s_setpc_b64 s[30:31] entry: ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not ; fit in the instruction, and has to live in the SGPR offset. @@ -178,6 +77,12 @@ entry: ; 0x40000 / 64 = 4096 (for wave64) %a = load volatile i32, i32 addrspace(5)* %aptr + + ; MUBUF: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1004 + ; MUBUF-NEXT: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+:[0-9]+}}], s32 offen ; 4-byte Folded Spill + +; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1004 + ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0.0, i32 %asm1.0, i32 %asm2.0, i32 %asm3.0, i32 %asm4.0, i32 %asm5.0, i32 %asm6.0, i32 %asm7.0, i32 %a) %asm = call { i32, i32, i32, i32, i32, i32, i32, i32 } asm sideeffect "", "=s,=s,=s,=s,=s,=s,=s,=s"() @@ -191,58 +96,18 @@ entry: %asm7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm, 7 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0 + + ; MUBUF: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1004 + ; MUBUF-NEXT: buffer_load_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+:[0-9]+}}], s32 offen ; 4-byte Folded Reload + ; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s32, 0x1004 + ; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, [[SOFF]] ; 4-byte Folded Reload + ; Force %a to spill with no free SGPRs call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0, i32 %asm1, i32 %asm2, i32 %asm3, i32 %asm4, i32 %asm5, i32 %asm6, i32 %asm7, i32 %a) ret void } define amdgpu_kernel void @test_sgpr_offset_function_scavenge_fail_kernel() #3 { -; MUBUF-LABEL: test_sgpr_offset_function_scavenge_fail_kernel: -; MUBUF: ; %bb.0: ; %entry -; MUBUF-NEXT: s_add_u32 s0, s0, s7 -; MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1004 -; MUBUF-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; 4-byte Folded Spill -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1004 -; MUBUF-NEXT: buffer_load_dword v0, v1, s[0:3], 0 offen ; 4-byte Folded Reload -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: s_endpgm -; -; FLATSCR-LABEL: test_sgpr_offset_function_scavenge_fail_kernel: -; FLATSCR: ; %bb.0: ; %entry -; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 -; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_hi offset:8 glc -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_movk_i32 s8, 0x1004 -; FLATSCR-NEXT: scratch_store_dword off, v0, s8 ; 4-byte Folded Spill -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: s_movk_i32 s8, 0x1004 -; FLATSCR-NEXT: scratch_load_dword v0, off, s8 ; 4-byte Folded Reload -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: s_endpgm entry: ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not ; fit in the instruction, and has to live in the SGPR offset. @@ -263,6 +128,12 @@ entry: ; 0x40000 / 64 = 4096 (for wave64) %a = load volatile i32, i32 addrspace(5)* %aptr + + ; MUBUF: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1004 + ; MUBUF: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+:[0-9]+}}], 0 offen ; 4-byte Folded Spill + + ; FLATSCR: s_movk_i32 [[SOFF:s[0-9]+]], 0x1004 + ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, [[SOFF]] ; 4-byte Folded Spill call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0.0, i32 %asm1.0, i32 %asm2.0, i32 %asm3.0, i32 %asm4.0, i32 %asm5.0, i32 %asm6.0, i32 %asm7.0, i32 %a) %asm = call { i32, i32, i32, i32, i32, i32, i32, i32 } asm sideeffect "", "=s,=s,=s,=s,=s,=s,=s,=s"() @@ -276,57 +147,19 @@ entry: %asm7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32 } %asm, 7 call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0 + + ; MUBUF: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x1004 + ; MUBUF: buffer_load_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+:[0-9]+}}], 0 offen ; 4-byte Folded Reload + ; FLATSCR: s_movk_i32 [[SOFF:s[0-9]+]], 0x1004 + ; FLATSCR: scratch_load_dword v{{[0-9]+}}, off, [[SOFF]] ; 4-byte Folded Reload + ; Force %a to spill with no free SGPRs call void asm sideeffect "", "s,s,s,s,s,s,s,s,v"(i32 %asm0, i32 %asm1, i32 %asm2, i32 %asm3, i32 %asm4, i32 %asm5, i32 %asm6, i32 %asm7, i32 %a) ret void } +; GCN-LABEL: test_sgpr_offset_subregs_kernel define amdgpu_kernel void @test_sgpr_offset_subregs_kernel() { -; MUBUF-LABEL: test_sgpr_offset_subregs_kernel: -; MUBUF: ; %bb.0: ; %entry -; MUBUF-NEXT: s_add_u32 s0, s0, s7 -; MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:12 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:16 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4088 ; 4-byte Folded Spill -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:4092 ; 4-byte Folded Spill -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4088 ; 4-byte Folded Reload -; MUBUF-NEXT: s_nop 0 -; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:4092 ; 4-byte Folded Reload -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ; v[0:1] -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: s_endpgm -; -; FLATSCR-LABEL: test_sgpr_offset_subregs_kernel: -; FLATSCR: ; %bb.0: ; %entry -; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 -; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, vcc_lo offset:12 glc -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_movk_i32 s0, 0xff8 -; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 ; 8-byte Folded Spill -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_hi offset:8 glc -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_movk_i32 s0, 0xff8 -; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ; v[0:1] -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: s_endpgm entry: ; Occupy 4088 bytes of scratch, so that the spill of the last subreg of %a ; still fits below offset 4096 (4088 + 8 - 4 = 4092), and can be placed in @@ -334,6 +167,11 @@ entry: %alloca = alloca i8, i32 4084, align 4, addrspace(5) %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)* + + ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4088 ; 4-byte Folded Spill + ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], 0 offset:4092 ; 4-byte Folded Spill + ; FLATSCR: s_movk_i32 [[SOFF:s[0-9]+]], 0xff8 + ; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], [[SOFF]] ; 8-byte Folded Spill %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1 %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr @@ -350,54 +188,8 @@ entry: ret void } +; GCN-LABEL: test_inst_offset_subregs_kernel define amdgpu_kernel void @test_inst_offset_subregs_kernel() { -; MUBUF-LABEL: test_inst_offset_subregs_kernel: -; MUBUF: ; %bb.0: ; %entry -; MUBUF-NEXT: s_add_u32 s0, s0, s7 -; MUBUF-NEXT: s_addc_u32 s1, s1, 0 -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:12 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:16 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_mov_b32 s4, 0x3ff00 -; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Spill -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_mov_b32 s4, 0x3ff00 -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload -; MUBUF-NEXT: s_nop 0 -; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Reload -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ; v[0:1] -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: s_endpgm -; -; FLATSCR-LABEL: test_inst_offset_subregs_kernel: -; FLATSCR: ; %bb.0: ; %entry -; FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s0, s3 -; FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; FLATSCR-NEXT: s_mov_b32 vcc_lo, 0 -; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, vcc_lo offset:12 glc -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_movk_i32 s0, 0xffc -; FLATSCR-NEXT: s_mov_b32 vcc_hi, 0 -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s0 ; 8-byte Folded Spill -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: scratch_load_dword v0, off, vcc_hi offset:8 glc -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_movk_i32 s0, 0xffc -; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 ; 8-byte Folded Reload -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ; v[0:1] -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: s_endpgm entry: ; Occupy 4092 bytes of scratch, so that the spill of the last subreg of %a ; does not fit below offset 4096 (4092 + 8 - 4 = 4096), and has to live @@ -407,6 +199,11 @@ entry: %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)* ; 0x3ff00 / 64 = 4092 (for wave64) + ; MUBUF: s_mov_b32 s4, 0x3ff00 + ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill + ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 offset:4 ; 4-byte Folded Spill + ; FLATSCR: s_movk_i32 [[SOFF:s[0-9]+]], 0xffc + ; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], [[SOFF]] ; 8-byte Folded Spill %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1 %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr @@ -423,34 +220,8 @@ entry: ret void } +; GCN-LABEL: test_inst_offset_function define void @test_inst_offset_function() { -; MUBUF-LABEL: test_inst_offset_function: -; MUBUF: ; %bb.0: ; %entry -; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4088 ; 4-byte Folded Spill -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4088 ; 4-byte Folded Reload -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_setpc_b64 s[30:31] -; -; FLATSCR-LABEL: test_inst_offset_function: -; FLATSCR: ; %bb.0: ; %entry -; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR-NEXT: scratch_load_dword v0, off, s32 offset:4 glc -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:4088 ; 4-byte Folded Spill -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: scratch_load_dword v0, off, s32 offset:4088 ; 4-byte Folded Reload -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:4 -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_setpc_b64 s[30:31] entry: ; Occupy enough bytes of scratch, so the offset of the spill of %a ; just fits in the instruction offset field when the emergency stack @@ -460,8 +231,8 @@ entry: %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 - - + ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill + ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill %a = load volatile i32, i32 addrspace(5)* %aptr ; Force %a to spill. @@ -473,38 +244,8 @@ entry: ret void } +; GCN-LABEL: test_sgpr_offset_function define void @test_sgpr_offset_function() { -; MUBUF-LABEL: test_sgpr_offset_function: -; MUBUF: ; %bb.0: ; %entry -; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_add_i32 s4, s32, 0x40100 -; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: s_add_i32 s4, s32, 0x40100 -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_setpc_b64 s[30:31] -; -; FLATSCR-LABEL: test_sgpr_offset_function: -; FLATSCR: ; %bb.0: ; %entry -; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR-NEXT: scratch_load_dword v0, off, s32 offset:8 glc -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_add_i32 s0, s32, 0x1004 -; FLATSCR-NEXT: scratch_store_dword off, v0, s0 ; 4-byte Folded Spill -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: s_add_i32 s0, s32, 0x1004 -; FLATSCR-NEXT: scratch_load_dword v0, off, s0 ; 4-byte Folded Reload -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_dword off, v0, s32 offset:8 -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: s_setpc_b64 s[30:31] entry: ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not ; fit in the instruction, and has to live in the SGPR offset. @@ -513,6 +254,10 @@ entry: %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1 ; 0x40000 / 64 = 4096 (for wave64) + ; MUBUF: s_add_i32 s4, s32, 0x40100 + ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill + ; FLATSCR: s_add_i32 s0, s32, 0x1004 + ; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s0 ; 4-byte Folded Spill %a = load volatile i32, i32 addrspace(5)* %aptr ; Force %a to spill @@ -524,57 +269,24 @@ entry: ret void } +; GCN-LABEL: test_sgpr_offset_subregs_function define void @test_sgpr_offset_subregs_function() { -; MUBUF-LABEL: test_sgpr_offset_subregs_function: -; MUBUF: ; %bb.0: ; %entry -; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:12 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4084 ; 4-byte Folded Spill -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4088 ; 4-byte Folded Spill -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4084 ; 4-byte Folded Reload -; MUBUF-NEXT: s_nop 0 -; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4088 ; 4-byte Folded Reload -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ; v[0:1] -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: s_setpc_b64 s[30:31] -; -; FLATSCR-LABEL: test_sgpr_offset_subregs_function: -; FLATSCR: ; %bb.0: ; %entry -; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s32 offset:8 glc -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s32 offset:4084 ; 8-byte Folded Spill -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: scratch_load_dword v0, off, s32 offset:4 glc -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s32 offset:4084 ; 8-byte Folded Reload -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ; v[0:1] -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: s_setpc_b64 s[30:31] entry: ; We want to test the spill of the last subreg of %a is the highest ; valid value for the immediate offset. We enable the emergency ; stack slot for large frames, so it's hard to get the frame layout ; exactly as we want to test it. + ; ; Occupy 4084 bytes of scratch, so that the spill of the last subreg of %a ; still fits below offset 4096 (4084 + 8 - 4 = 4092), and can be placed in ; the instruction offset field. %alloca = alloca i8, i32 4084, align 4, addrspace(5) %bufv1 = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)* %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)* + + ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4084 ; 4-byte Folded Spill + ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offset:4088 ; 4-byte Folded Spill + ; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], s32 offset:4084 ; 8-byte Folded Spill %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1 %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr @@ -591,48 +303,8 @@ entry: ret void } +; GCN-LABEL: test_inst_offset_subregs_function define void @test_inst_offset_subregs_function() { -; MUBUF-LABEL: test_inst_offset_subregs_function: -; MUBUF: ; %bb.0: ; %entry -; MUBUF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_add_i32 s4, s32, 0x3ff00 -; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: buffer_store_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Spill -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 glc -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_add_i32 s4, s32, 0x3ff00 -; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload -; MUBUF-NEXT: s_nop 0 -; MUBUF-NEXT: buffer_load_dword v1, off, s[0:3], s4 offset:4 ; 4-byte Folded Reload -; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: ;;#ASMSTART -; MUBUF-NEXT: ; v[0:1] -; MUBUF-NEXT: ;;#ASMEND -; MUBUF-NEXT: s_setpc_b64 s[30:31] -; -; FLATSCR-LABEL: test_inst_offset_subregs_function: -; FLATSCR: ; %bb.0: ; %entry -; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s32 offset:12 glc -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_store_dwordx2 off, v[0:1], s32 offset:4092 ; 8-byte Folded Spill -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: scratch_load_dword v0, off, s32 offset:8 glc -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s32 offset:4092 ; 8-byte Folded Reload -; FLATSCR-NEXT: s_waitcnt vmcnt(0) -; FLATSCR-NEXT: ;;#ASMSTART -; FLATSCR-NEXT: ; v[0:1] -; FLATSCR-NEXT: ;;#ASMEND -; FLATSCR-NEXT: s_setpc_b64 s[30:31] entry: ; Occupy 4088 bytes of scratch, so that the spill of the last subreg of %a ; does not fit below offset 4096 (408 + 4 + 8 - 4 = 4096), and has to live @@ -642,6 +314,10 @@ entry: %bufv2 = bitcast i8 addrspace(5)* %alloca to <2 x i32> addrspace(5)* ; 0x3ff0000 / 64 = 4092 (for wave64) + ; MUBUF: s_add_i32 s4, s32, 0x3ff00 + ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 ; 4-byte Folded Spill + ; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s4 offset:4 ; 4-byte Folded Spill + ; FLATSCR: scratch_store_dwordx2 off, v[{{[0-9:]+}}], s32 offset:4092 ; 8-byte Folded Spill %aptr = getelementptr <2 x i32>, <2 x i32> addrspace(5)* %bufv2, i32 1 %a = load volatile <2 x i32>, <2 x i32> addrspace(5)* %aptr diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll index bdeb97cede4c3..e0fc1e19b1677 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll +++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -march=amdgcn -mcpu=verde -enable-misched=0 -post-RA-scheduler=0 -amdgpu-spill-sgpr-to-vgpr=0 < %s | FileCheck -check-prefixes=CHECK,GFX6 %s ; RUN: llc -sgpr-regalloc=basic -vgpr-regalloc=basic -march=amdgcn -mcpu=tonga -enable-misched=0 -post-RA-scheduler=0 -amdgpu-spill-sgpr-to-vgpr=0 < %s | FileCheck --check-prefix=CHECK %s ; RUN: llc -march=amdgcn -mattr=-xnack,+enable-flat-scratch -mcpu=gfx900 -enable-misched=0 -post-RA-scheduler=0 -amdgpu-spill-sgpr-to-vgpr=0 < %s | FileCheck -check-prefixes=CHECK,GFX9-FLATSCR,FLATSCR %s @@ -13,10044 +12,17 @@ ; mechanism works even when many spills happen. ; Just test that it compiles successfully. +; CHECK-LABEL: test + +; GFX9-FLATSCR: s_mov_b32 [[SOFF1:s[0-9]+]], 4{{$}} +; GFX9-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SOFF1]] ; 16-byte Folded Spill +; GFX9-FLATSCR: ;;#ASMSTART +; GFX9-FLATSCR: s_movk_i32 [[SOFF2:s[0-9]+]], 0x1{{[0-9a-f]+}}{{$}} +; GFX9-FLATSCR: scratch_load_dwordx4 v[{{[0-9:]+}}], off, [[SOFF2]] ; 16-byte Folded Reload + +; GFX10-FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], off offset:{{[0-9]+}} ; 16-byte Folded Spill +; GFX10-FLATSCR: scratch_load_dwordx4 v[{{[0-9:]+}}], off, off offset:{{[0-9]+}} ; 16-byte Folded Reload define amdgpu_kernel void @test(<1280 x i32> addrspace(1)* %out, <1280 x i32> addrspace(1)* %in) { -; GFX6-LABEL: test: -; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_mov_b32 s44, SCRATCH_RSRC_DWORD0 -; GFX6-NEXT: s_mov_b32 s45, SCRATCH_RSRC_DWORD1 -; GFX6-NEXT: s_mov_b32 s46, -1 -; GFX6-NEXT: s_mov_b32 s47, 0xe8f000 -; GFX6-NEXT: s_add_u32 s44, s44, s3 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX6-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 -; GFX6-NEXT: v_mbcnt_hi_u32_b32_e32 v0, -1, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v5, 13, v0 -; GFX6-NEXT: s_mov_b32 s18, 0 -; GFX6-NEXT: s_mov_b32 s19, 0xf000 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: v_add_i32_e32 v0, vcc, s2, v5 -; GFX6-NEXT: v_mov_b32_e32 v1, s3 -; GFX6-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; GFX6-NEXT: s_movk_i32 s4, 0x80 -; GFX6-NEXT: s_mov_b32 s5, s18 -; GFX6-NEXT: s_mov_b64 s[6:7], s[18:19] -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:3968 -; GFX6-NEXT: s_addc_u32 s45, s45, 0 -; GFX6-NEXT: s_movk_i32 s8, 0x100 -; GFX6-NEXT: s_mov_b32 s9, s18 -; GFX6-NEXT: s_mov_b64 s[10:11], s[18:19] -; GFX6-NEXT: s_movk_i32 s12, 0x180 -; GFX6-NEXT: s_mov_b32 s13, s18 -; GFX6-NEXT: s_mov_b64 s[14:15], s[18:19] -; GFX6-NEXT: s_movk_i32 s20, 0x200 -; GFX6-NEXT: s_mov_b32 s21, s18 -; GFX6-NEXT: s_mov_b64 s[22:23], s[18:19] -; GFX6-NEXT: s_movk_i32 s24, 0x280 -; GFX6-NEXT: s_mov_b32 s25, s18 -; GFX6-NEXT: s_mov_b64 s[26:27], s[18:19] -; GFX6-NEXT: s_movk_i32 s28, 0x300 -; GFX6-NEXT: s_mov_b32 s29, s18 -; GFX6-NEXT: s_mov_b64 s[30:31], s[18:19] -; GFX6-NEXT: s_movk_i32 s36, 0x380 -; GFX6-NEXT: s_mov_b32 s37, s18 -; GFX6-NEXT: s_mov_b64 s[38:39], s[18:19] -; GFX6-NEXT: s_movk_i32 s40, 0x400 -; GFX6-NEXT: s_mov_b32 s41, s18 -; GFX6-NEXT: s_mov_b64 s[42:43], s[18:19] -; GFX6-NEXT: s_mov_b64 s[16:17], s[2:3] -; GFX6-NEXT: v_mov_b32_e32 v6, 0 -; GFX6-NEXT: s_mov_b32 s2, 0x3fd00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1268 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1272 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1276 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1280 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:3984 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1300 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1304 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1308 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1312 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4000 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1332 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1336 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1340 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1344 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4016 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1364 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1368 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1372 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1376 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4032 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1396 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1400 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1404 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1408 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4048 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1428 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1432 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1436 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1440 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4064 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1460 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1464 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1468 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1472 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:4080 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1492 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1496 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1500 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1504 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:3968 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1556 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1560 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1564 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1568 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:3984 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1588 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1592 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1596 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1600 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4000 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1620 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1624 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1628 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1632 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4016 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1652 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1656 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1660 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1664 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4032 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1684 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1688 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1692 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1696 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4048 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1716 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1720 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1724 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1728 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4064 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1748 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1752 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1756 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1760 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[8:11], 0 addr64 offset:4080 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1780 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1784 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1788 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1792 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:3968 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1860 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1864 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1868 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1872 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:3984 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1892 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1896 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1900 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1904 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4000 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1924 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1928 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1932 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1936 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4016 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1956 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1960 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1964 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:1968 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4032 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:1988 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:1992 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:1996 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2000 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4048 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2020 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2024 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2028 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2032 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4064 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2052 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2056 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2060 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2064 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[12:15], 0 addr64 offset:4080 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2084 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2088 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2092 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2096 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:3968 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2148 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2152 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2156 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2160 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:3984 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2180 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2184 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2188 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2192 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4000 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2212 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2216 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2220 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2224 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4016 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2244 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2248 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2252 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2256 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4032 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2276 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2280 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2284 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2288 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4048 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2308 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2312 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2316 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2320 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4064 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2340 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2344 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2348 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2352 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[20:23], 0 addr64 offset:4080 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2372 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2376 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2380 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2384 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:3968 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2452 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2456 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2460 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2464 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:3984 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2484 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2488 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2492 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2496 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4000 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2516 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2520 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2524 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2528 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4016 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2548 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2552 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2556 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2560 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4032 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2580 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2584 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2588 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2592 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4048 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2612 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2616 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2620 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2624 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4064 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2644 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2648 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2652 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2656 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[24:27], 0 addr64 offset:4080 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2676 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2680 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2684 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2688 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:3968 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2740 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2744 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2748 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2752 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:3984 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2772 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2776 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2780 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2784 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4000 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2804 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2808 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2812 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2816 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4016 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2836 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2840 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2844 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2848 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4032 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2868 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2872 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2876 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2880 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4048 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2900 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2904 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2908 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2912 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4064 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2932 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2936 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2940 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2944 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[28:31], 0 addr64 offset:4080 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:2964 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:2968 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:2972 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:2976 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:3968 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3044 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3048 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3052 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3056 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:3984 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3076 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3080 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3084 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3088 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4000 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3108 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3112 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3116 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3120 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4016 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3140 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3144 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3148 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3152 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4032 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3172 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3176 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3180 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3184 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4048 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3204 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3208 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3212 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3216 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4064 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3236 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3240 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3244 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3248 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[36:39], 0 addr64 offset:4080 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3268 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3272 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3276 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3280 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:3968 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3332 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3336 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3340 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3344 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:3984 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3364 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3368 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3372 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3376 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4000 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3396 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3400 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3404 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3408 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4016 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3428 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3432 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3436 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3440 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4032 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3460 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3464 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3468 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3472 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4048 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3492 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3496 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3500 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3504 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[40:43], 0 addr64 offset:4064 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v7, off, s[44:47], 0 offset:3524 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v8, off, s[44:47], 0 offset:3528 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v9, off, s[44:47], 0 offset:3532 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v10, off, s[44:47], 0 offset:3536 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[40:43], 0 addr64 offset:4080 -; GFX6-NEXT: s_waitcnt expcnt(3) -; GFX6-NEXT: v_add_i32_e32 v7, vcc, s0, v5 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3556 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3560 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3564 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3568 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:16 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:16 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:20 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:24 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:28 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:32 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:32 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:36 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:40 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:44 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:48 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:48 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:52 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:56 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:60 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:64 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:64 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:68 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:72 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:76 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:80 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:80 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:84 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:88 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:92 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:96 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:96 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:100 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:104 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:108 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:112 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:112 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:116 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:120 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:124 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:128 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:128 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:132 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:136 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:140 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:144 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:144 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:148 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:152 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:156 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:160 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:160 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:164 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:168 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:172 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:176 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:176 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:180 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:184 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:188 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:192 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:192 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:196 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:200 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:204 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:208 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:208 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:212 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:216 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:220 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:224 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:224 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:228 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:232 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:236 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:240 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:240 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:244 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:248 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:252 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:256 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:256 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:260 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:264 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:268 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:272 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:272 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:276 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:280 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:284 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:288 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:288 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:292 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:296 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:300 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:304 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:304 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:308 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:312 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:316 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:320 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:320 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:324 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:328 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:332 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:336 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:336 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:340 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:344 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:348 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:352 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:352 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:356 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:360 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:364 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:368 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:368 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:372 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:376 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:380 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:384 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:384 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:388 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:392 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:396 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:400 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:400 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:404 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:408 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:412 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:416 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:416 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:420 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:424 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:428 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:432 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:432 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:436 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:440 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:444 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:448 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:448 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:452 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:456 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:460 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:464 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:464 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:468 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:472 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:476 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:480 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:480 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:484 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:488 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:492 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:496 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:496 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:500 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:504 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:508 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:512 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:512 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:516 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:520 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:524 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:528 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:528 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:532 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:536 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:540 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:544 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:544 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:548 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:552 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:556 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:560 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:560 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:564 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:568 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:572 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:576 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:576 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:580 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:584 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:588 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:592 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:592 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:596 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:600 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:604 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:608 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:608 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:612 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:616 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:620 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:624 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:624 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:628 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:632 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:636 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:640 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:640 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:644 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:648 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:652 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:656 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:656 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:660 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:664 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:668 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:672 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:672 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:676 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:680 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:684 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:688 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:688 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:692 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:696 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:700 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:704 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:704 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:708 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:712 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:716 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:720 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:720 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:724 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:728 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:732 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:736 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:736 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:740 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:744 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:748 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:752 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:752 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:756 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:760 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:764 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:768 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:768 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:772 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:776 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:780 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:784 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:784 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:788 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:792 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:796 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:800 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:800 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:804 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:808 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:812 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:816 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:816 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:820 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:824 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:828 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:832 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:832 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:836 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:840 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:844 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:848 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:848 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:852 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:856 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:860 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:864 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:864 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:868 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:872 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:876 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:880 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:880 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:884 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:888 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:892 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:896 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:896 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:900 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:904 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:908 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:912 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:912 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:916 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:920 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:924 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:928 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:928 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:932 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:936 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:940 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:944 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:944 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:948 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:952 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:956 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:960 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:960 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:964 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:968 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:972 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:976 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:976 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:980 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:984 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:988 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:992 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:992 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:996 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1000 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1004 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1008 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1008 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1012 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1016 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1020 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1024 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1024 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1028 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1032 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1036 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1040 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1040 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1044 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1048 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1052 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1056 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1056 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1060 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1064 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1068 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1072 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1072 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1076 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1080 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1084 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1088 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1088 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1092 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1096 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1100 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1104 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1104 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1108 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1112 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1116 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1120 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1120 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1124 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1128 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1132 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1136 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1136 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1140 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1144 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1148 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1152 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1152 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1156 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1160 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1164 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1168 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1168 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1172 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1176 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1180 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1184 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1184 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1188 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1192 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1196 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1200 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1200 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1204 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1208 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1212 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1216 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1216 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1220 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1224 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1228 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1232 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1232 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1236 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1240 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1244 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1248 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1248 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1252 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1256 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1260 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1264 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1264 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1284 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1288 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1292 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1296 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1280 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1316 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1320 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1324 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1328 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1296 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1348 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1352 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1356 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1360 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1312 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1380 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1384 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1388 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1392 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1328 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1412 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1416 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1420 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1424 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1344 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1444 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1448 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1452 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1456 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1360 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1476 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1480 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1484 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1488 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1376 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1508 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1512 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1516 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1520 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1392 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1524 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1528 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1532 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1536 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1408 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1540 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1544 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1548 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1552 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1424 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1572 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1576 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1580 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1584 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1440 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1604 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1608 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1612 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1616 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1456 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1636 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1640 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1644 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1648 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1472 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1668 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1672 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1676 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1680 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1488 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1700 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1704 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1708 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1712 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1504 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1732 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1736 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1740 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1744 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1520 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1764 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1768 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1772 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1776 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1536 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1796 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1800 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1804 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1808 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1552 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1812 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1816 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1820 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1824 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1568 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1828 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1832 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1836 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1840 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1584 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1844 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1848 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1852 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1856 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1600 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1876 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1880 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1884 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1888 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1616 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1908 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1912 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1916 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1920 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1632 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1940 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1944 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1948 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1952 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1648 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:1972 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:1976 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:1980 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:1984 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1664 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2004 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2008 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2012 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2016 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1680 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2036 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2040 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2044 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2048 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1696 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2068 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2072 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2076 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2080 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1712 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2100 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2104 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2108 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2112 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1728 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2116 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2120 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2124 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2128 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1744 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2132 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2136 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2140 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2144 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1760 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2164 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2168 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2172 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2176 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1776 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2196 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2200 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2204 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2208 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1792 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2228 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2232 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2236 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2240 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1808 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2260 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2264 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2268 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2272 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1824 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2292 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2296 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2300 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2304 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1840 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2324 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2328 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2332 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2336 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1856 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2356 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2360 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2364 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2368 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1872 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2388 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2392 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2396 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2400 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1888 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2404 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2408 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2412 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2416 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1904 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2420 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2424 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2428 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2432 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1920 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2436 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2440 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2444 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2448 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1936 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2468 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2472 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2476 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2480 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1952 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2500 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2504 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2508 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2512 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1968 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2532 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2536 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2540 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2544 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:1984 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2564 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2568 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2572 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2576 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2000 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2596 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2600 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2604 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2608 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2016 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2628 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2632 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2636 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2640 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2032 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2660 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2664 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2668 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2672 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2048 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2692 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2696 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2700 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2704 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2064 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2708 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2712 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2716 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2720 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2080 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2724 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2728 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2732 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2736 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2096 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2756 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2760 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2764 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2768 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2112 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2788 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2792 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2796 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2800 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2128 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2820 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2824 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2828 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2832 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2144 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2852 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2856 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2860 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2864 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2160 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2884 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2888 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2892 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2896 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2176 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2916 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2920 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2924 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2928 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2192 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2948 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2952 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2956 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2960 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2208 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2980 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:2984 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:2988 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:2992 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2224 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:2996 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3000 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3004 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3008 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2240 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3012 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3016 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3020 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3024 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2256 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3028 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3032 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3036 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3040 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2272 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3060 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3064 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3068 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3072 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2288 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3092 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3096 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3100 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3104 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2304 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3124 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3128 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3132 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3136 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2320 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3156 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3160 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3164 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3168 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2336 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3188 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3192 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3196 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3200 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2352 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3220 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3224 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3228 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3232 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2368 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3252 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3256 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3260 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3264 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2384 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3284 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3288 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3292 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3296 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2400 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3300 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3304 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3308 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3312 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2416 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3316 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3320 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3324 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3328 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2432 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3348 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3352 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3356 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3360 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2448 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3380 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3384 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3388 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3392 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2464 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3412 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3416 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3420 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3424 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2480 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3444 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3448 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3452 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3456 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2496 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3476 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3480 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3484 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3488 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2512 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3508 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3512 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3516 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3520 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2528 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3540 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3544 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3548 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3552 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2544 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3572 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3576 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3580 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3584 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2560 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3588 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3592 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3596 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3600 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2576 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3604 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3608 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3612 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3616 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2592 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3620 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3624 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3628 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3632 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2608 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3636 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3640 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3644 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3648 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2624 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3652 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3656 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3660 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3664 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2640 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3668 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3672 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3676 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3680 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2656 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3684 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3688 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3692 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3696 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2672 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3700 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3704 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3708 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3712 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2688 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3716 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3720 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3724 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3728 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2704 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3732 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3736 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3740 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3744 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2720 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3748 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3752 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3756 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3760 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2736 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3764 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3768 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3772 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3776 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2752 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3780 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3784 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3788 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3792 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2768 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3796 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3800 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3804 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3808 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2784 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3812 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3816 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3820 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3824 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2800 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3828 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3832 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3836 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3840 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2816 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3844 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3848 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3852 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3856 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2832 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3860 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3864 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3868 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3872 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2848 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3876 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3880 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3884 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3888 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2864 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3892 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3896 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3900 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3904 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2880 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3908 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3912 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3916 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3920 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2896 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3924 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3928 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3932 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3936 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2912 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3940 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3944 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3948 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3952 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2928 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3956 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3960 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3964 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3968 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2944 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3972 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3976 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3980 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:3984 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2960 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:3988 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:3992 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:3996 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4000 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2976 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4004 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:4008 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:4012 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4016 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:2992 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4020 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:4024 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:4028 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4032 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3008 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4036 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:4040 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:4044 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4048 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3024 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4052 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:4056 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:4060 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4064 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3040 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], 0 offset:4068 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], 0 offset:4072 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], 0 offset:4076 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], 0 offset:4080 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3056 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3072 -; GFX6-NEXT: s_mov_b32 s2, 0x40100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3088 -; GFX6-NEXT: s_mov_b32 s2, 0x40500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3104 -; GFX6-NEXT: s_mov_b32 s2, 0x40900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3120 -; GFX6-NEXT: s_mov_b32 s2, 0x40d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3136 -; GFX6-NEXT: s_mov_b32 s2, 0x41100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3152 -; GFX6-NEXT: s_mov_b32 s2, 0x41500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3168 -; GFX6-NEXT: s_mov_b32 s2, 0x41900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3184 -; GFX6-NEXT: s_mov_b32 s2, 0x41d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3200 -; GFX6-NEXT: s_mov_b32 s2, 0x42100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3216 -; GFX6-NEXT: s_mov_b32 s2, 0x42500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3232 -; GFX6-NEXT: s_mov_b32 s2, 0x42900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3248 -; GFX6-NEXT: s_mov_b32 s2, 0x42d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3264 -; GFX6-NEXT: s_mov_b32 s2, 0x43100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3280 -; GFX6-NEXT: s_mov_b32 s2, 0x43500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3296 -; GFX6-NEXT: s_mov_b32 s2, 0x43900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3312 -; GFX6-NEXT: s_mov_b32 s2, 0x43d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3328 -; GFX6-NEXT: s_mov_b32 s2, 0x44100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3344 -; GFX6-NEXT: s_mov_b32 s2, 0x44500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3360 -; GFX6-NEXT: s_mov_b32 s2, 0x44900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3376 -; GFX6-NEXT: s_mov_b32 s2, 0x44d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3392 -; GFX6-NEXT: s_mov_b32 s2, 0x45100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3408 -; GFX6-NEXT: s_mov_b32 s2, 0x45500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3424 -; GFX6-NEXT: s_mov_b32 s2, 0x45900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3440 -; GFX6-NEXT: s_mov_b32 s2, 0x45d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3456 -; GFX6-NEXT: s_mov_b32 s2, 0x46100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3472 -; GFX6-NEXT: s_mov_b32 s2, 0x46500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3488 -; GFX6-NEXT: s_mov_b32 s2, 0x46900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3504 -; GFX6-NEXT: s_mov_b32 s2, 0x46d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3520 -; GFX6-NEXT: s_mov_b32 s2, 0x47100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3536 -; GFX6-NEXT: s_mov_b32 s2, 0x47500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3552 -; GFX6-NEXT: s_mov_b32 s2, 0x47900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3568 -; GFX6-NEXT: s_mov_b32 s2, 0x47d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3584 -; GFX6-NEXT: s_mov_b32 s2, 0x48100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3600 -; GFX6-NEXT: s_mov_b32 s2, 0x48500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3616 -; GFX6-NEXT: s_mov_b32 s2, 0x48900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3632 -; GFX6-NEXT: s_mov_b32 s2, 0x48d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3648 -; GFX6-NEXT: s_mov_b32 s2, 0x49100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3664 -; GFX6-NEXT: s_mov_b32 s2, 0x49500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3680 -; GFX6-NEXT: s_mov_b32 s2, 0x49900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3696 -; GFX6-NEXT: s_mov_b32 s2, 0x49d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3712 -; GFX6-NEXT: s_mov_b32 s2, 0x4a100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3728 -; GFX6-NEXT: s_mov_b32 s2, 0x4a500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3744 -; GFX6-NEXT: s_mov_b32 s2, 0x4a900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3760 -; GFX6-NEXT: s_mov_b32 s2, 0x4ad00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3776 -; GFX6-NEXT: s_mov_b32 s2, 0x4b100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3792 -; GFX6-NEXT: s_mov_b32 s2, 0x4b500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3808 -; GFX6-NEXT: s_mov_b32 s2, 0x4b900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3824 -; GFX6-NEXT: s_mov_b32 s2, 0x4bd00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3840 -; GFX6-NEXT: s_mov_b32 s2, 0x4c100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3856 -; GFX6-NEXT: s_mov_b32 s2, 0x4c500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3872 -; GFX6-NEXT: s_mov_b32 s2, 0x4c900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3888 -; GFX6-NEXT: s_mov_b32 s2, 0x4cd00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3904 -; GFX6-NEXT: s_mov_b32 s2, 0x4d100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3920 -; GFX6-NEXT: s_mov_b32 s2, 0x4d500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3936 -; GFX6-NEXT: s_mov_b32 s2, 0x4d900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3952 -; GFX6-NEXT: s_mov_b32 s2, 0x4dd00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3968 -; GFX6-NEXT: s_mov_b32 s2, 0x4e100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:3984 -; GFX6-NEXT: s_mov_b32 s2, 0x4e500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4000 -; GFX6-NEXT: s_mov_b32 s2, 0x4e900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4016 -; GFX6-NEXT: s_mov_b32 s2, 0x4ed00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4032 -; GFX6-NEXT: s_mov_b32 s2, 0x4f100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4048 -; GFX6-NEXT: s_mov_b32 s2, 0x4f500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4064 -; GFX6-NEXT: s_mov_b32 s2, 0x4f900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[44:47], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[44:47], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[44:47], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[44:47], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[5:6], s[16:19], 0 addr64 offset:4080 -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3556 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3560 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3564 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3568 ; 4-byte Folded Reload -; GFX6-NEXT: v_mov_b32_e32 v4, s1 -; GFX6-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc -; GFX6-NEXT: s_mov_b64 s[2:3], s[18:19] -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4080 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3524 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3528 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3532 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3536 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4064 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3492 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3496 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3500 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3504 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4048 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3460 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3464 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3468 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3472 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4032 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3428 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3432 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3436 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3440 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4016 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3396 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3400 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3404 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3408 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:4000 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3364 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3368 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3372 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3376 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:3984 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3332 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3336 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3340 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3344 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[40:43], 0 addr64 offset:3968 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3268 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3272 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3276 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3280 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4080 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3236 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3240 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3244 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3248 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4064 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3204 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3208 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3212 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3216 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4048 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3172 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3176 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3180 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3184 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4032 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3140 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3144 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3148 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3152 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4016 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3108 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3112 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3116 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3120 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:4000 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3076 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3080 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3084 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3088 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:3984 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:3044 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:3048 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:3052 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:3056 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:3968 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2964 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2968 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2972 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2976 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4080 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2932 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2936 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2940 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2944 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4064 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2900 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2904 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2908 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2912 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4048 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2868 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2872 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2876 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2880 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4032 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2836 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2840 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2844 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2848 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4016 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2804 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2808 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2812 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2816 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:4000 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2772 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2776 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2780 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2784 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:3984 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2740 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2744 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2748 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2752 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[28:31], 0 addr64 offset:3968 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2676 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2680 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2684 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2688 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4080 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2644 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2648 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2652 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2656 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4064 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2612 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2616 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2620 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2624 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4048 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2580 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2584 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2588 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2592 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4032 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2548 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2552 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2556 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2560 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4016 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2516 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2520 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2524 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2528 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:4000 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2484 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2488 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2492 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2496 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:3984 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2452 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2456 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2460 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2464 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[24:27], 0 addr64 offset:3968 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2372 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2376 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2380 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2384 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4080 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2340 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2344 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2348 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2352 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4064 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2308 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2312 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2316 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2320 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4048 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2276 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2280 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2284 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2288 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4032 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2244 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2248 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2252 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2256 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4016 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2212 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2216 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2220 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2224 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:4000 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2180 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2184 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2188 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2192 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:3984 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2148 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2152 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2156 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2160 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[20:23], 0 addr64 offset:3968 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2084 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2088 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2092 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2096 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4080 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2052 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2056 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2060 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2064 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4064 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:2020 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:2024 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:2028 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2032 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4048 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1988 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1992 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1996 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:2000 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4032 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1956 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1960 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1964 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1968 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4016 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1924 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1928 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1932 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1936 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:4000 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1892 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1896 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1900 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1904 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:3984 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1860 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1864 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1868 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1872 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[12:15], 0 addr64 offset:3968 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1780 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1784 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1788 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1792 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4080 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1748 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1752 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1756 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1760 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4064 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1716 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1720 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1724 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1728 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4048 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1684 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1688 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1692 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1696 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4032 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1652 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1656 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1660 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1664 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4016 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1620 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1624 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1628 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1632 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:4000 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1588 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1592 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1596 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1600 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:3984 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1556 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1560 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1564 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1568 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[8:11], 0 addr64 offset:3968 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1492 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1496 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1500 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1504 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4080 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1460 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1464 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1468 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1472 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4064 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1428 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1432 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1436 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1440 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4048 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1396 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1400 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1404 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1408 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4032 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1364 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1368 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1372 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1376 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4016 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1332 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1336 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1340 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1344 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:4000 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1300 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1304 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1308 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1312 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:3984 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[44:47], 0 offset:1268 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v10, off, s[44:47], 0 offset:1272 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v11, off, s[44:47], 0 offset:1276 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v12, off, s[44:47], 0 offset:1280 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[9:12], v[7:8], s[4:7], 0 addr64 offset:3968 -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4080 -; GFX6-NEXT: s_mov_b32 s4, 0x4f900 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4f500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4064 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4f100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4048 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4ed00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4032 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4e900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4016 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4e500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:4000 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4e100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3984 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4dd00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3968 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4d900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3952 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4d500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3936 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4d100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3920 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4cd00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3904 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4c900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3888 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4c500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3872 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4c100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3856 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4bd00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3840 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4b900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3824 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4b500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3808 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4b100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3792 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4ad00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3776 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4a900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3760 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4a500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3744 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x4a100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3728 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x49d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3712 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x49900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3696 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x49500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3680 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x49100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3664 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x48d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3648 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x48900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3632 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x48500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3616 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x48100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3600 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x47d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3584 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x47900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3568 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x47500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3552 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x47100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3536 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x46d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3520 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x46900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3504 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x46500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3488 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x46100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3472 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x45d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3456 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x45900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3440 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x45500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3424 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x45100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3408 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x44d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3392 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x44900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3376 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x44500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3360 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x44100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3344 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x43d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3328 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x43900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3312 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x43500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3296 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x43100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3280 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x42d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3264 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x42900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3248 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x42500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3232 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x42100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3216 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x41d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3200 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x41900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3184 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x41500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3168 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x41100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3152 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x40d00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3136 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x40900 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3120 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x40500 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3104 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x40100 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3088 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x3fd00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3072 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3056 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4068 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:4072 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:4076 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4080 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3040 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4052 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:4056 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:4060 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4064 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3024 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4036 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:4040 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:4044 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4048 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:3008 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4020 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:4024 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:4028 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4032 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2992 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4004 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:4008 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:4012 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4016 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2976 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3988 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3992 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3996 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:4000 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2960 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3972 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3976 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3980 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3984 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2944 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3956 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3960 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3964 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3968 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2928 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3940 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3944 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3948 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3952 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2912 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3924 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3928 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3932 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3936 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2896 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3908 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3912 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3916 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3920 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2880 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3892 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3896 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3900 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3904 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2864 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3876 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3880 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3884 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3888 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2848 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3860 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3864 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3868 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3872 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2832 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3844 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3848 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3852 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3856 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2816 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3828 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3832 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3836 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3840 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2800 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3812 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3816 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3820 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3824 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2784 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3796 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3800 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3804 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3808 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2768 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3780 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3784 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3788 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3792 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2752 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3764 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3768 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3772 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3776 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2736 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3748 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3752 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3756 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3760 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2720 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3732 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3736 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3740 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3744 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2704 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3716 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3720 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3724 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3728 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2688 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3700 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3704 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3708 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3712 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2672 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3684 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3688 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3692 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3696 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2656 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3668 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3672 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3676 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3680 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2640 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3652 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3656 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3660 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3664 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2624 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3636 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3640 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3644 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3648 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2608 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3620 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3624 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3628 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3632 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2592 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3604 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3608 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3612 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3616 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2576 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3588 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3592 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3596 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3600 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2560 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3572 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3576 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3580 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3584 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2544 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3540 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3544 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3548 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3552 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2528 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3508 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3512 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3516 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3520 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2512 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3476 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3480 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3484 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3488 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2496 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3444 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3448 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3452 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3456 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2480 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3412 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3416 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3420 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3424 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2464 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3380 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3384 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3388 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3392 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2448 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3348 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3352 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3356 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3360 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2432 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3316 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3320 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3324 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3328 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2416 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3300 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3304 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3308 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3312 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2400 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3284 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3288 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3292 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3296 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2384 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3252 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3256 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3260 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3264 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2368 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3220 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3224 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3228 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3232 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2352 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3188 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3192 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3196 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3200 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2336 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3156 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3160 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3164 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3168 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2320 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3124 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3128 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3132 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3136 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2304 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3092 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3096 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3100 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3104 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2288 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3060 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3064 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3068 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3072 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2272 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3028 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3032 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3036 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3040 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2256 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:3012 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3016 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3020 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3024 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2240 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2996 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:3000 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:3004 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:3008 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2224 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2980 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2984 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2988 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2992 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2208 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2948 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2952 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2956 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2960 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2192 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2916 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2920 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2924 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2928 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2176 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2884 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2888 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2892 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2896 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2160 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2852 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2856 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2860 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2864 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2144 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2820 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2824 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2828 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2832 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2128 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2788 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2792 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2796 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2800 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2112 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2756 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2760 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2764 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2768 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2096 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2724 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2728 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2732 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2736 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2080 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2708 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2712 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2716 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2720 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2064 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2692 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2696 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2700 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2704 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2048 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2660 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2664 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2668 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2672 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2032 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2628 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2632 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2636 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2640 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2016 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2596 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2600 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2604 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2608 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:2000 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2564 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2568 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2572 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2576 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1984 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2532 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2536 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2540 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2544 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1968 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2500 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2504 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2508 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2512 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1952 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2468 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2472 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2476 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2480 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1936 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2436 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2440 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2444 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2448 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1920 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2420 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2424 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2428 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2432 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1904 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2404 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2408 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2412 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2416 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1888 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2388 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2392 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2396 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2400 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1872 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2356 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2360 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2364 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2368 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1856 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2324 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2328 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2332 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2336 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1840 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2292 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2296 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2300 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2304 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1824 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2260 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2264 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2268 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2272 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1808 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2228 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2232 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2236 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2240 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1792 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2196 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2200 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2204 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2208 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1776 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2164 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2168 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2172 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2176 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1760 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2132 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2136 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2140 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2144 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1744 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2116 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2120 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2124 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2128 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1728 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2100 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2104 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2108 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2112 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1712 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2068 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2072 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2076 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2080 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1696 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2036 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2040 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2044 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2048 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1680 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:2004 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:2008 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:2012 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:2016 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1664 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1972 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1976 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1980 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1984 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1648 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1940 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1944 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1948 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1952 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1632 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1908 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1912 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1916 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1920 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1616 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1876 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1880 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1884 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1888 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1600 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1844 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1848 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1852 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1856 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1584 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1828 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1832 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1836 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1840 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1568 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1812 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1816 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1820 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1824 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1552 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1796 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1800 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1804 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1808 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1536 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1764 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1768 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1772 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1776 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1520 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1732 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1736 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1740 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1744 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1504 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1700 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1704 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1708 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1712 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1488 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1668 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1672 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1676 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1680 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1472 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1636 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1640 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1644 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1648 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1456 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1604 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1608 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1612 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1616 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1440 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1572 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1576 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1580 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1584 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1424 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1540 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1544 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1548 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1552 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1408 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1524 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1528 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1532 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1536 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1392 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1508 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1512 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1516 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1520 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1376 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1476 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1480 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1484 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1488 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1360 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1444 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1448 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1452 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1456 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1344 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1412 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1416 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1420 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1424 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1328 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1380 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1384 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1388 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1392 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1312 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1348 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1352 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1356 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1360 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1296 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1316 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1320 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1324 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1328 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1280 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1284 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1288 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1292 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1296 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1264 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1252 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1256 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1260 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1264 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1248 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1236 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1240 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1244 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1248 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1232 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1220 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1224 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1228 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1232 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1216 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1204 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1208 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1212 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1216 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1200 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1188 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1192 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1196 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1200 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1184 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1172 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1176 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1180 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1184 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1168 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1156 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1160 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1164 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1168 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1152 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1140 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1144 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1148 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1152 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1136 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1124 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1128 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1132 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1136 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1120 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1108 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1112 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1116 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1120 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1104 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1092 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1096 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1100 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1104 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1088 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1076 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1080 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1084 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1088 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1072 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1060 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1064 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1068 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1072 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1056 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1044 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1048 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1052 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1056 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1040 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1028 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1032 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1036 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1040 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1024 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:1012 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1016 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1020 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1024 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:1008 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:996 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:1000 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:1004 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:1008 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:992 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:980 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:984 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:988 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:992 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:976 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:964 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:968 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:972 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:976 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:960 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:948 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:952 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:956 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:960 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:944 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:932 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:936 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:940 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:944 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:928 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:916 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:920 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:924 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:928 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:912 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:900 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:904 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:908 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:912 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:896 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:884 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:888 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:892 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:896 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:880 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:868 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:872 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:876 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:880 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:864 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:852 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:856 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:860 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:864 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:848 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:836 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:840 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:844 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:848 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:832 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:820 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:824 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:828 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:832 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:816 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:804 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:808 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:812 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:816 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:800 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:788 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:792 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:796 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:800 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:784 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:772 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:776 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:780 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:784 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:768 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:756 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:760 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:764 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:768 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:752 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:740 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:744 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:748 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:752 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:736 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:724 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:728 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:732 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:736 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:720 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:708 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:712 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:716 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:720 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:704 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:692 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:696 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:700 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:704 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:688 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:676 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:680 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:684 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:688 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:672 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:660 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:664 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:668 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:672 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:656 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:644 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:648 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:652 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:656 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:640 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:628 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:632 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:636 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:640 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:624 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:612 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:616 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:620 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:624 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:608 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:596 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:600 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:604 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:608 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:592 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:580 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:584 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:588 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:592 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:576 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:564 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:568 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:572 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:576 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:560 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:548 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:552 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:556 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:560 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:544 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:532 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:536 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:540 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:544 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:528 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:516 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:520 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:524 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:528 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:512 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:500 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:504 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:508 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:512 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:496 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:484 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:488 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:492 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:496 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:480 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:468 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:472 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:476 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:480 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:464 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:452 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:456 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:460 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:464 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:448 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:436 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:440 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:444 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:448 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:432 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:420 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:424 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:428 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:432 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:416 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:404 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:408 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:412 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:416 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:400 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:388 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:392 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:396 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:400 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:384 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:372 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:376 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:380 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:384 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:368 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:356 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:360 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:364 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:368 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:352 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:340 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:344 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:348 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:352 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:336 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:324 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:328 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:332 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:336 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:320 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:308 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:312 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:316 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:320 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:304 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:292 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:296 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:300 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:304 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:288 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:276 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:280 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:284 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:288 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:272 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:260 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:264 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:268 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:272 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:256 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:244 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:248 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:252 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:256 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:240 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:228 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:232 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:236 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:240 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:224 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:212 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:216 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:220 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:224 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:208 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:196 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:200 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:204 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:208 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:192 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:180 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:184 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:188 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:192 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:176 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:164 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:168 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:172 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:176 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:160 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:148 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:152 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:156 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:160 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:144 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:132 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:136 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:140 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:144 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:128 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:116 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:120 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:124 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:128 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:112 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:100 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:104 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:108 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:112 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:96 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:84 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:88 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:92 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:96 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:80 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:68 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:72 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:76 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:80 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:64 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:52 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:56 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:60 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:64 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:48 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:36 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:40 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:44 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:48 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:32 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:20 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:24 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:28 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:32 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 offset:16 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v0, off, s[44:47], 0 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[44:47], 0 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[44:47], 0 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[44:47], 0 offset:16 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[5:6], s[0:3], 0 addr64 -; GFX6-NEXT: s_endpgm -; -; GFX9-FLATSCR-LABEL: test: -; GFX9-FLATSCR: ; %bb.0: ; %entry -; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5 -; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-FLATSCR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX9-FLATSCR-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 -; GFX9-FLATSCR-NEXT: v_mbcnt_hi_u32_b32 v0, -1, v0 -; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v5, 13, v0 -; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x80 -; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v2, vcc, s2, v5 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v0, s3 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v0, vcc -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v2 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 -; GFX9-FLATSCR-NEXT: s_mov_b32 s4, 4 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0x84 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x104 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x184 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x204 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x284 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x304 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x384 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_mov_b32 s4, 20 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_mov_b32 s4, 36 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_mov_b32 s4, 52 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x44 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x54 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x64 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x74 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s4, 0x100 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v2 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0x94 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xa4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xb4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xc4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xd4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xe4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0xf4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s5 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s5, 0x180 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s5, v2 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x114 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x124 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x134 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x144 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x154 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x164 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x174 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s6 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s6, 0x200 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s6, v2 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x194 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x1f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s7 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s7, 0x280 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s7, v2 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x214 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x224 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x234 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x244 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x254 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x264 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x274 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s8 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s8, 0x300 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s8, v2 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x294 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x2f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s9 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s9, 0x380 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s9, v2 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x314 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x324 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x334 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x344 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x354 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x364 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x374 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s10 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s10, 0x400 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s10, v2 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3968 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x394 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:4064 -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x3f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x404 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, s1 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:16 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x414 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:32 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x424 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:48 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x434 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:64 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x444 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:80 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x454 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:96 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x464 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:112 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x474 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:128 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x484 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:144 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x494 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:160 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:176 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:192 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:208 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:224 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:240 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x4f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:256 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x504 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:272 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x514 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:288 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x524 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:304 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x534 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:320 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x544 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:336 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x554 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:352 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x564 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:368 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x574 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:384 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x584 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:400 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x594 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:416 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:432 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:448 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:464 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:480 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:496 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x5f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:512 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x604 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:528 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x614 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:544 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x624 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:560 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x634 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:576 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x644 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:592 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x654 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:608 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x664 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:624 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x674 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:640 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x684 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:656 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x694 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:672 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:688 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:704 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:720 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:736 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:752 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x6f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:768 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x704 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:784 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x714 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:800 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x724 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:816 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x734 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:832 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x744 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:848 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x754 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:864 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x764 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:880 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x774 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:896 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x784 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:912 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x794 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:928 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:944 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:960 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:976 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:992 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1008 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x7f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1024 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x804 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1040 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x814 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1056 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x824 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1072 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x834 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1088 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x844 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1104 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x854 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1120 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x864 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1136 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x874 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1152 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x884 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1168 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x894 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1184 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1200 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1216 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1232 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1248 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1264 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x8f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1280 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x904 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1296 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x914 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1312 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x924 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1328 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x934 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1344 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x944 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1360 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x954 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1376 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x964 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1392 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x974 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1408 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x984 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1424 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x994 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1440 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1456 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1472 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1488 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1504 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1520 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x9f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1536 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa04 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1552 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa14 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1568 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa24 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1584 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa34 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1600 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa44 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1616 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa54 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1632 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa64 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1648 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa74 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1664 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa84 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1680 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xa94 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1696 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xaa4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1712 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xab4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1728 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xac4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1744 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xad4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1760 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xae4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1776 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xaf4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1792 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb04 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1808 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb14 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1824 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb24 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1840 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb34 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1856 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb44 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1872 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb54 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1888 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb64 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1904 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb74 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1920 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb84 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1936 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xb94 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1952 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xba4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1968 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbb4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbc4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbd4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbe4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xbf4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc04 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc14 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc24 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2096 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc34 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2112 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc44 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2128 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc54 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2144 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc64 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2160 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc74 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2176 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc84 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2192 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xc94 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2208 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xca4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2224 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcb4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2240 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcc4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2256 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcd4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2272 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xce4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2288 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xcf4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2304 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd04 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2320 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd14 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2336 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd24 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2352 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd34 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2368 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd44 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2384 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd54 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2400 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd64 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2416 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd74 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2432 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd84 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2448 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xd94 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2464 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xda4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2480 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdb4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2496 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdc4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2512 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdd4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2528 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xde4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2544 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xdf4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2560 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe04 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2576 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe14 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2592 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe24 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2608 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe34 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2624 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe44 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2640 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe54 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2656 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe64 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2672 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe74 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2688 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe84 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2704 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xe94 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2720 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xea4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2736 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xeb4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2752 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xec4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2768 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xed4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2784 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xee4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2800 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xef4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2816 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf04 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2832 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf14 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2848 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf24 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2864 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf34 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2880 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf44 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2896 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf54 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2912 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf64 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2928 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf74 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2944 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf84 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2960 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xf94 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2976 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfa4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2992 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfb4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3008 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfc4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3024 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfd4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3040 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xfe4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3056 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0xff4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3072 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1004 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3088 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1014 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3104 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1024 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3120 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1034 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3136 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1044 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3152 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1054 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3168 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1064 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3184 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1074 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3200 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1084 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3216 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1094 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3232 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3248 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3264 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3280 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3296 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3312 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x10f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3328 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1104 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3344 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1114 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3360 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1124 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3376 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1134 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3392 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1144 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3408 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1154 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3424 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1164 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3440 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1174 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3456 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1184 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3472 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1194 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3488 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3504 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3520 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3536 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3552 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3568 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x11f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3584 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1204 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3600 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1214 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3616 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1224 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3632 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1234 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3648 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1244 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3664 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1254 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3680 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1264 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3696 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1274 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3712 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1284 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3728 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1294 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3744 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3760 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3776 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3792 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3808 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3824 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x12f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3840 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1304 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3856 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1314 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3872 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1324 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3888 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1334 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3904 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1344 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3920 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1354 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3936 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1364 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3952 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1374 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3968 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1384 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:3984 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x1394 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4000 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4016 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4032 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4048 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4064 -; GFX9-FLATSCR-NEXT: s_movk_i32 s11, 0x13e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s11 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:4080 -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x13e4 -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v4, vcc, s0, v5 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4080 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x13d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4064 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x13c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4048 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x13b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4032 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x13a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4016 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1394 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:4000 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1384 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3984 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1374 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3968 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1364 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3952 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1354 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3936 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1344 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3920 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1334 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3904 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1324 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3888 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1314 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3872 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1304 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3856 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x12f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3840 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x12e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3824 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x12d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3808 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x12c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3792 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x12b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3776 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x12a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3760 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1294 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3744 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1284 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3728 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1274 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3712 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1264 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3696 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1254 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3680 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1244 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3664 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1234 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3648 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1224 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3632 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1214 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3616 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1204 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3600 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x11f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3584 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x11e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3568 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x11d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3552 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x11c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3536 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x11b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3520 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x11a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3504 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1194 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3488 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1184 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3472 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1174 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3456 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1164 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3440 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1154 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3424 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1144 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3408 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1134 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3392 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1124 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3376 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1114 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3360 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1104 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3344 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x10f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3328 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x10e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3312 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x10d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3296 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x10c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3280 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x10b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3264 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x10a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3248 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1094 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3232 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1084 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3216 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1074 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3200 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1064 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3184 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1054 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3168 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1044 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3152 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1034 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3136 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1024 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3120 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1014 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3104 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x1004 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3088 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xff4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3072 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xfe4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3056 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xfd4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3040 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xfc4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3024 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xfb4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:3008 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xfa4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2992 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf94 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2976 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf84 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2960 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf74 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2944 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf64 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2928 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf54 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2912 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf44 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2896 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf34 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2880 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf24 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2864 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf14 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2848 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xf04 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2832 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xef4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2816 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xee4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2800 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xed4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2784 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xec4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2768 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xeb4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2752 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xea4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2736 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe94 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2720 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe84 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2704 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe74 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2688 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe64 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2672 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe54 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2656 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe44 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2640 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe34 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2624 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe24 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2608 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe14 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2592 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xe04 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2576 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xdf4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2560 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xde4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2544 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xdd4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2528 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xdc4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2512 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xdb4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2496 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xda4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2480 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd94 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2464 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd84 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2448 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd74 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2432 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd64 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2416 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd54 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2400 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd44 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2384 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd34 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2368 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd24 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2352 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd14 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2336 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xd04 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2320 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xcf4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2304 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xce4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2288 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xcd4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2272 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xcc4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2256 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xcb4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2240 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xca4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2224 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc94 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2208 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc84 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2192 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc74 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2176 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc64 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2160 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc54 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2144 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc44 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2128 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc34 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2112 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc24 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2096 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc14 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2080 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xc04 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2064 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xbf4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2048 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xbe4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2032 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xbd4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2016 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xbc4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2000 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xbb4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1984 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xba4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1968 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb94 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1952 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb84 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1936 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb74 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1920 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb64 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1904 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb54 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1888 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb44 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1872 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb34 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1856 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb24 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1840 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb14 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1824 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xb04 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1808 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xaf4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1792 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xae4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1776 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xad4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1760 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xac4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1744 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xab4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1728 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xaa4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1712 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa94 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1696 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa84 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1680 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa74 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1664 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa64 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1648 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa54 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1632 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa44 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1616 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa34 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1600 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa24 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1584 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa14 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1568 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0xa04 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1552 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x9f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1536 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x9e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1520 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x9d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1504 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x9c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1488 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x9b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1472 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x9a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1456 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x994 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1440 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x984 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1424 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x974 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1408 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x964 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1392 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x954 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1376 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x944 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1360 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x934 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1344 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x924 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1328 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x914 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1312 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x904 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1296 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x8f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1280 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x8e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1264 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x8d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1248 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x8c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1232 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x8b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1216 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x8a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1200 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x894 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1184 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x884 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1168 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x874 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1152 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x864 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1136 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x854 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1120 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x844 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1104 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x834 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1088 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x824 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1072 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x814 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1056 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x804 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1040 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x7f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1024 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x7e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1008 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x7d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:992 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x7c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:976 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x7b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:960 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x7a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:944 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x794 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:928 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x784 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:912 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x774 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:896 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x764 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:880 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x754 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:864 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x744 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:848 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x734 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:832 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x724 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:816 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x714 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:800 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x704 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:784 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x6f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:768 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x6e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:752 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x6d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:736 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x6c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:720 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x6b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:704 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x6a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:688 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x694 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:672 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x684 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:656 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x674 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:640 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x664 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:624 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x654 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:608 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x644 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:592 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x634 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:576 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x624 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:560 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x614 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:544 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x604 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:528 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x5f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:512 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x5e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:496 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x5d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:480 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x5c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:464 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x5b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:448 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x5a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:432 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x594 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:416 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x584 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:400 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x574 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:384 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x564 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:368 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x554 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:352 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x544 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:336 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x534 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:320 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x524 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:304 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x514 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:288 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x504 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:272 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x4f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:256 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x4e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:240 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x4d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:224 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x4c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:208 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x4b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:192 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x4a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:176 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x494 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:160 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x484 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:144 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x474 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:128 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x464 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:112 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x454 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:96 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x444 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:80 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x434 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:64 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x424 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:48 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x414 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:32 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s2, 0x404 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:16 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3f4 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s10, v4 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x3a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x394 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x384 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x374 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s9, v4 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x364 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x354 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x344 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x334 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x324 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x314 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x304 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s8, v4 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x294 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x284 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x274 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s7, v4 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x264 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x254 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x244 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x234 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x224 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x214 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x204 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1f4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s6, v4 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1e4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1d4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1c4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1b4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x1a4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x194 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x184 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x174 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s5, v4 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x164 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x154 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x144 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x134 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x124 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x114 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x104 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xf4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, s4, v4 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xe4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4080 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xd4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4064 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xc4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4048 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xb4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4032 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0xa4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4016 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x94 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:4000 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x84 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3984 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x74 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[7:10], off offset:3968 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v0, vcc, 0x80, v4 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v6, vcc -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x64 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4080 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x54 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4064 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x44 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4048 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 52 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4032 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 36 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4016 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 20 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:4000 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_mov_b32 s0, 4 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:3984 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[2:5], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:3968 -; GFX9-FLATSCR-NEXT: s_endpgm -; -; GFX10-FLATSCR-LABEL: test: -; GFX10-FLATSCR: ; %bb.0: ; %entry -; GFX10-FLATSCR-NEXT: s_add_u32 s2, s2, s5 -; GFX10-FLATSCR-NEXT: s_addc_u32 s3, s3, 0 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 -; GFX10-FLATSCR-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; GFX10-FLATSCR-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 -; GFX10-FLATSCR-NEXT: v_mbcnt_hi_u32_b32 v0, -1, v0 -; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v5, 13, v0 -; GFX10-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-FLATSCR-NEXT: v_add_co_u32 v4, s4, s2, v5 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e64 v22, null, s3, 0, s4 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x804 -; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x80, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v22, vcc_lo -; GFX10-FLATSCR-NEXT: v_add_co_u32 v2, vcc_lo, 0x100, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v22, vcc_lo -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:20 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:36 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:52 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:68 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:84 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:100 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2032 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:116 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:132 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:1936 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:148 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:1952 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:164 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:1968 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:180 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:1984 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:196 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:2000 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:212 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:2016 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:228 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[2:3], off offset:2032 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], off offset:244 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: v_add_co_u32 v6, vcc_lo, 0x180, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v22, vcc_lo -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:260 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:1936 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:276 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:1952 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:292 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:1968 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:308 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:1984 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:324 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:2000 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:340 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:2016 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:356 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v[6:7], off offset:2032 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], off offset:372 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: v_add_co_u32 v8, vcc_lo, 0x200, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v22, vcc_lo -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:388 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:1936 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:404 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:1952 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:420 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:1968 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:436 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:1984 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:452 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:2000 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:468 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:2016 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:484 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[10:13], v[8:9], off offset:2032 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[10:13], off offset:500 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: v_add_co_u32 v10, vcc_lo, 0x280, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, 0, v22, vcc_lo -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:516 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:1936 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:532 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:1952 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:548 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:1968 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:564 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:1984 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:580 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:2000 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:596 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:2016 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:612 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v[10:11], off offset:2032 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[12:15], off offset:628 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: v_add_co_u32 v12, vcc_lo, 0x300, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, 0, v22, vcc_lo -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:644 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:1936 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:660 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:1952 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:676 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:1968 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:692 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:1984 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:708 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:2000 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:724 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:2016 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:740 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[14:17], v[12:13], off offset:2032 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[14:17], off offset:756 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: v_add_co_u32 v14, vcc_lo, 0x380, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v15, vcc_lo, 0, v22, vcc_lo -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:772 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:1936 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:788 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:1952 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:804 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:1968 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:820 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:1984 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:836 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:2000 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:852 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:2016 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:868 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v[14:15], off offset:2032 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], off offset:884 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: v_add_co_u32 v16, vcc_lo, 0x400, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v17, vcc_lo, 0, v22, vcc_lo -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:900 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:1936 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:916 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:1952 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:932 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:1968 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:948 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:1984 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:964 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:2000 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:980 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:2016 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:996 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[18:21], v[16:17], off offset:2032 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[18:21], off offset:1012 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: v_add_co_u32 v18, vcc_lo, 0x480, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v19, vcc_lo, 0, v22, vcc_lo -; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x500, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1028 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:1936 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1044 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:1952 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1060 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:1968 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1076 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:1984 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1092 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:2000 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1108 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:2016 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1124 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[18:19], off offset:2032 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1140 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1156 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1936 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1172 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1952 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1188 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1968 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1204 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1984 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1220 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2000 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1236 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2016 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1252 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2032 -; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x580, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1268 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1284 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1936 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1300 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1952 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1316 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1968 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1332 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1984 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1348 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2000 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1364 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2016 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1380 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2032 -; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x600, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1396 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1412 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1936 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1428 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1952 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1444 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1968 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1460 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1984 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1476 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2000 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1492 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2016 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1508 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2032 -; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x680, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1524 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1540 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1936 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1556 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1952 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1572 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1968 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1588 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1984 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1604 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2000 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1620 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2016 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1636 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2032 -; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x700, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1652 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1668 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1936 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1684 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1952 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1700 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1968 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1716 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:1984 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1732 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2000 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1748 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2016 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1764 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[23:26], v[20:21], off offset:2032 -; GFX10-FLATSCR-NEXT: v_add_co_u32 v20, vcc_lo, 0x780, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v21, vcc_lo, 0, v22, vcc_lo -; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v0 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[23:26], off offset:1780 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1796 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:1936 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1812 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:1952 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1828 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:1968 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1844 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:1984 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1860 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:2000 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1876 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[22:25], v[20:21], off offset:2016 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[22:25], off offset:1892 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[20:21], off offset:2032 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1908 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1924 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1936 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1940 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1952 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1956 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1968 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1972 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1984 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:1988 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2000 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:2004 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2016 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:2020 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2032 -; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v2 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], off offset:2036 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1936 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x814 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1952 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x824 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1968 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x834 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1984 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x844 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2000 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x854 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: s_clause 0x1 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2016 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x864 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x874 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v6 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v7, vcc_lo -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x884 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1936 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x894 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1952 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8a4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1968 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8b4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:1984 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8c4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2000 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8d4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: s_clause 0x1 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:2016 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8e4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x8f4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v8 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v9, vcc_lo -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x904 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x914 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x924 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x934 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x944 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x954 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: s_clause 0x1 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x964 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x974 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v10 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v11, vcc_lo -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x984 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x994 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9a4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9b4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9c4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9d4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: s_clause 0x1 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9e4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x9f4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v12 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v13, vcc_lo -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa04 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa14 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa24 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa34 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa44 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa54 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: s_clause 0x1 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa64 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa74 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v14 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v15, vcc_lo -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa84 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xa94 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xaa4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xab4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xac4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xad4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: s_clause 0x1 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xae4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xaf4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v16 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v17, vcc_lo -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb04 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb14 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb24 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb34 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb44 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb54 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: s_clause 0x1 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb64 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb74 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x780, v18 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v19, vcc_lo -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb84 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1920 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1936 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xb94 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1952 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xba4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1968 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xbb4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:1984 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xbc4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2000 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xbd4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: s_clause 0x1 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[6:9], v[0:1], off offset:2016 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v[0:1], off offset:2032 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xbe4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(1) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[6:9], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xbf4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc04 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:16 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc14 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:32 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc24 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:48 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc34 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:64 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc44 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:80 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc54 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:96 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc64 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:112 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc74 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:128 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc84 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:144 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xc94 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:160 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xca4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:176 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xcb4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:192 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xcc4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:208 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xcd4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:224 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xce4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:240 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xcf4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:256 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd04 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:272 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd14 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:288 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd24 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:304 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd34 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:320 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd44 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:336 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd54 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:352 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd64 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:368 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd74 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:384 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd84 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:400 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xd94 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:416 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xda4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:432 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xdb4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:448 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xdc4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:464 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xdd4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:480 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xde4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:496 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xdf4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:512 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe04 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:528 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe14 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:544 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe24 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:560 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe34 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:576 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe44 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:592 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe54 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:608 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe64 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:624 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe74 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:640 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe84 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:656 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xe94 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:672 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xea4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:688 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xeb4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:704 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xec4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:720 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xed4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:736 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xee4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:752 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xef4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:768 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf04 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:784 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf14 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:800 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf24 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:816 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf34 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:832 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf44 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:848 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf54 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:864 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf64 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:880 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf74 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:896 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf84 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:912 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xf94 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:928 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xfa4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:944 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xfb4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:960 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xfc4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:976 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xfd4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:992 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xfe4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1008 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0xff4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1024 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1004 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1040 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1014 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1056 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1024 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1072 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1034 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1088 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1044 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1104 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1054 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1120 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1064 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1136 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1074 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1152 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1084 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1168 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1094 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1184 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10a4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1200 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10b4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1216 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10c4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1232 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10d4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1248 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10e4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1264 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x10f4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1280 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1104 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1296 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1114 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1312 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1124 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1328 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1134 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1344 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1144 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1360 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1154 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1376 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1164 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1392 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1174 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1408 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1184 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1424 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1194 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1440 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11a4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1456 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11b4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1472 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11c4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1488 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11d4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1504 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11e4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1520 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x11f4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1536 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1204 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1552 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1214 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1568 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1224 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1584 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1234 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1600 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1244 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1616 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1254 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1632 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1264 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1648 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1274 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1664 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1284 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1680 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1294 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1696 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12a4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1712 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12b4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1728 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12c4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1744 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12d4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1760 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12e4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1776 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x12f4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1792 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1304 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1808 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1314 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1824 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1324 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1840 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1334 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1856 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1344 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1872 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1354 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1888 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1364 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1904 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1374 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1920 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1384 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1936 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x1394 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1952 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x13a4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1968 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x13b4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:1984 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x13c4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2000 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x13d4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2016 -; GFX10-FLATSCR-NEXT: s_movk_i32 s4, 0x13e4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s4 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v5, s[2:3] offset:2032 -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: v_add_co_u32 v4, s2, s0, v5 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e64 v6, null, s1, 0, s2 -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x13e4 -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x13d4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x13c4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x13b4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x13a4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1394 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1384 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1374 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1364 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1904 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1354 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1888 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1344 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1872 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1334 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1856 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1324 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1840 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1314 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1824 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1304 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1808 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12f4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1792 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12e4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1776 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12d4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1760 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12c4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1744 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12b4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1728 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x12a4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1712 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1294 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1696 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1284 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1680 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1274 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1664 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1264 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1648 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1254 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1632 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1244 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1616 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1234 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1600 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1224 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1584 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1214 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1568 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1204 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1552 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11f4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1536 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11e4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1520 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11d4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1504 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11c4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1488 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11b4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1472 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x11a4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1456 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1194 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1440 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1184 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1424 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1174 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1408 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1164 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1392 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1154 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1376 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1144 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1360 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1134 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1344 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1124 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1328 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1114 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1312 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1104 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1296 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10f4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1280 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10e4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1264 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10d4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1248 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10c4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1232 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10b4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1216 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x10a4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1200 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1094 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1184 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1084 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1168 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1074 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1152 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1064 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1136 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1054 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1120 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1044 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1104 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1034 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1088 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1024 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1072 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1014 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1056 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0x1004 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1040 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xff4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1024 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xfe4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:1008 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xfd4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:992 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xfc4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:976 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xfb4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:960 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xfa4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:944 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf94 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:928 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf84 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:912 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf74 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:896 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf64 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:880 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf54 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:864 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf44 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:848 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf34 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:832 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf24 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:816 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf14 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:800 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xf04 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:784 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xef4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:768 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xee4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:752 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xed4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:736 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xec4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:720 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xeb4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:704 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xea4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:688 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe94 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:672 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe84 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:656 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe74 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:640 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe64 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:624 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe54 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:608 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe44 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:592 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe34 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:576 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe24 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:560 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe14 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:544 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xe04 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:528 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xdf4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:512 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xde4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:496 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xdd4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:480 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xdc4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:464 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xdb4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:448 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xda4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:432 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd94 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:416 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd84 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:400 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd74 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:384 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd64 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:368 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd54 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:352 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd44 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:336 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd34 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:320 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd24 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:304 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd14 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:288 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xd04 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:272 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xcf4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:256 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xce4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:240 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xcd4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:224 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xcc4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:208 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xcb4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:192 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xca4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:176 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc94 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:160 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc84 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:144 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc74 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:128 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc64 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:112 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc54 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:96 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc44 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:80 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc34 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:64 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc24 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:48 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc14 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:32 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s2, 0xc04 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] offset:16 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s2 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xbf4 -; GFX10-FLATSCR-NEXT: v_add_co_u32 v0, vcc_lo, 0x480, v4 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v6, vcc_lo -; GFX10-FLATSCR-NEXT: v_add_co_u32 v2, vcc_lo, 0x780, v0 -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xbe4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v1, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xbd4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xbc4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xbb4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xba4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb94 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb84 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[7:10], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb74 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[7:10], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: v_add_co_u32 v2, vcc_lo, 0x400, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v6, vcc_lo -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb64 -; GFX10-FLATSCR-NEXT: v_add_co_u32 v7, vcc_lo, 0x780, v2 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v3, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb54 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb44 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb34 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb24 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb14 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xb04 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[9:12], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xaf4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[9:12], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: v_add_co_u32 v7, vcc_lo, 0x380, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v8, vcc_lo, 0, v6, vcc_lo -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xae4 -; GFX10-FLATSCR-NEXT: v_add_co_u32 v9, vcc_lo, 0x780, v7 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, 0, v8, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xad4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xac4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xab4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xaa4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa94 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa84 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[11:14], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa74 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[11:14], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: v_add_co_u32 v9, vcc_lo, 0x300, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, 0, v6, vcc_lo -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa64 -; GFX10-FLATSCR-NEXT: v_add_co_u32 v11, vcc_lo, 0x780, v9 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, 0, v10, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa54 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa44 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa34 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa24 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa14 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0xa04 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[13:16], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9f4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[13:16], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: v_add_co_u32 v11, vcc_lo, 0x280, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v12, vcc_lo, 0, v6, vcc_lo -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9e4 -; GFX10-FLATSCR-NEXT: v_add_co_u32 v13, vcc_lo, 0x780, v11 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v14, vcc_lo, 0, v12, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9d4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9c4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9b4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x9a4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x994 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x984 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[15:18], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x974 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[15:18], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: v_add_co_u32 v13, vcc_lo, 0x200, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v14, vcc_lo, 0, v6, vcc_lo -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x964 -; GFX10-FLATSCR-NEXT: v_add_co_u32 v15, vcc_lo, 0x780, v13 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, 0, v14, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x954 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x944 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x934 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x924 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x914 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x904 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[17:20], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8f4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[17:20], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: v_add_co_u32 v15, vcc_lo, 0x180, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v16, vcc_lo, 0, v6, vcc_lo -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8e4 -; GFX10-FLATSCR-NEXT: v_add_co_u32 v17, vcc_lo, 0x780, v15 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v18, vcc_lo, 0, v16, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8d4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8c4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8b4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x8a4 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x894 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x884 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[19:22], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x874 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[19:22], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: v_add_co_u32 v17, vcc_lo, 0x100, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v18, vcc_lo, 0, v6, vcc_lo -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x864 -; GFX10-FLATSCR-NEXT: v_add_co_u32 v19, vcc_lo, 0x780, v17 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, 0, v18, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x854 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x844 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x834 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x824 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x814 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x804 -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[21:24], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:2036 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: v_add_co_u32 v19, vcc_lo, 0x80, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v20, vcc_lo, 0, v6, vcc_lo -; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x780, v19 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v20, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:2020 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:2004 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1988 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1972 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1956 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1940 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1924 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1908 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x780, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1892 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1876 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1860 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1844 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1828 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1812 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1796 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1780 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x700, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1764 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1748 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1732 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1716 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1700 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1684 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1668 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1652 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x680, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1636 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1620 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1604 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1588 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1572 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1556 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1540 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1524 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x600, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1508 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1492 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1476 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1460 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1444 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1428 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1412 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1396 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: v_add_co_u32 v21, vcc_lo, 0x580, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v22, vcc_lo, 0, v6, vcc_lo -; GFX10-FLATSCR-NEXT: v_add_co_u32 v4, vcc_lo, 0x500, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v6, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1380 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1364 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1348 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1332 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1316 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1300 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[23:26], off, off offset:1284 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[21:22], v[23:26], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1268 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1252 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1236 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1220 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1204 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1188 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1172 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1156 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[21:24], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1140 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1124 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1108 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1092 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1076 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1060 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1044 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1028 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[0:1], v[21:24], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:1012 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:996 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:980 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:964 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:948 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:932 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:916 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[21:24], off, off offset:900 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[2:3], v[21:24], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:884 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:868 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:852 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:836 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:820 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:804 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:788 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:772 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[7:8], v[0:3], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:756 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:740 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:724 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:708 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:692 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:676 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:660 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:644 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[9:10], v[0:3], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:628 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:612 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:596 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:580 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:564 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:548 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:532 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:516 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[11:12], v[0:3], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:500 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:484 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:468 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:452 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:436 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:420 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:404 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:388 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[13:14], v[0:3], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:372 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:356 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:340 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:324 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:308 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:292 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:276 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:260 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[15:16], v[0:3], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:244 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:228 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:212 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:196 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:180 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:164 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:148 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:132 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[17:18], v[0:3], off offset:1920 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:116 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:2032 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:100 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:2016 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:84 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:2000 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:68 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:1984 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:52 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:1968 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:36 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:1952 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:20 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:1936 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, off offset:4 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[19:20], v[0:3], off offset:1920 -; GFX10-FLATSCR-NEXT: s_endpgm entry: %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) @@ -10073,1028 +45,79 @@ entry: ret void } +; CHECK-LABEL: test_limited_sgpr +; GFX6: %bb.1: +; GFX6: s_mov_b64 exec, 0xff +; GFX6: buffer_store_dword [[SPILL_REG_0:v[0-9]+]] +; GFX6-COUNT-8: v_writelane_b32 [[SPILL_REG_0]] +; GFX6: v_mov_b32_e32 [[OFFSET_REG0:v[0-9]+]], 0x[[OFFSET0:[0-9a-f]+]] +; GFX6: buffer_store_dword [[SPILL_REG_0]], [[OFFSET_REG0]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen +; GFX6: buffer_load_dword [[SPILL_REG_0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GFX6: s_mov_b64 exec, s + + +; GFX6: s_mov_b64 exec, 0xff +; GFX6: v_mov_b32_e32 [[RELOAD_OFFSET_REG0:v[0-9]+]], 0x[[RELOAD_OFFSET0:[0-9a-f]+]] +; GFX6: buffer_store_dword [[RELOAD_REG_0:v[0-9]+]], off, +; GFX6: buffer_load_dword [[RELOAD_REG_0]], [[RELOAD_OFFSET_REG0]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen +; GFX6-COUNT-8: v_readlane_b32 s{{[0-9]+}}, [[RELOAD_REG_0]] +; GFX6: buffer_load_dword [[RELOAD_REG_0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GFX6: s_mov_b64 exec, + + +; GFX6: s_mov_b64 exec, 0xff +; GFX6: buffer_store_dword [[SPILL_REG_1:v[0-9]+]] +; GFX6-COUNT-8: v_writelane_b32 [[SPILL_REG_1]] +; GFX6: v_mov_b32_e32 [[OFFSET_REG1:v[0-9]+]], 0x[[OFFSET1:[0-9a-f]+]] +; GFX6: buffer_store_dword [[SPILL_REG_1]], [[OFFSET_REG1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen +; GFX6: buffer_load_dword [[SPILL_REG_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GFX6: s_mov_b64 exec, s + + +; GFX6: s_mov_b64 exec, 0xff +; GFX6: v_mov_b32_e32 [[RELOAD_OFFSET_REG1:v[0-9]+]], 0x[[RELOAD_OFFSET1:[0-9a-f]+]] +; GFX6: buffer_store_dword [[RELOAD_REG_1:v[0-9]+]], off, +; GFX6: buffer_load_dword [[RELOAD_REG_1]], [[RELOAD_OFFSET_REG1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen +; GFX6-COUNT-8: v_readlane_b32 s{{[0-9]+}}, [[RELOAD_REG_1]] +; GFX6: buffer_load_dword [[RELOAD_REG_1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GFX6: s_mov_b64 exec, + + +; GFX6: s_mov_b64 exec, 0xff +; GFX6: buffer_store_dword [[SPILL_REG_2:v[0-9]+]] +; GFX6-COUNT-8: v_writelane_b32 [[SPILL_REG_2]] +; GFX6: v_mov_b32_e32 [[OFFSET_REG2:v[0-9]+]], 0x[[OFFSET2:[0-9a-f]+]] +; GFX6: buffer_store_dword [[SPILL_REG_2]], [[OFFSET_REG2]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen +; GFX6: buffer_load_dword [[SPILL_REG_2]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GFX6: s_mov_b64 exec, s + + +; GFX6: s_mov_b64 exec, 0xff +; GFX6: buffer_store_dword [[SPILL_REG_3:v[0-9]+]] +; GFX6-COUNT-8: v_writelane_b32 [[SPILL_REG_3]] +; GFX6: v_mov_b32_e32 [[OFFSET_REG3:v[0-9]+]], 0x[[OFFSET3:[0-9a-f]+]] +; GFX6: buffer_store_dword [[SPILL_REG_3]], [[OFFSET_REG3]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen +; GFX6: buffer_load_dword [[SPILL_REG_3]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GFX6: s_mov_b64 exec, s + + +; GFX6: s_mov_b64 exec, 0xff +; GFX6: buffer_store_dword [[SPILL_REG_4:v[0-9]+]] +; GFX6-COUNT-4: v_writelane_b32 [[SPILL_REG_4]] +; GFX6: v_mov_b32_e32 [[OFFSET_REG4:v[0-9]+]], 0x[[OFFSET4:[0-9a-f]+]] +; GFX6: buffer_store_dword [[SPILL_REG_4]], [[OFFSET_REG4]], s{{\[[0-9]+:[0-9]+\]}}, 0 offen +; GFX6: buffer_load_dword [[SPILL_REG_4]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} +; GFX6: s_mov_b64 exec, s + +; GFX6: NumSgprs: 48 +; GFX6: ScratchSize: 8608 + +; FLATSCR: s_movk_i32 [[SOFF1:s[0-9]+]], 0x +; GFX9-FLATSCR: s_waitcnt vmcnt(0) +; FLATSCR: scratch_store_dwordx4 off, v[{{[0-9:]+}}], [[SOFF1]] ; 16-byte Folded Spill +; FLATSCR: s_movk_i32 [[SOFF2:s[0-9]+]], 0x +; FLATSCR: scratch_load_dwordx4 v[{{[0-9:]+}}], off, [[SOFF2]] ; 16-byte Folded Reload define amdgpu_kernel void @test_limited_sgpr(<64 x i32> addrspace(1)* %out, <64 x i32> addrspace(1)* %in) #0 { -; GFX6-LABEL: test_limited_sgpr: -; GFX6: ; %bb.0: ; %entry -; GFX6-NEXT: s_mov_b32 s40, SCRATCH_RSRC_DWORD0 -; GFX6-NEXT: s_mov_b32 s41, SCRATCH_RSRC_DWORD1 -; GFX6-NEXT: s_mov_b32 s42, -1 -; GFX6-NEXT: s_mov_b32 s43, 0xe8f000 -; GFX6-NEXT: s_add_u32 s40, s40, s3 -; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 -; GFX6-NEXT: v_mbcnt_lo_u32_b32_e64 v0, -1, 0 -; GFX6-NEXT: v_mbcnt_hi_u32_b32_e32 v5, -1, v0 -; GFX6-NEXT: v_mov_b32_e32 v6, 0 -; GFX6-NEXT: s_mov_b32 s38, 0 -; GFX6-NEXT: s_mov_b32 s39, 0xf000 -; GFX6-NEXT: s_waitcnt lgkmcnt(0) -; GFX6-NEXT: s_mov_b64 s[36:37], s[2:3] -; GFX6-NEXT: v_lshlrev_b32_e32 v7, 8, v5 -; GFX6-NEXT: v_mov_b32_e32 v8, v6 -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:240 -; GFX6-NEXT: s_addc_u32 s41, s41, 0 -; GFX6-NEXT: s_mov_b32 s2, 0x83400 -; GFX6-NEXT: s_mov_b64 s[44:45], exec -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:224 -; GFX6-NEXT: s_mov_b32 s2, 0x83000 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:208 -; GFX6-NEXT: s_mov_b32 s2, 0x82c00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:192 -; GFX6-NEXT: s_mov_b32 s2, 0x82800 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:176 -; GFX6-NEXT: s_mov_b32 s2, 0x82400 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:160 -; GFX6-NEXT: s_mov_b32 s2, 0x82000 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:144 -; GFX6-NEXT: s_mov_b32 s2, 0x81c00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:128 -; GFX6-NEXT: s_mov_b32 s2, 0x81800 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:112 -; GFX6-NEXT: s_mov_b32 s2, 0x81400 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:96 -; GFX6-NEXT: s_mov_b32 s2, 0x81000 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:80 -; GFX6-NEXT: s_mov_b32 s2, 0x80c00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 offset:64 -; GFX6-NEXT: s_mov_b32 s2, 0x80400 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dwordx4 v[0:3], v[7:8], s[36:39], 0 addr64 -; GFX6-NEXT: buffer_load_dwordx4 v[9:12], v[7:8], s[36:39], 0 addr64 offset:16 -; GFX6-NEXT: s_mov_b32 s2, 0x80800 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v10, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v11, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v12, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_load_dwordx4 v[13:16], v[7:8], s[36:39], 0 addr64 offset:32 -; GFX6-NEXT: buffer_load_dwordx4 v[17:20], v[7:8], s[36:39], 0 addr64 offset:48 -; GFX6-NEXT: v_lshlrev_b32_e32 v4, 13, v0 -; GFX6-NEXT: v_add_i32_e32 v4, vcc, 16, v4 -; GFX6-NEXT: v_mov_b32_e32 v7, 1 -; GFX6-NEXT: buffer_store_dword v7, v4, s[40:43], 0 offen -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ; def s[4:11] -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: s_mov_b64 exec, 0xff -; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_writelane_b32 v4, s4, 0 -; GFX6-NEXT: v_writelane_b32 v4, s5, 1 -; GFX6-NEXT: v_writelane_b32 v4, s6, 2 -; GFX6-NEXT: v_writelane_b32 v4, s7, 3 -; GFX6-NEXT: v_writelane_b32 v4, s8, 4 -; GFX6-NEXT: v_writelane_b32 v4, s9, 5 -; GFX6-NEXT: v_writelane_b32 v4, s10, 6 -; GFX6-NEXT: v_writelane_b32 v4, s11, 7 -; GFX6-NEXT: s_mov_b32 s2, 0x83800 -; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[44:45] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ; def s[8:15] -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ; def s[16:23] -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ; def s[24:31] -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ; def s[4:7] -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ; def s[2:3] -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ; def s[36:37] -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ; def s33 -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: s_and_saveexec_b64 s[34:35], vcc -; GFX6-NEXT: s_cbranch_execz .LBB1_2 -; GFX6-NEXT: ; %bb.1: ; %bb0 -; GFX6-NEXT: s_mov_b64 s[44:45], exec -; GFX6-NEXT: s_mov_b64 exec, 0xff -; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_writelane_b32 v9, s8, 0 -; GFX6-NEXT: v_writelane_b32 v9, s9, 1 -; GFX6-NEXT: v_writelane_b32 v9, s10, 2 -; GFX6-NEXT: v_writelane_b32 v9, s11, 3 -; GFX6-NEXT: v_writelane_b32 v9, s12, 4 -; GFX6-NEXT: v_writelane_b32 v9, s13, 5 -; GFX6-NEXT: v_writelane_b32 v9, s14, 6 -; GFX6-NEXT: v_writelane_b32 v9, s15, 7 -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2100 -; GFX6-NEXT: buffer_store_dword v9, v4, s[40:43], 0 offen ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[44:45] -; GFX6-NEXT: s_mov_b64 s[44:45], exec -; GFX6-NEXT: s_mov_b64 exec, 0xff -; GFX6-NEXT: v_mov_b32_e32 v4, 0x20e0 -; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_readlane_b32 s8, v8, 0 -; GFX6-NEXT: v_readlane_b32 s9, v8, 1 -; GFX6-NEXT: v_readlane_b32 s10, v8, 2 -; GFX6-NEXT: v_readlane_b32 s11, v8, 3 -; GFX6-NEXT: v_readlane_b32 s12, v8, 4 -; GFX6-NEXT: v_readlane_b32 s13, v8, 5 -; GFX6-NEXT: v_readlane_b32 s14, v8, 6 -; GFX6-NEXT: v_readlane_b32 s15, v8, 7 -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[44:45] -; GFX6-NEXT: s_mov_b64 s[44:45], exec -; GFX6-NEXT: s_mov_b64 exec, 0xff -; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_writelane_b32 v7, s16, 0 -; GFX6-NEXT: v_writelane_b32 v7, s17, 1 -; GFX6-NEXT: v_writelane_b32 v7, s18, 2 -; GFX6-NEXT: v_writelane_b32 v7, s19, 3 -; GFX6-NEXT: v_writelane_b32 v7, s20, 4 -; GFX6-NEXT: v_writelane_b32 v7, s21, 5 -; GFX6-NEXT: v_writelane_b32 v7, s22, 6 -; GFX6-NEXT: v_writelane_b32 v7, s23, 7 -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2120 -; GFX6-NEXT: buffer_store_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[44:45] -; GFX6-NEXT: s_mov_b64 s[44:45], exec -; GFX6-NEXT: s_mov_b64 exec, 0xff -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2100 -; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, v4, s[40:43], 0 offen ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_readlane_b32 s16, v9, 0 -; GFX6-NEXT: v_readlane_b32 s17, v9, 1 -; GFX6-NEXT: v_readlane_b32 s18, v9, 2 -; GFX6-NEXT: v_readlane_b32 s19, v9, 3 -; GFX6-NEXT: v_readlane_b32 s20, v9, 4 -; GFX6-NEXT: v_readlane_b32 s21, v9, 5 -; GFX6-NEXT: v_readlane_b32 s22, v9, 6 -; GFX6-NEXT: v_readlane_b32 s23, v9, 7 -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[44:45] -; GFX6-NEXT: s_mov_b64 s[44:45], exec -; GFX6-NEXT: s_mov_b64 exec, 0xff -; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_writelane_b32 v8, s24, 0 -; GFX6-NEXT: v_writelane_b32 v8, s25, 1 -; GFX6-NEXT: v_writelane_b32 v8, s26, 2 -; GFX6-NEXT: v_writelane_b32 v8, s27, 3 -; GFX6-NEXT: v_writelane_b32 v8, s28, 4 -; GFX6-NEXT: v_writelane_b32 v8, s29, 5 -; GFX6-NEXT: v_writelane_b32 v8, s30, 6 -; GFX6-NEXT: v_writelane_b32 v8, s31, 7 -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2140 -; GFX6-NEXT: buffer_store_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[44:45] -; GFX6-NEXT: s_mov_b64 s[44:45], exec -; GFX6-NEXT: s_mov_b64 exec, 0xff -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2120 -; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_readlane_b32 s24, v7, 0 -; GFX6-NEXT: v_readlane_b32 s25, v7, 1 -; GFX6-NEXT: v_readlane_b32 s26, v7, 2 -; GFX6-NEXT: v_readlane_b32 s27, v7, 3 -; GFX6-NEXT: v_readlane_b32 s28, v7, 4 -; GFX6-NEXT: v_readlane_b32 s29, v7, 5 -; GFX6-NEXT: v_readlane_b32 s30, v7, 6 -; GFX6-NEXT: v_readlane_b32 s31, v7, 7 -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[44:45] -; GFX6-NEXT: s_mov_b64 s[44:45], exec -; GFX6-NEXT: s_mov_b64 exec, 15 -; GFX6-NEXT: buffer_store_dword v10, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_writelane_b32 v10, s0, 0 -; GFX6-NEXT: v_writelane_b32 v10, s1, 1 -; GFX6-NEXT: v_writelane_b32 v10, s2, 2 -; GFX6-NEXT: v_writelane_b32 v10, s3, 3 -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2160 -; GFX6-NEXT: buffer_store_dword v10, v4, s[40:43], 0 offen ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[44:45] -; GFX6-NEXT: s_mov_b64 s[44:45], exec -; GFX6-NEXT: s_mov_b64 exec, 15 -; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_writelane_b32 v8, s4, 0 -; GFX6-NEXT: v_writelane_b32 v8, s5, 1 -; GFX6-NEXT: v_writelane_b32 v8, s6, 2 -; GFX6-NEXT: v_writelane_b32 v8, s7, 3 -; GFX6-NEXT: s_mov_b32 s0, 0x85c00 -; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], s0 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[44:45] -; GFX6-NEXT: s_mov_b64 s[0:1], exec -; GFX6-NEXT: s_mov_b64 exec, 3 -; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_writelane_b32 v9, s2, 0 -; GFX6-NEXT: v_writelane_b32 v9, s3, 1 -; GFX6-NEXT: s_mov_b32 s4, 0x86600 -; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], s4 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[0:1] -; GFX6-NEXT: s_mov_b64 s[44:45], exec -; GFX6-NEXT: s_mov_b64 exec, 0xff -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2140 -; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_readlane_b32 s0, v7, 0 -; GFX6-NEXT: v_readlane_b32 s1, v7, 1 -; GFX6-NEXT: v_readlane_b32 s2, v7, 2 -; GFX6-NEXT: v_readlane_b32 s3, v7, 3 -; GFX6-NEXT: v_readlane_b32 s4, v7, 4 -; GFX6-NEXT: v_readlane_b32 s5, v7, 5 -; GFX6-NEXT: v_readlane_b32 s6, v7, 6 -; GFX6-NEXT: v_readlane_b32 s7, v7, 7 -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[44:45] -; GFX6-NEXT: s_mov_b64 s[44:45], exec -; GFX6-NEXT: s_mov_b64 exec, 15 -; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_writelane_b32 v8, s36, 0 -; GFX6-NEXT: v_writelane_b32 v8, s37, 1 -; GFX6-NEXT: v_writelane_b32 v8, s38, 2 -; GFX6-NEXT: v_writelane_b32 v8, s39, 3 -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2180 -; GFX6-NEXT: buffer_store_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[44:45] -; GFX6-NEXT: s_mov_b64 s[38:39], exec -; GFX6-NEXT: s_mov_b64 exec, 3 -; GFX6-NEXT: buffer_store_dword v10, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_writelane_b32 v10, s36, 0 -; GFX6-NEXT: v_writelane_b32 v10, s37, 1 -; GFX6-NEXT: s_mov_b32 s44, 0x86400 -; GFX6-NEXT: buffer_store_dword v10, off, s[40:43], s44 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v10, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[38:39] -; GFX6-NEXT: s_mov_b64 s[44:45], exec -; GFX6-NEXT: s_mov_b64 exec, 15 -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2170 -; GFX6-NEXT: buffer_store_dword v9, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v9, v4, s[40:43], 0 offen ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_readlane_b32 s36, v9, 0 -; GFX6-NEXT: v_readlane_b32 s37, v9, 1 -; GFX6-NEXT: v_readlane_b32 s38, v9, 2 -; GFX6-NEXT: v_readlane_b32 s39, v9, 3 -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[44:45] -; GFX6-NEXT: s_not_b64 exec, exec -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2190 -; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Reload -; GFX6-NEXT: s_not_b64 exec, exec -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2190 -; GFX6-NEXT: buffer_load_dword v7, v4, s[40:43], 0 offen ; 4-byte Folded Reload -; GFX6-NEXT: s_not_b64 exec, exec -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_readlane_b32 s44, v7, 0 -; GFX6-NEXT: v_readlane_b32 s45, v7, 1 -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_not_b64 exec, exec -; GFX6-NEXT: s_mov_b64 vcc, s[34:35] -; GFX6-NEXT: s_not_b64 exec, exec -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2198 -; GFX6-NEXT: buffer_store_dword v8, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Reload -; GFX6-NEXT: s_not_b64 exec, exec -; GFX6-NEXT: v_mov_b32_e32 v4, 0x2198 -; GFX6-NEXT: buffer_load_dword v8, v4, s[40:43], 0 offen ; 4-byte Folded Reload -; GFX6-NEXT: s_not_b64 exec, exec -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_readlane_b32 s34, v8, 0 -; GFX6-NEXT: v_readlane_b32 s35, v8, 1 -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_not_b64 exec, exec -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ; use s[8:15],s[16:23],s[24:31],s[0:7],s[36:39],s[34:35],s[44:45] -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: s_mov_b64 s[34:35], vcc -; GFX6-NEXT: s_mov_b64 s[8:9], exec -; GFX6-NEXT: s_mov_b64 exec, 15 -; GFX6-NEXT: s_mov_b32 s0, 0x86000 -; GFX6-NEXT: buffer_store_dword v4, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], s0 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_readlane_b32 s36, v4, 0 -; GFX6-NEXT: v_readlane_b32 s37, v4, 1 -; GFX6-NEXT: v_readlane_b32 s38, v4, 2 -; GFX6-NEXT: v_readlane_b32 s39, v4, 3 -; GFX6-NEXT: buffer_load_dword v4, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[8:9] -; GFX6-NEXT: s_mov_b64 s[4:5], exec -; GFX6-NEXT: s_mov_b64 exec, 15 -; GFX6-NEXT: s_mov_b32 s6, 0x85800 -; GFX6-NEXT: buffer_store_dword v7, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s6 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_readlane_b32 s0, v7, 0 -; GFX6-NEXT: v_readlane_b32 s1, v7, 1 -; GFX6-NEXT: v_readlane_b32 s2, v7, 2 -; GFX6-NEXT: v_readlane_b32 s3, v7, 3 -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_mov_b64 exec, s[4:5] -; GFX6-NEXT: s_mov_b32 s2, 0x83800 -; GFX6-NEXT: buffer_store_dword v0, off, s[40:43], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_mov_b32 s2, 0x84000 -; GFX6-NEXT: buffer_store_dword v13, off, s[40:43], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v14, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v15, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v16, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_mov_b32 s2, 0x84800 -; GFX6-NEXT: buffer_store_dword v17, off, s[40:43], s2 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dword v18, off, s[40:43], s2 offset:4 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v19, off, s[40:43], s2 offset:8 ; 4-byte Folded Spill -; GFX6-NEXT: buffer_store_dword v20, off, s[40:43], s2 offset:12 ; 4-byte Folded Spill -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: s_mov_b32 s2, 0x84800 -; GFX6-NEXT: buffer_load_dword v17, off, s[40:43], s2 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v18, off, s[40:43], s2 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v19, off, s[40:43], s2 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v20, off, s[40:43], s2 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s2, 0x84000 -; GFX6-NEXT: buffer_load_dword v13, off, s[40:43], s2 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v14, off, s[40:43], s2 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v15, off, s[40:43], s2 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v16, off, s[40:43], s2 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s2, 0x83800 -; GFX6-NEXT: buffer_load_dword v0, off, s[40:43], s2 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v1, off, s[40:43], s2 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v2, off, s[40:43], s2 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v3, off, s[40:43], s2 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: ;;#ASMSTART -; GFX6-NEXT: ;;#ASMEND -; GFX6-NEXT: .LBB1_2: ; %ret -; GFX6-NEXT: s_or_b64 exec, exec, s[34:35] -; GFX6-NEXT: s_mov_b32 s4, 0x83400 -; GFX6-NEXT: v_lshl_b64 v[4:5], v[5:6], 8 -; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b64 s[2:3], s[38:39] -; GFX6-NEXT: s_mov_b32 s4, 0x83000 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:240 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x82c00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:224 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x82800 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:208 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x82400 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:192 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x82000 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:176 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x81c00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:160 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x81800 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:144 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x81400 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:128 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x81000 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:112 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x80c00 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:96 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x80400 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:80 -; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_mov_b32 s4, 0x80800 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:64 -; GFX6-NEXT: buffer_store_dwordx4 v[17:20], v[4:5], s[0:3], 0 addr64 offset:48 -; GFX6-NEXT: buffer_store_dwordx4 v[13:16], v[4:5], s[0:3], 0 addr64 offset:32 -; GFX6-NEXT: s_waitcnt expcnt(2) -; GFX6-NEXT: buffer_load_dword v6, off, s[40:43], s4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v7, off, s[40:43], s4 offset:4 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v8, off, s[40:43], s4 offset:8 ; 4-byte Folded Reload -; GFX6-NEXT: buffer_load_dword v9, off, s[40:43], s4 offset:12 ; 4-byte Folded Reload -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[0:3], 0 addr64 offset:16 -; GFX6-NEXT: buffer_store_dwordx4 v[0:3], v[4:5], s[0:3], 0 addr64 -; GFX6-NEXT: s_endpgm -; -; GFX9-FLATSCR-LABEL: test_limited_sgpr: -; GFX9-FLATSCR: ; %bb.0: ; %entry -; GFX9-FLATSCR-NEXT: s_load_dwordx4 s[36:39], s[0:1], 0x24 -; GFX9-FLATSCR-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 -; GFX9-FLATSCR-NEXT: v_mbcnt_hi_u32_b32 v5, -1, v0 -; GFX9-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 8, v5 -; GFX9-FLATSCR-NEXT: s_add_u32 flat_scratch_lo, s2, s5 -; GFX9-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:240 -; GFX9-FLATSCR-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20b0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, 0 -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v7, 1 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v0, s[38:39] offset:224 -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:208 -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20a0 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v0, s[38:39] offset:192 -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:176 -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2090 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v0, s[38:39] offset:160 -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:144 -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2080 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:128 -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20c0 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:112 -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2060 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:96 -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2050 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:80 -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2040 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:64 -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2030 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:48 -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2020 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:32 -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2070 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[1:4], v0, s[38:39] offset:16 -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2010 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[1:4], s0 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v0, s[38:39] -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v4, 16 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: v_lshl_add_u32 v4, v0, 13, v4 -; GFX9-FLATSCR-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-FLATSCR-NEXT: scratch_store_dword v4, v7, off -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ; def s[0:7] -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ; def s[8:15] -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ; def s[16:23] -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ; def s[24:31] -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ; def s[40:43] -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ; def s[38:39] -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ; def s[44:45] -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ; def s33 -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: s_and_saveexec_b64 s[34:35], vcc -; GFX9-FLATSCR-NEXT: s_cbranch_execz .LBB1_2 -; GFX9-FLATSCR-NEXT: ; %bb.1: ; %bb0 -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ; use s[0:7],s[8:15],s[16:23],s[24:31],s[40:43],s[38:39],s[44:45] -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20d0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[0:3], s0 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20e0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[16:19], s0 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20f0 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[20:23], s0 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2100 -; GFX9-FLATSCR-NEXT: scratch_store_dwordx4 off, v[8:11], s0 ; 16-byte Folded Spill -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2100 -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[8:11], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20f0 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[20:23], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20e0 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[16:19], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20d0 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: ;;#ASMSTART -; GFX9-FLATSCR-NEXT: ;;#ASMEND -; GFX9-FLATSCR-NEXT: .LBB1_2: ; %ret -; GFX9-FLATSCR-NEXT: s_or_b64 exec, exec, s[34:35] -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20b0 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[12:15], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: v_lshlrev_b64 v[4:5], 8, v[5:6] -; GFX9-FLATSCR-NEXT: v_mov_b32_e32 v6, s37 -; GFX9-FLATSCR-NEXT: v_add_co_u32_e32 v4, vcc, s36, v4 -; GFX9-FLATSCR-NEXT: v_addc_co_u32_e32 v5, vcc, v6, v5, vcc -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20a0 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[12:15], off offset:240 -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[8:11], off offset:224 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2090 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:208 -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[20:23], off offset:192 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[20:23], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2080 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[20:23], off offset:176 -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[16:19], off offset:160 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[16:19], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x20c0 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2060 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[12:15], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2050 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(2) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[16:19], off offset:144 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(2) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:128 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2040 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(3) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[12:15], off offset:112 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(1) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:96 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2030 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:80 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2020 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:64 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2070 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:48 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_movk_i32 s0, 0x2010 -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:32 -; GFX9-FLATSCR-NEXT: scratch_load_dwordx4 v[6:9], off, s0 ; 16-byte Folded Reload -; GFX9-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[6:9], off offset:16 -; GFX9-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[0:3], off -; GFX9-FLATSCR-NEXT: s_endpgm -; -; GFX10-FLATSCR-LABEL: test_limited_sgpr: -; GFX10-FLATSCR: ; %bb.0: ; %entry -; GFX10-FLATSCR-NEXT: s_add_u32 s2, s2, s5 -; GFX10-FLATSCR-NEXT: s_addc_u32 s3, s3, 0 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 -; GFX10-FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 -; GFX10-FLATSCR-NEXT: s_load_dwordx4 s[36:39], s[0:1], 0x24 -; GFX10-FLATSCR-NEXT: v_mbcnt_lo_u32_b32 v0, -1, 0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v6, 0 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v7, 1 -; GFX10-FLATSCR-NEXT: s_mov_b32 s33, exec_lo -; GFX10-FLATSCR-NEXT: v_mbcnt_hi_u32_b32 v5, -1, v0 -; GFX10-FLATSCR-NEXT: v_lshlrev_b32_e32 v0, 8, v5 -; GFX10-FLATSCR-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-FLATSCR-NEXT: s_clause 0xf -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[64:67], v0, s[38:39] offset:240 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[60:63], v0, s[38:39] offset:224 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[56:59], v0, s[38:39] offset:208 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[52:55], v0, s[38:39] offset:192 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[48:51], v0, s[38:39] offset:176 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[44:47], v0, s[38:39] offset:160 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[40:43], v0, s[38:39] offset:144 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[36:39], v0, s[38:39] offset:128 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[32:35], v0, s[38:39] offset:112 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[28:31], v0, s[38:39] offset:96 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[24:27], v0, s[38:39] offset:80 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[20:23], v0, s[38:39] offset:64 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[16:19], v0, s[38:39] offset:48 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[12:15], v0, s[38:39] offset:32 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[8:11], v0, s[38:39] offset:16 -; GFX10-FLATSCR-NEXT: global_load_dwordx4 v[0:3], v0, s[38:39] -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: v_lshl_add_u32 v4, v0, 13, 16 -; GFX10-FLATSCR-NEXT: scratch_store_dword v4, v7, off -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ; def s[0:7] -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ; def s[8:15] -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ; def s[16:23] -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ; def s[24:31] -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ; def s[40:43] -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ; def s[34:35] -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ; def s[38:39] -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ; def s44 -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: v_cmpx_eq_u32_e32 0, v0 -; GFX10-FLATSCR-NEXT: s_cbranch_execz .LBB1_2 -; GFX10-FLATSCR-NEXT: ; %bb.1: ; %bb0 -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ; use s[0:7],s[8:15],s[16:23],s[24:31],s[40:43],s[34:35],s[38:39] -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x2010 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v88, v59 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v92, v63 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v87, v58 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v86, v57 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v85, v56 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v91, v62 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v90, v61 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v89, v60 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v60, v35 -; GFX10-FLATSCR-NEXT: scratch_store_dwordx4 off, v[64:67], s0 ; 16-byte Folded Spill -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v68, v39 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v59, v34 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v58, v33 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v57, v32 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v67, v38 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v66, v37 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v65, v36 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v11 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v72, v43 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v76, v47 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v80, v51 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v84, v55 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v8 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v71, v42 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v70, v41 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v69, v40 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v40, v15 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v75, v46 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v74, v45 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v73, v44 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v44, v19 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v79, v50 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v78, v49 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v77, v48 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v48, v23 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v83, v54 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v82, v53 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v81, v52 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v52, v27 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v56, v31 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v10 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v9 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v12 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v41, v16 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v45, v20 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v49, v24 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v53, v28 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v39, v14 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v13 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v43, v18 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v42, v17 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v47, v22 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v46, v21 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v51, v26 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v50, v25 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v55, v30 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v54, v29 -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v8, v33 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v28, v53 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v24, v49 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v20, v45 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v16, v41 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v12, v37 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v9, v34 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v10, v35 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v11, v36 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v32, v57 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v29, v54 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v30, v55 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v31, v56 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v25, v50 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v26, v51 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v27, v52 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v21, v46 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v22, v47 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v23, v48 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v17, v42 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v18, v43 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v19, v44 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v13, v38 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v14, v39 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v15, v40 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v33, v58 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v34, v59 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v35, v60 -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: s_movk_i32 s0, 0x2010 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v36, v65 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v37, v66 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v38, v67 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v39, v68 -; GFX10-FLATSCR-NEXT: scratch_load_dwordx4 v[64:67], off, s0 ; 16-byte Folded Reload -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v60, v89 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v56, v85 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v52, v81 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v48, v77 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v44, v73 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v40, v69 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v61, v90 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v62, v91 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v63, v92 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v57, v86 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v58, v87 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v59, v88 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v53, v82 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v54, v83 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v55, v84 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v49, v78 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v50, v79 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v51, v80 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v45, v74 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v46, v75 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v47, v76 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v41, v70 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v42, v71 -; GFX10-FLATSCR-NEXT: v_mov_b32_e32 v43, v72 -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: ;;#ASMSTART -; GFX10-FLATSCR-NEXT: ;;#ASMEND -; GFX10-FLATSCR-NEXT: .LBB1_2: ; %ret -; GFX10-FLATSCR-NEXT: s_or_b32 exec_lo, exec_lo, s33 -; GFX10-FLATSCR-NEXT: v_lshlrev_b64 v[4:5], 8, v[5:6] -; GFX10-FLATSCR-NEXT: v_add_co_u32 v4, vcc_lo, s36, v4 -; GFX10-FLATSCR-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, s37, v5, vcc_lo -; GFX10-FLATSCR-NEXT: s_waitcnt vmcnt(0) -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[64:67], off offset:240 -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[60:63], off offset:224 -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[56:59], off offset:208 -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[52:55], off offset:192 -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[48:51], off offset:176 -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[44:47], off offset:160 -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[40:43], off offset:144 -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[36:39], off offset:128 -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[32:35], off offset:112 -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[28:31], off offset:96 -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[24:27], off offset:80 -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[20:23], off offset:64 -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[16:19], off offset:48 -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[12:15], off offset:32 -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[8:11], off offset:16 -; GFX10-FLATSCR-NEXT: global_store_dwordx4 v[4:5], v[0:3], off -; GFX10-FLATSCR-NEXT: s_endpgm entry: %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) %tid = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) @@ -11150,6 +173,3 @@ declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1 attributes #0 = { "amdgpu-waves-per-eu"="10,10" } attributes #1 = { nounwind readnone } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} -; FLATSCR: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir index 3914c3b2fe073..69442032f37ec 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir @@ -48,23 +48,23 @@ body: | ; GFX9-NEXT: $sgpr12 = S_ADD_U32 $sgpr12, $sgpr9, implicit-def $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9-NEXT: $sgpr13 = S_ADDC_U32 $sgpr13, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15 ; GFX9-NEXT: $vcc = IMPLICIT_DEF - ; GFX9-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GFX9-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr2, implicit $vcc - ; GFX9-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr2, implicit $vcc - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2 + ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GFX9-NEXT: $vcc = IMPLICIT_DEF - ; GFX9-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr1 - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GFX9-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr1, implicit $vcc - ; GFX9-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr1, implicit killed $vcc - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GFX9-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1 + ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX9-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -84,23 +84,23 @@ body: | ; GFX10-NEXT: $sgpr96 = S_ADD_U32 $sgpr96, $sgpr9, implicit-def $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GFX10-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99 ; GFX10-NEXT: $vcc = IMPLICIT_DEF - ; GFX10-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2 - ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GFX10-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr2, implicit $vcc - ; GFX10-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr2, implicit $vcc - ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GFX10-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2 + ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc + ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GFX10-NEXT: $vcc = IMPLICIT_DEF - ; GFX10-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr1 - ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GFX10-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr1, implicit $vcc - ; GFX10-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr1, implicit killed $vcc - ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) - ; GFX10-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1 + ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX10-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc + ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) @@ -114,23 +114,23 @@ body: | ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: $sgpr33 = S_MOV_B32 0 ; GFX11-NEXT: $vcc = IMPLICIT_DEF - ; GFX11-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec - ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr2 - ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GFX11-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr2, implicit $vcc - ; GFX11-NEXT: $vgpr2 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr2, implicit $vcc - ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) - ; GFX11-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr2 + ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit $vcc + ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GFX11-NEXT: $vcc = IMPLICIT_DEF - ; GFX11-NEXT: $sgpr2_sgpr3 = S_MOV_B64 $exec - ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr1 - ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) - ; GFX11-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr1, implicit $vcc - ; GFX11-NEXT: $vgpr1 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr1, implicit killed $vcc - ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) - ; GFX11-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr2_sgpr3, implicit killed $vgpr1 + ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) + ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_lo, 0, undef $vgpr0, implicit $vcc + ; GFX11-NEXT: $vgpr0 = V_WRITELANE_B32 $vcc_hi, 1, $vgpr0, implicit killed $vcc + ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5) + ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0 ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %fixed-stack.0, align 16, addrspace 5) diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir index 2c4d9f0094470..2dbfd58a3db62 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir +++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir @@ -412,7 +412,6 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.1: ; MUBUF-NEXT: successors: %bb.2(0x80000000) - ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_NOP 0 ; MUBUF-NEXT: {{ $}} @@ -433,7 +432,6 @@ body: | ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: bb.1: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_NOP 0 ; GFX9-FLATSCR-NEXT: {{ $}} @@ -453,7 +451,6 @@ body: | ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: bb.1: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_NOP 0 ; GFX10-FLATSCR-NEXT: {{ $}} @@ -473,7 +470,6 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -487,7 +483,6 @@ body: | S_CBRANCH_SCC1 %bb.2, implicit $scc bb.1: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 S_NOP 0 bb.2: @@ -522,7 +517,6 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.1: ; MUBUF-NEXT: successors: %bb.2(0x80000000) - ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_NOP 0 ; MUBUF-NEXT: {{ $}} @@ -543,7 +537,6 @@ body: | ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: bb.1: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_NOP 0 ; GFX9-FLATSCR-NEXT: {{ $}} @@ -563,7 +556,6 @@ body: | ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: bb.1: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_NOP 0 ; GFX10-FLATSCR-NEXT: {{ $}} @@ -584,7 +576,6 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -598,7 +589,6 @@ body: | S_CBRANCH_SCC1 %bb.2, implicit $scc bb.1: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 S_NOP 0 bb.2: @@ -634,7 +624,6 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.1: ; MUBUF-NEXT: successors: %bb.2(0x80000000) - ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_NOP 0 ; MUBUF-NEXT: {{ $}} @@ -655,7 +644,6 @@ body: | ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: bb.1: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_NOP 0 ; GFX9-FLATSCR-NEXT: {{ $}} @@ -675,7 +663,6 @@ body: | ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: bb.1: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_NOP 0 ; GFX10-FLATSCR-NEXT: {{ $}} @@ -697,7 +684,6 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -711,7 +697,6 @@ body: | S_CBRANCH_SCC1 %bb.2, implicit $scc bb.1: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 S_NOP 0 bb.2: @@ -745,7 +730,6 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.1: ; MUBUF-NEXT: successors: %bb.2(0x80000000) - ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_NOP 0 ; MUBUF-NEXT: {{ $}} @@ -766,7 +750,6 @@ body: | ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: bb.1: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_NOP 0 ; GFX9-FLATSCR-NEXT: {{ $}} @@ -786,7 +769,6 @@ body: | ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: bb.1: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_NOP 0 ; GFX10-FLATSCR-NEXT: {{ $}} @@ -806,7 +788,6 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -820,7 +801,6 @@ body: | S_CBRANCH_SCC1 %bb.2, implicit $scc bb.1: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 S_NOP 0 bb.2: @@ -855,7 +835,6 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.1: ; MUBUF-NEXT: successors: %bb.2(0x80000000) - ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_NOP 0 ; MUBUF-NEXT: {{ $}} @@ -876,7 +855,6 @@ body: | ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: bb.1: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_NOP 0 ; GFX9-FLATSCR-NEXT: {{ $}} @@ -896,7 +874,6 @@ body: | ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: bb.1: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_NOP 0 ; GFX10-FLATSCR-NEXT: {{ $}} @@ -917,7 +894,6 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -931,7 +907,6 @@ body: | S_CBRANCH_SCC1 %bb.2, implicit $scc bb.1: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 S_NOP 0 bb.2: @@ -967,7 +942,6 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.1: ; MUBUF-NEXT: successors: %bb.2(0x80000000) - ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_NOP 0 ; MUBUF-NEXT: {{ $}} @@ -988,7 +962,6 @@ body: | ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: bb.1: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_NOP 0 ; GFX9-FLATSCR-NEXT: {{ $}} @@ -1008,7 +981,6 @@ body: | ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: bb.1: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_NOP 0 ; GFX10-FLATSCR-NEXT: {{ $}} @@ -1030,7 +1002,6 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -1044,7 +1015,6 @@ body: | S_CBRANCH_SCC1 %bb.2, implicit $scc bb.1: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 S_NOP 0 bb.2: @@ -1174,7 +1144,6 @@ body: | ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: bb.1: ; MUBUF-NEXT: successors: %bb.2(0x80000000) - ; MUBUF-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; MUBUF-NEXT: {{ $}} ; MUBUF-NEXT: S_NOP 0 ; MUBUF-NEXT: {{ $}} @@ -1195,7 +1164,6 @@ body: | ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: bb.1: ; GFX9-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX9-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX9-FLATSCR-NEXT: {{ $}} ; GFX9-FLATSCR-NEXT: S_NOP 0 ; GFX9-FLATSCR-NEXT: {{ $}} @@ -1216,7 +1184,6 @@ body: | ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: bb.1: ; GFX10-FLATSCR-NEXT: successors: %bb.2(0x80000000) - ; GFX10-FLATSCR-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; GFX10-FLATSCR-NEXT: {{ $}} ; GFX10-FLATSCR-NEXT: S_NOP 0 ; GFX10-FLATSCR-NEXT: {{ $}} @@ -1236,7 +1203,6 @@ body: | ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: bb.1: ; VMEM-GFX8-NEXT: successors: %bb.2(0x80000000) - ; VMEM-GFX8-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 ; VMEM-GFX8-NEXT: {{ $}} ; VMEM-GFX8-NEXT: S_NOP 0 ; VMEM-GFX8-NEXT: {{ $}} @@ -1251,7 +1217,6 @@ body: | S_CBRANCH_SCC1 %bb.2, implicit $scc bb.1: - liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255 S_NOP 0 bb.2: