diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index aa5ea77f17291..cd4340c761958 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -12,8 +12,10 @@ #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetMachine.h" @@ -619,6 +621,34 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, assert(MFI->isEntryFunction()); + // Debug location must be unknown since the first debug location is used to + // determine the end of the prologue. + DebugLoc DL; + MachineBasicBlock::iterator I = MBB.begin(); + + if (MF.needsFrameMoves()) { + // On entry the SP/FP are not set up, so we need to define the CFA in terms + // of a literal location expression. + static const char CFAEncodedInstUserOpsArr[] = { + dwarf::DW_CFA_def_cfa_expression, + 4, // length + static_cast(dwarf::DW_OP_lit0), + static_cast(dwarf::DW_OP_lit0 + + dwarf::DW_ASPACE_LLVM_AMDGPU_private_wave), + static_cast(dwarf::DW_OP_LLVM_user), + static_cast(dwarf::DW_OP_LLVM_form_aspace_address)}; + static StringRef CFAEncodedInstUserOps = + StringRef(CFAEncodedInstUserOpsArr, sizeof(CFAEncodedInstUserOpsArr)); + buildCFI(MBB, I, DL, + MCCFIInstruction::createEscape(nullptr, CFAEncodedInstUserOps, + SMLoc(), + "CFA is 0 in private_wave aspace")); + // Unwinding halts when the return address (PC) is undefined. + buildCFI(MBB, I, DL, + MCCFIInstruction::createUndefined( + nullptr, TRI->getDwarfRegNum(AMDGPU::PC_REG, false))); + } + Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg( AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET); @@ -655,11 +685,6 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF, } } - // Debug location must be unknown since the first debug location is used to - // determine the end of the prologue. - DebugLoc DL; - MachineBasicBlock::iterator I = MBB.begin(); - // We found the SRSRC first because it needs four registers and has an // alignment requirement. If the SRSRC that we found is clobbering with // the scratch wave offset, which may be in a fixed SGPR or a free SGPR @@ -2212,3 +2237,15 @@ bool SIFrameLowering::requiresStackPointerReference( // references the SP, like variable sized stack objects. return frameTriviallyRequiresSP(MFI); } + +MachineInstr *SIFrameLowering::buildCFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, + const MCCFIInstruction &CFIInst, + MachineInstr::MIFlag flag) const { + MachineFunction &MF = *MBB.getParent(); + const SIInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + return BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(MF.addFrameInst(CFIInst)) + .setMIFlag(flag); +} diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h index a72772987262e..0b691d8f15a48 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h @@ -104,6 +104,12 @@ class SIFrameLowering final : public AMDGPUFrameLowering { public: bool requiresStackPointerReference(const MachineFunction &MF) const; + /// Create a CFI index for CFIInst and build a MachineInstr around it. + MachineInstr * + buildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, const MCCFIInstruction &CFIInst, + MachineInstr::MIFlag flag = MachineInstr::FrameSetup) const; + // Returns true if the function may need to reserve space on the stack for the // CWSR trap handler. bool mayReserveScratchForCWSR(const MachineFunction &MF) const; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll index 37b5422be7e2f..ba95171ba5005 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/memory-legalizer-atomic-fence.ll @@ -13,18 +13,24 @@ define amdgpu_kernel void @system_one_as_acquire() #0 { ; GFX6-LABEL: name: system_one_as_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_one_as_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_one_as_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -33,6 +39,8 @@ define amdgpu_kernel void @system_one_as_acquire() #0 { ; ; GFX10CU-LABEL: name: system_one_as_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -41,6 +49,8 @@ define amdgpu_kernel void @system_one_as_acquire() #0 { ; ; GFX11WGP-LABEL: name: system_one_as_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -49,6 +59,8 @@ define amdgpu_kernel void @system_one_as_acquire() #0 { ; ; GFX11CU-LABEL: name: system_one_as_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -62,34 +74,46 @@ entry: define amdgpu_kernel void @system_one_as_release() #0 { ; GFX6-LABEL: name: system_one_as_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_one_as_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_one_as_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: system_one_as_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: system_one_as_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: system_one_as_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -101,18 +125,24 @@ entry: define amdgpu_kernel void @system_one_as_acq_rel() #0 { ; GFX6-LABEL: name: system_one_as_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_one_as_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_one_as_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -121,6 +151,8 @@ define amdgpu_kernel void @system_one_as_acq_rel() #0 { ; ; GFX10CU-LABEL: name: system_one_as_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -129,6 +161,8 @@ define amdgpu_kernel void @system_one_as_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: system_one_as_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -137,6 +171,8 @@ define amdgpu_kernel void @system_one_as_acq_rel() #0 { ; ; GFX11CU-LABEL: name: system_one_as_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -150,18 +186,24 @@ entry: define amdgpu_kernel void @system_one_as_seq_cst() #0 { ; GFX6-LABEL: name: system_one_as_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_one_as_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_one_as_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -170,6 +212,8 @@ define amdgpu_kernel void @system_one_as_seq_cst() #0 { ; ; GFX10CU-LABEL: name: system_one_as_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -178,6 +222,8 @@ define amdgpu_kernel void @system_one_as_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: system_one_as_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -186,6 +232,8 @@ define amdgpu_kernel void @system_one_as_seq_cst() #0 { ; ; GFX11CU-LABEL: name: system_one_as_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -199,26 +247,38 @@ entry: define amdgpu_kernel void @singlethread_one_as_acquire() #0 { ; GFX6-LABEL: name: singlethread_one_as_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_one_as_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_one_as_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_one_as_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_one_as_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_one_as_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread-one-as") acquire @@ -228,26 +288,38 @@ entry: define amdgpu_kernel void @singlethread_one_as_release() #0 { ; GFX6-LABEL: name: singlethread_one_as_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_one_as_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_one_as_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_one_as_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_one_as_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_one_as_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread-one-as") release @@ -257,26 +329,38 @@ entry: define amdgpu_kernel void @singlethread_one_as_acq_rel() #0 { ; GFX6-LABEL: name: singlethread_one_as_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_one_as_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_one_as_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_one_as_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_one_as_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_one_as_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread-one-as") acq_rel @@ -286,26 +370,38 @@ entry: define amdgpu_kernel void @singlethread_one_as_seq_cst() #0 { ; GFX6-LABEL: name: singlethread_one_as_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_one_as_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_one_as_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_one_as_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_one_as_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_one_as_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread-one-as") seq_cst @@ -315,18 +411,24 @@ entry: define amdgpu_kernel void @agent_one_as_acquire() #0 { ; GFX6-LABEL: name: agent_one_as_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_one_as_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_one_as_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -335,6 +437,8 @@ define amdgpu_kernel void @agent_one_as_acquire() #0 { ; ; GFX10CU-LABEL: name: agent_one_as_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -343,6 +447,8 @@ define amdgpu_kernel void @agent_one_as_acquire() #0 { ; ; GFX11WGP-LABEL: name: agent_one_as_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -351,6 +457,8 @@ define amdgpu_kernel void @agent_one_as_acquire() #0 { ; ; GFX11CU-LABEL: name: agent_one_as_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -364,34 +472,46 @@ entry: define amdgpu_kernel void @agent_one_as_release() #0 { ; GFX6-LABEL: name: agent_one_as_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_one_as_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_one_as_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: agent_one_as_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: agent_one_as_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: agent_one_as_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -403,18 +523,24 @@ entry: define amdgpu_kernel void @agent_one_as_acq_rel() #0 { ; GFX6-LABEL: name: agent_one_as_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_one_as_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_one_as_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -423,6 +549,8 @@ define amdgpu_kernel void @agent_one_as_acq_rel() #0 { ; ; GFX10CU-LABEL: name: agent_one_as_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -431,6 +559,8 @@ define amdgpu_kernel void @agent_one_as_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: agent_one_as_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -439,6 +569,8 @@ define amdgpu_kernel void @agent_one_as_acq_rel() #0 { ; ; GFX11CU-LABEL: name: agent_one_as_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -452,18 +584,24 @@ entry: define amdgpu_kernel void @agent_one_as_seq_cst() #0 { ; GFX6-LABEL: name: agent_one_as_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 3952 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_one_as_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 3952 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_one_as_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -472,6 +610,8 @@ define amdgpu_kernel void @agent_one_as_seq_cst() #0 { ; ; GFX10CU-LABEL: name: agent_one_as_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -480,6 +620,8 @@ define amdgpu_kernel void @agent_one_as_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: agent_one_as_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -488,6 +630,8 @@ define amdgpu_kernel void @agent_one_as_seq_cst() #0 { ; ; GFX11CU-LABEL: name: agent_one_as_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -501,14 +645,20 @@ entry: define amdgpu_kernel void @workgroup_one_as_acquire() #0 { ; GFX6-LABEL: name: workgroup_one_as_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -516,10 +666,14 @@ define amdgpu_kernel void @workgroup_one_as_acquire() #0 { ; ; GFX10CU-LABEL: name: workgroup_one_as_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_one_as_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -527,6 +681,8 @@ define amdgpu_kernel void @workgroup_one_as_acquire() #0 { ; ; GFX11CU-LABEL: name: workgroup_one_as_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("workgroup-one-as") acquire @@ -536,14 +692,20 @@ entry: define amdgpu_kernel void @workgroup_one_as_release() #0 { ; GFX6-LABEL: name: workgroup_one_as_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -551,6 +713,8 @@ define amdgpu_kernel void @workgroup_one_as_release() #0 { ; ; GFX10CU-LABEL: name: workgroup_one_as_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -558,12 +722,16 @@ define amdgpu_kernel void @workgroup_one_as_release() #0 { ; ; GFX11WGP-LABEL: name: workgroup_one_as_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: workgroup_one_as_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -575,14 +743,20 @@ entry: define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 { ; GFX6-LABEL: name: workgroup_one_as_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -591,6 +765,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 { ; ; GFX10CU-LABEL: name: workgroup_one_as_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -598,6 +774,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: workgroup_one_as_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -605,6 +783,8 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 { ; ; GFX11CU-LABEL: name: workgroup_one_as_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -616,14 +796,20 @@ entry: define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 { ; GFX6-LABEL: name: workgroup_one_as_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_one_as_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_one_as_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 16240 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -632,6 +818,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 { ; ; GFX10CU-LABEL: name: workgroup_one_as_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 16240 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -639,6 +827,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: workgroup_one_as_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 1015 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -646,6 +836,8 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 { ; ; GFX11CU-LABEL: name: workgroup_one_as_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 1015 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -657,26 +849,38 @@ entry: define amdgpu_kernel void @wavefront_one_as_acquire() #0 { ; GFX6-LABEL: name: wavefront_one_as_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_one_as_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_one_as_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_one_as_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_one_as_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_one_as_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront-one-as") acquire @@ -686,26 +890,38 @@ entry: define amdgpu_kernel void @wavefront_one_as_release() #0 { ; GFX6-LABEL: name: wavefront_one_as_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_one_as_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_one_as_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_one_as_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_one_as_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_one_as_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront-one-as") release @@ -715,26 +931,38 @@ entry: define amdgpu_kernel void @wavefront_one_as_acq_rel() #0 { ; GFX6-LABEL: name: wavefront_one_as_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_one_as_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_one_as_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_one_as_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_one_as_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_one_as_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront-one-as") acq_rel @@ -744,26 +972,38 @@ entry: define amdgpu_kernel void @wavefront_one_as_seq_cst() #0 { ; GFX6-LABEL: name: wavefront_one_as_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_one_as_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_one_as_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_one_as_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_one_as_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_one_as_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront-one-as") seq_cst @@ -773,18 +1013,24 @@ entry: define amdgpu_kernel void @system_acquire() #0 { ; GFX6-LABEL: name: system_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -793,6 +1039,8 @@ define amdgpu_kernel void @system_acquire() #0 { ; ; GFX10CU-LABEL: name: system_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -801,6 +1049,8 @@ define amdgpu_kernel void @system_acquire() #0 { ; ; GFX11WGP-LABEL: name: system_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -809,6 +1059,8 @@ define amdgpu_kernel void @system_acquire() #0 { ; ; GFX11CU-LABEL: name: system_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -822,34 +1074,46 @@ entry: define amdgpu_kernel void @system_release() #0 { ; GFX6-LABEL: name: system_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: system_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: system_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: system_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -861,18 +1125,24 @@ entry: define amdgpu_kernel void @system_acq_rel() #0 { ; GFX6-LABEL: name: system_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -881,6 +1151,8 @@ define amdgpu_kernel void @system_acq_rel() #0 { ; ; GFX10CU-LABEL: name: system_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -889,6 +1161,8 @@ define amdgpu_kernel void @system_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: system_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -897,6 +1171,8 @@ define amdgpu_kernel void @system_acq_rel() #0 { ; ; GFX11CU-LABEL: name: system_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -910,18 +1186,24 @@ entry: define amdgpu_kernel void @system_seq_cst() #0 { ; GFX6-LABEL: name: system_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: system_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: system_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -930,6 +1212,8 @@ define amdgpu_kernel void @system_seq_cst() #0 { ; ; GFX10CU-LABEL: name: system_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -938,6 +1222,8 @@ define amdgpu_kernel void @system_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: system_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -946,6 +1232,8 @@ define amdgpu_kernel void @system_seq_cst() #0 { ; ; GFX11CU-LABEL: name: system_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -959,26 +1247,38 @@ entry: define amdgpu_kernel void @singlethread_acquire() #0 { ; GFX6-LABEL: name: singlethread_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread") acquire @@ -988,26 +1288,38 @@ entry: define amdgpu_kernel void @singlethread_release() #0 { ; GFX6-LABEL: name: singlethread_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread") release @@ -1017,26 +1329,38 @@ entry: define amdgpu_kernel void @singlethread_acq_rel() #0 { ; GFX6-LABEL: name: singlethread_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread") acq_rel @@ -1046,26 +1370,38 @@ entry: define amdgpu_kernel void @singlethread_seq_cst() #0 { ; GFX6-LABEL: name: singlethread_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: singlethread_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: singlethread_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: singlethread_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: singlethread_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: singlethread_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("singlethread") seq_cst @@ -1075,18 +1411,24 @@ entry: define amdgpu_kernel void @agent_acquire() #0 { ; GFX6-LABEL: name: agent_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1095,6 +1437,8 @@ define amdgpu_kernel void @agent_acquire() #0 { ; ; GFX10CU-LABEL: name: agent_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1103,6 +1447,8 @@ define amdgpu_kernel void @agent_acquire() #0 { ; ; GFX11WGP-LABEL: name: agent_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1111,6 +1457,8 @@ define amdgpu_kernel void @agent_acquire() #0 { ; ; GFX11CU-LABEL: name: agent_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1124,34 +1472,46 @@ entry: define amdgpu_kernel void @agent_release() #0 { ; GFX6-LABEL: name: agent_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: agent_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: agent_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: agent_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -1163,18 +1523,24 @@ entry: define amdgpu_kernel void @agent_acq_rel() #0 { ; GFX6-LABEL: name: agent_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1183,6 +1549,8 @@ define amdgpu_kernel void @agent_acq_rel() #0 { ; ; GFX10CU-LABEL: name: agent_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1191,6 +1559,8 @@ define amdgpu_kernel void @agent_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: agent_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1199,6 +1569,8 @@ define amdgpu_kernel void @agent_acq_rel() #0 { ; ; GFX11CU-LABEL: name: agent_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1212,18 +1584,24 @@ entry: define amdgpu_kernel void @agent_seq_cst() #0 { ; GFX6-LABEL: name: agent_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 112 ; GFX6-NEXT: BUFFER_WBINVL1 implicit $exec ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: agent_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 112 ; GFX8-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: agent_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1232,6 +1610,8 @@ define amdgpu_kernel void @agent_seq_cst() #0 { ; ; GFX10CU-LABEL: name: agent_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1240,6 +1620,8 @@ define amdgpu_kernel void @agent_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: agent_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL1_INV implicit $exec @@ -1248,6 +1630,8 @@ define amdgpu_kernel void @agent_seq_cst() #0 { ; ; GFX11CU-LABEL: name: agent_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: BUFFER_GL1_INV implicit $exec @@ -1261,16 +1645,22 @@ entry: define amdgpu_kernel void @workgroup_acquire() #0 { ; GFX6-LABEL: name: workgroup_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 127 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 127 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX10WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -1278,11 +1668,15 @@ define amdgpu_kernel void @workgroup_acquire() #0 { ; ; GFX10CU-LABEL: name: workgroup_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 49279 ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: workgroup_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -1290,6 +1684,8 @@ define amdgpu_kernel void @workgroup_acquire() #0 { ; ; GFX11CU-LABEL: name: workgroup_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 64519 ; GFX11CU-NEXT: S_ENDPGM 0 entry: @@ -1300,16 +1696,22 @@ entry: define amdgpu_kernel void @workgroup_release() #0 { ; GFX6-LABEL: name: workgroup_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 127 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 127 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1317,6 +1719,8 @@ define amdgpu_kernel void @workgroup_release() #0 { ; ; GFX10CU-LABEL: name: workgroup_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1324,12 +1728,16 @@ define amdgpu_kernel void @workgroup_release() #0 { ; ; GFX11WGP-LABEL: name: workgroup_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: workgroup_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -1341,16 +1749,22 @@ entry: define amdgpu_kernel void @workgroup_acq_rel() #0 { ; GFX6-LABEL: name: workgroup_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 127 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 127 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1359,6 +1773,8 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 { ; ; GFX10CU-LABEL: name: workgroup_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1366,6 +1782,8 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 { ; ; GFX11WGP-LABEL: name: workgroup_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -1373,6 +1791,8 @@ define amdgpu_kernel void @workgroup_acq_rel() #0 { ; ; GFX11CU-LABEL: name: workgroup_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -1384,16 +1804,22 @@ entry: define amdgpu_kernel void @workgroup_seq_cst() #0 { ; GFX6-LABEL: name: workgroup_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_WAITCNT_soft 127 ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: workgroup_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_WAITCNT_soft 127 ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: workgroup_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_WAITCNT_soft 112 ; GFX10WGP-NEXT: S_WAITCNT_lds_direct ; GFX10WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1402,6 +1828,8 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 { ; ; GFX10CU-LABEL: name: workgroup_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_WAITCNT_soft 112 ; GFX10CU-NEXT: S_WAITCNT_lds_direct ; GFX10CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 @@ -1409,6 +1837,8 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 { ; ; GFX11WGP-LABEL: name: workgroup_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_WAITCNT_soft 7 ; GFX11WGP-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11WGP-NEXT: BUFFER_GL0_INV implicit $exec @@ -1416,6 +1846,8 @@ define amdgpu_kernel void @workgroup_seq_cst() #0 { ; ; GFX11CU-LABEL: name: workgroup_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_WAITCNT_soft 7 ; GFX11CU-NEXT: S_WAITCNT_VSCNT_soft undef $sgpr_null, 0 ; GFX11CU-NEXT: S_ENDPGM 0 @@ -1427,26 +1859,38 @@ entry: define amdgpu_kernel void @wavefront_acquire() #0 { ; GFX6-LABEL: name: wavefront_acquire ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_acquire ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_acquire ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_acquire ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_acquire ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_acquire ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront") acquire @@ -1456,26 +1900,38 @@ entry: define amdgpu_kernel void @wavefront_release() #0 { ; GFX6-LABEL: name: wavefront_release ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_release ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_release ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_release ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_release ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_release ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront") release @@ -1485,26 +1941,38 @@ entry: define amdgpu_kernel void @wavefront_acq_rel() #0 { ; GFX6-LABEL: name: wavefront_acq_rel ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_acq_rel ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_acq_rel ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_acq_rel ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_acq_rel ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_acq_rel ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront") acq_rel @@ -1514,30 +1982,42 @@ entry: define amdgpu_kernel void @wavefront_seq_cst() #0 { ; GFX6-LABEL: name: wavefront_seq_cst ; GFX6: bb.0.entry: + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX6-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX6-NEXT: S_ENDPGM 0 ; ; GFX8-LABEL: name: wavefront_seq_cst ; GFX8: bb.0.entry: + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX8-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX8-NEXT: S_ENDPGM 0 ; ; GFX10WGP-LABEL: name: wavefront_seq_cst ; GFX10WGP: bb.0.entry: + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10WGP-NEXT: S_ENDPGM 0 ; ; GFX10CU-LABEL: name: wavefront_seq_cst ; GFX10CU: bb.0.entry: + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX10CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX10CU-NEXT: S_ENDPGM 0 ; ; GFX11WGP-LABEL: name: wavefront_seq_cst ; GFX11WGP: bb.0.entry: + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11WGP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11WGP-NEXT: S_ENDPGM 0 ; ; GFX11CU-LABEL: name: wavefront_seq_cst ; GFX11CU: bb.0.entry: + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX11CU-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX11CU-NEXT: S_ENDPGM 0 entry: fence syncscope("wavefront") seq_cst ret void } -attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } +attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir b/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir index dfe4b8a33f396..02856a31d2fb7 100644 --- a/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir +++ b/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir @@ -21,6 +21,8 @@ body: | ; GFX908-PEI-LABEL: name: agpr-spill-to-vgpr-machine-cp ; GFX908-PEI: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GFX908-PEI-NEXT: {{ $}} + ; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX908-PEI-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec ; GFX908-PEI-NEXT: renamable $agpr2 = COPY renamable $vgpr1, implicit $exec ; GFX908-PEI-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 @@ -31,6 +33,8 @@ body: | ; GFX908-PEI-MACHINECP-LABEL: name: agpr-spill-to-vgpr-machine-cp ; GFX908-PEI-MACHINECP: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33 ; GFX908-PEI-MACHINECP-NEXT: {{ $}} + ; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX908-PEI-MACHINECP-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec ; GFX908-PEI-MACHINECP-NEXT: renamable $agpr2 = COPY renamable $vgpr1, implicit $exec ; GFX908-PEI-MACHINECP-NEXT: $vgpr33 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 @@ -63,6 +67,8 @@ body: | ; GFX908-PEI-LABEL: name: agpr-spill-to-vgpr-to-stack-machine-cp ; GFX908-PEI: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-NEXT: {{ $}} + ; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX908-PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX908-PEI-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec @@ -79,6 +85,8 @@ body: | ; GFX908-PEI-MACHINECP-LABEL: name: agpr-spill-to-vgpr-to-stack-machine-cp ; GFX908-PEI-MACHINECP: liveins: $vgpr0, $vgpr1, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-MACHINECP-NEXT: {{ $}} + ; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX908-PEI-MACHINECP-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX908-PEI-MACHINECP-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-MACHINECP-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX908-PEI-MACHINECP-NEXT: renamable $agpr0 = COPY renamable $vgpr0, implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll index d89b39348ad9a..863177ae3d6b5 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll @@ -7,6 +7,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr17, $sgpr12_sgpr13 ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; GFX90A-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; GFX90A-NEXT: $sgpr32 = S_MOV_B32 0 ; GFX90A-NEXT: $flat_scr_lo = S_ADD_U32 $sgpr12, $sgpr17, implicit-def $scc ; GFX90A-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr13, 0, implicit-def dead $scc, implicit $scc diff --git a/llvm/test/CodeGen/AMDGPU/dbg-info-inline-at.ll b/llvm/test/CodeGen/AMDGPU/dbg-info-inline-at.ll index ed609f85918f9..20077fa5d96a7 100644 --- a/llvm/test/CodeGen/AMDGPU/dbg-info-inline-at.ll +++ b/llvm/test/CodeGen/AMDGPU/dbg-info-inline-at.ll @@ -8,6 +8,8 @@ define amdgpu_kernel void @_Z3fooPiiii(ptr addrspace(1) nocapture noundef writeo ; CHECK-NEXT: .cfi_sections .debug_frame ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 ; CFA is 0 in private_wave aspace +; CHECK-NEXT: .cfi_undefined 16 ; CHECK-NEXT: .file 1 "." "a.h" ; CHECK-NEXT: .loc 1 5 12 prologue_end ; ./a.h:5:12 @[ a.hip:12:8 ] ; CHECK-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x8 diff --git a/llvm/test/CodeGen/AMDGPU/debug-frame.ll b/llvm/test/CodeGen/AMDGPU/debug-frame.ll new file mode 100644 index 0000000000000..40ff6ccf0cb0f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/debug-frame.ll @@ -0,0 +1,1405 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,GFX900 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-spill-vgpr-to-agpr=0 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,GFX90A-V2A-DIS %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -amdgpu-spill-vgpr-to-agpr=1 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,GFX90A-V2A-EN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK,WAVE32 %s + +define protected amdgpu_kernel void @kern1() #0 { +; CHECK-LABEL: kern1: +; CHECK: .Lfunc_begin0: +; CHECK-NEXT: .cfi_sections .debug_frame +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 ; CFA is 0 in private_wave aspace +; CHECK-NEXT: .cfi_undefined 16 +; CHECK-NEXT: s_endpgm +entry: + ret void +} + +define hidden void @func_no_clobber() #0 { +; CHECK-LABEL: func_no_clobber: +; CHECK: .Lfunc_begin1: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: ; %bb.0: ; %entry +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_setpc_b64 s[30:31] +entry: + ret void +} + +define void @callee_need_to_spill_fp_to_memory() #1 { +; GFX900-LABEL: callee_need_to_spill_fp_to_memory: +; GFX900: .Lfunc_begin2: +; GFX900-NEXT: .cfi_startproc +; GFX900-NEXT: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s40, s33 +; GFX900-NEXT: s_mov_b32 s33, s32 +; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber nonpreserved SGPRs +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber all VGPRs +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: buffer_load_dword v255, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload +; GFX900-NEXT: s_addk_i32 s32, 0x7100 +; GFX900-NEXT: s_mov_b32 s32, s33 +; GFX900-NEXT: s_mov_b32 s33, s40 +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-DIS-LABEL: callee_need_to_spill_fp_to_memory: +; GFX90A-V2A-DIS: .Lfunc_begin2: +; GFX90A-V2A-DIS-NEXT: .cfi_startproc +; GFX90A-V2A-DIS-NEXT: ; %bb.0: +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s40, s33 +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s32 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber nonpreserved SGPRs +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber all VGPRs +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v255, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: s_addk_i32 s32, 0x7100 +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s32, s33 +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s40 +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-EN-LABEL: callee_need_to_spill_fp_to_memory: +; GFX90A-V2A-EN: .Lfunc_begin2: +; GFX90A-V2A-EN-NEXT: .cfi_startproc +; GFX90A-V2A-EN-NEXT: ; %bb.0: +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-EN-NEXT: s_mov_b32 s40, s33 +; GFX90A-V2A-EN-NEXT: s_mov_b32 s33, s32 +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a2, v42 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a3, v43 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a4, v44 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a5, v45 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a6, v46 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a7, v47 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a8, v56 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a9, v57 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a10, v58 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a11, v59 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a12, v60 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a13, v61 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a14, v62 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a15, v63 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a16, v72 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a17, v73 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a18, v74 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a19, v75 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a20, v76 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a21, v77 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a22, v78 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a23, v79 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a24, v88 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a25, v89 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a26, v90 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a27, v91 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a28, v92 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a29, v93 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a30, v94 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a31, v95 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber nonpreserved SGPRs +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber all VGPRs +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: buffer_load_dword v255, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: s_addk_i32 s32, 0x5100 +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v95, a31 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v94, a30 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v93, a29 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v92, a28 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v91, a27 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v90, a26 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v89, a25 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v88, a24 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v79, a23 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v78, a22 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v77, a21 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v76, a20 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v75, a19 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v74, a18 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v73, a17 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v72, a16 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v63, a15 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v62, a14 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v61, a13 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v60, a12 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v59, a11 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v58, a10 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v57, a9 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v56, a8 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v47, a7 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v46, a6 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v45, a5 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v44, a4 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v43, a3 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v42, a2 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: s_mov_b32 s32, s33 +; GFX90A-V2A-EN-NEXT: s_mov_b32 s33, s40 +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-EN-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: callee_need_to_spill_fp_to_memory: +; WAVE32: .Lfunc_begin2: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: s_mov_b32 s40, s33 +; WAVE32-NEXT: s_mov_b32 s33, s32 +; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:444 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:440 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:436 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:432 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:428 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:424 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:420 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:416 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:412 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:408 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:404 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:400 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v60, off, s[0:3], s33 offset:396 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v61, off, s[0:3], s33 offset:392 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v62, off, s[0:3], s33 offset:388 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v63, off, s[0:3], s33 offset:384 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v72, off, s[0:3], s33 offset:380 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v73, off, s[0:3], s33 offset:376 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v74, off, s[0:3], s33 offset:372 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v75, off, s[0:3], s33 offset:368 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v76, off, s[0:3], s33 offset:364 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v77, off, s[0:3], s33 offset:360 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v78, off, s[0:3], s33 offset:356 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v79, off, s[0:3], s33 offset:352 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v88, off, s[0:3], s33 offset:348 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v89, off, s[0:3], s33 offset:344 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v90, off, s[0:3], s33 offset:340 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v91, off, s[0:3], s33 offset:336 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v92, off, s[0:3], s33 offset:332 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v93, off, s[0:3], s33 offset:328 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v94, off, s[0:3], s33 offset:324 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v95, off, s[0:3], s33 offset:320 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v104, off, s[0:3], s33 offset:316 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v105, off, s[0:3], s33 offset:312 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v106, off, s[0:3], s33 offset:308 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v107, off, s[0:3], s33 offset:304 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v108, off, s[0:3], s33 offset:300 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v109, off, s[0:3], s33 offset:296 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v110, off, s[0:3], s33 offset:292 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v111, off, s[0:3], s33 offset:288 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v120, off, s[0:3], s33 offset:284 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v121, off, s[0:3], s33 offset:280 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v122, off, s[0:3], s33 offset:276 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v123, off, s[0:3], s33 offset:272 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v124, off, s[0:3], s33 offset:268 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v125, off, s[0:3], s33 offset:264 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v126, off, s[0:3], s33 offset:260 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v127, off, s[0:3], s33 offset:256 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v136, off, s[0:3], s33 offset:252 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v137, off, s[0:3], s33 offset:248 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v138, off, s[0:3], s33 offset:244 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v139, off, s[0:3], s33 offset:240 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v140, off, s[0:3], s33 offset:236 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v141, off, s[0:3], s33 offset:232 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v142, off, s[0:3], s33 offset:228 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v143, off, s[0:3], s33 offset:224 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v152, off, s[0:3], s33 offset:220 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v153, off, s[0:3], s33 offset:216 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v154, off, s[0:3], s33 offset:212 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v155, off, s[0:3], s33 offset:208 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v156, off, s[0:3], s33 offset:204 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v157, off, s[0:3], s33 offset:200 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v158, off, s[0:3], s33 offset:196 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v159, off, s[0:3], s33 offset:192 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v168, off, s[0:3], s33 offset:188 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v169, off, s[0:3], s33 offset:184 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v170, off, s[0:3], s33 offset:180 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v171, off, s[0:3], s33 offset:176 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v172, off, s[0:3], s33 offset:172 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v173, off, s[0:3], s33 offset:168 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v174, off, s[0:3], s33 offset:164 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v175, off, s[0:3], s33 offset:160 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v184, off, s[0:3], s33 offset:156 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v185, off, s[0:3], s33 offset:152 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v186, off, s[0:3], s33 offset:148 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v187, off, s[0:3], s33 offset:144 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v188, off, s[0:3], s33 offset:140 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v189, off, s[0:3], s33 offset:136 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v190, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v191, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v200, off, s[0:3], s33 offset:124 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v201, off, s[0:3], s33 offset:120 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v202, off, s[0:3], s33 offset:116 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v203, off, s[0:3], s33 offset:112 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v204, off, s[0:3], s33 offset:108 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v205, off, s[0:3], s33 offset:104 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v206, off, s[0:3], s33 offset:100 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v207, off, s[0:3], s33 offset:96 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v216, off, s[0:3], s33 offset:92 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v217, off, s[0:3], s33 offset:88 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v218, off, s[0:3], s33 offset:84 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v219, off, s[0:3], s33 offset:80 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v220, off, s[0:3], s33 offset:76 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v221, off, s[0:3], s33 offset:72 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v222, off, s[0:3], s33 offset:68 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v223, off, s[0:3], s33 offset:64 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v232, off, s[0:3], s33 offset:60 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v233, off, s[0:3], s33 offset:56 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v234, off, s[0:3], s33 offset:52 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v235, off, s[0:3], s33 offset:48 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v236, off, s[0:3], s33 offset:44 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v237, off, s[0:3], s33 offset:40 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v238, off, s[0:3], s33 offset:36 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v239, off, s[0:3], s33 offset:32 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v248, off, s[0:3], s33 offset:28 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v249, off, s[0:3], s33 offset:24 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v250, off, s[0:3], s33 offset:20 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v251, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v252, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v253, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v254, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v255, off, s[0:3], s33 ; 4-byte Folded Spill +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber nonpreserved SGPRs +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber all VGPRs +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: s_clause 0x3e +; WAVE32-NEXT: buffer_load_dword v255, off, s[0:3], s33 +; WAVE32-NEXT: buffer_load_dword v254, off, s[0:3], s33 offset:4 +; WAVE32-NEXT: buffer_load_dword v253, off, s[0:3], s33 offset:8 +; WAVE32-NEXT: buffer_load_dword v252, off, s[0:3], s33 offset:12 +; WAVE32-NEXT: buffer_load_dword v251, off, s[0:3], s33 offset:16 +; WAVE32-NEXT: buffer_load_dword v250, off, s[0:3], s33 offset:20 +; WAVE32-NEXT: buffer_load_dword v249, off, s[0:3], s33 offset:24 +; WAVE32-NEXT: buffer_load_dword v248, off, s[0:3], s33 offset:28 +; WAVE32-NEXT: buffer_load_dword v239, off, s[0:3], s33 offset:32 +; WAVE32-NEXT: buffer_load_dword v238, off, s[0:3], s33 offset:36 +; WAVE32-NEXT: buffer_load_dword v237, off, s[0:3], s33 offset:40 +; WAVE32-NEXT: buffer_load_dword v236, off, s[0:3], s33 offset:44 +; WAVE32-NEXT: buffer_load_dword v235, off, s[0:3], s33 offset:48 +; WAVE32-NEXT: buffer_load_dword v234, off, s[0:3], s33 offset:52 +; WAVE32-NEXT: buffer_load_dword v233, off, s[0:3], s33 offset:56 +; WAVE32-NEXT: buffer_load_dword v232, off, s[0:3], s33 offset:60 +; WAVE32-NEXT: buffer_load_dword v223, off, s[0:3], s33 offset:64 +; WAVE32-NEXT: buffer_load_dword v222, off, s[0:3], s33 offset:68 +; WAVE32-NEXT: buffer_load_dword v221, off, s[0:3], s33 offset:72 +; WAVE32-NEXT: buffer_load_dword v220, off, s[0:3], s33 offset:76 +; WAVE32-NEXT: buffer_load_dword v219, off, s[0:3], s33 offset:80 +; WAVE32-NEXT: buffer_load_dword v218, off, s[0:3], s33 offset:84 +; WAVE32-NEXT: buffer_load_dword v217, off, s[0:3], s33 offset:88 +; WAVE32-NEXT: buffer_load_dword v216, off, s[0:3], s33 offset:92 +; WAVE32-NEXT: buffer_load_dword v207, off, s[0:3], s33 offset:96 +; WAVE32-NEXT: buffer_load_dword v206, off, s[0:3], s33 offset:100 +; WAVE32-NEXT: buffer_load_dword v205, off, s[0:3], s33 offset:104 +; WAVE32-NEXT: buffer_load_dword v204, off, s[0:3], s33 offset:108 +; WAVE32-NEXT: buffer_load_dword v203, off, s[0:3], s33 offset:112 +; WAVE32-NEXT: buffer_load_dword v202, off, s[0:3], s33 offset:116 +; WAVE32-NEXT: buffer_load_dword v201, off, s[0:3], s33 offset:120 +; WAVE32-NEXT: buffer_load_dword v200, off, s[0:3], s33 offset:124 +; WAVE32-NEXT: buffer_load_dword v191, off, s[0:3], s33 offset:128 +; WAVE32-NEXT: buffer_load_dword v190, off, s[0:3], s33 offset:132 +; WAVE32-NEXT: buffer_load_dword v189, off, s[0:3], s33 offset:136 +; WAVE32-NEXT: buffer_load_dword v188, off, s[0:3], s33 offset:140 +; WAVE32-NEXT: buffer_load_dword v187, off, s[0:3], s33 offset:144 +; WAVE32-NEXT: buffer_load_dword v186, off, s[0:3], s33 offset:148 +; WAVE32-NEXT: buffer_load_dword v185, off, s[0:3], s33 offset:152 +; WAVE32-NEXT: buffer_load_dword v184, off, s[0:3], s33 offset:156 +; WAVE32-NEXT: buffer_load_dword v175, off, s[0:3], s33 offset:160 +; WAVE32-NEXT: buffer_load_dword v174, off, s[0:3], s33 offset:164 +; WAVE32-NEXT: buffer_load_dword v173, off, s[0:3], s33 offset:168 +; WAVE32-NEXT: buffer_load_dword v172, off, s[0:3], s33 offset:172 +; WAVE32-NEXT: buffer_load_dword v171, off, s[0:3], s33 offset:176 +; WAVE32-NEXT: buffer_load_dword v170, off, s[0:3], s33 offset:180 +; WAVE32-NEXT: buffer_load_dword v169, off, s[0:3], s33 offset:184 +; WAVE32-NEXT: buffer_load_dword v168, off, s[0:3], s33 offset:188 +; WAVE32-NEXT: buffer_load_dword v159, off, s[0:3], s33 offset:192 +; WAVE32-NEXT: buffer_load_dword v158, off, s[0:3], s33 offset:196 +; WAVE32-NEXT: buffer_load_dword v157, off, s[0:3], s33 offset:200 +; WAVE32-NEXT: buffer_load_dword v156, off, s[0:3], s33 offset:204 +; WAVE32-NEXT: buffer_load_dword v155, off, s[0:3], s33 offset:208 +; WAVE32-NEXT: buffer_load_dword v154, off, s[0:3], s33 offset:212 +; WAVE32-NEXT: buffer_load_dword v153, off, s[0:3], s33 offset:216 +; WAVE32-NEXT: buffer_load_dword v152, off, s[0:3], s33 offset:220 +; WAVE32-NEXT: buffer_load_dword v143, off, s[0:3], s33 offset:224 +; WAVE32-NEXT: buffer_load_dword v142, off, s[0:3], s33 offset:228 +; WAVE32-NEXT: buffer_load_dword v141, off, s[0:3], s33 offset:232 +; WAVE32-NEXT: buffer_load_dword v140, off, s[0:3], s33 offset:236 +; WAVE32-NEXT: buffer_load_dword v139, off, s[0:3], s33 offset:240 +; WAVE32-NEXT: buffer_load_dword v138, off, s[0:3], s33 offset:244 +; WAVE32-NEXT: buffer_load_dword v137, off, s[0:3], s33 offset:248 +; WAVE32-NEXT: s_clause 0x30 +; WAVE32-NEXT: buffer_load_dword v136, off, s[0:3], s33 offset:252 +; WAVE32-NEXT: buffer_load_dword v127, off, s[0:3], s33 offset:256 +; WAVE32-NEXT: buffer_load_dword v126, off, s[0:3], s33 offset:260 +; WAVE32-NEXT: buffer_load_dword v125, off, s[0:3], s33 offset:264 +; WAVE32-NEXT: buffer_load_dword v124, off, s[0:3], s33 offset:268 +; WAVE32-NEXT: buffer_load_dword v123, off, s[0:3], s33 offset:272 +; WAVE32-NEXT: buffer_load_dword v122, off, s[0:3], s33 offset:276 +; WAVE32-NEXT: buffer_load_dword v121, off, s[0:3], s33 offset:280 +; WAVE32-NEXT: buffer_load_dword v120, off, s[0:3], s33 offset:284 +; WAVE32-NEXT: buffer_load_dword v111, off, s[0:3], s33 offset:288 +; WAVE32-NEXT: buffer_load_dword v110, off, s[0:3], s33 offset:292 +; WAVE32-NEXT: buffer_load_dword v109, off, s[0:3], s33 offset:296 +; WAVE32-NEXT: buffer_load_dword v108, off, s[0:3], s33 offset:300 +; WAVE32-NEXT: buffer_load_dword v107, off, s[0:3], s33 offset:304 +; WAVE32-NEXT: buffer_load_dword v106, off, s[0:3], s33 offset:308 +; WAVE32-NEXT: buffer_load_dword v105, off, s[0:3], s33 offset:312 +; WAVE32-NEXT: buffer_load_dword v104, off, s[0:3], s33 offset:316 +; WAVE32-NEXT: buffer_load_dword v95, off, s[0:3], s33 offset:320 +; WAVE32-NEXT: buffer_load_dword v94, off, s[0:3], s33 offset:324 +; WAVE32-NEXT: buffer_load_dword v93, off, s[0:3], s33 offset:328 +; WAVE32-NEXT: buffer_load_dword v92, off, s[0:3], s33 offset:332 +; WAVE32-NEXT: buffer_load_dword v91, off, s[0:3], s33 offset:336 +; WAVE32-NEXT: buffer_load_dword v90, off, s[0:3], s33 offset:340 +; WAVE32-NEXT: buffer_load_dword v89, off, s[0:3], s33 offset:344 +; WAVE32-NEXT: buffer_load_dword v88, off, s[0:3], s33 offset:348 +; WAVE32-NEXT: buffer_load_dword v79, off, s[0:3], s33 offset:352 +; WAVE32-NEXT: buffer_load_dword v78, off, s[0:3], s33 offset:356 +; WAVE32-NEXT: buffer_load_dword v77, off, s[0:3], s33 offset:360 +; WAVE32-NEXT: buffer_load_dword v76, off, s[0:3], s33 offset:364 +; WAVE32-NEXT: buffer_load_dword v75, off, s[0:3], s33 offset:368 +; WAVE32-NEXT: buffer_load_dword v74, off, s[0:3], s33 offset:372 +; WAVE32-NEXT: buffer_load_dword v73, off, s[0:3], s33 offset:376 +; WAVE32-NEXT: buffer_load_dword v72, off, s[0:3], s33 offset:380 +; WAVE32-NEXT: buffer_load_dword v63, off, s[0:3], s33 offset:384 +; WAVE32-NEXT: buffer_load_dword v62, off, s[0:3], s33 offset:388 +; WAVE32-NEXT: buffer_load_dword v61, off, s[0:3], s33 offset:392 +; WAVE32-NEXT: buffer_load_dword v60, off, s[0:3], s33 offset:396 +; WAVE32-NEXT: buffer_load_dword v59, off, s[0:3], s33 offset:400 +; WAVE32-NEXT: buffer_load_dword v58, off, s[0:3], s33 offset:404 +; WAVE32-NEXT: buffer_load_dword v57, off, s[0:3], s33 offset:408 +; WAVE32-NEXT: buffer_load_dword v56, off, s[0:3], s33 offset:412 +; WAVE32-NEXT: buffer_load_dword v47, off, s[0:3], s33 offset:416 +; WAVE32-NEXT: buffer_load_dword v46, off, s[0:3], s33 offset:420 +; WAVE32-NEXT: buffer_load_dword v45, off, s[0:3], s33 offset:424 +; WAVE32-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:428 +; WAVE32-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:432 +; WAVE32-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:436 +; WAVE32-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:440 +; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:444 +; WAVE32-NEXT: s_addk_i32 s32, 0x3880 +; WAVE32-NEXT: s_mov_b32 s32, s33 +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 s33, s40 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_setpc_b64 s[30:31] + call void asm sideeffect "; clobber nonpreserved SGPRs", + "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9} + ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19} + ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29} + ,~{vcc}"() + + call void asm sideeffect "; clobber all VGPRs", + "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9} + ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19} + ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29} + ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39} + ,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49} + ,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59} + ,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69} + ,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79} + ,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89} + ,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99} + ,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109} + ,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119} + ,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129} + ,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139} + ,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149} + ,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159} + ,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169} + ,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179} + ,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189} + ,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199} + ,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209} + ,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219} + ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229} + ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239} + ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249} + ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}"() + ret void +} + +declare hidden void @ex() #0 + +define hidden void @func_call_clobber() #0 { +; GFX900-LABEL: func_call_clobber: +; GFX900: .Lfunc_begin3: +; GFX900-NEXT: .cfi_startproc +; GFX900-NEXT: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: s_mov_b32 s16, s33 +; GFX900-NEXT: s_mov_b32 s33, s32 +; GFX900-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX900-NEXT: s_mov_b64 exec, s[18:19] +; GFX900-NEXT: v_writelane_b32 v40, s16, 2 +; GFX900-NEXT: s_addk_i32 s32, 0x400 +; GFX900-NEXT: v_writelane_b32 v40, s30, 0 +; GFX900-NEXT: s_getpc_b64 s[16:17] +; GFX900-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX900-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX900-NEXT: v_writelane_b32 v40, s31, 1 +; GFX900-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX900-NEXT: v_readlane_b32 s31, v40, 1 +; GFX900-NEXT: v_readlane_b32 s30, v40, 0 +; GFX900-NEXT: s_mov_b32 s32, s33 +; GFX900-NEXT: v_readlane_b32 s4, v40, 2 +; GFX900-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX900-NEXT: s_mov_b64 exec, s[6:7] +; GFX900-NEXT: s_mov_b32 s33, s4 +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-DIS-LABEL: func_call_clobber: +; GFX90A-V2A-DIS: .Lfunc_begin3: +; GFX90A-V2A-DIS-NEXT: .cfi_startproc +; GFX90A-V2A-DIS-NEXT: ; %bb.0: ; %entry +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s16, s33 +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s32 +; GFX90A-V2A-DIS-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: s_mov_b64 exec, s[18:19] +; GFX90A-V2A-DIS-NEXT: v_writelane_b32 v40, s16, 2 +; GFX90A-V2A-DIS-NEXT: s_addk_i32 s32, 0x400 +; GFX90A-V2A-DIS-NEXT: v_writelane_b32 v40, s30, 0 +; GFX90A-V2A-DIS-NEXT: s_getpc_b64 s[16:17] +; GFX90A-V2A-DIS-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX90A-V2A-DIS-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX90A-V2A-DIS-NEXT: v_writelane_b32 v40, s31, 1 +; GFX90A-V2A-DIS-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX90A-V2A-DIS-NEXT: v_readlane_b32 s31, v40, 1 +; GFX90A-V2A-DIS-NEXT: v_readlane_b32 s30, v40, 0 +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s32, s33 +; GFX90A-V2A-DIS-NEXT: v_readlane_b32 s4, v40, 2 +; GFX90A-V2A-DIS-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: s_mov_b64 exec, s[6:7] +; GFX90A-V2A-DIS-NEXT: s_mov_b32 s33, s4 +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-EN-LABEL: func_call_clobber: +; GFX90A-V2A-EN: .Lfunc_begin3: +; GFX90A-V2A-EN-NEXT: .cfi_startproc +; GFX90A-V2A-EN-NEXT: ; %bb.0: ; %entry +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-EN-NEXT: s_mov_b32 s16, s33 +; GFX90A-V2A-EN-NEXT: s_mov_b32 s33, s32 +; GFX90A-V2A-EN-NEXT: s_or_saveexec_b64 s[18:19], -1 +; GFX90A-V2A-EN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX90A-V2A-EN-NEXT: s_mov_b64 exec, s[18:19] +; GFX90A-V2A-EN-NEXT: v_writelane_b32 v40, s16, 2 +; GFX90A-V2A-EN-NEXT: s_addk_i32 s32, 0x400 +; GFX90A-V2A-EN-NEXT: v_writelane_b32 v40, s30, 0 +; GFX90A-V2A-EN-NEXT: s_getpc_b64 s[16:17] +; GFX90A-V2A-EN-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; GFX90A-V2A-EN-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; GFX90A-V2A-EN-NEXT: v_writelane_b32 v40, s31, 1 +; GFX90A-V2A-EN-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GFX90A-V2A-EN-NEXT: v_readlane_b32 s31, v40, 1 +; GFX90A-V2A-EN-NEXT: v_readlane_b32 s30, v40, 0 +; GFX90A-V2A-EN-NEXT: s_mov_b32 s32, s33 +; GFX90A-V2A-EN-NEXT: v_readlane_b32 s4, v40, 2 +; GFX90A-V2A-EN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX90A-V2A-EN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; GFX90A-V2A-EN-NEXT: s_mov_b64 exec, s[6:7] +; GFX90A-V2A-EN-NEXT: s_mov_b32 s33, s4 +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-EN-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: func_call_clobber: +; WAVE32: .Lfunc_begin3: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: ; %entry +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: s_mov_b32 s16, s33 +; WAVE32-NEXT: s_mov_b32 s33, s32 +; WAVE32-NEXT: s_or_saveexec_b32 s17, -1 +; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, s17 +; WAVE32-NEXT: v_writelane_b32 v40, s16, 2 +; WAVE32-NEXT: s_addk_i32 s32, 0x200 +; WAVE32-NEXT: s_getpc_b64 s[16:17] +; WAVE32-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 +; WAVE32-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 +; WAVE32-NEXT: v_writelane_b32 v40, s30, 0 +; WAVE32-NEXT: v_writelane_b32 v40, s31, 1 +; WAVE32-NEXT: s_swappc_b64 s[30:31], s[16:17] +; WAVE32-NEXT: v_readlane_b32 s31, v40, 1 +; WAVE32-NEXT: v_readlane_b32 s30, v40, 0 +; WAVE32-NEXT: s_mov_b32 s32, s33 +; WAVE32-NEXT: v_readlane_b32 s4, v40, 2 +; WAVE32-NEXT: s_or_saveexec_b32 s5, -1 +; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload +; WAVE32-NEXT: s_waitcnt_depctr 0xffe3 +; WAVE32-NEXT: s_mov_b32 exec_lo, s5 +; WAVE32-NEXT: s_mov_b32 s33, s4 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_setpc_b64 s[30:31] +entry: + call void @ex() #0 + ret void +} + +define hidden void @func_spill_vgpr_to_vmem() #0 { +; GFX900-LABEL: func_spill_vgpr_to_vmem: +; GFX900: .Lfunc_begin4: +; GFX900-NEXT: .cfi_startproc +; GFX900-NEXT: ; %bb.0: ; %entry +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: buffer_load_dword v41, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-DIS-LABEL: func_spill_vgpr_to_vmem: +; GFX90A-V2A-DIS: .Lfunc_begin4: +; GFX90A-V2A-DIS-NEXT: .cfi_startproc +; GFX90A-V2A-DIS-NEXT: ; %bb.0: ; %entry +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword a32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword a33, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: buffer_load_dword a33, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword a32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-EN-LABEL: func_spill_vgpr_to_vmem: +; GFX90A-V2A-EN: .Lfunc_begin4: +; GFX90A-V2A-EN-NEXT: .cfi_startproc +; GFX90A-V2A-EN-NEXT: ; %bb.0: ; %entry +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v0, a32 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v1, a33 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a33, v1 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a32, v0 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: func_spill_vgpr_to_vmem: +; WAVE32: .Lfunc_begin4: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: ; %entry +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: s_clause 0x1 +; WAVE32-NEXT: buffer_load_dword v41, off, s[0:3], s32 +; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_setpc_b64 s[30:31] +entry: + call void asm sideeffect "; clobber", "~{v40}"() #0 + call void asm sideeffect "; clobber", "~{v41}"() #0 + call void asm sideeffect "; clobber", "~{a32}"() #0 + call void asm sideeffect "; clobber", "~{a33}"() #0 + ret void +} + +define hidden void @func_spill_vgpr_to_agpr() #2 { +; GFX900-LABEL: func_spill_vgpr_to_agpr: +; GFX900: .Lfunc_begin5: +; GFX900-NEXT: .cfi_startproc +; GFX900-NEXT: ; %bb.0: +; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX900-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX900-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: ;;#ASMSTART +; GFX900-NEXT: ; clobber +; GFX900-NEXT: ;;#ASMEND +; GFX900-NEXT: buffer_load_dword v41, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX900-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX900-NEXT: s_waitcnt vmcnt(0) +; GFX900-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-DIS-LABEL: func_spill_vgpr_to_agpr: +; GFX90A-V2A-DIS: .Lfunc_begin5: +; GFX90A-V2A-DIS-NEXT: .cfi_startproc +; GFX90A-V2A-DIS-NEXT: ; %bb.0: +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword a32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: buffer_store_dword a33, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: ;;#ASMSTART +; GFX90A-V2A-DIS-NEXT: ; clobber +; GFX90A-V2A-DIS-NEXT: ;;#ASMEND +; GFX90A-V2A-DIS-NEXT: buffer_load_dword a33, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword a32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; GFX90A-V2A-DIS-NEXT: s_waitcnt vmcnt(0) +; GFX90A-V2A-DIS-NEXT: s_setpc_b64 s[30:31] +; +; GFX90A-V2A-EN-LABEL: func_spill_vgpr_to_agpr: +; GFX90A-V2A-EN: .Lfunc_begin5: +; GFX90A-V2A-EN-NEXT: .cfi_startproc +; GFX90A-V2A-EN-NEXT: ; %bb.0: +; GFX90A-V2A-EN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a0, v40 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a1, v41 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v0, a32 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v1, a33 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: ;;#ASMSTART +; GFX90A-V2A-EN-NEXT: ; clobber +; GFX90A-V2A-EN-NEXT: ;;#ASMEND +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a33, v1 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_write_b32 a32, v0 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v41, a1 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: v_accvgpr_read_b32 v40, a0 ; Reload Reuse +; GFX90A-V2A-EN-NEXT: s_setpc_b64 s[30:31] +; +; WAVE32-LABEL: func_spill_vgpr_to_agpr: +; WAVE32: .Lfunc_begin5: +; WAVE32-NEXT: .cfi_startproc +; WAVE32-NEXT: ; %bb.0: +; WAVE32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; WAVE32-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; WAVE32-NEXT: buffer_store_dword v41, off, s[0:3], s32 ; 4-byte Folded Spill +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: ;;#ASMSTART +; WAVE32-NEXT: ; clobber +; WAVE32-NEXT: ;;#ASMEND +; WAVE32-NEXT: s_clause 0x1 +; WAVE32-NEXT: buffer_load_dword v41, off, s[0:3], s32 +; WAVE32-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 +; WAVE32-NEXT: s_waitcnt vmcnt(0) +; WAVE32-NEXT: s_setpc_b64 s[30:31] + call void asm sideeffect "; clobber", "~{v40}"() + call void asm sideeffect "; clobber", "~{v41}"() + call void asm sideeffect "; clobber", "~{a32}"() + call void asm sideeffect "; clobber", "~{a33}"() + ret void +} + + +; NOTE: Number of VGPRs available to kernel, and in turn number of corresponding CFIs generated, +; is dependent on waves/WG size. Since the intent here is to check whether we generate the correct +; CFIs, doing it for any one set of details is sufficient which also makes the test insensitive to +; changes in those details. +attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="128,128" } +attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="128,128" "frame-pointer"="all" } +attributes #2 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug) +!1 = !DIFile(filename: "filename", directory: "directory") +!2 = !{i32 7, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir index 49a91e6f6f33b..bd5dc4b2af318 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-i32.mir @@ -9,6 +9,48 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s +--- | + define void @s_add_i32__inline_imm__fi_offset0() #0 { unreachable } + define void @s_add_i32__fi_offset0__inline_imm() #0 { unreachable } + define void @s_add_i32__inline_imm___fi_offset_inline_imm() #0 { unreachable } + define void @s_add_i32__literal__fi_offset0() #0 { unreachable } + define void @s_add_i32__fi_offset0__literal() #0 { unreachable } + define void @s_add_i32__literal__fi_offset96() #0 { unreachable } + define void @s_add_i32____fi_offset96__literal() #0 { unreachable } + define void @s_add_i32__sgpr__fi_offset0() #0 { unreachable } + define void @s_add_i32__fi_offset0__sgpr() #0 { unreachable } + define void @s_add_i32__sgpr__fi_literal_offset() #0 { unreachable } + define void @s_add_i32__fi_literal_offset__sgpr() #0 { unreachable } + define void @s_add_i32__kernel__literal__fi_offset96__offset_literal() #0 { unreachable } + define void @s_add_i32__kernel__literal__fi_offset96__offset_literal_live_scc() #0 { unreachable } + define void @s_add_i32__kernel__fi_offset96__offset_literal__literal() #0 { unreachable } + define void @s_add_i32__kernel__sgpr__fi_literal_offset() #0 { unreachable } + define void @s_add_i32__kernel__fi_literal_offset__sgpr() #0 { unreachable } + define void @s_add_i32__kernel__sgpr__fi_offset0__live_scc() #0 { unreachable } + define void @s_add_i32__sgpr__fi_offset0__live_scc() #0 { unreachable } + define void @s_add_i32__kernel__sgpr__fi_literal_offset__live_scc() #0 { unreachable } + define void @s_add_i32__sgpr__fi_literal_offset__live_scc() #0 { unreachable } + define void @s_add_i32__inlineimm__fi_offset_32__total_offset_inlineimm() #0 { unreachable } + define void @s_add_i32__fi_offset_32__inlineimm__total_offset_inlineimm() #0 { unreachable } + define void @s_add_i32__kernel_inlineimm__fi_offset_32__total_offset_inlineimm() #0 { unreachable } + define void @s_add_i32__kernel_fi_offset_32__inlineimm__total_offset_inlineimm() #0 { unreachable } + define void @s_add_i32__0__fi_offset0() #0 { unreachable } + define void @s_add_i32__fi_offset0__0() #0 { unreachable } + define void @s_add_i32__same_sgpr__fi_offset0() #0 { unreachable } + define void @s_add_i32__different_sgpr__fi_offset0() #0 { unreachable } + define void @s_add_i32__different_sgpr__fi_offset0_live_after() #0 { unreachable } + define void @s_add_i32__identity_sgpr__fi_offset0__kernel() #0 { unreachable } + define void @s_add_i32__fi_offset0__identity_sgpr__kernel() #0 { unreachable } + define void @s_add_i32__identity_sgpr__fi_offset32__kernel() #0 { unreachable } + define void @s_add_i32__fi_offset32__identity_sgpr__kernel() #0 { unreachable } + define void @s_add_i32__identity_sgpr__fi_offset0() #0 { unreachable } + define void @s_add_i32__fi_offset32__identity_sgpr() #0 { unreachable } + define void @s_add_i32_use_dst_reg_as_temp_regression() #0 { unreachable } + define void @s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_zero() #0 { unreachable } + define void @s_add_i32_frame_index_elimination_no_sgpr_dead_scc_offset_non_zero() #0 { unreachable } + attributes #0 = { nounwind } +... +--- --- name: s_add_i32__inline_imm__fi_offset0 tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir index af61bd70f16b6..716e99e977d32 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-add-u32.mir @@ -9,6 +9,13 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s +--- | + define void @s_add_u32__inline_imm__fi_offset0() #0 { unreachable } + define void @s_add_u32__kernel__literal__fi_offset96__offset_literal() #0 { unreachable } + define void @s_add_u32__kernel__literal__fi_offset96__offset_literal_live_scc() #0 { unreachable } + attributes #0 = { nounwind } +... +--- --- name: s_add_u32__inline_imm__fi_offset0 tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir index 348743644ce4f..5e1f153486549 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32-wave32.mir @@ -4,6 +4,26 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+wavefrontsize32 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=FLATSCRW32 %s +--- | + define void @v_add_co_u32_e64__inline_imm__fi_offset0() #0 { unreachable } + define void @v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc() #0 { unreachable } + define void @v_add_co_u32_e64__inline_imm__fi_offset0__clamp() #0 { unreachable } + define void @v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc_clamp() #0 { unreachable } + define void @v_add_co_u32_e64__fi_literal_offset__sgpr() #0 { unreachable } + define void @v_add_co_u32_e64__fi_literal_offset__sgpr_clamp() #0 { unreachable } + define void @v_add_co_u32_e64__fi_literal_offset__vgpr() #0 { unreachable } + define void @v_add_co_u32_e64__fi_literal_offset__vgpr__clamp() #0 { unreachable } + define void @v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc() #0 { unreachable } + define void @v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp() #0 { unreachable } + define void @v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required() #0 { unreachable } + define void @v_add_co_u32_e64__kernel_fi_offset0__other_vgpr_live_after() #0 { unreachable } + define void @v_add_co_u32_e64__kernel__other_vgpr_live_after__fi_offset0() #0 { unreachable } + define void @v_add_co_u32_e64__identity_vgpr__fi_offset0__kernel() #0 { unreachable } + define void @v_add_co_u32_e64__fi_offset0__identity_vgpr__kernel() #0 { unreachable } + define void @v_add_co_u32_e64__identity_vgpr__fi_offset32__kernel() #0 { unreachable } + attributes #0 = { nounwind } +... +--- --- name: v_add_co_u32_e64__inline_imm__fi_offset0 tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir index ade7b4266e9e6..7867e09efc3ef 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir @@ -10,6 +10,63 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=FLATSCRW64,GFX11 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefixes=FLATSCRW64,GFX12 %s +--- | + define void @v_add_co_u32_e32__inline_imm__fi_offset0() #0 { unreachable } + define void @v_add_co_u32_e32__inline_imm__fi_offset0_live_vcc() #0 { unreachable } + define void @v_add_co_u32_e32__inline_imm___fi_offset_inline_imm() #0 { unreachable } + define void @v_add_co_u32_e32__inline_imm___fi_offset_inline_imm_live_vcc() #0 { unreachable } + define void @v_add_co_u32_e32__literal__fi_offset0() #0 { unreachable } + define void @v_add_co_u32_e32__literal__fi_offset0_live_vcc() #0 { unreachable } + define void @v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm() #0 { unreachable } + define void @v_add_co_u32_e32__literal__fi_offset0__offset_inlineimm_live_vcc() #0 { unreachable } + define void @v_add_co_u32_e32__vgpr__fi_offset0() #0 { unreachable } + define void @v_add_co_u32_e32__fi_offset0__vgpr() #0 { unreachable } + define void @v_add_co_u32_e32__vgpr__fi_literal_offset() #0 { unreachable } + define void @v_add_co_u32_e32__fi_literal_offset__vgpr() #0 { unreachable } + define void @v_add_co_u32_e32__sgpr__fi_literal_offset() #0 { unreachable } + define void @v_add_co_u32_e64__inline_imm__fi_offset0() #0 { unreachable } + define void @v_add_co_u32_e64__inline_imm__fi_offset0__clamp() #0 { unreachable } + define void @v_add_co_u32_e64__inline_imm__fi_offset0__live_vcc_clamp() #0 { unreachable } + define void @v_add_co_u32_e64__fi_literal_offset__sgpr() #0 { unreachable } + define void @v_add_co_u32_e64__fi_literal_offset__sgpr_clamp() #0 { unreachable } + define void @v_add_co_u32_e64__fi_literal_offset__vgpr() #0 { unreachable } + define void @v_add_co_u32_e64__fi_literal_offset__vgpr__clamp() #0 { unreachable } + define void @v_add_co_u32_e64__fi_literal_offset__vgpr__live_vcc() #0 { unreachable } + define void @v_add_co_u32_e32__inline_imm__fi_offset0__kernel() #0 { unreachable } + define void @v_add_co_u32_e32__inline_imm__fi_offset0__kernel__live_vcc() #0 { unreachable } + define void @v_add_co_u32_e32__inline_imm__fi_offset_literal__kernel() #0 { unreachable } + define void @v_add_co_u32_e64__inline_imm__fi_offset_literal__kernel__clamp() #0 { unreachable } + define void @v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required() #0 { unreachable } + define void @v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required() #0 { unreachable } + define void @v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required() #0 { unreachable } + define void @v_add_co_u32_e32__kernel_fi_offset0__other_vgpr_live_after() #0 { unreachable } + define void @v_add_co_u32_e64__kernel_fi_offset0__other_vgpr_live_after() #0 { unreachable } + define void @v_add_co_u32_e64__kernel__other_vgpr_live_after__fi_offset0() #0 { unreachable } + define void @v_add_co_u32_e32__identity_vgpr__fi_offset0__kernel() #0 { unreachable } + define void @v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel() #0 { unreachable } + define void @v_add_co_u32_e64__identity_vgpr__fi_offset0__kernel() #0 { unreachable } + define void @v_add_co_u32_e64__fi_offset0__identity_vgpr__kernel() #0 { unreachable } + define void @v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel_kill() #0 { unreachable } + define void @v_add_co_u32_e32__fi_offset0__identity_vgpr__kernel_live_vcc() #0 { unreachable } + define void @v_add_co_u32_e32__identity_vgpr__fi_offset32__kernel() #0 { unreachable } + define void @v_add_co_u32_e32__identity_vgpr__fi_offset72__kernel() #0 { unreachable } + define void @v_add_co_u32_e32__fi_offset72__identity_vgpr__kernel() #0 { unreachable } + define void @v_add_co_u32_e32__fi_offset32__identity_vgpr__kernel() #0 { unreachable } + define void @v_add_co_u32_e64__identity_vgpr__fi_offset32__kernel() #0 { unreachable } + define void @v_add_co_u32_e64__fi_sgpr_kernel() #0 { unreachable } + define void @v_add_co_u32_e64__fi_sgpr_func() #0 { unreachable } + define void @v_add_co_u32_e64__fi_inc_same_vgpr_kernel() #0 { unreachable } + define void @v_add_co_u32_e64__fi_inc_same_vgpr_func() #0 { unreachable } + define void @v_add_co_u32_e64__fi_sgpr_kernel_live_co() #0 { unreachable } + define void @v_add_co_u32_e64_fi_sgpr_clobbered_register() #0 { unreachable } + define void @v_add_co_u32_e64_sgpr_fi_clobbered_register() #0 { unreachable } + define void @v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc() #0 { unreachable } + define void @v_add_co_u32_e64_imm_fi_vop3_literal_error_non_vcc_live() #0 { unreachable } + define void @v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc() #0 { unreachable } + define void @v_add_co_u32_e64_imm_fi_vop3_literal_error_vcc_live() #0 { unreachable } + attributes #0 = { nounwind } +... +--- --- name: v_add_co_u32_e32__inline_imm__fi_offset0 tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir index 6a4671058dc0e..b1ebb298ac5c7 100644 --- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir +++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir @@ -6,6 +6,49 @@ # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs -run-pass=prologepilog %s -o - | FileCheck -check-prefix=FLATSCRW32 %s +--- | + define void @v_add_u32_e32__inline_imm__fi_offset0() #0 { unreachable } + define void @v_add_u32_e32__inline_imm___fi_offset_inline_imm() #0 { unreachable } + define void @v_add_u32_e32__literal__fi_offset0() #0 { unreachable } + define void @v_add_u32_e32__literal__fi_offset0__offset_inlineimm() #0 { unreachable } + define void @v_add_u32_e32__vgpr__fi_offset0() #0 { unreachable } + define void @v_add_u32_e32__fi_offset0__vgpr() #0 { unreachable } + define void @v_add_u32_e32__vgpr__fi_literal_offset() #0 { unreachable } + define void @v_add_u32_e32__fi_literal_offset__vgpr() #0 { unreachable } + define void @v_add_u32_e32__sgpr__fi_literal_offset() #0 { unreachable } + define void @v_add_u32_e64__inline_imm__fi_offset0() #0 { unreachable } + define void @v_add_u32_e64__fi_literal_offset__sgpr() #0 { unreachable } + define void @v_add_u32_e64__vgpr__fi_literal_offset() #0 { unreachable } + define void @v_add_u32_e64__vgpr__fi_literal_offset__clamp() #0 { unreachable } + define void @v_add_u32_e64__fi_literal_offset__vgpr__clamp() #0 { unreachable } + define void @v_add_u32_e64__fi_literal_offset__vgpr__clamp__kernel() #0 { unreachable } + define void @v_add_u32_e32__inline_imm__fi_offset0__kernel() #0 { unreachable } + define void @v_add_u32_e64__inline_imm__fi_offset0__kernel() #0 { unreachable } + define void @v_add_u32_e32__inline_imm__fi_literal__kernel() #0 { unreachable } + define void @v_add_u32_e64__inline_imm__fi_literal__kernel() #0 { unreachable } + define void @v_add_u32_e64__fi_literal__inline_imm__kernel() #0 { unreachable } + define void @v_add_u32_e64__inline_imm__fi_literal__kernel__clamp() #0 { unreachable } + define void @killed_reg_regression() #0 { unreachable } + define void @v_add_u32_e32__kernel_fi_offset0__other_vgpr_live_after() #0 { unreachable } + define void @v_add_u32_e32__kernel_other_vgpr_live_after__fi_offset0() #0 { unreachable } + define void @v_add_u32_e32__kernel_fi_offset0__sgpr_live_after() #0 { unreachable } + define void @v_add_u32_e64__kernel_fi_offset0__other_vgpr_live_after() #0 { unreachable } + define void @v_add_u32_e32__kernel_fi_offset72__sgpr_live_after() #0 { unreachable } + define void @v_add_u32_e64__kernel_fi_offset72__sgpr_live_after() #0 { unreachable } + define void @v_add_u32_e32__identity_vgpr__fi_offset0__kernel() #0 { unreachable } + define void @v_add_u32_e32__fi_offset0__identity_vgpr__kernel() #0 { unreachable } + define void @v_add_u32_e64__identity_vgpr__fi_offset0__kernel() #0 { unreachable } + define void @v_add_u32_e64__fi_offset0__identity_vgpr__kernel() #0 { unreachable } + define void @v_add_u32_e32__fi_offset0__identity_vgpr__kernel_kill() #0 { unreachable } + define void @v_add_u32_e32__identity_vgpr__fi_offset32__kernel() #0 { unreachable } + define void @v_add_u32_e32__identity_vgpr__fi_offset72__kernel() #0 { unreachable } + define void @v_add_u32_e32__fi_offset72__identity_vgpr__kernel() #0 { unreachable } + define void @v_add_u32_e32__fi_offset32__identity_vgpr__kernel() #0 { unreachable } + define void @v_add_u32_e64__identity_vgpr__fi_offset32__kernel() #0 { unreachable } + define void @v_add_u32_e64_imm_fi_vop3_literal_error() #0 { unreachable } + attributes #0 = { nounwind } +... +--- --- name: v_add_u32_e32__inline_imm__fi_offset0 tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/entry-function-cfi.mir b/llvm/test/CodeGen/AMDGPU/entry-function-cfi.mir new file mode 100644 index 0000000000000..dd2503502211f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/entry-function-cfi.mir @@ -0,0 +1,34 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=prologepilog -o - %s | FileCheck %s + +--- | + + define protected amdgpu_kernel void @kern1() { + entry: + ret void + } +... +--- +name: kern1 +alignment: 1 +tracksRegLiveness: true +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + isEntryFunction: true + scratchRSrcReg: '$sgpr100_sgpr101_sgpr102_sgpr103' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + workGroupIDX: { reg: '$sgpr0' } + privateSegmentWaveByteOffset: { reg: '$sgpr1' } + workItemIDX: { reg: '$vgpr0' } +body: | + bb.0: + ; CHECK-LABEL: name: kern1 + ; CHECK: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg + ; CHECK-NEXT: S_ENDPGM 0 + S_ENDPGM 0 + +... diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination-tied-operand.mir b/llvm/test/CodeGen/AMDGPU/frame-index-elimination-tied-operand.mir index 17ec6f5b37241..b4a9e3a16136a 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination-tied-operand.mir +++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination-tied-operand.mir @@ -1,7 +1,15 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=prologepilog -o - %s | FileCheck -check-prefix=GFX11 %s + +--- | + define void @tied_operand_test() #0 { + entry: + unreachable + } + attributes #0 = { nounwind } ... --- +--- name: tied_operand_test tracksRegLiveness: true stack: diff --git a/llvm/test/CodeGen/AMDGPU/inflate-av-remat-imm.mir b/llvm/test/CodeGen/AMDGPU/inflate-av-remat-imm.mir index 4d8fb8db624f8..7cdb7c4ff1a9c 100644 --- a/llvm/test/CodeGen/AMDGPU/inflate-av-remat-imm.mir +++ b/llvm/test/CodeGen/AMDGPU/inflate-av-remat-imm.mir @@ -4,6 +4,13 @@ # Compare results of using V_MOV_B32 vs. AV_MOV_B32_IMM_PSEUDO during # allocation. +--- | + define void @av_mov_b32_split() #0 { unreachable } + define void @v_mov_b32_split() #0 { unreachable } + define void @av_mov_b64_split() #0 { unreachable } + attributes #0 = { nounwind } +... +--- --- name: av_mov_b32_split tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir b/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir index 786ce40203836..ce1e8efb06435 100644 --- a/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir +++ b/llvm/test/CodeGen/AMDGPU/issue98474-assigned-physreg-interference.mir @@ -1,6 +1,11 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -start-before=greedy,2 -stop-after=tailduplication -verify-machineinstrs -o - %s | FileCheck %s +--- | + define void @undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2_assigned_physreg_interference() #0 { unreachable } + attributes #0 = { nounwind } +... +--- --- name: undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2_assigned_physreg_interference tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir index 86b6c5982b4cb..eeb0c56fbb8e7 100644 --- a/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir +++ b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir @@ -6,6 +6,16 @@ # the physical subregister. Without this, a verifier error would # appear after tail duplication. +--- | + define void @undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1() #0 { unreachable } + define void @undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub1_sub2() #0 { unreachable } + define void @undef_subreg_def_live_out_tailduplicate_vreg96_undef_sub0_sub2() #0 { unreachable } + define void @undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_undef_use_in_def_block() #0 { unreachable } + define void @undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1_no_phi_use() #0 { unreachable } + define void @assigned_physreg_subregister_interference() #0 { unreachable } + attributes #0 = { nounwind } +... +--- --- name: undef_subreg_def_live_out_tailduplicate_vreg64_undef_sub1 tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir b/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir index 7a913cf50ea2b..94ba958e0daf0 100644 --- a/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir +++ b/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir @@ -9,7 +9,7 @@ ret void } - attributes #0 = { "frame-pointer"="all"} + attributes #0 = { "frame-pointer"="all" nounwind } ... --- name: kernel_vgpr32_spill diff --git a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll index 4d23fb116cd03..294d8bbbeba63 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll @@ -22,6 +22,8 @@ entry: ; GCN-LABEL: {{^}}only_undef_dbg_value: ; NOOPT: ;DEBUG_VALUE: test_debug_value:globalptr_arg <- undef +; NOOPT-NEXT: .cfi_escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 ; CFA is 0 in private_wave aspace +; NOOPT-NEXT: .cfi_undefined 16 ; NOOPT-NEXT: s_endpgm ; OPT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir index aa4428f3da4eb..7882a725f2de1 100644 --- a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir +++ b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir @@ -6,6 +6,11 @@ # the current iterator in the scavenger, which was not yet set if the # spill was the first instruction in the block. +--- | + define void @scavenge_register_position() #0 { unreachable } + attributes #0 = { nounwind } +... +--- --- name: scavenge_register_position tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir b/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir index e4cbae66d47fa..fc56fbaf4152c 100644 --- a/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir +++ b/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir @@ -12,7 +12,7 @@ ret void } - attributes #0 = { "amdgpu-waves-per-eu"="7,7" } + attributes #0 = { "amdgpu-waves-per-eu"="7,7" nounwind } ... --- diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir index 520717391b596..2f6c628d290ea 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir @@ -59,6 +59,8 @@ body: | ; PEI: bb.0: ; PEI-NEXT: successors: %bb.1(0x80000000) ; PEI-NEXT: {{ $}} + ; PEI-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 + ; PEI-NEXT: frame-setup CFI_INSTRUCTION undefined $pc_reg ; PEI-NEXT: renamable $sgpr10 = IMPLICIT_DEF ; PEI-NEXT: $vgpr0 = IMPLICIT_DEF ; PEI-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, killed $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir index ba2e80fdc04c8..0369c970ff7d4 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir @@ -15,7 +15,7 @@ ret void } - attributes #0 = { "frame-pointer"="all" } + attributes #0 = { "frame-pointer"="all" nounwind } ... --- name: check_spill diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir index 639bf6a6d550c..2fd89dd1cd2fa 100644 --- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir +++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir @@ -8,7 +8,7 @@ ret void } - attributes #0 = { "frame-pointer"="all" } + attributes #0 = { "frame-pointer"="all" nounwind } ... --- name: check_vcc