diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 07d50df317b77..e4d70abd15f71 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -316,9 +316,6 @@ extern char &SIMemoryLegalizerID; void initializeSIModeRegisterPass(PassRegistry&); extern char &SIModeRegisterID; -void initializeAMDGPUReleaseVGPRsPass(PassRegistry &); -extern char &AMDGPUReleaseVGPRsID; - void initializeAMDGPUInsertDelayAluPass(PassRegistry &); extern char &AMDGPUInsertDelayAluID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp deleted file mode 100644 index b7521540c0205..0000000000000 --- a/llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp +++ /dev/null @@ -1,156 +0,0 @@ -//===- AMDGPUReleaseVGPRs.cpp - Automatically release vgprs on GFX11+ -----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// Insert S_SENDMSG instructions to release vgprs on GFX11+. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "AMDGPUSubtarget.h" -#include "GCNSubtarget.h" -#include "MCTargetDesc/AMDGPUMCTargetDesc.h" -#include "SIDefines.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineOperand.h" -#include -using namespace llvm; - -#define DEBUG_TYPE "release-vgprs" - -namespace { - -class AMDGPUReleaseVGPRs : public MachineFunctionPass { -public: - static char ID; - - AMDGPUReleaseVGPRs() : MachineFunctionPass(ID) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - // Track if the last instruction referencing a vgpr in a MBB is a VMEM - // store. Because this pass is late in the pipeline, it is expected that the - // last vgpr use will likely be one of vmem store, ds, exp. - // Loads and others vgpr operations would have been - // deleted by this point, except for complex control flow involving loops. - // This is why we are just testing the type of instructions rather - // than the operands. - class LastVGPRUseIsVMEMStore { - BitVector BlockVMEMStore; - - static std::optional - lastVGPRUseIsStore(const MachineBasicBlock &MBB) { - for (auto &MI : reverse(MBB.instrs())) { - // If it's a VMEM store, a VGPR will be used, return true. - if ((SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI)) && - MI.mayStore()) - return true; - - // If it's referencing a VGPR but is not a VMEM store, return false. - if (SIInstrInfo::isDS(MI) || SIInstrInfo::isEXP(MI) || - SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI) || - SIInstrInfo::isVALU(MI)) - return false; - } - // Wait until the values are propagated from the predecessors - return std::nullopt; - } - - public: - LastVGPRUseIsVMEMStore(const MachineFunction &MF) - : BlockVMEMStore(MF.getNumBlockIDs()) { - - df_iterator_default_set Visited; - SmallVector EndWithVMEMStoreBlocks; - - for (const auto &MBB : MF) { - auto LastUseIsStore = lastVGPRUseIsStore(MBB); - if (!LastUseIsStore.has_value()) - continue; - - if (*LastUseIsStore) { - EndWithVMEMStoreBlocks.push_back(&MBB); - } else { - Visited.insert(&MBB); - } - } - - for (const auto *MBB : EndWithVMEMStoreBlocks) { - for (const auto *Succ : depth_first_ext(MBB, Visited)) { - BlockVMEMStore[Succ->getNumber()] = true; - } - } - } - - // Return true if the last instruction referencing a vgpr in this MBB - // is a VMEM store, otherwise return false. - bool isLastVGPRUseVMEMStore(const MachineBasicBlock &MBB) const { - return BlockVMEMStore[MBB.getNumber()]; - } - }; - - static bool - runOnMachineBasicBlock(MachineBasicBlock &MBB, const SIInstrInfo *SII, - const LastVGPRUseIsVMEMStore &BlockVMEMStore) { - - bool Changed = false; - - for (auto &MI : MBB.terminators()) { - // Look for S_ENDPGM instructions - if (MI.getOpcode() == AMDGPU::S_ENDPGM || - MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) { - // If the last instruction using a VGPR in the block is a VMEM store, - // release VGPRs. The VGPRs release will be placed just before ending - // the program - if (BlockVMEMStore.isLastVGPRUseVMEMStore(MBB)) { - BuildMI(MBB, MI, DebugLoc(), SII->get(AMDGPU::S_SENDMSG)) - .addImm(AMDGPU::SendMsg::ID_DEALLOC_VGPRS_GFX11Plus); - Changed = true; - } - } - } - - return Changed; - } - - bool runOnMachineFunction(MachineFunction &MF) override { - Function &F = MF.getFunction(); - if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) - return false; - - // This pass only runs on GFX11+ - const GCNSubtarget &ST = MF.getSubtarget(); - if (ST.getGeneration() < AMDGPUSubtarget::GFX11) - return false; - - LLVM_DEBUG(dbgs() << "AMDGPUReleaseVGPRs running on " << MF.getName() - << "\n"); - - const SIInstrInfo *SII = ST.getInstrInfo(); - LastVGPRUseIsVMEMStore BlockVMEMStore(MF); - - bool Changed = false; - for (auto &MBB : MF) { - Changed |= runOnMachineBasicBlock(MBB, SII, BlockVMEMStore); - } - - return Changed; - } -}; - -} // namespace - -char AMDGPUReleaseVGPRs::ID = 0; - -char &llvm::AMDGPUReleaseVGPRsID = AMDGPUReleaseVGPRs::ID; - -INITIALIZE_PASS(AMDGPUReleaseVGPRs, DEBUG_TYPE, "Release VGPRs", false, false) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index c3f7d753e1979..704f2e0b19bb9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -405,7 +405,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeAMDGPURewriteUndefForPHIPass(*PR); initializeAMDGPUUnifyMetadataPass(*PR); initializeSIAnnotateControlFlowPass(*PR); - initializeAMDGPUReleaseVGPRsPass(*PR); initializeAMDGPUInsertDelayAluPass(*PR); initializeSIInsertHardClausesPass(*PR); initializeSIInsertWaitcntsPass(*PR); @@ -1431,9 +1430,6 @@ void GCNPassConfig::addPreEmitPass() { // cases. addPass(&PostRAHazardRecognizerID); - if (getOptLevel() > CodeGenOpt::Less) - addPass(&AMDGPUReleaseVGPRsID); - if (isPassEnabled(EnableInsertDelayAlu, CodeGenOpt::Less)) addPass(&AMDGPUInsertDelayAluID); diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index 8df156f24dcb6..ae88d934ad52b 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -88,7 +88,6 @@ add_llvm_target(AMDGPUCodeGen AMDGPURegBankCombiner.cpp AMDGPURegBankSelect.cpp AMDGPURegisterBankInfo.cpp - AMDGPUReleaseVGPRs.cpp AMDGPURemoveIncompatibleFunctions.cpp AMDGPUResourceUsageAnalysis.cpp AMDGPURewriteOutArguments.cpp diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 6f0be07a3e23e..05247d3ff1603 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -386,6 +386,10 @@ class SIInsertWaitcnts : public MachineFunctionPass { bool ForceEmitZeroWaitcnts; bool ForceEmitWaitcnt[NUM_INST_CNTS]; + // S_ENDPGM instructions before which we should insert a DEALLOC_VGPRS + // message. + DenseSet ReleaseVGPRInsts; + public: static char ID; @@ -1032,6 +1036,15 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, (MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) { Wait = Wait.combined(allZeroWaitcnt()); } + // Identify S_ENDPGM instructions which may have to wait for outstanding VMEM + // stores. In this case it can be useful to send a message to explicitly + // release all VGPRs before the stores have completed. + else if (MI.getOpcode() == AMDGPU::S_ENDPGM || + MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) { + if (ST->getGeneration() >= AMDGPUSubtarget::GFX11 && + ScoreBrackets.getScoreRange(VS_CNT) != 0) + ReleaseVGPRInsts.insert(&MI); + } // Resolve vm waits before gs-done. else if ((MI.getOpcode() == AMDGPU::S_SENDMSG || MI.getOpcode() == AMDGPU::S_SENDMSGHALT) && @@ -1930,5 +1943,14 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { } } + // Insert DEALLOC_VGPR messages before previously identified S_ENDPGM + // instructions. + for (MachineInstr *MI : ReleaseVGPRInsts) { + BuildMI(*MI->getParent(), MI, DebugLoc(), TII->get(AMDGPU::S_SENDMSG)) + .addImm(AMDGPU::SendMsg::ID_DEALLOC_VGPRS_GFX11Plus); + Modified = true; + } + ReleaseVGPRInsts.clear(); + return Modified; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll index f4a3234c73ee6..2f79135fe610d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll @@ -159,7 +159,6 @@ define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset_nortn(ptr addrspa ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s2 ; GFX11-NEXT: global_atomic_csub_u32 v0, v1, v0, s[0:1] glc -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024 %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %gep, i32 %data) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll index d95f8eb5d7a45..00f6b7ac9342e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll @@ -3,8 +3,8 @@ ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX900 %s ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a -o - %s | FileCheck -check-prefix=GFX90A %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10PLUS %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - %s | FileCheck -check-prefix=GFX10PLUS %s define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i32 %s) { ; GFX6-LABEL: atomic_swap_i32_1d: @@ -1059,32 +1059,18 @@ define amdgpu_ps void @atomic_cmpswap_i32_1d_no_return(<8 x i32> inreg %rsrc, i3 ; GFX90A-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc ; GFX90A-NEXT: s_endpgm ; -; GFX10-LABEL: atomic_cmpswap_i32_1d_no_return: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc -; GFX10-NEXT: s_endpgm -; -; GFX11-LABEL: atomic_cmpswap_i32_1d_no_return: -; GFX11: ; %bb.0: ; %main_body -; GFX11-NEXT: s_mov_b32 s0, s2 -; GFX11-NEXT: s_mov_b32 s1, s3 -; GFX11-NEXT: s_mov_b32 s2, s4 -; GFX11-NEXT: s_mov_b32 s3, s5 -; GFX11-NEXT: s_mov_b32 s4, s6 -; GFX11-NEXT: s_mov_b32 s5, s7 -; GFX11-NEXT: s_mov_b32 s6, s8 -; GFX11-NEXT: s_mov_b32 s7, s9 -; GFX11-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX11-NEXT: s_endpgm +; GFX10PLUS-LABEL: atomic_cmpswap_i32_1d_no_return: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: s_mov_b32 s0, s2 +; GFX10PLUS-NEXT: s_mov_b32 s1, s3 +; GFX10PLUS-NEXT: s_mov_b32 s2, s4 +; GFX10PLUS-NEXT: s_mov_b32 s3, s5 +; GFX10PLUS-NEXT: s_mov_b32 s4, s6 +; GFX10PLUS-NEXT: s_mov_b32 s5, s7 +; GFX10PLUS-NEXT: s_mov_b32 s6, s8 +; GFX10PLUS-NEXT: s_mov_b32 s7, s9 +; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc +; GFX10PLUS-NEXT: s_endpgm main_body: %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret void @@ -2760,32 +2746,18 @@ define amdgpu_ps void @atomic_cmpswap_i64_1d_no_return(<8 x i32> inreg %rsrc, i6 ; GFX90A-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc ; GFX90A-NEXT: s_endpgm ; -; GFX10-LABEL: atomic_cmpswap_i64_1d_no_return: -; GFX10: ; %bb.0: ; %main_body -; GFX10-NEXT: s_mov_b32 s0, s2 -; GFX10-NEXT: s_mov_b32 s1, s3 -; GFX10-NEXT: s_mov_b32 s2, s4 -; GFX10-NEXT: s_mov_b32 s3, s5 -; GFX10-NEXT: s_mov_b32 s4, s6 -; GFX10-NEXT: s_mov_b32 s5, s7 -; GFX10-NEXT: s_mov_b32 s6, s8 -; GFX10-NEXT: s_mov_b32 s7, s9 -; GFX10-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc -; GFX10-NEXT: s_endpgm -; -; GFX11-LABEL: atomic_cmpswap_i64_1d_no_return: -; GFX11: ; %bb.0: ; %main_body -; GFX11-NEXT: s_mov_b32 s0, s2 -; GFX11-NEXT: s_mov_b32 s1, s3 -; GFX11-NEXT: s_mov_b32 s2, s4 -; GFX11-NEXT: s_mov_b32 s3, s5 -; GFX11-NEXT: s_mov_b32 s4, s6 -; GFX11-NEXT: s_mov_b32 s5, s7 -; GFX11-NEXT: s_mov_b32 s6, s8 -; GFX11-NEXT: s_mov_b32 s7, s9 -; GFX11-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) -; GFX11-NEXT: s_endpgm +; GFX10PLUS-LABEL: atomic_cmpswap_i64_1d_no_return: +; GFX10PLUS: ; %bb.0: ; %main_body +; GFX10PLUS-NEXT: s_mov_b32 s0, s2 +; GFX10PLUS-NEXT: s_mov_b32 s1, s3 +; GFX10PLUS-NEXT: s_mov_b32 s2, s4 +; GFX10PLUS-NEXT: s_mov_b32 s3, s5 +; GFX10PLUS-NEXT: s_mov_b32 s4, s6 +; GFX10PLUS-NEXT: s_mov_b32 s5, s7 +; GFX10PLUS-NEXT: s_mov_b32 s6, s8 +; GFX10PLUS-NEXT: s_mov_b32 s7, s9 +; GFX10PLUS-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc +; GFX10PLUS-NEXT: s_endpgm main_body: %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll index ded7081033f0e..acf69e7e7b2ad 100644 --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -4382,7 +4382,6 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { ; GFX11-NEXT: s_waitcnt vmcnt(7) ; GFX11-NEXT: scratch_store_b32 off, v31, s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v32i32: @@ -4552,7 +4551,6 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX11-NEXT: s_waitcnt vmcnt(7) ; GFX11-NEXT: scratch_store_b32 off, v32, s4 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_v32i32_i32: @@ -4926,7 +4924,6 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: scratch_store_b64 off, v[0:1], s32 ; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3] -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; ; HSA-LABEL: test_call_external_void_func_byval_struct_i8_i32: diff --git a/llvm/test/CodeGen/AMDGPU/cc-update.ll b/llvm/test/CodeGen/AMDGPU/cc-update.ll index a3e22a3bc7db6..fdb8bec944cb3 100644 --- a/llvm/test/CodeGen/AMDGPU/cc-update.ll +++ b/llvm/test/CodeGen/AMDGPU/cc-update.ll @@ -238,7 +238,6 @@ define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 { ; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 ; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 ; GFX1100-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-NEXT: s_endpgm entry: @@ -517,7 +516,6 @@ define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_add ; GFX1100-NEXT: s_add_u32 s16, s16, ex@rel32@lo+4 ; GFX1100-NEXT: s_addc_u32 s17, s17, ex@rel32@hi+12 ; GFX1100-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-NEXT: s_endpgm entry: %x = alloca i32, align 4, addrspace(5) diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index fe4d0224b038b..4d9ed0a784bfe 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -1026,7 +1026,6 @@ ; GCN-O2-NEXT: SI Final Branch Preparation ; GCN-O2-NEXT: SI peephole optimizations ; GCN-O2-NEXT: Post RA hazard recognizer -; GCN-O2-NEXT: Release VGPRs ; GCN-O2-NEXT: AMDGPU Insert Delay ALU ; GCN-O2-NEXT: Branch relaxation pass ; GCN-O2-NEXT: Register Usage Information Collector Pass @@ -1350,7 +1349,6 @@ ; GCN-O3-NEXT: SI Final Branch Preparation ; GCN-O3-NEXT: SI peephole optimizations ; GCN-O3-NEXT: Post RA hazard recognizer -; GCN-O3-NEXT: Release VGPRs ; GCN-O3-NEXT: AMDGPU Insert Delay ALU ; GCN-O3-NEXT: Branch relaxation pass ; GCN-O3-NEXT: Register Usage Information Collector Pass diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll index cbf0ca740c76b..4b16e9b2ce846 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll @@ -591,6 +591,7 @@ define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 ; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: exp mrt0 v0, v0, v0, v0 done +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm main_body: %tmp = shl i32 %index, 4 diff --git a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir b/llvm/test/CodeGen/AMDGPU/release-vgprs.mir deleted file mode 100644 index 7adb2e4cac416..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/release-vgprs.mir +++ /dev/null @@ -1,411 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py - -# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=release-vgprs -verify-machineinstrs -o - %s | FileCheck %s - ---- | - define amdgpu_ps void @tbuffer_store1() { ret void } - define amdgpu_ps void @tbuffer_store2() { ret void } - define amdgpu_ps void @flat_store() { ret void } - define amdgpu_ps void @global_store() { ret void } - define amdgpu_ps void @buffer_store_format() { ret void } - define amdgpu_ps void @ds_write_b32() { ret void } - define amdgpu_ps void @global_store_dword() { ret void } - define amdgpu_ps void @multiple_basic_blocks1() { ret void } - define amdgpu_ps void @multiple_basic_blocks2() { ret void } - define amdgpu_ps void @multiple_basic_blocks3() { ret void } - define amdgpu_ps void @recursive_loop() { ret void } - define amdgpu_ps void @recursive_loop_vmem() { ret void } - define amdgpu_ps void @image_store() { ret void } - define amdgpu_ps void @scratch_store() { ret void } - define amdgpu_ps void @buffer_atomic() { ret void } - define amdgpu_ps void @flat_atomic() { ret void } - define amdgpu_ps void @global_atomic() { ret void } - define amdgpu_ps void @image_atomic() { ret void } -... - ---- -name: tbuffer_store1 -body: | - bb.0: - ; CHECK-LABEL: name: tbuffer_store1 - ; CHECK: TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec - ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 - ; CHECK-NEXT: S_ENDPGM 0 - TBUFFER_STORE_FORMAT_XYZW_OFFSET_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 42, 117, 0, 0, implicit $exec - S_ENDPGM 0 -... - ---- -name: tbuffer_store2 -body: | - bb.0: - ; CHECK-LABEL: name: tbuffer_store2 - ; CHECK: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) - ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 - ; CHECK-NEXT: S_ENDPGM 0 - TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec :: (dereferenceable store (s128), align 1, addrspace 7) - S_ENDPGM 0 -... - ---- -name: flat_store -body: | - bb.0: - ; CHECK-LABEL: name: flat_store - ; CHECK: FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, implicit $exec, implicit $flat_scr - ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 - ; CHECK-NEXT: S_ENDPGM 0 - FLAT_STORE_DWORDX4 $vgpr49_vgpr50, $vgpr26_vgpr27_vgpr28_vgpr29, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM 0 -... - ---- -name: global_store -body: | - bb.0: - ; CHECK-LABEL: name: global_store - ; CHECK: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec - ; CHECK-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0 - ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 - ; CHECK-NEXT: S_ENDPGM 0 - GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr1, 0, 4, implicit $exec - S_WAITCNT_VSCNT undef $sgpr_null, 0 - S_ENDPGM 0 -... - ---- -name: buffer_store_format -body: | - bb.0: - ; CHECK-LABEL: name: buffer_store_format - ; CHECK: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec - ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 - ; CHECK-NEXT: S_ENDPGM 0 - BUFFER_STORE_FORMAT_D16_X_OFFEN_exact killed renamable $vgpr0, killed renamable $vgpr1, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, killed renamable $sgpr4, 0, 0, 0, implicit $exec - S_ENDPGM 0 -... - ---- -name: ds_write_b32 -body: | - bb.0: - ; CHECK-LABEL: name: ds_write_b32 - ; CHECK: renamable $vgpr0 = IMPLICIT_DEF - ; CHECK-NEXT: renamable $vgpr1 = IMPLICIT_DEF - ; CHECK-NEXT: DS_WRITE_B32_gfx9 killed renamable $vgpr0, killed renamable $vgpr1, 12, 0, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0 - renamable $vgpr0 = IMPLICIT_DEF - renamable $vgpr1 = IMPLICIT_DEF - DS_WRITE_B32_gfx9 killed renamable $vgpr0, killed renamable $vgpr1, 12, 0, implicit $exec - S_ENDPGM 0 - -... ---- -name: global_store_dword -body: | - bb.0: - liveins: $vgpr0, $sgpr0_sgpr1 - - ; CHECK-LABEL: name: global_store_dword - ; CHECK: liveins: $vgpr0, $sgpr0_sgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec - ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec - ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 - ; CHECK-NEXT: S_ENDPGM 0 - renamable $vgpr0 = V_MAD_I32_I24_e64 killed $vgpr1, killed $vgpr0, killed $sgpr2, 0, implicit $exec - GLOBAL_STORE_DWORD_SADDR killed renamable $vgpr2, killed renamable $vgpr0, killed renamable $sgpr0_sgpr1, 0, 0, implicit $exec - S_ENDPGM 0 -... - ---- -name: multiple_basic_blocks1 -body: | - ; CHECK-LABEL: name: multiple_basic_blocks1 - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec - ; CHECK-NEXT: S_BRANCH %bb.1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec - ; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc - ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc - ; CHECK-NEXT: S_BRANCH %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: S_ENDPGM 0 - bb.0: - successors: %bb.1 - - renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec - S_BRANCH %bb.1 - - bb.1: - successors: %bb.1, %bb.2 - - $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec - S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc - S_CBRANCH_SCC1 %bb.1, implicit killed $scc - S_BRANCH %bb.2 - - bb.2: - S_ENDPGM 0 - -... - - -# One block has a VMEM store as the last instruction, we should release the VGPRS -... ---- -name: multiple_basic_blocks2 -body: | - ; CHECK-LABEL: name: multiple_basic_blocks2 - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec - ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec - ; CHECK-NEXT: S_BRANCH %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec - ; CHECK-NEXT: S_BRANCH %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 - ; CHECK-NEXT: S_ENDPGM 0 - bb.0: - successors: %bb.2 - - TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec - $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec - S_BRANCH %bb.2 - - bb.1: - successors: %bb.2 - - $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec - TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec - S_BRANCH %bb.2 - - bb.2: - S_ENDPGM 0 -... - - -# One parent block has a VMEM store, release VGPRs ---- -name: multiple_basic_blocks3 -body: | - ; CHECK-LABEL: name: multiple_basic_blocks3 - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec - ; CHECK-NEXT: S_BRANCH %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec - ; CHECK-NEXT: S_BRANCH %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: successors: %bb.4(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_BRANCH %bb.4 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.3: - ; CHECK-NEXT: successors: %bb.4(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec - ; CHECK-NEXT: S_BRANCH %bb.4 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 - ; CHECK-NEXT: S_ENDPGM 0 - bb.0: - successors: %bb.2 - - $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec - TBUFFER_STORE_FORMAT_X_OFFSET_exact killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 125, 0, 0, implicit $exec - S_BRANCH %bb.2 - - bb.1: - successors: %bb.2 - - $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec - S_BRANCH %bb.2 - - bb.2: - successors: %bb.4 - - S_BRANCH %bb.4 - - bb.3: - successors: %bb.4 - - $vgpr1 = V_ADD_U32_e32 renamable $vgpr0, renamable $vgpr2, implicit $exec - S_BRANCH %bb.4 - - bb.4: - S_ENDPGM 0 -... - ---- -name: recursive_loop -body: | - ; CHECK-LABEL: name: recursive_loop - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec - ; CHECK-NEXT: S_BRANCH %bb.1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc - ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc - ; CHECK-NEXT: S_BRANCH %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: S_ENDPGM 0 - bb.0: - successors: %bb.1 - - renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec - S_BRANCH %bb.1 - - bb.1: - successors: %bb.1, %bb.2 - - S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc - S_CBRANCH_SCC1 %bb.1, implicit killed $scc - S_BRANCH %bb.2 - - bb.2: - S_ENDPGM 0 -... - ---- -name: recursive_loop_vmem -body: | - ; CHECK-LABEL: name: recursive_loop_vmem - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec - ; CHECK-NEXT: S_BRANCH %bb.1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec - ; CHECK-NEXT: S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc - ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc - ; CHECK-NEXT: S_BRANCH %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 - ; CHECK-NEXT: S_ENDPGM 0 - bb.0: - successors: %bb.1 - - renamable $vgpr0 = BUFFER_LOAD_FORMAT_X_IDXEN killed renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec - S_BRANCH %bb.1 - - bb.1: - successors: %bb.1, %bb.2 - - TBUFFER_STORE_FORMAT_XYZW_OFFEN_exact killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 115, 0, 0, implicit $exec - S_CMP_LG_U32 killed renamable $sgpr3, renamable $sgpr4, implicit-def $scc - S_CBRANCH_SCC1 %bb.1, implicit killed $scc - S_BRANCH %bb.2 - - bb.2: - S_ENDPGM 0 -... - ---- -name: image_store -body: | - bb.0: - ; CHECK-LABEL: name: image_store - ; CHECK: IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7) - ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 - ; CHECK-NEXT: S_ENDPGM 0 - IMAGE_STORE_V2_V1_gfx11 killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 12, 0, 1, 0, 0, -1, 0, 0, 0, implicit $exec :: (dereferenceable store (<2 x s32>), addrspace 7) - S_ENDPGM 0 -... - ---- -name: scratch_store -body: | - bb.0: - ; CHECK-LABEL: name: scratch_store - ; CHECK: renamable $sgpr0 = S_AND_B32 killed renamable $sgpr0, -16, implicit-def dead $scc - ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $sgpr0, 0, 0, implicit $exec, implicit $flat_scr - ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 - ; CHECK-NEXT: S_ENDPGM 0 - renamable $sgpr0 = S_AND_B32 killed renamable $sgpr0, -16, implicit-def dead $scc - SCRATCH_STORE_DWORD_SADDR killed renamable $vgpr0, killed renamable $sgpr0, 0, 0, implicit $exec, implicit $flat_scr - S_ENDPGM 0 -... - ---- -name: buffer_atomic -body: | - bb.0: - ; CHECK-LABEL: name: buffer_atomic - ; CHECK: BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) - ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 - ; CHECK-NEXT: S_ENDPGM 0 - BUFFER_ATOMIC_ADD_F32_OFFEN killed renamable $vgpr0, killed renamable $vgpr2, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 7) - S_ENDPGM 0 -... - ---- -name: flat_atomic -body: | - bb.0: - ; CHECK-LABEL: name: flat_atomic - ; CHECK: renamable $vgpr0_vgpr1 = FLAT_ATOMIC_DEC_X2_RTN killed renamable $vgpr0_vgpr1, killed renamable $vgpr2_vgpr3, 40, 1, implicit $exec, implicit $flat_scr - ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 - ; CHECK-NEXT: S_ENDPGM 0 - renamable $vgpr0_vgpr1 = FLAT_ATOMIC_DEC_X2_RTN killed renamable $vgpr0_vgpr1, killed renamable $vgpr2_vgpr3, 40, 1, implicit $exec, implicit $flat_scr - S_ENDPGM 0 -... - - ---- -name: global_atomic -body: | - bb.0: - ; CHECK-LABEL: name: global_atomic - ; CHECK: renamable $vgpr0_vgpr1 = GLOBAL_ATOMIC_INC_X2_SADDR_RTN killed renamable $vgpr0, killed renamable $vgpr1_vgpr2, killed renamable $sgpr0_sgpr1, 40, 1, implicit $exec - ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 - ; CHECK-NEXT: S_ENDPGM 0 - renamable $vgpr0_vgpr1 = GLOBAL_ATOMIC_INC_X2_SADDR_RTN killed renamable $vgpr0, killed renamable $vgpr1_vgpr2, killed renamable $sgpr0_sgpr1, 40, 1, implicit $exec - S_ENDPGM 0 -... - ---- -name: image_atomic -body: | - bb.0: - ; CHECK-LABEL: name: image_atomic - ; CHECK: renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) - ; CHECK-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 - ; CHECK-NEXT: S_ENDPGM 0 - renamable $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, killed renamable $vgpr4, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) - S_ENDPGM 0 -... diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir index 5f04e8cff880e..97dc6b60c0b4a 100644 --- a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir @@ -31,6 +31,7 @@ body: | ; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX11-NEXT: S_WAITCNT 7 ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX11-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; GFX11-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec S_WAITCNT_VSCNT undef $sgpr_null, 0 @@ -69,6 +70,7 @@ body: | ; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX11-NEXT: S_WAITCNT 7 ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX11-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; GFX11-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec S_WAITCNT_VSCNT undef $sgpr_null, 1 @@ -109,6 +111,7 @@ body: | ; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX11-NEXT: S_WAITCNT 7 ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX11-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; GFX11-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec S_WAITCNT 112 @@ -148,6 +151,7 @@ body: | ; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX11-NEXT: S_WAITCNT 7 ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX11-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; GFX11-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec S_WAITCNT_VSCNT undef $sgpr_null, 0 @@ -190,6 +194,7 @@ body: | ; GFX11-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX11-NEXT: S_WAITCNT 7 ; GFX11-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr + ; GFX11-NEXT: S_SENDMSG 3, implicit $exec, implicit $m0 ; GFX11-NEXT: S_ENDPGM 0 GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec S_WAITCNT 0