diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index bb4bf742fb861..a0cd1785c0130 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -13,6 +13,10 @@ include "AMDGPU.td" include "AMDGPUCombine.td" +def gi_ignore : + GIComplexOperandMatcher, + GIComplexPatternEquiv; + def sd_vsrc0 : ComplexPattern; def gi_vsrc0 : GIComplexOperandMatcher, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 2192a72bb27b7..bdf4cd3693b2a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -4312,6 +4312,8 @@ bool AMDGPUDAGToDAGISel::SelectBITOP3(SDValue In, SDValue &Src0, SDValue &Src1, return true; } +bool AMDGPUDAGToDAGISel::SelectIgnore(SDValue In) const { return true; } + SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const { if (In.isUndef()) return CurDAG->getUNDEF(MVT::i32); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index 4fa0d3f72e1c7..906548742f77f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -305,6 +305,8 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { void SelectWAVE_ADDRESS(SDNode *N); void SelectSTACKRESTORE(SDNode *N); + bool SelectIgnore(SDValue In) const; + protected: // Include the pieces autogenerated from the target description. #include "AMDGPUGenDAGISel.inc" diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 12915c7344426..7f08a4eef97c9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -4266,6 +4266,11 @@ Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded( return Src; } +InstructionSelector::ComplexRendererFns +AMDGPUInstructionSelector::selectIgnore(MachineOperand &Root) const { + return {{}}; +} + /// /// This will select either an SGPR or VGPR operand and will save us from /// having to write an extra tablegen pattern. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index c760fe7ef99dd..5a575a9c66a8a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -166,6 +166,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector { MachineOperand Root, MachineInstr *InsertPt, bool ForceVGPR = false) const; + InstructionSelector::ComplexRendererFns + selectIgnore(MachineOperand &Root) const; + InstructionSelector::ComplexRendererFns selectVCSRC(MachineOperand &Root) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 31a2d55e1baad..c2252afdbb064 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1006,9 +1006,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { Opcode == AMDGPU::S_BARRIER_INIT_M0 || Opcode == AMDGPU::S_BARRIER_INIT_IMM || Opcode == AMDGPU::S_BARRIER_JOIN_IMM || - Opcode == AMDGPU::S_BARRIER_LEAVE || - Opcode == AMDGPU::S_BARRIER_LEAVE_IMM || - Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER; + Opcode == AMDGPU::S_BARRIER_LEAVE || Opcode == AMDGPU::DS_GWS_INIT || + Opcode == AMDGPU::DS_GWS_BARRIER; } static bool isF16PseudoScalarTrans(unsigned Opcode) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 18a53931a6390..e46bd45aed506 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1710,6 +1710,8 @@ def VOP3PMadMixBF16Mods : ComplexPattern; def VINTERPModsHi : ComplexPattern; +def Ignore : ComplexPattern; + //===----------------------------------------------------------------------===// // SI assembler operands //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 296ce5a46287c..6a39187e48cc8 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1616,7 +1616,8 @@ def S_BARRIER_WAIT : SOPP_Pseudo <"s_barrier_wait", (ins i16imm:$simm16), "$simm let isConvergent = 1; } -def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", (ins)> { + def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", + (ins), "", [(int_amdgcn_s_barrier_leave (Ignore))] > { let SchedRW = [WriteBarrier]; let simm16 = 0; let fixed_imm = 1; @@ -1624,9 +1625,6 @@ def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", (ins)> { let Defs = [SCC]; } -def S_BARRIER_LEAVE_IMM : SOPP_Pseudo <"s_barrier_leave", - (ins i16imm:$simm16), "$simm16", [(int_amdgcn_s_barrier_leave timm:$simm16)]>; - def S_WAKEUP : SOPP_Pseudo <"s_wakeup", (ins) > { let SubtargetPredicate = isGFX8Plus; let simm16 = 0; diff --git a/llvm/test/CodeGen/AMDGPU/s-barrier.ll b/llvm/test/CodeGen/AMDGPU/s-barrier.ll index 8a9beb73a6baa..4c7cef9cc1a0f 100644 --- a/llvm/test/CodeGen/AMDGPU/s-barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/s-barrier.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-SDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s @bar = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison @bar2 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison @@ -102,6 +102,7 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in) ; GFX12-SDAG-NEXT: s_mov_b32 m0, 2 ; GFX12-SDAG-NEXT: s_barrier_signal_isfirst -1 ; GFX12-SDAG-NEXT: s_barrier_wait 1 +; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX12-SDAG-NEXT: s_barrier_leave ; GFX12-SDAG-NEXT: s_get_barrier_state s3, m0 ; GFX12-SDAG-NEXT: s_mov_b32 m0, s2 @@ -155,10 +156,11 @@ define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in) ; GFX12-GISEL-NEXT: s_barrier_signal -1 ; GFX12-GISEL-NEXT: s_barrier_join m0 ; GFX12-GISEL-NEXT: s_barrier_signal_isfirst -1 -; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48 ; GFX12-GISEL-NEXT: s_barrier_wait 1 +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_barrier_leave +; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48 ; GFX12-GISEL-NEXT: s_get_barrier_state s0, 2 ; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX12-GISEL-NEXT: s_get_barrier_state s0, m0 @@ -256,6 +258,25 @@ define amdgpu_kernel void @kernel2(ptr addrspace(1) %out, ptr addrspace(3) %in) ret void } +define amdgpu_ps void @test_barrier_leave_write_to_scc(i32 inreg %val, ptr addrspace(1) %out) { +; GFX12-LABEL: test_barrier_leave_write_to_scc: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_barrier_leave +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_cmp_lg_u32 s0, 0 +; GFX12-NEXT: s_movk_i32 s0, 0x7b +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_cselect_b32 s0, s0, 0x1c8 +; GFX12-NEXT: v_mov_b32_e32 v2, s0 +; GFX12-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-NEXT: s_endpgm + call void @llvm.amdgcn.s.barrier.leave(i16 1) + %cmp = icmp ne i32 %val, 0 + %ret = select i1 %cmp, i32 123, i32 456 + store i32 %ret, ptr addrspace(1) %out + ret void +} + declare void @llvm.amdgcn.s.barrier() #1 declare void @llvm.amdgcn.s.barrier.wait(i16) #1 declare void @llvm.amdgcn.s.barrier.signal(i32) #1