diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 0b774b724d0c0..5c001a4dd6247 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2482,6 +2482,11 @@ class AMDGPUGlobalLoadLDS : "", [SDNPMemOperand]>; def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS; +// Use read/write of inaccessible memory to model the fact that this reads a +// volatile value. +def int_amdgcn_pops_exiting_wave_id : + DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>; + //===----------------------------------------------------------------------===// // GFX10 Intrinsics //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index c11c7a57e0596..e35957338da7b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -2526,6 +2526,14 @@ void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) { CurDAG->setNodeMemRefs(cast(Selected), {MMO}); } +void AMDGPUDAGToDAGISel::SelectPOPSExitingWaveID(SDNode *N) { + // TODO: Select this with a tablegen pattern. This is tricky because the + // intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked + // mayLoad/mayStore and tablegen complains about the mismatch. + SDValue Reg = CurDAG->getRegister(AMDGPU::SRC_POPS_EXITING_WAVE_ID, MVT::i32); + CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, N->getVTList(), Reg); +} + static unsigned gwsIntrinToOpcode(unsigned IntrID) { switch (IntrID) { case Intrinsic::amdgcn_ds_gws_init: @@ -2682,6 +2690,9 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { case Intrinsic::amdgcn_ds_bvh_stack_rtn: SelectDSBvhStackIntrinsic(N); return; + case Intrinsic::amdgcn_pops_exiting_wave_id: + SelectPOPSExitingWaveID(N); + return; } SelectCode(N); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index f987b747c0e21..53d25b4cf4ca8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -274,6 +274,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { void SelectFP_EXTEND(SDNode *N); void SelectDSAppendConsume(SDNode *N, unsigned IntrID); void SelectDSBvhStackIntrinsic(SDNode *N); + void SelectPOPSExitingWaveID(SDNode *N); void SelectDS_GWS(SDNode *N, unsigned IntrID); void SelectInterpP1F16(SDNode *N); void SelectINTRINSIC_W_CHAIN(SDNode *N); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index b48a09489653a..99109b23a1591 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -2079,6 +2079,21 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic( return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); } +bool AMDGPUInstructionSelector::selectPOPSExitingWaveID( + MachineInstr &MI) const { + Register Dst = MI.getOperand(0).getReg(); + const DebugLoc &DL = MI.getDebugLoc(); + MachineBasicBlock *MBB = MI.getParent(); + + // TODO: Select this with a tablegen pattern. This is tricky because the + // intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked + // mayLoad/mayStore and tablegen complains about the mismatch. + auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst) + .addDef(AMDGPU::SRC_POPS_EXITING_WAVE_ID); + MI.eraseFromParent(); + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); +} + bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( MachineInstr &I) const { unsigned IntrinsicID = cast(I).getIntrinsicID(); @@ -2129,6 +2144,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS( return selectSBarrierSignalIsfirst(I, IntrinsicID); case Intrinsic::amdgcn_s_barrier_leave: return selectSBarrierLeave(I); + case Intrinsic::amdgcn_pops_exiting_wave_id: + return selectPOPSExitingWaveID(I); } return selectImpl(I, *CoverageInfo); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index f561d5d29efc4..48f3b18118014 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -125,6 +125,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector { bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const; bool selectSBarrier(MachineInstr &MI) const; bool selectDSBvhStackIntrinsic(MachineInstr &MI) const; + bool selectPOPSExitingWaveID(MachineInstr &MI) const; bool selectImageIntrinsic(MachineInstr &MI, const AMDGPU::ImageDimIntrinsicInfo *Intr) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 56345d14a331c..dbb42a60f71fe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -5132,6 +5132,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); break; } + case Intrinsic::amdgcn_pops_exiting_wave_id: + return getDefaultMappingSOP(MI); default: return getInvalidInstructionMapping(); } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll new file mode 100644 index 0000000000000..4927c2ffcdf30 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=SDAG +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=GFX9-GISEL +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=SDAG +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=GFX10-GISEL + +define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) { +; SDAG-LABEL: test: +; SDAG: ; %bb.0: +; SDAG-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id +; SDAG-NEXT: v_mov_b32_e32 v0, 0 +; SDAG-NEXT: v_mov_b32_e32 v1, s2 +; SDAG-NEXT: global_store_dword v0, v1, s[0:1] +; SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: test: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX9-GISEL-NEXT: s_endpgm +; +; GFX10-GISEL-LABEL: test: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id +; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1] +; GFX10-GISEL-NEXT: s_endpgm + %id = call i32 @llvm.amdgcn.pops.exiting.wave.id() + store i32 %id, ptr addrspace(1) %ptr + ret void +}