Skip to content

Commit

Permalink
[AMDGPU] New intrinsic llvm.amdgcn.pops.exiting.wave.id (#89612)
Browse files Browse the repository at this point in the history
This provides access to the special scalar source value
SRC_POPS_EXITING_WAVE_ID on GFX9 and GFX10.
  • Loading branch information
jayfoad committed May 22, 2024
1 parent 0a62a99 commit 990bed6
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 0 deletions.
5 changes: 5 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -2482,6 +2482,11 @@ class AMDGPUGlobalLoadLDS :
"", [SDNPMemOperand]>;
def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;

// Use read/write of inaccessible memory to model the fact that this reads a
// volatile value.
def int_amdgcn_pops_exiting_wave_id :
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>;

//===----------------------------------------------------------------------===//
// GFX10 Intrinsics
//===----------------------------------------------------------------------===//
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2526,6 +2526,14 @@ void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
}

void AMDGPUDAGToDAGISel::SelectPOPSExitingWaveID(SDNode *N) {
// TODO: Select this with a tablegen pattern. This is tricky because the
// intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
// mayLoad/mayStore and tablegen complains about the mismatch.
SDValue Reg = CurDAG->getRegister(AMDGPU::SRC_POPS_EXITING_WAVE_ID, MVT::i32);
CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, N->getVTList(), Reg);
}

static unsigned gwsIntrinToOpcode(unsigned IntrID) {
switch (IntrID) {
case Intrinsic::amdgcn_ds_gws_init:
Expand Down Expand Up @@ -2682,6 +2690,9 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
SelectDSBvhStackIntrinsic(N);
return;
case Intrinsic::amdgcn_pops_exiting_wave_id:
SelectPOPSExitingWaveID(N);
return;
}

SelectCode(N);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
void SelectFP_EXTEND(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDSBvhStackIntrinsic(SDNode *N);
void SelectPOPSExitingWaveID(SDNode *N);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
void SelectInterpP1F16(SDNode *N);
void SelectINTRINSIC_W_CHAIN(SDNode *N);
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2079,6 +2079,21 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}

bool AMDGPUInstructionSelector::selectPOPSExitingWaveID(
MachineInstr &MI) const {
Register Dst = MI.getOperand(0).getReg();
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock *MBB = MI.getParent();

// TODO: Select this with a tablegen pattern. This is tricky because the
// intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
// mayLoad/mayStore and tablegen complains about the mismatch.
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst)
.addDef(AMDGPU::SRC_POPS_EXITING_WAVE_ID);
MI.eraseFromParent();
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}

bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const {
unsigned IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
Expand Down Expand Up @@ -2129,6 +2144,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
return selectSBarrierSignalIsfirst(I, IntrinsicID);
case Intrinsic::amdgcn_s_barrier_leave:
return selectSBarrierLeave(I);
case Intrinsic::amdgcn_pops_exiting_wave_id:
return selectPOPSExitingWaveID(I);
}
return selectImpl(I, *CoverageInfo);
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
bool selectSBarrier(MachineInstr &MI) const;
bool selectDSBvhStackIntrinsic(MachineInstr &MI) const;
bool selectPOPSExitingWaveID(MachineInstr &MI) const;

bool selectImageIntrinsic(MachineInstr &MI,
const AMDGPU::ImageDimIntrinsicInfo *Intr) const;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5132,6 +5132,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
break;
}
case Intrinsic::amdgcn_pops_exiting_wave_id:
return getDefaultMappingSOP(MI);
default:
return getInvalidInstructionMapping();
}
Expand Down
34 changes: 34 additions & 0 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=SDAG
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=GFX9-GISEL
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=SDAG
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=GFX10-GISEL

define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
; SDAG-LABEL: test:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
; SDAG-NEXT: v_mov_b32_e32 v0, 0
; SDAG-NEXT: v_mov_b32_e32 v1, s2
; SDAG-NEXT: global_store_dword v0, v1, s[0:1]
; SDAG-NEXT: s_endpgm
;
; GFX9-GISEL-LABEL: test:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
;
; GFX10-GISEL-LABEL: test:
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
%id = call i32 @llvm.amdgcn.pops.exiting.wave.id()
store i32 %id, ptr addrspace(1) %ptr
ret void
}

0 comments on commit 990bed6

Please sign in to comment.