Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,16 @@ def int_amdgcn_s_wait_loadcnt : AMDGPUWaitIntrinsic;
def int_amdgcn_s_wait_samplecnt : AMDGPUWaitIntrinsic;
def int_amdgcn_s_wait_storecnt : AMDGPUWaitIntrinsic;

// Request the hardware to allocate the given number of VGPRs. The actual number
// of allocated VGPRs may be rounded up to match hardware block boundaries.
// It is the responsibility of the calling code to ensure it does not allocate
// below the VGPR requirements of the current shader.
def int_amdgcn_s_alloc_vgpr :
DefaultAttrsIntrinsic<
[llvm_i1_ty], // Returns true if the allocation succeeded, false otherwise.
[llvm_i32_ty], // The number of VGPRs to allocate.
[NoUndef<RetIndex>, IntrNoMem, IntrHasSideEffects, IntrConvergent]>;

def int_amdgcn_div_scale : DefaultAttrsIntrinsic<
// 1st parameter: Numerator
// 2nd parameter: Denominator
Expand Down
16 changes: 16 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2331,6 +2331,22 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
return selectDSBvhStackIntrinsic(I);
case Intrinsic::amdgcn_s_alloc_vgpr: {
// S_ALLOC_VGPR doesn't have a destination register, it just implicitly sets
// SCC. We then need to COPY it into the result vreg.
MachineBasicBlock *MBB = I.getParent();
const DebugLoc &DL = I.getDebugLoc();

Register ResReg = I.getOperand(0).getReg();

MachineInstr *AllocMI = BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_ALLOC_VGPR))
.add(I.getOperand(2));
(void)BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), ResReg)
.addReg(AMDGPU::SCC);
I.eraseFromParent();
return constrainSelectedInstRegOperands(*AllocMI, TII, TRI, RBI) &&
RBI.constrainGenericRegister(ResReg, AMDGPU::SReg_32RegClass, *MRI);
}
case Intrinsic::amdgcn_s_barrier_init:
case Intrinsic::amdgcn_s_barrier_signal_var:
return selectNamedBarrierInit(I, IntrinsicID);
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5359,6 +5359,10 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[6] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32);
OpdsMapping[8] = getSGPROpMapping(MI.getOperand(8).getReg(), MRI, *TRI);
break;
case Intrinsic::amdgcn_s_alloc_vgpr:
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 1);
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32);
break;
case Intrinsic::amdgcn_s_sendmsg:
case Intrinsic::amdgcn_s_sendmsghalt: {
// This must be an SGPR, but accept a VGPR.
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,7 @@ def : AlwaysUniform<int_amdgcn_cluster_workgroup_max_flat_id>;
def : AlwaysUniform<int_amdgcn_workgroup_id_x>;
def : AlwaysUniform<int_amdgcn_workgroup_id_y>;
def : AlwaysUniform<int_amdgcn_workgroup_id_z>;
def : AlwaysUniform<int_amdgcn_s_alloc_vgpr>;
def : AlwaysUniform<int_amdgcn_s_getpc>;
def : AlwaysUniform<int_amdgcn_s_getreg>;
def : AlwaysUniform<int_amdgcn_s_memrealtime>;
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/AMDGPU/SOPInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,10 @@ let SubtargetPredicate = isGFX11Plus in {
} // End SubtargetPredicate = isGFX11Plus

let SubtargetPredicate = isGFX12Plus in {
let hasSideEffects = 1, Defs = [SCC] in {
def S_ALLOC_VGPR : SOP1_0_32 <"s_alloc_vgpr">;
let hasSideEffects = 1, isConvergent = 1, Defs = [SCC] in {
def S_ALLOC_VGPR : SOP1_0_32 <"s_alloc_vgpr",
[(set SCC, (int_amdgcn_s_alloc_vgpr SSrc_b32:$src0))]
>;
}
} // End SubtargetPredicate = isGFX12Plus

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,15 @@ define void @cluster_workgroup_max_flat_id(ptr addrspace(1) inreg %out) {
ret void
}

; CHECK-LABEL: for function 's_alloc_vgpr':
; CHECK: ALL VALUES UNIFORM
define void @s_alloc_vgpr(i32 inreg %n, ptr addrspace(1) inreg %out) {
%scc = call i1 @llvm.amdgcn.s.alloc.vgpr(i32 %n)
%sel = select i1 %scc, i32 1, i32 0
store i32 %sel, ptr addrspace(1) %out
ret void
}

; CHECK-LABEL: for function 's_memtime':
; CHECK: ALL VALUES UNIFORM
define void @s_memtime(ptr addrspace(1) inreg %out) {
Expand Down
59 changes: 59 additions & 0 deletions llvm/test/CodeGen/AMDGPU/intrinsic-amdgcn-s-alloc-vgpr.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1250 < %s | FileCheck %s --check-prefix=GISEL
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1250 < %s | FileCheck %s --check-prefix=DAGISEL

declare i1 @llvm.amdgcn.s.alloc.vgpr(i32)

define amdgpu_cs void @test_alloc_vreg_const(ptr addrspace(1) %out) #0 {
; GISEL-LABEL: test_alloc_vreg_const:
; GISEL: ; %bb.0: ; %entry
; GISEL-NEXT: s_alloc_vgpr 45
; GISEL-NEXT: s_cselect_b32 s0, 1, 0
; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GISEL-NEXT: s_and_b32 s0, s0, 1
; GISEL-NEXT: v_mov_b32_e32 v2, s0
; GISEL-NEXT: global_store_b32 v[0:1], v2, off
; GISEL-NEXT: s_endpgm
;
; DAGISEL-LABEL: test_alloc_vreg_const:
; DAGISEL: ; %bb.0: ; %entry
; DAGISEL-NEXT: s_alloc_vgpr 45
; DAGISEL-NEXT: s_cselect_b32 s0, -1, 0
; DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; DAGISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
; DAGISEL-NEXT: global_store_b32 v[0:1], v2, off
; DAGISEL-NEXT: s_endpgm
entry:
%scc = call i1 @llvm.amdgcn.s.alloc.vgpr(i32 45)
%sel = select i1 %scc, i32 1, i32 0
store i32 %sel, ptr addrspace(1) %out
ret void
}

define amdgpu_cs void @test_alloc_vreg_var(i32 inreg %n, ptr addrspace(1) %out) #0 {
; GISEL-LABEL: test_alloc_vreg_var:
; GISEL: ; %bb.0: ; %entry
; GISEL-NEXT: s_alloc_vgpr s0
; GISEL-NEXT: s_cselect_b32 s0, 1, 0
; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GISEL-NEXT: s_and_b32 s0, s0, 1
; GISEL-NEXT: v_mov_b32_e32 v2, s0
; GISEL-NEXT: global_store_b32 v[0:1], v2, off
; GISEL-NEXT: s_endpgm
;
; DAGISEL-LABEL: test_alloc_vreg_var:
; DAGISEL: ; %bb.0: ; %entry
; DAGISEL-NEXT: s_alloc_vgpr s0
; DAGISEL-NEXT: s_cselect_b32 s0, -1, 0
; DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; DAGISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
; DAGISEL-NEXT: global_store_b32 v[0:1], v2, off
; DAGISEL-NEXT: s_endpgm
entry:
%scc = call i1 @llvm.amdgcn.s.alloc.vgpr(i32 %n)
%sel = select i1 %scc, i32 1, i32 0
store i32 %sel, ptr addrspace(1) %out
ret void
}

attributes #0 = { "amdgpu-dynamic-vgpr-block-sze" = "16" }