Skip to content

Commit

Permalink
AMDGPU/GlobalISel: Handle stacksave/stackrestore
Browse files Browse the repository at this point in the history
  • Loading branch information
arsenm committed Aug 11, 2023
1 parent 9a53f5f commit 1030483
Show file tree
Hide file tree
Showing 8 changed files with 154 additions and 11 deletions.
43 changes: 36 additions & 7 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ void AMDGPUInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB,
InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
}

// Return the wave level SGPR base address if this is a wave address.
static Register getWaveAddress(const MachineInstr *Def) {
return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
? Def->getOperand(1).getReg()
: Register();
}

bool AMDGPUInstructionSelector::isVCC(Register Reg,
const MachineRegisterInfo &MRI) const {
// The verifier is oblivious to s1 being a valid value for wavesize registers.
Expand Down Expand Up @@ -3365,6 +3372,33 @@ bool AMDGPUInstructionSelector::selectWaveAddress(MachineInstr &MI) const {
return true;
}

bool AMDGPUInstructionSelector::selectStackRestore(MachineInstr &MI) const {
Register SrcReg = MI.getOperand(0).getReg();
if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
return false;

MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
Register SP =
Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
Register WaveAddr = getWaveAddress(DefMI);
MachineBasicBlock *MBB = MI.getParent();
const DebugLoc &DL = MI.getDebugLoc();

if (!WaveAddr) {
WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
BuildMI(*MBB, MI, DL, TII.get(AMDGPU::S_LSHR_B32), WaveAddr)
.addReg(SrcReg)
.addImm(Subtarget->getWavefrontSizeLog2())
.setOperandDead(3); // Dead scc
}

BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), SP)
.addReg(WaveAddr);

MI.eraseFromParent();
return true;
}

bool AMDGPUInstructionSelector::select(MachineInstr &I) {
if (I.isPHI())
return selectPHI(I);
Expand Down Expand Up @@ -3503,6 +3537,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return true;
case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
return selectWaveAddress(I);
case AMDGPU::G_STACKRESTORE:
return selectStackRestore(I);
default:
return selectImpl(I, *CoverageInfo);
}
Expand Down Expand Up @@ -4364,13 +4400,6 @@ bool AMDGPUInstructionSelector::isUnneededShiftMask(const MachineInstr &MI,
return (LHSKnownZeros | *RHS).countr_one() >= ShAmtBits;
}

// Return the wave level SGPR base address if this is a wave address.
static Register getWaveAddress(const MachineInstr *Def) {
return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
? Def->getOperand(1).getReg()
: Register();
}

InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectMUBUFScratchOffset(
MachineOperand &Root) const {
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectBVHIntrinsic(MachineInstr &I) const;
bool selectSMFMACIntrin(MachineInstr &I) const;
bool selectWaveAddress(MachineInstr &I) const;
bool selectStackRestore(MachineInstr &MI) const;

std::pair<Register, unsigned> selectVOP3ModsImpl(MachineOperand &Root,
bool IsCanonicalizing = true,
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder(G_DYN_STACKALLOC)
.legalFor({{PrivatePtr, S32}});

getActionDefinitionsBuilder(G_STACKSAVE)
.customFor({PrivatePtr});
getActionDefinitionsBuilder(G_STACKRESTORE)
.legalFor({PrivatePtr});

getActionDefinitionsBuilder(G_GLOBAL_VALUE)
.customIf(typeIsNot(0, PrivatePtr));

Expand Down Expand Up @@ -2037,6 +2042,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
return legalizeCTLZ_CTTZ(MI, MRI, B);
case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
return legalizeFPTruncRound(MI, B);
case TargetOpcode::G_STACKSAVE:
return legalizeStackSave(MI, B);
default:
return false;
}
Expand Down Expand Up @@ -6516,6 +6523,16 @@ bool AMDGPULegalizerInfo::legalizeFPTruncRound(MachineInstr &MI,
return true;
}

bool AMDGPULegalizerInfo::legalizeStackSave(MachineInstr &MI,
MachineIRBuilder &B) const {
const SITargetLowering *TLI = ST.getTargetLowering();
Register StackPtr = TLI->getStackPointerRegisterToSaveRestore();
Register DstReg = MI.getOperand(0).getReg();
B.buildInstr(AMDGPU::G_AMDGPU_WAVE_ADDRESS, {DstReg}, {StackPtr});
MI.eraseFromParent();
return true;
}

bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const {
MachineIRBuilder &B = Helper.MIRBuilder;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
bool legalizeBVHIntrinsic(MachineInstr &MI, MachineIRBuilder &B) const;

bool legalizeFPTruncRound(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeStackSave(MachineInstr &MI, MachineIRBuilder &B) const;

bool legalizeImageIntrinsic(
MachineInstr &MI, MachineIRBuilder &B,
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3212,6 +3212,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case AMDGPU::G_DYN_STACKALLOC:
applyMappingDynStackAlloc(B, OpdMapper, MI);
return;
case AMDGPU::G_STACKRESTORE: {
applyDefaultMapping(OpdMapper);
constrainOpWithReadfirstlane(B, MI, 0);
return;
}
case AMDGPU::G_SBFX:
applyMappingBFE(B, OpdMapper, /*Signed*/ true);
return;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# RUN: not --crash llc -march=amdgcn -mcpu=gfx1030 -run-pass=instruction-select -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s

# ERR: LLVM ERROR: cannot select: G_STACKRESTORE %{{[0-9]+}}:vgpr(p5) (in function: stackrestore_waveaddress_vgpr)

---
name: stackrestore_waveaddress_vgpr
legalized: true
regBankSelected: true
body: |
bb.0:
%0:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
G_STACKRESTORE %0
...
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
# RUN: llc -march=amdgcn -mcpu=gfx1030 -run-pass=instruction-select -o - %s | FileCheck -check-prefix=GFX10-WAVE32 %s
# RUN: llc -march=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 -run-pass=instruction-select -o - %s | FileCheck -check-prefix=GFX10-WAVE64 %s

---
name: stackrestore_waveaddress_sgpr
legalized: true
regBankSelected: true
body: |
bb.0:
; GFX10-WAVE32-LABEL: name: stackrestore_waveaddress_sgpr
; GFX10-WAVE32: $sgpr32 = COPY $sgpr32
;
; GFX10-WAVE64-LABEL: name: stackrestore_waveaddress_sgpr
; GFX10-WAVE64: $sgpr32 = COPY $sgpr32
%0:sgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
G_STACKRESTORE %0
...

# Test we aren't special casing the direct register value.
---
name: stackrestore_direct_sp_sgpr
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $sgpr10
; GFX10-WAVE32-LABEL: name: stackrestore_direct_sp_sgpr
; GFX10-WAVE32: liveins: $sgpr10
; GFX10-WAVE32-NEXT: {{ $}}
; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr32
; GFX10-WAVE32-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 5, implicit-def dead $scc
; GFX10-WAVE32-NEXT: $sgpr32 = COPY [[S_LSHR_B32_]]
;
; GFX10-WAVE64-LABEL: name: stackrestore_direct_sp_sgpr
; GFX10-WAVE64: liveins: $sgpr10
; GFX10-WAVE64-NEXT: {{ $}}
; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr32
; GFX10-WAVE64-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 6, implicit-def dead $scc
; GFX10-WAVE64-NEXT: $sgpr32 = COPY [[S_LSHR_B32_]]
%0:sgpr(p5) = COPY $sgpr32
G_STACKRESTORE %0
...

---
name: stackrestore_any_sgpr
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $sgpr10
; GFX10-WAVE32-LABEL: name: stackrestore_any_sgpr
; GFX10-WAVE32: liveins: $sgpr10
; GFX10-WAVE32-NEXT: {{ $}}
; GFX10-WAVE32-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr10
; GFX10-WAVE32-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 5, implicit-def dead $scc
; GFX10-WAVE32-NEXT: $sgpr32 = COPY [[S_LSHR_B32_]]
;
; GFX10-WAVE64-LABEL: name: stackrestore_any_sgpr
; GFX10-WAVE64: liveins: $sgpr10
; GFX10-WAVE64-NEXT: {{ $}}
; GFX10-WAVE64-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr10
; GFX10-WAVE64-NEXT: [[S_LSHR_B32_:%[0-9]+]]:sreg_32 = S_LSHR_B32 [[COPY]], 6, implicit-def dead $scc
; GFX10-WAVE64-NEXT: $sgpr32 = COPY [[S_LSHR_B32_]]
%0:sgpr(p5) = COPY $sgpr10
G_STACKRESTORE %0
...

13 changes: 9 additions & 4 deletions llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.invalid.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
; RUN: split-file %s %t
; RUN: not --crash llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -filetype=null %t/stacksave-error.ll 2>&1 | FileCheck -check-prefix=ERR-SAVE %s
; RUN: not --crash llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -filetype=null %t/stackrestore-error.ll 2>&1 | FileCheck -check-prefix=ERR-RESTORE %s
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -filetype=null %t/stacksave-error.ll 2>&1 | FileCheck -check-prefix=ERR-SAVE-SDAG %s
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -filetype=null %t/stackrestore-error.ll 2>&1 | FileCheck -check-prefix=ERR-RESTORE-SDAG %s

; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -filetype=null %t/stacksave-error.ll 2>&1 | FileCheck -check-prefix=ERR-SAVE-GISEL %s
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -filetype=null %t/stackrestore-error.ll 2>&1 | FileCheck -check-prefix=ERR-RESTORE-GISEL %s

; Test that an error is produced if stacksave/stackrestore are used
; with the wrong (default) address space.
Expand All @@ -9,7 +12,8 @@

declare ptr @llvm.stacksave.p0()

; ERR-SAVE: LLVM ERROR: Cannot select: {{.+}}: i64,ch = stacksave
; ERR-SAVE-SDAG: LLVM ERROR: Cannot select: {{.+}}: i64,ch = stacksave
; ERR-SAVE-GISEL: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(p0) = G_STACKSAVE (in function: func_store_stacksave)
define void @func_store_stacksave() {
%stacksave = call ptr @llvm.stacksave.p0()
call void asm sideeffect "; use $0", "s"(ptr %stacksave)
Expand All @@ -20,7 +24,8 @@ define void @func_store_stacksave() {

declare void @llvm.stackrestore.p0(ptr)

; ERR-RESTORE: LLVM ERROR: Cannot select: {{.+}}: ch = stackrestore {{.+}}, {{.+}}
; ERR-RESTORE-SDAG: LLVM ERROR: Cannot select: {{.+}}: ch = stackrestore {{.+}}, {{.+}}
; ERR-RESTORE-GISEL: LLVM ERROR: unable to legalize instruction: G_STACKRESTORE %{{[0-9]+}}:_(p0) (in function: func_stacksave_sgpr)
define amdgpu_gfx void @func_stacksave_sgpr(ptr inreg %stack) {
call void @llvm.stackrestore.p0(ptr %stack)
ret void
Expand Down

0 comments on commit 1030483

Please sign in to comment.