Skip to content

Commit

Permalink
Revert "[AMDGPU] Move call clobbered return address registers s[30:31…
Browse files Browse the repository at this point in the history
…] to callee saved range"

This reverts commit 9075009.

 Failed amdgpu runtime buildbot # 3514
  • Loading branch information
ronlieb committed Dec 22, 2021
1 parent ea22fdd commit 09b5329
Show file tree
Hide file tree
Showing 97 changed files with 6,961 additions and 5,827 deletions.
38 changes: 36 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
Expand Up @@ -337,6 +337,7 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val,
FunctionLoweringInfo &FLI) const {

MachineFunction &MF = B.getMF();
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
MFI->setIfReturnsVoid(!Val);

Expand All @@ -352,15 +353,40 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val,
return true;
}

unsigned ReturnOpc =
IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN;
auto const &ST = MF.getSubtarget<GCNSubtarget>();

unsigned ReturnOpc = 0;
if (IsShader)
ReturnOpc = AMDGPU::SI_RETURN_TO_EPILOG;
else if (CC == CallingConv::AMDGPU_Gfx)
ReturnOpc = AMDGPU::S_SETPC_B64_return_gfx;
else
ReturnOpc = AMDGPU::S_SETPC_B64_return;

auto Ret = B.buildInstrNoInsert(ReturnOpc);
Register ReturnAddrVReg;
if (ReturnOpc == AMDGPU::S_SETPC_B64_return) {
ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass);
Ret.addUse(ReturnAddrVReg);
} else if (ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) {
ReturnAddrVReg =
MRI.createVirtualRegister(&AMDGPU::Gfx_CCR_SGPR_64RegClass);
Ret.addUse(ReturnAddrVReg);
}

if (!FLI.CanLowerReturn)
insertSRetStores(B, Val->getType(), VRegs, FLI.DemoteRegister);
else if (!lowerReturnVal(B, Val, VRegs, Ret))
return false;

if (ReturnOpc == AMDGPU::S_SETPC_B64_return ||
ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) {
const SIRegisterInfo *TRI = ST.getRegisterInfo();
Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF),
&AMDGPU::SGPR_64RegClass);
B.buildCopy(ReturnAddrVReg, LiveInReturn);
}

// TODO: Handle CalleeSavedRegsViaCopy.

B.insertInstr(Ret);
Expand Down Expand Up @@ -575,6 +601,14 @@ bool AMDGPUCallLowering::lowerFormalArguments(
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());

if (!IsEntryFunc) {
Register ReturnAddrReg = TRI->getReturnAddressReg(MF);
Register LiveInReturn = MF.addLiveIn(ReturnAddrReg,
&AMDGPU::SGPR_64RegClass);
MBB.addLiveIn(ReturnAddrReg);
B.buildCopy(LiveInReturn, ReturnAddrReg);
}

if (Info->hasImplicitBufferPtr()) {
Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI);
MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass);
Expand Down
8 changes: 2 additions & 6 deletions llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
Expand Up @@ -152,10 +152,6 @@ def CSR_AMDGPU_AGPRs_32_255 : CalleeSavedRegs<
(sequence "AGPR%u", 32, 255)
>;

def CSR_AMDGPU_SGPRs_30_31 : CalleeSavedRegs<
(sequence "SGPR%u", 30, 31)
>;

def CSR_AMDGPU_SGPRs_32_105 : CalleeSavedRegs<
(sequence "SGPR%u", 32, 105)
>;
Expand Down Expand Up @@ -186,15 +182,15 @@ def CSR_AMDGPU_AllAllocatableSRegs : CalleeSavedRegs<
>;

def CSR_AMDGPU_HighRegs : CalleeSavedRegs<
(add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs_30_31, CSR_AMDGPU_SGPRs_32_105)
(add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs_32_105)
>;

def CSR_AMDGPU_HighRegs_With_AGPRs : CalleeSavedRegs<
(add CSR_AMDGPU_HighRegs, CSR_AMDGPU_AGPRs_32_255)
>;

def CSR_AMDGPU_SI_Gfx : CalleeSavedRegs<
(add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs_4_29, CSR_AMDGPU_SGPRs_30_31, CSR_AMDGPU_SI_Gfx_SGPRs_64_105)
(add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs_4_29, CSR_AMDGPU_SI_Gfx_SGPRs_64_105)
>;

def CSR_AMDGPU_SI_Gfx_With_AGPRs : CalleeSavedRegs<
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Expand Up @@ -4398,6 +4398,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(TC_RETURN)
NODE_NAME_CASE(TRAP)
NODE_NAME_CASE(RET_FLAG)
NODE_NAME_CASE(RET_GFX_FLAG)
NODE_NAME_CASE(RETURN_TO_EPILOG)
NODE_NAME_CASE(ENDPGM)
NODE_NAME_CASE(DWORDADDR)
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
Expand Up @@ -367,6 +367,9 @@ enum NodeType : unsigned {
// Return with values from a non-entry function.
RET_FLAG,

// Return with values from a non-entry function (AMDGPU_Gfx CC).
RET_GFX_FLAG,

DWORDADDR,
FRACT,

Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
Expand Up @@ -355,7 +355,11 @@ def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone,
def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;

def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
>;

def AMDGPUret_gfx_flag : SDNode<"AMDGPUISD::RET_GFX_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
>;

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
Expand Up @@ -120,7 +120,8 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
// FIXME: Should be able to handle this with emitPseudoExpansionLowering. We
// need to select it to the subtarget specific version, and there's no way to
// do that with a single pseudo source operation.
if (Opcode == AMDGPU::S_SETPC_B64_return)
if (Opcode == AMDGPU::S_SETPC_B64_return ||
Opcode == AMDGPU::S_SETPC_B64_return_gfx)
Opcode = AMDGPU::S_SETPC_B64;
else if (Opcode == AMDGPU::SI_CALL) {
// SI_CALL is just S_SWAPPC_B64 with an additional operand to track the
Expand Down
46 changes: 44 additions & 2 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Expand Up @@ -2618,6 +2618,24 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SmallVector<SDValue, 48> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)

// Add return address for callable functions.
if (!Info->isEntryFunction()) {
const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
SDValue ReturnAddrReg = CreateLiveInRegister(
DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64);

SDValue ReturnAddrVirtualReg =
DAG.getRegister(MF.getRegInfo().createVirtualRegister(
CallConv != CallingConv::AMDGPU_Gfx
? &AMDGPU::CCR_SGPR_64RegClass
: &AMDGPU::Gfx_CCR_SGPR_64RegClass),
MVT::i64);
Chain =
DAG.getCopyToReg(Chain, DL, ReturnAddrVirtualReg, ReturnAddrReg, Flag);
Flag = Chain.getValue(1);
RetOps.push_back(ReturnAddrVirtualReg);
}

// Copy the result values into the output registers.
for (unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.size(); I != E;
++I, ++RealRVLocIdx) {
Expand Down Expand Up @@ -2674,8 +2692,15 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
RetOps.push_back(Flag);

unsigned Opc = AMDGPUISD::ENDPGM;
if (!IsWaveEnd)
Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG;
if (!IsWaveEnd) {
if (IsShader)
Opc = AMDGPUISD::RETURN_TO_EPILOG;
else if (CallConv == CallingConv::AMDGPU_Gfx)
Opc = AMDGPUISD::RET_GFX_FLAG;
else
Opc = AMDGPUISD::RET_FLAG;
}

return DAG.getNode(Opc, DL, MVT::Other, RetOps);
}

Expand Down Expand Up @@ -3243,6 +3268,21 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
}


SDValue PhysReturnAddrReg;
if (IsTailCall) {
// Since the return is being combined with the call, we need to pass on the
// return address.

const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo();
SDValue ReturnAddrReg = CreateLiveInRegister(
DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64);

PhysReturnAddrReg = DAG.getRegister(TRI->getReturnAddressReg(MF),
MVT::i64);
Chain = DAG.getCopyToReg(Chain, DL, PhysReturnAddrReg, ReturnAddrReg, InFlag);
InFlag = Chain.getValue(1);
}

// We don't usually want to end the call-sequence here because we would tidy
// the frame up *after* the call, however in the ABI-changing tail-call case
// we've carefully laid out the parameters so that when sp is reset they'll be
Expand Down Expand Up @@ -3272,6 +3312,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
// this information must travel along with the operation for eventual
// consumption by emitEpilogue.
Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));

Ops.push_back(PhysReturnAddrReg);
}

// Add argument registers to the end of the list so that they are known live
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Expand Up @@ -955,8 +955,8 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(
// NOTE: this could be improved with knowledge of all call sites or
// with knowledge of the called routines.
if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
MI.getOpcode() == AMDGPU::SI_RETURN ||
MI.getOpcode() == AMDGPU::S_SETPC_B64_return ||
MI.getOpcode() == AMDGPU::S_SETPC_B64_return_gfx ||
(MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt()));
}
Expand Down
17 changes: 0 additions & 17 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Expand Up @@ -2071,23 +2071,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64));
break;
}
case AMDGPU::SI_RETURN: {
const MachineFunction *MF = MBB.getParent();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
// Hiding the return address use with SI_RETURN may lead to extra kills in
// the function and missing live-ins. We are fine in practice because callee
// saved register handling ensures the register value is restored before
// RET, but we need the undef flag here to appease the MachineVerifier
// liveness checks.
MachineInstrBuilder MIB =
BuildMI(MBB, MI, DL, get(AMDGPU::S_SETPC_B64_return))
.addReg(TRI->getReturnAddressReg(*MF), RegState::Undef);

MIB.copyImplicitOps(MI);
MI.eraseFromParent();
break;
}
}
return true;
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIInstructions.td
Expand Up @@ -463,7 +463,7 @@ def SI_RETURN_TO_EPILOG : SPseudoInstSI <

// Return for returning function calls.
def SI_RETURN : SPseudoInstSI <
(outs), (ins), [(AMDGPUret_flag)],
(outs), (ins), [],
"; return"> {
let isTerminator = 1;
let isBarrier = 1;
Expand Down
13 changes: 5 additions & 8 deletions llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
Expand Up @@ -79,8 +79,6 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *RI = ST.getRegisterInfo();

MachineBasicBlock::iterator I = SaveBlock.begin();
if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) {
Expand All @@ -91,8 +89,8 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
MCRegister Reg = CS.getReg();

MachineInstrSpan MIS(I, &SaveBlock);
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
const TargetRegisterClass *RC =
TRI->getMinimalPhysRegClass(Reg, MVT::i32);

// If this value was already livein, we probably have a direct use of the
// incoming register value, so don't kill at the spill point. This happens
Expand Down Expand Up @@ -121,8 +119,7 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *RI = ST.getRegisterInfo();

// Restore all registers immediately before the return and any
// terminators that precede it.
MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator();
Expand All @@ -131,8 +128,8 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
for (const CalleeSavedInfo &CI : reverse(CSI)) {
unsigned Reg = CI.getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32);
const TargetRegisterClass *RC =
TRI->getMinimalPhysRegClass(Reg, MVT::i32);

TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
assert(I != RestoreBlock.begin() &&
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Expand Up @@ -701,6 +701,23 @@ def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16],
let HasSGPR = 1;
}

// CCR (call clobbered registers) SGPR 64-bit registers
def CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
(add (trunc SGPR_64, 16))> {
let CopyCost = SGPR_64.CopyCost;
let AllocationPriority = SGPR_64.AllocationPriority;
let HasSGPR = 1;
}

// Call clobbered 64-bit SGPRs for AMDGPU_Gfx CC
def Gfx_CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
(add (trunc (shl SGPR_64, 15), 1), // s[30:31]
(trunc (shl SGPR_64, 18), 14))> { // s[36:37]-s[s62:63]
let CopyCost = SGPR_64.CopyCost;
let AllocationPriority = SGPR_64.AllocationPriority;
let HasSGPR = 1;
}

def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
(add TTMP_64Regs)> {
let isAllocatable = 0;
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Target/AMDGPU/SOPInstructions.td
Expand Up @@ -152,8 +152,8 @@ class SOP1_64_0 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
}

// 64-bit input, no output
class SOP1_1 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
opName, (outs), (ins SReg_64:$src0), "$src0", pattern> {
class SOP1_1 <string opName, RegisterClass rc = SReg_64, list<dag> pattern=[]> : SOP1_Pseudo <
opName, (outs), (ins rc:$src0), "$src0", pattern> {
let has_sdst = 0;
}

Expand Down Expand Up @@ -300,7 +300,8 @@ def S_SETPC_B64 : SOP1_1 <"s_setpc_b64">;

let isReturn = 1 in {
// Define variant marked as return rather than branch.
def S_SETPC_B64_return : SOP1_1<"">;
def S_SETPC_B64_return : SOP1_1<"", CCR_SGPR_64, [(AMDGPUret_flag i64:$src0)]>;
def S_SETPC_B64_return_gfx : SOP1_1<"", Gfx_CCR_SGPR_64, [(AMDGPUret_gfx_flag i64:$src0)]>;
}
} // End isTerminator = 1, isBarrier = 1

Expand Down

0 comments on commit 09b5329

Please sign in to comment.