Skip to content

Commit

Permalink
Delay outgoing register assignments to last.
Browse files Browse the repository at this point in the history
The delayed stack protector feature which is currently used for SDAG (and thus
allows for more commonly generating tail calls) depends on being able to extract
the tail call into a separate return block. To do this it also has to extract
the vreg->physreg copies that set up the call's arguments, since if it doesn't
then the call inst ends up using undefined physregs in it's new spliced block.

SelectionDAG implementations can do this because they delay emitting register
copies until  *after* the stack arguments are set up. GISel however just
processes and emits the arguments in IR order, so stack arguments always end up
last, and thus this breaks the code that looks for any register arg copies that
precede the call instruction.

This patch adds a thunk argument to the assignValueToReg() and custom assignment
hooks. For outgoing arguments, register assignments use this return param to
return a thunk that does the actual generating of the copies. We collect these
until all the outgoing stack assignments have been done and then execute them,
so that the copies (and perhaps some artifacts like G_SEXTs) are placed after
any stores.

Differential Revision: https://reviews.llvm.org/D110610
  • Loading branch information
aemerson committed Oct 4, 2021
1 parent 01d696e commit 8bde5e5
Show file tree
Hide file tree
Showing 30 changed files with 465 additions and 414 deletions.
11 changes: 7 additions & 4 deletions llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
Expand Up @@ -262,7 +262,7 @@ class CallLowering {
/// handle the appropriate COPY (either to or from) and mark any
/// relevant uses/defines as needed.
virtual void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) = 0;
CCValAssign VA) = 0;

/// The specified value has been assigned to a stack
/// location. Load or store it there, with appropriate extension
Expand All @@ -282,11 +282,14 @@ class CallLowering {
}

/// Handle custom values, which may be passed into one or more of \p VAs.
/// \p If the handler wants the assignments to be delayed until after
/// mem loc assignments, then it sets \p Thunk to the thunk to do the
/// assignment.
/// \return The number of \p VAs that have been assigned after the first
/// one, and which should therefore be skipped from further
/// processing.
virtual unsigned assignCustomValue(ArgInfo &Arg,
ArrayRef<CCValAssign> VAs) {
virtual unsigned assignCustomValue(ArgInfo &Arg, ArrayRef<CCValAssign> VAs,
std::function<void()> *Thunk = nullptr) {
// This is not a pure virtual method because not all targets need to worry
// about custom values.
llvm_unreachable("Custom values not supported");
Expand Down Expand Up @@ -318,7 +321,7 @@ class CallLowering {

/// Provides a default implementation for argument handling.
void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override;
CCValAssign VA) override;
};

/// Base class for ValueHandlers used for arguments passed to a function call,
Expand Down
34 changes: 30 additions & 4 deletions llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
Expand Up @@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
Expand Down Expand Up @@ -617,14 +618,31 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,

const unsigned NumArgs = Args.size();

// Stores thunks for outgoing register assignments. This is used so we delay
// generating register copies until mem loc assignments are done. We do this
// so that if the target is using the delayed stack protector feature, we can
// find the split point of the block accurately. E.g. if we have:
// G_STORE %val, %memloc
// $x0 = COPY %foo
// $x1 = COPY %bar
// CALL func
// ... then the split point for the block will correctly be at, and including,
// the copy to $x0. If instead the G_STORE instruction immediately precedes
// the CALL, then we'd prematurely choose the CALL as the split point, thus
// generating a split block with a CALL that uses undefined physregs.
SmallVector<std::function<void()>> DelayedOutgoingRegAssignments;

for (unsigned i = 0, j = 0; i != NumArgs; ++i, ++j) {
assert(j < ArgLocs.size() && "Skipped too many arg locs");
CCValAssign &VA = ArgLocs[j];
assert(VA.getValNo() == i && "Location doesn't correspond to current arg");

if (VA.needsCustom()) {
unsigned NumArgRegs =
Handler.assignCustomValue(Args[i], makeArrayRef(ArgLocs).slice(j));
std::function<void()> Thunk;
unsigned NumArgRegs = Handler.assignCustomValue(
Args[i], makeArrayRef(ArgLocs).slice(j), &Thunk);
if (Thunk)
DelayedOutgoingRegAssignments.emplace_back(Thunk);
if (!NumArgRegs)
return false;
j += NumArgRegs;
Expand Down Expand Up @@ -743,7 +761,13 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
continue;
}

Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
if (Handler.isIncomingArgumentHandler())
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
else {
DelayedOutgoingRegAssignments.emplace_back([=, &Handler]() {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
});
}
}

// Now that all pieces have been assigned, re-pack the register typed values
Expand All @@ -757,6 +781,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,

j += NumParts - 1;
}
for (auto &Fn : DelayedOutgoingRegAssignments)
Fn();

return true;
}
Expand Down Expand Up @@ -1157,7 +1183,7 @@ static bool isCopyCompatibleType(LLT SrcTy, LLT DstTy) {

void CallLowering::IncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
CCValAssign &VA) {
CCValAssign VA) {
const MVT LocVT = VA.getLocVT();
const LLT LocTy(LocVT);
const LLT RegTy = MRI.getType(ValVReg);
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
Expand Up @@ -156,7 +156,7 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
}

void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
CCValAssign VA) override {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
Expand Down Expand Up @@ -281,7 +281,7 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
}

void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
CCValAssign VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
Expand Up @@ -60,7 +60,7 @@ struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}

void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
CCValAssign VA) override {
Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);

// If this is a scalar return, insert a readfirstlane just in case the value
Expand Down Expand Up @@ -103,7 +103,7 @@ struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
}

void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
CCValAssign VA) override {
markPhysRegUsed(PhysReg);

if (VA.getLocVT().getSizeInBits() < 32) {
Expand Down Expand Up @@ -203,7 +203,7 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
}

void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
CCValAssign VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegisterMin32(*this, ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
Expand Down
19 changes: 14 additions & 5 deletions llvm/lib/Target/ARM/ARMCallLowering.cpp
Expand Up @@ -45,6 +45,7 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <functional>
#include <utility>

using namespace llvm;
Expand Down Expand Up @@ -109,7 +110,7 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}

void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
CCValAssign VA) override {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");

Expand All @@ -130,7 +131,8 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}

unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
ArrayRef<CCValAssign> VAs) override {
ArrayRef<CCValAssign> VAs,
std::function<void()> *Thunk) override {
assert(Arg.Regs.size() == 1 && "Can't handle multple regs yet");

CCValAssign VA = VAs[0];
Expand Down Expand Up @@ -158,9 +160,15 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
if (!IsLittle)
std::swap(NewRegs[0], NewRegs[1]);

if (Thunk) {
*Thunk = [=]() {
assignValueToReg(NewRegs[0], VA.getLocReg(), VA);
assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA);
};
return 1;
}
assignValueToReg(NewRegs[0], VA.getLocReg(), VA);
assignValueToReg(NewRegs[1], NextVA.getLocReg(), NextVA);

return 1;
}

Expand Down Expand Up @@ -273,7 +281,7 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
}

void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
CCValAssign VA) override {
assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");

Expand All @@ -298,7 +306,8 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
}

unsigned assignCustomValue(ARMCallLowering::ArgInfo &Arg,
ArrayRef<CCValAssign> VAs) override {
ArrayRef<CCValAssign> VAs,
std::function<void()> *Thunk) override {
assert(Arg.Regs.size() == 1 && "Can't handle multple regs yet");

CCValAssign VA = VAs[0];
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/M68k/GlSel/M68kCallLowering.cpp
Expand Up @@ -33,7 +33,7 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler {
: OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB) {}

void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
CCValAssign VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
Expand Down Expand Up @@ -110,7 +110,7 @@ bool M68kCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,

void M68kIncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
CCValAssign &VA) {
CCValAssign VA) {
MIRBuilder.getMRI()->addLiveIn(PhysReg);
MIRBuilder.getMBB().addLiveIn(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/M68k/GlSel/M68kCallLowering.h
Expand Up @@ -52,7 +52,7 @@ struct M68kIncomingValueHandler : public CallLowering::IncomingValueHandler {

private:
void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override;
CCValAssign VA) override;

void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override;
Expand Down
29 changes: 21 additions & 8 deletions llvm/lib/Target/Mips/MipsCallLowering.cpp
Expand Up @@ -92,7 +92,7 @@ class MipsIncomingValueHandler : public CallLowering::IncomingValueHandler {

private:
void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override;
CCValAssign VA) override;

Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
Expand All @@ -101,7 +101,8 @@ class MipsIncomingValueHandler : public CallLowering::IncomingValueHandler {
MachinePointerInfo &MPO, CCValAssign &VA) override;

unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
ArrayRef<CCValAssign> VAs) override;
ArrayRef<CCValAssign> VAs,
std::function<void()> *Thunk = nullptr) override;

virtual void markPhysRegUsed(unsigned PhysReg) {
MIRBuilder.getMRI()->addLiveIn(PhysReg);
Expand All @@ -127,7 +128,7 @@ class CallReturnHandler : public MipsIncomingValueHandler {

void MipsIncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
CCValAssign &VA) {
CCValAssign VA) {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
Expand Down Expand Up @@ -163,7 +164,8 @@ void MipsIncomingValueHandler::assignValueToAddress(Register ValVReg,
/// dependent on other arguments.
unsigned
MipsIncomingValueHandler::assignCustomValue(CallLowering::ArgInfo &Arg,
ArrayRef<CCValAssign> VAs) {
ArrayRef<CCValAssign> VAs,
std::function<void()> *Thunk) {
const CCValAssign &VALo = VAs[0];
const CCValAssign &VAHi = VAs[1];

Expand Down Expand Up @@ -197,7 +199,7 @@ class MipsOutgoingValueHandler : public CallLowering::OutgoingValueHandler {

private:
void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override;
CCValAssign VA) override;

Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
Expand All @@ -206,15 +208,16 @@ class MipsOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override;
unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
ArrayRef<CCValAssign> VAs) override;
ArrayRef<CCValAssign> VAs,
std::function<void()> *Thunk) override;

MachineInstrBuilder &MIB;
};
} // end anonymous namespace

void MipsOutgoingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
CCValAssign &VA) {
CCValAssign VA) {
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
MIB.addUse(PhysReg, RegState::Implicit);
Expand Down Expand Up @@ -253,7 +256,8 @@ void MipsOutgoingValueHandler::assignValueToAddress(Register ValVReg,

unsigned
MipsOutgoingValueHandler::assignCustomValue(CallLowering::ArgInfo &Arg,
ArrayRef<CCValAssign> VAs) {
ArrayRef<CCValAssign> VAs,
std::function<void()> *Thunk) {
const CCValAssign &VALo = VAs[0];
const CCValAssign &VAHi = VAs[1];

Expand All @@ -271,6 +275,15 @@ MipsOutgoingValueHandler::assignCustomValue(CallLowering::ArgInfo &Arg,
if (!STI.isLittle())
std::swap(Lo, Hi);

// If we can return a thunk, just include the register copies. The unmerge can
// be emitted earlier.
if (Thunk) {
*Thunk = [=]() {
MIRBuilder.buildCopy(VALo.getLocReg(), Lo);
MIRBuilder.buildCopy(VAHi.getLocReg(), Hi);
};
return 2;
}
MIRBuilder.buildCopy(VALo.getLocReg(), Lo);
MIRBuilder.buildCopy(VAHi.getLocReg(), Hi);
return 2;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/PowerPC/GISel/PPCCallLowering.cpp
Expand Up @@ -80,7 +80,7 @@ bool PPCCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,

void PPCIncomingValueHandler::assignValueToReg(Register ValVReg,
Register PhysReg,
CCValAssign &VA) {
CCValAssign VA) {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/PowerPC/GISel/PPCCallLowering.h
Expand Up @@ -46,7 +46,7 @@ class PPCIncomingValueHandler : public CallLowering::IncomingValueHandler {

private:
void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override;
CCValAssign VA) override;

void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
MachinePointerInfo &MPO, CCValAssign &VA) override;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/X86/X86CallLowering.cpp
Expand Up @@ -105,7 +105,7 @@ struct X86OutgoingValueHandler : public CallLowering::OutgoingValueHandler {
}

void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
CCValAssign VA) override {
MIB.addUse(PhysReg, RegState::Implicit);
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
Expand Down Expand Up @@ -195,7 +195,7 @@ struct X86IncomingValueHandler : public CallLowering::IncomingValueHandler {
}

void assignValueToReg(Register ValVReg, Register PhysReg,
CCValAssign &VA) override {
CCValAssign VA) override {
markPhysRegUsed(PhysReg);
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}
Expand Down

0 comments on commit 8bde5e5

Please sign in to comment.