99 changes: 44 additions & 55 deletions llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,17 +491,19 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
B.buildUnmerge(UnmergeResults, UnmergeSrc);
}

bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
SmallVectorImpl<ArgInfo> &Args,
ValueHandler &Handler,
CallingConv::ID CallConv, bool IsVarArg,
Register ThisReturnReg) const {
bool CallLowering::determineAndHandleAssignments(
ValueHandler &Handler, ValueAssigner &Assigner,
SmallVectorImpl<ArgInfo> &Args, MachineIRBuilder &MIRBuilder,
CallingConv::ID CallConv, bool IsVarArg, Register ThisReturnReg) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
SmallVector<CCValAssign, 16> ArgLocs;

CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, F.getContext());
return handleAssignments(CCInfo, ArgLocs, MIRBuilder, Args, Handler,
if (!determineAssignments(Assigner, Args, CCInfo))
return false;

return handleAssignments(Handler, Args, CCInfo, ArgLocs, MIRBuilder,
ThisReturnReg);
}

Expand All @@ -513,33 +515,27 @@ static unsigned extendOpFromFlags(llvm::ISD::ArgFlagsTy Flags) {
return TargetOpcode::G_ANYEXT;
}

bool CallLowering::handleAssignments(CCState &CCInfo,
SmallVectorImpl<CCValAssign> &ArgLocs,
MachineIRBuilder &MIRBuilder,
SmallVectorImpl<ArgInfo> &Args,
ValueHandler &Handler,
Register ThisReturnReg) const {
MachineFunction &MF = MIRBuilder.getMF();
MachineRegisterInfo &MRI = MF.getRegInfo();
const Function &F = MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();
bool CallLowering::determineAssignments(ValueAssigner &Assigner,
SmallVectorImpl<ArgInfo> &Args,
CCState &CCInfo) const {
LLVMContext &Ctx = CCInfo.getContext();
const CallingConv::ID CallConv = CCInfo.getCallingConv();

unsigned NumArgs = Args.size();
for (unsigned i = 0; i != NumArgs; ++i) {
EVT CurVT = EVT::getEVT(Args[i].Ty);

MVT NewVT = TLI->getRegisterTypeForCallingConv(
F.getContext(), CCInfo.getCallingConv(), CurVT);
MVT NewVT = TLI->getRegisterTypeForCallingConv(Ctx, CallConv, CurVT);

// If we need to split the type over multiple regs, check it's a scenario
// we currently support.
unsigned NumParts = TLI->getNumRegistersForCallingConv(
F.getContext(), CCInfo.getCallingConv(), CurVT);
unsigned NumParts =
TLI->getNumRegistersForCallingConv(Ctx, CallConv, CurVT);

if (NumParts == 1) {
// Try to use the register type if we couldn't assign the VT.
if (Handler.assignArg(i, CurVT, NewVT, NewVT, CCValAssign::Full, Args[i],
Args[i].Flags[0], CCInfo))
if (Assigner.assignArg(i, CurVT, NewVT, NewVT, CCValAssign::Full, Args[i],
Args[i].Flags[0], CCInfo))
return false;
continue;
}
Expand Down Expand Up @@ -568,7 +564,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
Flags.setSplitEnd();
}

if (!Handler.isIncomingArgumentHandler()) {
if (!Assigner.isIncomingArgumentHandler()) {
// TODO: Also check if there is a valid extension that preserves the
// bits. However currently this call lowering doesn't support non-exact
// split parts, so that can't be tested.
Expand All @@ -579,14 +575,30 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
}

Args[i].Flags.push_back(Flags);
if (Handler.assignArg(i, CurVT, NewVT, NewVT, CCValAssign::Full, Args[i],
Args[i].Flags[Part], CCInfo)) {
if (Assigner.assignArg(i, CurVT, NewVT, NewVT, CCValAssign::Full, Args[i],
Args[i].Flags[Part], CCInfo)) {
// Still couldn't assign this smaller part type for some reason.
return false;
}
}
}

return true;
}

bool CallLowering::handleAssignments(ValueHandler &Handler,
SmallVectorImpl<ArgInfo> &Args,
CCState &CCInfo,
SmallVectorImpl<CCValAssign> &ArgLocs,
MachineIRBuilder &MIRBuilder,
Register ThisReturnReg) const {
MachineFunction &MF = MIRBuilder.getMF();
MachineRegisterInfo &MRI = MF.getRegInfo();
const Function &F = MF.getFunction();
const DataLayout &DL = F.getParent()->getDataLayout();

const unsigned NumArgs = Args.size();

for (unsigned i = 0, j = 0; i != NumArgs; ++i, ++j) {
assert(j < ArgLocs.size() && "Skipped too many arg locs");
CCValAssign &VA = ArgLocs[j];
Expand Down Expand Up @@ -652,7 +664,7 @@ bool CallLowering::handleAssignments(CCState &CCInfo,

// TODO: The memory size may be larger than the value we need to
// store. We may need to adjust the offset for big endian targets.
uint64_t MemSize = Handler.getStackValueStoreSize(VA);
uint64_t MemSize = Handler.getStackValueStoreSize(DL, VA);

MachinePointerInfo MPO;
Register StackAddr =
Expand Down Expand Up @@ -880,23 +892,6 @@ bool CallLowering::checkReturnTypeForCallConv(MachineFunction &MF) const {
return canLowerReturn(MF, CallConv, SplitArgs, F.isVarArg());
}

bool CallLowering::analyzeArgInfo(CCState &CCState,
SmallVectorImpl<ArgInfo> &Args,
CCAssignFn &AssignFnFixed,
CCAssignFn &AssignFnVarArg) const {
for (unsigned i = 0, e = Args.size(); i < e; ++i) {
MVT VT = MVT::getVT(Args[i].Ty);
CCAssignFn &Fn = Args[i].IsFixed ? AssignFnFixed : AssignFnVarArg;
if (Fn(i, VT, VT, CCValAssign::Full, Args[i].Flags[0], CCState)) {
// Bail out on anything we can't handle.
LLVM_DEBUG(dbgs() << "Cannot analyze " << EVT(VT).getEVTString()
<< " (arg number = " << i << "\n");
return false;
}
}
return true;
}

bool CallLowering::parametersInCSRMatch(
const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask,
const SmallVectorImpl<CCValAssign> &OutLocs,
Expand Down Expand Up @@ -952,10 +947,8 @@ bool CallLowering::parametersInCSRMatch(
bool CallLowering::resultsCompatible(CallLoweringInfo &Info,
MachineFunction &MF,
SmallVectorImpl<ArgInfo> &InArgs,
CCAssignFn &CalleeAssignFnFixed,
CCAssignFn &CalleeAssignFnVarArg,
CCAssignFn &CallerAssignFnFixed,
CCAssignFn &CallerAssignFnVarArg) const {
ValueAssigner &CalleeAssigner,
ValueAssigner &CallerAssigner) const {
const Function &F = MF.getFunction();
CallingConv::ID CalleeCC = Info.CallConv;
CallingConv::ID CallerCC = F.getCallingConv();
Expand All @@ -965,14 +958,12 @@ bool CallLowering::resultsCompatible(CallLoweringInfo &Info,

SmallVector<CCValAssign, 16> ArgLocs1;
CCState CCInfo1(CalleeCC, false, MF, ArgLocs1, F.getContext());
if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFnFixed,
CalleeAssignFnVarArg))
if (!determineAssignments(CalleeAssigner, InArgs, CCInfo1))
return false;

SmallVector<CCValAssign, 16> ArgLocs2;
CCState CCInfo2(CallerCC, false, MF, ArgLocs2, F.getContext());
if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFnFixed,
CalleeAssignFnVarArg))
if (!determineAssignments(CallerAssigner, InArgs, CCInfo2))
return false;

// We need the argument locations to match up exactly. If there's more in
Expand Down Expand Up @@ -1008,13 +999,11 @@ bool CallLowering::resultsCompatible(CallLoweringInfo &Info,
}

uint64_t CallLowering::ValueHandler::getStackValueStoreSize(
const CCValAssign &VA) const {
const DataLayout &DL, const CCValAssign &VA) const {
const EVT ValVT = VA.getValVT();
if (ValVT != MVT::iPTR)
return ValVT.getStoreSize();

const DataLayout &DL = MIRBuilder.getDataLayout();

/// FIXME: We need to get the correct pointer address space.
return DL.getPointerSize();
}
Expand Down Expand Up @@ -1082,7 +1071,7 @@ Register CallLowering::ValueHandler::extendRegister(Register ValReg,
llvm_unreachable("unable to extend register");
}

void CallLowering::ValueHandler::anchor() {}
void CallLowering::ValueAssigner::anchor() {}

Register CallLowering::IncomingValueHandler::buildExtensionHint(CCValAssign &VA,
Register SrcReg,
Expand Down
224 changes: 131 additions & 93 deletions llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp

Large diffs are not rendered by default.

58 changes: 32 additions & 26 deletions llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ static Register extendRegisterMin32(CallLowering::ValueHandler &Handler,

struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
AMDGPUOutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB, CCAssignFn *AssignFn)
: OutgoingValueHandler(B, MRI, AssignFn), MIB(MIB) {}
MachineInstrBuilder MIB)
: OutgoingValueHandler(B, MRI), MIB(MIB) {}

MachineInstrBuilder MIB;

Expand Down Expand Up @@ -83,9 +83,8 @@ struct AMDGPUOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
uint64_t StackUsed = 0;

AMDGPUIncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
CCAssignFn *AssignFn)
: IncomingValueHandler(B, MRI, AssignFn) {}
AMDGPUIncomingArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI)
: IncomingValueHandler(B, MRI) {}

Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
Expand Down Expand Up @@ -145,9 +144,8 @@ struct AMDGPUIncomingArgHandler : public CallLowering::IncomingValueHandler {
};

struct FormalArgHandler : public AMDGPUIncomingArgHandler {
FormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
CCAssignFn *AssignFn)
: AMDGPUIncomingArgHandler(B, MRI, AssignFn) {}
FormalArgHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI)
: AMDGPUIncomingArgHandler(B, MRI) {}

void markPhysRegUsed(unsigned PhysReg) override {
MIRBuilder.getMBB().addLiveIn(PhysReg);
Expand All @@ -156,8 +154,8 @@ struct FormalArgHandler : public AMDGPUIncomingArgHandler {

struct CallReturnHandler : public AMDGPUIncomingArgHandler {
CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB, CCAssignFn *AssignFn)
: AMDGPUIncomingArgHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
MachineInstrBuilder MIB)
: AMDGPUIncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}

void markPhysRegUsed(unsigned PhysReg) override {
MIB.addDef(PhysReg, RegState::Implicit);
Expand All @@ -167,8 +165,6 @@ struct CallReturnHandler : public AMDGPUIncomingArgHandler {
};

struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {
CCAssignFn *AssignFnVarArg;

/// For tail calls, the byte offset of the call's argument area from the
/// callee's. Unused elsewhere.
int FPDiff;
Expand All @@ -180,11 +176,9 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUOutgoingValueHandler {

AMDGPUOutgoingArgHandler(MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI, MachineInstrBuilder MIB,
CCAssignFn *AssignFn, CCAssignFn *AssignFnVarArg,
bool IsTailCall = false, int FPDiff = 0)
: AMDGPUOutgoingValueHandler(MIRBuilder, MRI, MIB, AssignFn),
AssignFnVarArg(AssignFnVarArg), FPDiff(FPDiff), IsTailCall(IsTailCall) {
}
: AMDGPUOutgoingValueHandler(MIRBuilder, MRI, MIB), FPDiff(FPDiff),
IsTailCall(IsTailCall) {}

Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
Expand Down Expand Up @@ -339,8 +333,11 @@ bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B,
}

CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg());
AMDGPUOutgoingValueHandler RetHandler(B, *MRI, Ret, AssignFn);
return handleAssignments(B, SplitRetInfos, RetHandler, CC, F.isVarArg());

OutgoingValueAssigner Assigner(AssignFn);
AMDGPUOutgoingValueHandler RetHandler(B, *MRI, Ret);
return determineAndHandleAssignments(RetHandler, Assigner, SplitRetInfos, B,
CC, F.isVarArg());
}

bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val,
Expand Down Expand Up @@ -709,8 +706,12 @@ bool AMDGPUCallLowering::lowerFormalArguments(
TLI.allocateSpecialInputVGPRsFixed(CCInfo, MF, *TRI, *Info);
}

FormalArgHandler Handler(B, MRI, AssignFn);
if (!handleAssignments(CCInfo, ArgLocs, B, SplitArgs, Handler))
IncomingValueAssigner Assigner(AssignFn);
if (!determineAssignments(Assigner, SplitArgs, CCInfo))
return false;

FormalArgHandler Handler(B, MRI);
if (!handleAssignments(Handler, SplitArgs, CCInfo, ArgLocs, B))
return false;

if (!IsEntryFunc && !AMDGPUTargetMachine::EnableFixedFunctionABI) {
Expand Down Expand Up @@ -999,9 +1000,13 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,

// Do the actual argument marshalling.
SmallVector<Register, 8> PhysRegs;
AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
AssignFnVarArg, false);
if (!handleAssignments(CCInfo, ArgLocs, MIRBuilder, OutArgs, Handler))

OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg);
if (!determineAssignments(Assigner, OutArgs, CCInfo))
return false;

AMDGPUOutgoingArgHandler Handler(MIRBuilder, MRI, MIB, false);
if (!handleAssignments(Handler, OutArgs, CCInfo, ArgLocs, MIRBuilder))
return false;

const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Expand Down Expand Up @@ -1045,9 +1050,10 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) {
CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv,
Info.IsVarArg);
CallReturnHandler Handler(MIRBuilder, MRI, MIB, RetAssignFn);
if (!handleAssignments(MIRBuilder, InArgs, Handler, Info.CallConv,
Info.IsVarArg))
OutgoingValueAssigner Assigner(RetAssignFn);
CallReturnHandler Handler(MIRBuilder, MRI, MIB);
if (!determineAndHandleAssignments(Handler, Assigner, InArgs, MIRBuilder,
Info.CallConv, Info.IsVarArg))
return false;
}

Expand Down
70 changes: 31 additions & 39 deletions llvm/lib/Target/ARM/ARMCallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,8 @@ namespace {
/// function return values and call parameters).
struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
ARMOutgoingValueHandler(MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI, MachineInstrBuilder &MIB,
CCAssignFn *AssignFn)
: OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
MachineRegisterInfo &MRI, MachineInstrBuilder &MIB)
: OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB) {}

Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
Expand Down Expand Up @@ -169,20 +168,7 @@ struct ARMOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
return 1;
}

bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
CCState &State) override {
if (AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State))
return true;

StackSize =
std::max(StackSize, static_cast<uint64_t>(State.getNextStackOffset()));
return false;
}

MachineInstrBuilder MIB;
uint64_t StackSize = 0;
};

} // end anonymous namespace
Expand Down Expand Up @@ -213,10 +199,11 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
CCAssignFn *AssignFn =
TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());

ARMOutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret,
AssignFn);
return handleAssignments(MIRBuilder, SplitRetInfos, RetHandler,
F.getCallingConv(), F.isVarArg());
OutgoingValueAssigner RetAssigner(AssignFn);
ARMOutgoingValueHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret);
return determineAndHandleAssignments(RetHandler, RetAssigner, SplitRetInfos,
MIRBuilder, F.getCallingConv(),
F.isVarArg());
}

bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
Expand All @@ -241,8 +228,8 @@ namespace {
/// formal arguments and call return values).
struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
ARMIncomingValueHandler(MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI, CCAssignFn AssignFn)
: IncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
MachineRegisterInfo &MRI)
: IncomingValueHandler(MIRBuilder, MRI) {}

Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO,
Expand Down Expand Up @@ -360,9 +347,8 @@ struct ARMIncomingValueHandler : public CallLowering::IncomingValueHandler {
};

struct FormalArgHandler : public ARMIncomingValueHandler {
FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
CCAssignFn AssignFn)
: ARMIncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
: ARMIncomingValueHandler(MIRBuilder, MRI) {}

void markPhysRegUsed(unsigned PhysReg) override {
MIRBuilder.getMRI()->addLiveIn(PhysReg);
Expand Down Expand Up @@ -403,8 +389,8 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
CCAssignFn *AssignFn =
TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg());

FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(),
AssignFn);
OutgoingValueAssigner ArgAssigner(AssignFn);
FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo());

SmallVector<ArgInfo, 8> SplitArgInfos;
unsigned Idx = 0;
Expand All @@ -420,8 +406,9 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
if (!MBB.empty())
MIRBuilder.setInstr(*MBB.begin());

if (!handleAssignments(MIRBuilder, SplitArgInfos, ArgHandler,
F.getCallingConv(), F.isVarArg()))
if (!determineAndHandleAssignments(ArgHandler, ArgAssigner, SplitArgInfos,
MIRBuilder, F.getCallingConv(),
F.isVarArg()))
return false;

// Move back to the end of the basic block.
Expand All @@ -433,8 +420,8 @@ namespace {

struct CallReturnHandler : public ARMIncomingValueHandler {
CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB, CCAssignFn *AssignFn)
: ARMIncomingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
MachineInstrBuilder MIB)
: ARMIncomingValueHandler(MIRBuilder, MRI), MIB(MIB) {}

void markPhysRegUsed(unsigned PhysReg) override {
MIB.addDef(PhysReg, RegState::Implicit);
Expand Down Expand Up @@ -513,9 +500,10 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &
}

auto ArgAssignFn = TLI.CCAssignFnForCall(Info.CallConv, Info.IsVarArg);
ARMOutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB, ArgAssignFn);
if (!handleAssignments(MIRBuilder, ArgInfos, ArgHandler, Info.CallConv,
Info.IsVarArg))
OutgoingValueAssigner ArgAssigner(ArgAssignFn);
ARMOutgoingValueHandler ArgHandler(MIRBuilder, MRI, MIB);
if (!determineAndHandleAssignments(ArgHandler, ArgAssigner, ArgInfos,
MIRBuilder, Info.CallConv, Info.IsVarArg))
return false;

// Now we can add the actual call instruction to the correct basic block.
Expand All @@ -528,18 +516,22 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &
ArgInfos.clear();
splitToValueTypes(Info.OrigRet, ArgInfos, DL, Info.CallConv);
auto RetAssignFn = TLI.CCAssignFnForReturn(Info.CallConv, Info.IsVarArg);
CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn);
if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler, Info.CallConv,
Info.IsVarArg))
OutgoingValueAssigner Assigner(RetAssignFn);
CallReturnHandler RetHandler(MIRBuilder, MRI, MIB);
if (!determineAndHandleAssignments(RetHandler, Assigner, ArgInfos,
MIRBuilder, Info.CallConv,
Info.IsVarArg))
return false;
}

// We now know the size of the stack - update the ADJCALLSTACKDOWN
// accordingly.
CallSeqStart.addImm(ArgHandler.StackSize).addImm(0).add(predOps(ARMCC::AL));
CallSeqStart.addImm(ArgAssigner.StackOffset)
.addImm(0)
.add(predOps(ARMCC::AL));

MIRBuilder.buildInstr(ARM::ADJCALLSTACKUP)
.addImm(ArgHandler.StackSize)
.addImm(ArgAssigner.StackOffset)
.addImm(0)
.add(predOps(ARMCC::AL));

Expand Down
103 changes: 57 additions & 46 deletions llvm/lib/Target/X86/X86CallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,39 @@ X86CallLowering::X86CallLowering(const X86TargetLowering &TLI)

namespace {

struct X86OutgoingValueAssigner : public CallLowering::OutgoingValueAssigner {
private:
uint64_t StackSize = 0;
unsigned NumXMMRegs = 0;

public:
uint64_t getStackSize() { return StackSize; }
unsigned getNumXmmRegs() { return NumXMMRegs; }

X86OutgoingValueAssigner(CCAssignFn *AssignFn_)
: CallLowering::OutgoingValueAssigner(AssignFn_) {}

bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
CCState &State) override {
bool Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
StackSize = State.getNextStackOffset();

static const MCPhysReg XMMArgRegs[] = {X86::XMM0, X86::XMM1, X86::XMM2,
X86::XMM3, X86::XMM4, X86::XMM5,
X86::XMM6, X86::XMM7};
if (!Info.IsFixed)
NumXMMRegs = State.getFirstUnallocated(XMMArgRegs);

return Res;
}
};

struct X86OutgoingValueHandler : public CallLowering::OutgoingValueHandler {
X86OutgoingValueHandler(MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI, MachineInstrBuilder &MIB,
CCAssignFn *AssignFn)
: OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
MachineRegisterInfo &MRI, MachineInstrBuilder &MIB)
: OutgoingValueHandler(MIRBuilder, MRI), MIB(MIB),
DL(MIRBuilder.getMF().getDataLayout()),
STI(MIRBuilder.getMF().getSubtarget<X86Subtarget>()) {}

Expand Down Expand Up @@ -94,31 +122,10 @@ struct X86OutgoingValueHandler : public CallLowering::OutgoingValueHandler {
MIRBuilder.buildStore(ExtReg, Addr, *MMO);
}

bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
CCState &State) override {
bool Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
StackSize = State.getNextStackOffset();

static const MCPhysReg XMMArgRegs[] = {X86::XMM0, X86::XMM1, X86::XMM2,
X86::XMM3, X86::XMM4, X86::XMM5,
X86::XMM6, X86::XMM7};
if (!Info.IsFixed)
NumXMMRegs = State.getFirstUnallocated(XMMArgRegs);

return Res;
}

uint64_t getStackSize() { return StackSize; }
uint64_t getNumXmmRegs() { return NumXMMRegs; }

protected:
MachineInstrBuilder &MIB;
uint64_t StackSize = 0;
const DataLayout &DL;
const X86Subtarget &STI;
unsigned NumXMMRegs = 0;
};

} // end anonymous namespace
Expand All @@ -142,9 +149,11 @@ bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
SmallVector<ArgInfo, 4> SplitRetInfos;
splitToValueTypes(OrigRetInfo, SplitRetInfos, DL, F.getCallingConv());

X86OutgoingValueHandler Handler(MIRBuilder, MRI, MIB, RetCC_X86);
if (!handleAssignments(MIRBuilder, SplitRetInfos, Handler, F.getCallingConv(),
F.isVarArg()))
X86OutgoingValueAssigner Assigner(RetCC_X86);
X86OutgoingValueHandler Handler(MIRBuilder, MRI, MIB);
if (!determineAndHandleAssignments(Handler, Assigner, SplitRetInfos,
MIRBuilder, F.getCallingConv(),
F.isVarArg()))
return false;
}

Expand All @@ -156,8 +165,8 @@ namespace {

struct X86IncomingValueHandler : public CallLowering::IncomingValueHandler {
X86IncomingValueHandler(MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI, CCAssignFn *AssignFn)
: IncomingValueHandler(MIRBuilder, MRI, AssignFn),
MachineRegisterInfo &MRI)
: IncomingValueHandler(MIRBuilder, MRI),
DL(MIRBuilder.getMF().getDataLayout()) {}

Register getStackAddress(uint64_t Size, int64_t Offset,
Expand Down Expand Up @@ -202,9 +211,8 @@ struct X86IncomingValueHandler : public CallLowering::IncomingValueHandler {
};

struct FormalArgHandler : public X86IncomingValueHandler {
FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
CCAssignFn *AssignFn)
: X86IncomingValueHandler(MIRBuilder, MRI, AssignFn) {}
FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
: X86IncomingValueHandler(MIRBuilder, MRI) {}

void markPhysRegUsed(unsigned PhysReg) override {
MIRBuilder.getMRI()->addLiveIn(PhysReg);
Expand All @@ -214,8 +222,8 @@ struct FormalArgHandler : public X86IncomingValueHandler {

struct CallReturnHandler : public X86IncomingValueHandler {
CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
CCAssignFn *AssignFn, MachineInstrBuilder &MIB)
: X86IncomingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {}
MachineInstrBuilder &MIB)
: X86IncomingValueHandler(MIRBuilder, MRI), MIB(MIB) {}

void markPhysRegUsed(unsigned PhysReg) override {
MIB.addDef(PhysReg, RegState::Implicit);
Expand Down Expand Up @@ -264,9 +272,10 @@ bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
if (!MBB.empty())
MIRBuilder.setInstr(*MBB.begin());

FormalArgHandler Handler(MIRBuilder, MRI, CC_X86);
if (!handleAssignments(MIRBuilder, SplitArgs, Handler, F.getCallingConv(),
F.isVarArg()))
X86OutgoingValueAssigner Assigner(CC_X86);
FormalArgHandler Handler(MIRBuilder, MRI);
if (!determineAndHandleAssignments(Handler, Assigner, SplitArgs, MIRBuilder,
F.getCallingConv(), F.isVarArg()))
return false;

// Move back to the end of the basic block.
Expand Down Expand Up @@ -317,9 +326,10 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
splitToValueTypes(OrigArg, SplitArgs, DL, Info.CallConv);
}
// Do the actual argument marshalling.
X86OutgoingValueHandler Handler(MIRBuilder, MRI, MIB, CC_X86);
if (!handleAssignments(MIRBuilder, SplitArgs, Handler, Info.CallConv,
Info.IsVarArg))
X86OutgoingValueAssigner Assigner(CC_X86);
X86OutgoingValueHandler Handler(MIRBuilder, MRI, MIB);
if (!determineAndHandleAssignments(Handler, Assigner, SplitArgs, MIRBuilder,
Info.CallConv, Info.IsVarArg))
return false;

bool IsFixed = Info.OrigArgs.empty() ? true : Info.OrigArgs.back().IsFixed;
Expand All @@ -334,7 +344,7 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,

MIRBuilder.buildInstr(X86::MOV8ri)
.addDef(X86::AL)
.addImm(Handler.getNumXmmRegs());
.addImm(Assigner.getNumXmmRegs());
MIB.addUse(X86::AL, RegState::Implicit);
}

Expand Down Expand Up @@ -363,22 +373,23 @@ bool X86CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,

splitToValueTypes(Info.OrigRet, SplitArgs, DL, Info.CallConv);

CallReturnHandler Handler(MIRBuilder, MRI, RetCC_X86, MIB);
if (!handleAssignments(MIRBuilder, SplitArgs, Handler, Info.CallConv,
Info.IsVarArg))
X86OutgoingValueAssigner Assigner(RetCC_X86);
CallReturnHandler Handler(MIRBuilder, MRI, MIB);
if (!determineAndHandleAssignments(Handler, Assigner, SplitArgs, MIRBuilder,
Info.CallConv, Info.IsVarArg))
return false;

if (!NewRegs.empty())
MIRBuilder.buildMerge(Info.OrigRet.Regs[0], NewRegs);
}

CallSeqStart.addImm(Handler.getStackSize())
CallSeqStart.addImm(Assigner.getStackSize())
.addImm(0 /* see getFrameTotalSize */)
.addImm(0 /* see getFrameAdjustment */);

unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
MIRBuilder.buildInstr(AdjStackUp)
.addImm(Handler.getStackSize())
.addImm(Assigner.getStackSize())
.addImm(0 /* NumBytesForCalleeToPop */);

return true;
Expand Down
28 changes: 28 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -449,3 +449,31 @@ define void @foo(i32*) {
musttail call void @must_callee(i8* null)
ret void
}

; Verify we emit a tail call with a type that requires splitting into
; multiple registers.
declare void @outgoing_v16f16(<16 x half>)
define void @test_tail_call_outgoing_v16f16(<16 x half> %arg) {
; DARWIN-LABEL: name: test_tail_call_outgoing_v16f16
; DARWIN: bb.1 (%ir-block.0):
; DARWIN: liveins: $q0, $q1
; DARWIN: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
; DARWIN: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
; DARWIN: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<8 x s16>), [[COPY1]](<8 x s16>)
; DARWIN: [[UV:%[0-9]+]]:_(<8 x s16>), [[UV1:%[0-9]+]]:_(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>)
; DARWIN: $q0 = COPY [[UV]](<8 x s16>)
; DARWIN: $q1 = COPY [[UV1]](<8 x s16>)
; DARWIN: TCRETURNdi @outgoing_v16f16, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1
; WINDOWS-LABEL: name: test_tail_call_outgoing_v16f16
; WINDOWS: bb.1 (%ir-block.0):
; WINDOWS: liveins: $q0, $q1
; WINDOWS: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0
; WINDOWS: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1
; WINDOWS: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<8 x s16>), [[COPY1]](<8 x s16>)
; WINDOWS: [[UV:%[0-9]+]]:_(<8 x s16>), [[UV1:%[0-9]+]]:_(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>)
; WINDOWS: $q0 = COPY [[UV]](<8 x s16>)
; WINDOWS: $q1 = COPY [[UV1]](<8 x s16>)
; WINDOWS: TCRETURNdi @outgoing_v16f16, 0, csr_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1
tail call void @outgoing_v16f16(<16 x half> %arg)
ret void
}