Skip to content

Commit

Permalink
[GlobalISel] Accept multiple vregs in lowerFormalArgs
Browse files Browse the repository at this point in the history
Change the interface of CallLowering::lowerFormalArguments to accept
several virtual registers for each formal argument, instead of just one.
This is a follow-up to D46018.

CallLowering::lowerReturn was similarly refactored in D49660. lowerCall
will be refactored in the same way in follow-up patches.

With this change, we forward the virtual registers generated for
aggregates to CallLowering. Therefore, the target can decide itself
whether it wants to handle them as separate pieces or use one big
register. We also copy the pack/unpackRegs helpers to CallLowering to
facilitate this.

ARM and AArch64 have been updated to use the passed in virtual registers
directly, which means we no longer need to generate so many
merge/extract instructions.

AArch64 seems to have had a bug when lowering e.g. [1 x i8*], which was
put into a s64 instead of a p0. Added a test-case which illustrates the
problem more clearly (it crashes without this patch) and fixed the
existing test-case to expect p0.

AMDGPU has been updated to unpack into the virtual registers for
kernels. I think the other code paths fall back for aggregates, so this
should be NFC.

Mips doesn't support aggregates yet, so it's also NFC.

x86 seems to have code for dealing with aggregates, but I couldn't find
the tests for it, so I just added a fallback to DAGISel if we get more
than one virtual register for an argument.

Differential Revision: https://reviews.llvm.org/D63549

llvm-svn: 364510
  • Loading branch information
rovka committed Jun 27, 2019
1 parent 69ce1c1 commit c3dbe23
Show file tree
Hide file tree
Showing 17 changed files with 214 additions and 155 deletions.
34 changes: 26 additions & 8 deletions llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
Expand Up @@ -52,8 +52,8 @@ class CallLowering {
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy{}, bool IsFixed = true)
: Regs(Regs.begin(), Regs.end()), Ty(Ty), Flags(Flags),
IsFixed(IsFixed) {
assert(Regs.size() == 1 && "Can't handle multiple regs yet");
assert((Ty->isVoidTy() == (Regs[0] == 0)) &&
// FIXME: We should have just one way of saying "no register".
assert((Ty->isVoidTy() == (Regs.empty() || Regs[0] == 0)) &&
"only void types should have no register");
}
};
Expand Down Expand Up @@ -139,6 +139,24 @@ class CallLowering {
void setArgFlags(ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL,
const FuncInfoTy &FuncInfo) const;

/// Generate instructions for packing \p SrcRegs into one big register
/// corresponding to the aggregate type \p PackedTy.
///
/// \param SrcRegs should contain one virtual register for each base type in
/// \p PackedTy, as returned by computeValueLLTs.
///
/// \return The packed register.
Register packRegs(ArrayRef<Register> SrcRegs, Type *PackedTy,
MachineIRBuilder &MIRBuilder) const;

/// Generate instructions for unpacking \p SrcReg into the \p DstRegs
/// corresponding to the aggregate type \p PackedTy.
///
/// \param DstRegs should contain one virtual register for each base type in
/// \p PackedTy, as returned by computeValueLLTs.
void unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg, Type *PackedTy,
MachineIRBuilder &MIRBuilder) const;

/// Invoke Handler::assignArg on each of the given \p Args and then use
/// \p Callback to move them to the assigned locations.
///
Expand Down Expand Up @@ -182,19 +200,19 @@ class CallLowering {
return false;
}


/// This hook must be implemented to lower the incoming (formal)
/// arguments, described by \p Args, for GlobalISel. Each argument
/// must end up in the related virtual register described by VRegs.
/// In other words, the first argument should end up in VRegs[0],
/// the second in VRegs[1], and so on.
/// arguments, described by \p VRegs, for GlobalISel. Each argument
/// must end up in the related virtual registers described by \p VRegs.
/// In other words, the first argument should end up in \c VRegs[0],
/// the second in \c VRegs[1], and so on. For each argument, there will be one
/// register for each non-aggregate type, as returned by \c computeValueLLTs.
/// \p MIRBuilder is set to the proper insertion for the argument
/// lowering.
///
/// \return True if the lowering succeeded, false otherwise.
virtual bool lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<Register> VRegs) const {
ArrayRef<ArrayRef<Register>> VRegs) const {
return false;
}

Expand Down
42 changes: 42 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
Expand Up @@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
Expand Down Expand Up @@ -115,6 +116,47 @@ CallLowering::setArgFlags<CallInst>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const DataLayout &DL,
const CallInst &FuncInfo) const;

Register CallLowering::packRegs(ArrayRef<Register> SrcRegs, Type *PackedTy,
MachineIRBuilder &MIRBuilder) const {
assert(SrcRegs.size() > 1 && "Nothing to pack");

const DataLayout &DL = MIRBuilder.getMF().getDataLayout();
MachineRegisterInfo *MRI = MIRBuilder.getMRI();

LLT PackedLLT = getLLTForType(*PackedTy, DL);

SmallVector<LLT, 8> LLTs;
SmallVector<uint64_t, 8> Offsets;
computeValueLLTs(DL, *PackedTy, LLTs, &Offsets);
assert(LLTs.size() == SrcRegs.size() && "Regs / types mismatch");

Register Dst = MRI->createGenericVirtualRegister(PackedLLT);
MIRBuilder.buildUndef(Dst);
for (unsigned i = 0; i < SrcRegs.size(); ++i) {
Register NewDst = MRI->createGenericVirtualRegister(PackedLLT);
MIRBuilder.buildInsert(NewDst, Dst, SrcRegs[i], Offsets[i]);
Dst = NewDst;
}

return Dst;
}

void CallLowering::unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg,
Type *PackedTy,
MachineIRBuilder &MIRBuilder) const {
assert(DstRegs.size() > 1 && "Nothing to unpack");

const DataLayout &DL = MIRBuilder.getMF().getDataLayout();

SmallVector<LLT, 8> LLTs;
SmallVector<uint64_t, 8> Offsets;
computeValueLLTs(DL, *PackedTy, LLTs, &Offsets);
assert(LLTs.size() == DstRegs.size() && "Regs / types mismatch");

for (unsigned i = 0; i < DstRegs.size(); ++i)
MIRBuilder.buildExtract(DstRegs[i], SrcReg, Offsets[i]);
}

bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
ArrayRef<ArgInfo> Args,
ValueHandler &Handler) const {
Expand Down
27 changes: 7 additions & 20 deletions llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Expand Up @@ -2274,16 +2274,17 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
EntryBB->addSuccessor(&getMBB(F.front()));

// Lower the actual args into this basic block.
SmallVector<Register, 8> VRegArgs;
SmallVector<ArrayRef<Register>, 8> VRegArgs;
for (const Argument &Arg: F.args()) {
if (DL->getTypeStoreSize(Arg.getType()) == 0)
continue; // Don't handle zero sized types.
VRegArgs.push_back(
MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL)));
ArrayRef<Register> VRegs = getOrCreateVRegs(Arg);
VRegArgs.push_back(VRegs);

if (Arg.hasSwiftErrorAttr())
SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(),
VRegArgs.back());
if (Arg.hasSwiftErrorAttr()) {
assert(VRegs.size() == 1 && "Too many vregs for Swift error");
SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(), VRegs[0]);
}
}

// We don't currently support translating swifterror or swiftself functions.
Expand All @@ -2306,20 +2307,6 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
return false;
}

auto ArgIt = F.arg_begin();
for (auto &VArg : VRegArgs) {
// If the argument is an unsplit scalar then don't use unpackRegs to avoid
// creating redundant copies.
if (!valueIsSplit(*ArgIt, VMap.getOffsets(*ArgIt))) {
auto &VRegs = *VMap.getVRegs(cast<Value>(*ArgIt));
assert(VRegs.empty() && "VRegs already populated?");
VRegs.push_back(VArg);
} else {
unpackRegs(*ArgIt, VArg, *EntryBuilder.get());
}
ArgIt++;
}

// Need to visit defs before uses when translating instructions.
GISelObserverWrapper WrapperObserver;
if (EnableCSE && CSEInfo)
Expand Down
43 changes: 24 additions & 19 deletions llvm/lib/Target/AArch64/AArch64CallLowering.cpp
Expand Up @@ -203,7 +203,6 @@ void AArch64CallLowering::splitToValueTypes(
SmallVector<EVT, 4> SplitVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
assert(OrigArg.Regs.size() == 1 && "Can't handle multple regs yet");

if (SplitVTs.size() == 1) {
// No splitting to do, but we want to replace the original type (e.g. [1 x
Expand All @@ -213,6 +212,24 @@ void AArch64CallLowering::splitToValueTypes(
return;
}

if (OrigArg.Regs.size() > 1) {
// Create one ArgInfo for each virtual register in the original ArgInfo.
assert(OrigArg.Regs.size() == SplitVTs.size() && "Regs / types mismatch");

bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
OrigArg.Ty, CallConv, false);
for (unsigned i = 0, e = SplitVTs.size(); i < e; ++i) {
Type *SplitTy = SplitVTs[i].getTypeForEVT(Ctx);
SplitArgs.emplace_back(OrigArg.Regs[i], SplitTy, OrigArg.Flags,
OrigArg.IsFixed);
if (NeedsRegBlock)
SplitArgs.back().Flags.setInConsecutiveRegs();
}

SplitArgs.back().Flags.setInConsecutiveRegsLast();
return;
}

unsigned FirstRegIdx = SplitArgs.size();
bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
OrigArg.Ty, CallConv, false);
Expand Down Expand Up @@ -351,9 +368,9 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
return Success;
}

bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<Register> VRegs) const {
bool AArch64CallLowering::lowerFormalArguments(
MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const {
MachineFunction &MF = MIRBuilder.getMF();
MachineBasicBlock &MBB = MIRBuilder.getMBB();
MachineRegisterInfo &MRI = MF.getRegInfo();
Expand All @@ -364,26 +381,14 @@ bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
for (auto &Arg : F.args()) {
if (DL.getTypeStoreSize(Arg.getType()) == 0)
continue;

ArgInfo OrigArg{VRegs[i], Arg.getType()};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, F);
bool Split = false;
LLT Ty = MRI.getType(VRegs[i]);
Register Dst = VRegs[i];

splitToValueTypes(OrigArg, SplitArgs, DL, MRI, F.getCallingConv(),
[&](unsigned Reg, uint64_t Offset) {
if (!Split) {
Split = true;
Dst = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildUndef(Dst);
}
unsigned Tmp = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildInsert(Tmp, Dst, Reg, Offset);
Dst = Tmp;
[&](Register Reg, uint64_t Offset) {
llvm_unreachable("Args should already be split");
});

if (Dst != VRegs[i])
MIRBuilder.buildCopy(VRegs[i], Dst);
++i;
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64CallLowering.h
Expand Up @@ -38,7 +38,7 @@ class AArch64CallLowering: public CallLowering {
Register SwiftErrorVReg) const override;

bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<Register> VRegs) const override;
ArrayRef<ArrayRef<Register>> VRegs) const override;

bool lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv,
const MachineOperand &Callee, const ArgInfo &OrigRet,
Expand Down
28 changes: 19 additions & 9 deletions llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
Expand Up @@ -193,9 +193,9 @@ static void allocateSystemSGPRs(CCState &CCInfo,
}
}

bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<Register> VRegs) const {
bool AMDGPUCallLowering::lowerFormalArguments(
MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs) const {
// AMDGPU_GS and AMDGP_HS are not supported yet.
if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
F.getCallingConv() == CallingConv::AMDGPU_HS)
Expand Down Expand Up @@ -275,9 +275,16 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;

ArrayRef<Register> OrigArgRegs = VRegs[i];
Register ArgReg =
OrigArgRegs.size() == 1
? OrigArgRegs[0]
: MRI.createGenericVirtualRegister(getLLTForType(*ArgTy, DL));
unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, ArgReg);
if (OrigArgRegs.size() > 1)
unpackRegs(OrigArgRegs, ArgReg, ArgTy, MIRBuilder);
++i;
}

Expand All @@ -295,7 +302,8 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,

// We can only hanlde simple value types at the moment.
ISD::ArgFlagsTy Flags;
ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
assert(VRegs[i].size() == 1 && "Can't lower into more than one register");
ArgInfo OrigArg{VRegs[i][0], CurOrigArg->getType()};
setArgFlags(OrigArg, i + 1, DL, F);
Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));

Expand Down Expand Up @@ -348,10 +356,12 @@ bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
if (Skipped.test(OrigArgIdx))
continue;
CCValAssign &VA = ArgLocs[i++];
MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
assert(VRegs[OrigArgIdx].size() == 1 &&
"Can't lower into more than 1 reg");
CCValAssign &VA = ArgLocs[i++];
MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx][0]);
MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
MIRBuilder.buildCopy(VRegs[OrigArgIdx][0], VA.getLocReg());
}

allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader);
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
Expand Up @@ -35,7 +35,7 @@ class AMDGPUCallLowering: public CallLowering {
bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
ArrayRef<Register> VRegs) const override;
bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<Register> VRegs) const override;
ArrayRef<ArrayRef<Register>> VRegs) const override;
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
};
Expand Down

0 comments on commit c3dbe23

Please sign in to comment.