Skip to content

Commit

Permalink
[RISCV] Support fixed-length vectors in the calling convention
Browse files Browse the repository at this point in the history
This patch adds fixed-length vector support to the calling convention
when RVV is used to lower fixed-length vectors. The scheme follows the
regular vector calling convention for the argument/return registers, but
uses scalable vector container types as the LocVTs, and converts to/from
the fixed-length vector value types as required.

Fixed-length vector types may be split when the combination of minimum
VLEN and the maximum allowable LMUL is not large enough to fully contain
the vector. In this case the behaviour differs between fixed-length
vectors passed as parameters and as return values:
1. For return values, vectors must be passed entirely via registers or
via the stack.
2. For parameters, unlike scalar values, split vectors continue to be
passed by value, and are split across multiple registers until there are
no remaining registers. Thus vector parameters may be found partly in
registers and partly on the stack.

As with scalable vectors, the first fixed-length mask vector is passed
via v0. Split mask fixed-length vectors are passed first via v0 and then
via the next available vector register: v8,v9,etc.

The handling of vector return values uses all available argument
registers v8-v23 which does not adhere to the calling convention we're
supposedly implementing, but since this issue affects both fixed-length
and scalable-vector values, it was left as-is.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D97954
  • Loading branch information
frasercrmck committed Mar 15, 2021
1 parent 5d48b45 commit 0c5b789
Show file tree
Hide file tree
Showing 2 changed files with 1,217 additions and 23 deletions.
70 changes: 47 additions & 23 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
auto addRegClassForFixedVectors = [this](MVT VT) {
unsigned LMul = Subtarget.getLMULForFixedLengthVector(VT);
const TargetRegisterClass *RC;
if (LMul == 1)
if (LMul == 1 || VT.getVectorElementType() == MVT::i1)
RC = &RISCV::VRRegClass;
else if (LMul == 2)
RC = &RISCV::VRM2RegClass;
Expand Down Expand Up @@ -4939,8 +4939,8 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;

// Any return value split in to more than two values can't be returned
// directly.
if (IsRet && ValNo > 1)
// directly. Vectors are returned via the available vector registers.
if (!LocVT.isVector() && IsRet && ValNo > 1)
return true;

// UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
Expand Down Expand Up @@ -5031,9 +5031,15 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
return false;
}

// Fixed-length vectors are located in the corresponding scalable-vector
// container types.
if (ValVT.isFixedLengthVector())
LocVT = TLI.getContainerForFixedLengthVector(LocVT);

// Split arguments might be passed indirectly, so keep track of the pending
// values.
if (ArgFlags.isSplit() || !PendingLocs.empty()) {
// values. Split vectors are passed via a mix of registers and indirectly, so
// treat them as we would any other argument.
if (!LocVT.isVector() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
LocVT = XLenVT;
LocInfo = CCValAssign::Indirect;
PendingLocs.push_back(
Expand All @@ -5046,7 +5052,7 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,

// If the split argument only had two elements, it should be passed directly
// in registers or on the stack.
if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
if (!LocVT.isVector() && ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
// Apply the normal calling convention rules to the first half of the
// split argument.
Expand All @@ -5066,7 +5072,7 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
Reg = State.AllocateReg(ArgFPR32s);
else if (ValVT == MVT::f64 && !UseGPRForF64)
Reg = State.AllocateReg(ArgFPR64s);
else if (ValVT.isScalableVector()) {
else if (ValVT.isVector()) {
const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
if (RC == &RISCV::VRRegClass) {
// Assign the first mask argument to V0.
Expand All @@ -5088,6 +5094,12 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
llvm_unreachable("Unhandled class register for ValueType");
}
if (!Reg) {
// For return values, the vector must be passed fully via registers or
// via the stack.
// FIXME: The proposed vector ABI only mandates v8-v15 for return values,
// but we're using all of them.
if (IsRet)
return true;
LocInfo = CCValAssign::Indirect;
// Try using a GPR to pass the address
Reg = State.AllocateReg(ArgGPRs);
Expand Down Expand Up @@ -5117,8 +5129,8 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
}

assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
(TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) &&
"Expected an XLenVT or scalable vector types at this stage");
(TLI.getSubtarget().hasStdExtV() && ValVT.isVector())) &&
"Expected an XLenVT or vector types at this stage");

if (Reg) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
Expand All @@ -5139,8 +5151,7 @@ template <typename ArgTy>
static Optional<unsigned> preAssignMask(const ArgTy &Args) {
for (const auto &ArgIdx : enumerate(Args)) {
MVT ArgVT = ArgIdx.value().VT;
if (ArgVT.isScalableVector() &&
ArgVT.getVectorElementType().SimpleTy == MVT::i1)
if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
return ArgIdx.index();
}
return None;
Expand Down Expand Up @@ -5206,11 +5217,14 @@ void RISCVTargetLowering::analyzeOutputArgs(
// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
// values.
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
const CCValAssign &VA, const SDLoc &DL) {
const CCValAssign &VA, const SDLoc &DL,
const RISCVSubtarget &Subtarget) {
switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unexpected CCValAssign::LocInfo");
case CCValAssign::Full:
if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
break;
case CCValAssign::BCvt:
if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
Expand Down Expand Up @@ -5241,17 +5255,20 @@ static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
if (VA.getLocInfo() == CCValAssign::Indirect)
return Val;

return convertLocVTToValVT(DAG, Val, VA, DL);
return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
}

static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
const CCValAssign &VA, const SDLoc &DL) {
const CCValAssign &VA, const SDLoc &DL,
const RISCVSubtarget &Subtarget) {
EVT LocVT = VA.getLocVT();

switch (VA.getLocInfo()) {
default:
llvm_unreachable("Unexpected CCValAssign::LocInfo");
case CCValAssign::Full:
if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
break;
case CCValAssign::BCvt:
if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
Expand Down Expand Up @@ -5512,14 +5529,17 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
if (VA.getLocInfo() == CCValAssign::Indirect) {
// If the original argument was split and passed by reference (e.g. i128
// on RV32), we need to load all parts of it here (using the same
// address).
// address). Vectors may be partly split to registers and partly to the
// stack, in which case the base address is partly offset and subsequent
// stores are relative to that.
InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
MachinePointerInfo()));
unsigned ArgIndex = Ins[i].OrigArgIndex;
assert(Ins[i].PartOffset == 0);
unsigned ArgPartOffset = Ins[i].PartOffset;
assert(VA.getValVT().isVector() || ArgPartOffset == 0);
while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
CCValAssign &PartVA = ArgLocs[i + 1];
unsigned PartOffset = Ins[i + 1].PartOffset;
unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
DAG.getIntPtrConstant(PartOffset, DL));
InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
Expand Down Expand Up @@ -5789,12 +5809,16 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
DAG.getStore(Chain, DL, ArgValue, SpillSlot,
MachinePointerInfo::getFixedStack(MF, FI)));
// If the original argument was split (e.g. i128), we need
// to store all parts of it here (and pass just one address).
// to store the required parts of it here (and pass just one address).
// Vectors may be partly split to registers and partly to the stack, in
// which case the base address is partly offset and subsequent stores are
// relative to that.
unsigned ArgIndex = Outs[i].OrigArgIndex;
assert(Outs[i].PartOffset == 0);
unsigned ArgPartOffset = Outs[i].PartOffset;
assert(VA.getValVT().isVector() || ArgPartOffset == 0);
while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
SDValue PartValue = OutVals[i + 1];
unsigned PartOffset = Outs[i + 1].PartOffset;
unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
DAG.getIntPtrConstant(PartOffset, DL));
MemOpChains.push_back(
Expand All @@ -5804,7 +5828,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
}
ArgValue = SpillSlot;
} else {
ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
}

// Use local copy if it is a byval arg.
Expand Down Expand Up @@ -5940,7 +5964,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
RetValue2);
}

RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);

InVals.push_back(RetValue);
}
Expand Down Expand Up @@ -6026,7 +6050,7 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
} else {
// Handle a 'normal' return.
Val = convertValVTToLocVT(DAG, Val, VA, DL);
Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);

if (STI.isRegisterReservedByUser(VA.getLocReg()))
Expand Down
Loading

0 comments on commit 0c5b789

Please sign in to comment.