-
Notifications
You must be signed in to change notification settings - Fork 11k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Use separate CCValAssign for both parts of f64 with ilp32. #69129
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesMark any registers as CustomReg. This allows us to more directly emit the register or memory access for the high part. Previously we needed a memory access if the low register was X17 and we assumed the stack offset was 0. If the low part wasn't X17, we assumed the high register was the next register after the low register. This is another part of supporting FP arguments with GISel. This is stacked on #69118. Full diff: https://github.com/llvm/llvm-project/pull/69129.diff 1 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d7552317fd8bc69..91abe256ebd7f48 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16452,16 +16452,23 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
// stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
// cases.
Register Reg = State.AllocateReg(ArgGPRs);
- LocVT = MVT::i32;
if (!Reg) {
unsigned StackOffset = State.AllocateStack(8, Align(8));
State.addLoc(
CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
return false;
}
- if (!State.AllocateReg(ArgGPRs))
- State.AllocateStack(4, Align(4));
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ LocVT = MVT::i32;
+ State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ Register HiReg = State.AllocateReg(ArgGPRs);
+ if (HiReg) {
+ State.addLoc(
+ CCValAssign::getCustomReg(ValNo, ValVT, HiReg, LocVT, LocInfo));
+ } else {
+ unsigned StackOffset = State.AllocateStack(4, Align(4));
+ State.addLoc(
+ CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
+ }
return false;
}
@@ -16770,38 +16777,32 @@ static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
}
static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
- const CCValAssign &VA, const SDLoc &DL) {
+ const CCValAssign &VA,
+ const CCValAssign &HiVA,
+ const SDLoc &DL) {
assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
"Unexpected VA");
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- if (VA.isMemLoc()) {
- // f64 is passed on the stack.
- int FI =
- MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*IsImmutable=*/true);
- SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
- return DAG.getLoad(MVT::f64, DL, Chain, FIN,
- MachinePointerInfo::getFixedStack(MF, FI));
- }
-
assert(VA.isRegLoc() && "Expected register VA assignment");
Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
SDValue Hi;
- if (VA.getLocReg() == RISCV::X17) {
+ if (HiVA.isMemLoc()) {
// Second half of f64 is passed on the stack.
- int FI = MFI.CreateFixedObject(4, 0, /*IsImmutable=*/true);
+ int FI = MFI.CreateFixedObject(4, HiVA.getLocMemOffset(),
+ /*IsImmutable=*/true);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
MachinePointerInfo::getFixedStack(MF, FI));
} else {
// Second half of f64 is passed in another GPR.
Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
- RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
+ RegInfo.addLiveIn(HiVA.getLocReg(), HiVReg);
Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
}
return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
@@ -17044,15 +17045,16 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
: RISCV::CC_RISCV);
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ for (unsigned i = 0, e = ArgLocs.size(), InsIdx = 0; i != e; ++i, ++InsIdx) {
CCValAssign &VA = ArgLocs[i];
SDValue ArgValue;
// Passing f64 on RV32D with a soft float ABI must be handled as a special
// case.
- if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
- ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
- else if (VA.isRegLoc())
- ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
+ if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
+ assert(VA.needsCustom());
+ ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, ArgLocs[++i], DL);
+ } else if (VA.isRegLoc())
+ ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[InsIdx], *this);
else
ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
@@ -17064,12 +17066,12 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
// stores are relative to that.
InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
MachinePointerInfo()));
- unsigned ArgIndex = Ins[i].OrigArgIndex;
- unsigned ArgPartOffset = Ins[i].PartOffset;
+ unsigned ArgIndex = Ins[InsIdx].OrigArgIndex;
+ unsigned ArgPartOffset = Ins[InsIdx].PartOffset;
assert(VA.getValVT().isVector() || ArgPartOffset == 0);
- while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
+ while (i + 1 != e && Ins[InsIdx + 1].OrigArgIndex == ArgIndex) {
CCValAssign &PartVA = ArgLocs[i + 1];
- unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
+ unsigned PartOffset = Ins[InsIdx + 1].PartOffset - ArgPartOffset;
SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
if (PartVA.getValVT().isScalableVector())
Offset = DAG.getNode(ISD::VSCALE, DL, XLenVT, Offset);
@@ -17077,6 +17079,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
MachinePointerInfo()));
++i;
+ ++InsIdx;
}
continue;
}
@@ -17292,15 +17295,18 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
SDValue StackPtr;
- for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
+ for (unsigned i = 0, j = 0, e = ArgLocs.size(), OutIdx = 0; i != e;
+ ++i, ++OutIdx) {
CCValAssign &VA = ArgLocs[i];
- SDValue ArgValue = OutVals[i];
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ SDValue ArgValue = OutVals[OutIdx];
+ ISD::ArgFlagsTy Flags = Outs[OutIdx].Flags;
// Handle passing f64 on RV32D with a soft float ABI as a special case.
bool IsF64OnRV32DSoftABI =
VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
- if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
+ if (IsF64OnRV32DSoftABI) {
+ assert(VA.isRegLoc() && "Expected register VA assignment");
+ assert(VA.needsCustom());
SDValue SplitF64 = DAG.getNode(
RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
SDValue Lo = SplitF64.getValue(0);
@@ -17309,18 +17315,22 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
Register RegLo = VA.getLocReg();
RegsToPass.push_back(std::make_pair(RegLo, Lo));
- if (RegLo == RISCV::X17) {
+ // Get the CCValAssign for the Hi part.
+ CCValAssign &HiVA = ArgLocs[++i];
+
+ if (HiVA.isMemLoc()) {
// Second half of f64 is passed on the stack.
- // Work out the address of the stack slot.
if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
+ SDValue Address =
+ DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
+ DAG.getIntPtrConstant(HiVA.getLocMemOffset(), DL));
// Emit the store.
MemOpChains.push_back(
- DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
+ DAG.getStore(Chain, DL, Hi, Address, MachinePointerInfo()));
} else {
// Second half of f64 is passed in another GPR.
- assert(RegLo < RISCV::X31 && "Invalid register pair");
- Register RegHigh = RegLo + 1;
+ Register RegHigh = HiVA.getLocReg();
RegsToPass.push_back(std::make_pair(RegHigh, Hi));
}
continue;
@@ -17334,7 +17344,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (VA.getLocInfo() == CCValAssign::Indirect) {
// Store the argument in a stack slot and pass its address.
Align StackAlign =
- std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
+ std::max(getPrefTypeAlign(Outs[OutIdx].ArgVT, DAG),
getPrefTypeAlign(ArgValue.getValueType(), DAG));
TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
// If the original argument was split (e.g. i128), we need
@@ -17342,16 +17352,16 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Vectors may be partly split to registers and partly to the stack, in
// which case the base address is partly offset and subsequent stores are
// relative to that.
- unsigned ArgIndex = Outs[i].OrigArgIndex;
- unsigned ArgPartOffset = Outs[i].PartOffset;
+ unsigned ArgIndex = Outs[OutIdx].OrigArgIndex;
+ unsigned ArgPartOffset = Outs[OutIdx].PartOffset;
assert(VA.getValVT().isVector() || ArgPartOffset == 0);
// Calculate the total size to store. We don't have access to what we're
// actually storing other than performing the loop and collecting the
// info.
SmallVector<std::pair<SDValue, SDValue>> Parts;
- while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
+ while (i + 1 != e && Outs[OutIdx + 1].OrigArgIndex == ArgIndex) {
SDValue PartValue = OutVals[i + 1];
- unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
+ unsigned PartOffset = Outs[OutIdx + 1].PartOffset - ArgPartOffset;
SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
EVT PartVT = PartValue.getValueType();
if (PartVT.isScalableVector())
@@ -17360,6 +17370,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
Parts.push_back(std::make_pair(PartValue, Offset));
++i;
+ ++OutIdx;
}
SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
@@ -17501,7 +17512,8 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
// Copy all of the result registers out of their specified physreg.
- for (auto &VA : RVLocs) {
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ auto &VA = RVLocs[i];
// Copy the value out
SDValue RetValue =
DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
@@ -17510,9 +17522,9 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
Glue = RetValue.getValue(2);
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
- assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
- SDValue RetValue2 =
- DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
+ assert(VA.needsCustom());
+ SDValue RetValue2 = DAG.getCopyFromReg(Chain, DL, RVLocs[++i].getLocReg(),
+ MVT::i32, Glue);
Chain = RetValue2.getValue(1);
Glue = RetValue2.getValue(2);
RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
@@ -17575,21 +17587,21 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
- for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
- SDValue Val = OutVals[i];
+ for (unsigned i = 0, e = RVLocs.size(), OutIdx = 0; i < e; ++i, ++OutIdx) {
+ SDValue Val = OutVals[OutIdx];
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
// Handle returning f64 on RV32D with a soft float ABI.
assert(VA.isRegLoc() && "Expected return via registers");
+ assert(VA.needsCustom());
SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
DAG.getVTList(MVT::i32, MVT::i32), Val);
SDValue Lo = SplitF64.getValue(0);
SDValue Hi = SplitF64.getValue(1);
Register RegLo = VA.getLocReg();
- assert(RegLo < RISCV::X31 && "Invalid register pair");
- Register RegHi = RegLo + 1;
+ Register RegHi = RVLocs[++i].getLocReg();
if (STI.isRegisterReservedByUser(RegLo) ||
STI.isRegisterReservedByUser(RegHi))
|
This seems to introduce some compile failures in the GCC torture suite. e.g. pr44942 for rv32imafdc, ilp32, O1. |
Mark any registers as CustomReg. This allows us to more directly emit the register or memory access for the high part. Previously we assume the offset was 0 and we needed a memory access if the low register was X17. This is another part of supporting FP arguments with GISel.
7eca044
to
3e90ffa
Compare
Thanks, Alex! I've fixed that issue and rebased the patch on trunk to get rid of the already committed #69118. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Mark any registers as CustomReg.
This allows us to more directly emit the register or memory access for the high part. Previously we needed a memory access if the low register was X17 and we assumed the stack offset was 0. If the low part wasn't X17, we assumed the high register was the next register after the low register.
This is another part of supporting FP arguments with GISel.