Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Support for byval aggregate arguments

The ABI for passing byval aggregates as arguments isn't
really documented, but this is what gcc seems to do:

If the aggregate fits in the available argument registers
it will be passed in those.
If it doesn't fit, it will be passed (completely) on the
stack and any arguments after the byval argument are
also passed on the stack.
  • Loading branch information...
commit bbcbd8a613ddb08bf0c607951947066b426da79e 1 parent 8822bd8
@skristiansson skristiansson authored sbourdeauducq committed
View
3  lib/Target/LM32/LM32CallingConv.td
@@ -28,6 +28,9 @@ def CC_LM32 : CallingConv<[
// Promote i8/i16 arguments to i32.
CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ // Handle byval arguments
+ CCIfByVal<CCPassByVal<4, 4>>,
+
// Integer and float arguments are passed in integer registers.
CCIfType<[i32, f32], CCAssignToReg<[R1, R2, R3, R4, R5, R6, R7, R8]>>,
View
103 lib/Target/LM32/LM32ISelLowering.cpp
@@ -502,6 +502,25 @@ SDValue LM32TargetLowering::LowerVASTART(SDValue Op,
// For mips eabi see http://www.cygwin.com/ml/binutils/2003-06/msg00436.html
// Elements may have been used from SparcTargetLowering::LowerArguments.
//===----------------------------------------------------------------------===//
+/// HandleByVal - byval parameters that fit in the remaining registers
+/// will be passed in those, if it doesn't fit, the whole parameter will be
+/// passed on stack and all remaining registers are confiscated.
+void LM32TargetLowering::HandleByVal(CCState *State, unsigned &Size) const {
+ static const unsigned ArgRegList[] = {
+ LM32::R1, LM32::R2, LM32::R3, LM32::R4, LM32::R5, LM32::R6, LM32::R7,
+ LM32::R8
+ };
+ unsigned NumWords = (Size + 3)/4;
+ unsigned NewSize = 0;
+ for (unsigned i = 0; i < NumWords; ++i) {
+ if (!State->AllocateReg(ArgRegList, 8)) {
+ NewSize = NumWords*4;
+ break;
+ }
+ }
+ Size = NewSize;
+}
+
/// Monarch call implementation
/// LowerCall - This hook must be implemented to lower calls into the
/// the specified DAG. The outgoing arguments to the call are described
@@ -557,11 +576,14 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
+ unsigned ArgRegEnd = LM32::R0;
+
// Walk the register/memloc assignments, inserting copies/loads.
// This was based on Sparc but the Sparc code has been updated.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -581,7 +603,39 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
// Arguments that can be passed on register must be kept at
// RegsToPass vector
if (VA.isRegLoc()) {
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ ArgRegEnd = VA.getLocReg();
+ RegsToPass.push_back(std::make_pair(ArgRegEnd, Arg));
+ } else if (Flags.isByVal()) {
+ unsigned NumWords = (Flags.getByValSize() + 3)/4;
+ if (NumWords <= (LM32::R8 - ArgRegEnd)) {
+ // Load byval aggregate into argument registers.
+ for (unsigned i = 0; i < NumWords; ++i) {
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
+ DAG.getConstant(i*4, MVT::i32));
+ SDValue Load = DAG.getLoad(getPointerTy(), dl, Chain, AddArg,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(++ArgRegEnd, Load));
+ }
+ continue;
+ }
+ // Byval aggregate didn't fit in the argument registers,
+ // pass it on the stack.
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, LM32::RSP,
+ getPointerTy());
+ int Offset = VA.getLocMemOffset();
+ Offset += Subtarget->hasSPBias() ? 4 : 0;
+ SDValue StackOffset = DAG.getIntPtrConstant(Offset);
+ SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+ StackOffset);
+ SDValue SizeNode = DAG.getConstant(NumWords*4, MVT::i32);
+ MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Arg, SizeNode,
+ Flags.getByValAlign(),
+ /*isVolatile=*/false,
+ /*AlwaysInline=*/false,
+ MachinePointerInfo(0),
+ MachinePointerInfo(0)));
} else {
assert(VA.isMemLoc());
@@ -702,6 +756,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
+ SmallVector<SDValue, 8> OutChains;
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
LM32FunctionInfo *LM32FI = MF.getInfo<LM32FunctionInfo>();
@@ -721,6 +776,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
// Arguments stored on registers
if (VA.isRegLoc()) {
@@ -760,6 +816,32 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
}
InVals.push_back(ArgValue);
+ } else if (Flags.isByVal()) {
+ unsigned NumWords = (Flags.getByValSize() + 3)/4;
+ unsigned Size = NumWords*4;
+ unsigned Align = Flags.getByValAlign();
+ int FI = 0;
+ if (NumWords <= (LM32::R8 - ArgRegEnd)) {
+ // Store the argument registers onto the local stack
+ FI = MFI->CreateStackObject(Size, Align, false);
+ for (unsigned i = 0; i < NumWords; ++i) {
+ unsigned LiveReg = MF.addLiveIn(++ArgRegEnd, LM32::GPRRegisterClass);
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ DAG.getFrameIndex(FI, getPointerTy()),
+ DAG.getConstant(i*4, MVT::i32));
+ OutChains.push_back(DAG.getStore(Chain, dl,
+ DAG.getRegister(LiveReg, MVT::i32),
+ AddArg,
+ MachinePointerInfo(),
+ false, false, 0));
+ }
+ } else {
+ // Byval arguments didn't fit in registers, mark all as occupied.
+ ArgRegEnd = LM32::R8;
+ nextLocMemOffset = VA.getLocMemOffset() + Size;
+ FI = MFI->CreateFixedObject(Size, VA.getLocMemOffset(), true);
+ }
+ InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
} else { // VA.isRegLoc()
assert(ArgRegEnd == LM32::R8 &&
"We should have used all argument registers");
@@ -821,9 +903,6 @@ DEBUG((cast<LoadSDNode>(/* SDNode* */lod.getNode()))->dump());
DEBUG(errs() << "All varargs on stack getVarArgsFrameIndex() to:" <<
LM32FI->getVarArgsFrameIndex() << "\n");
} else {
- // Used to acumulate store chains.
- std::vector<SDValue> OutChains;
-
TargetRegisterClass *RC = LM32::GPRRegisterClass;
// We'll save all argument registers not already saved on the stack. Store
@@ -851,16 +930,16 @@ DEBUG((cast<LoadSDNode>(/* SDNode* */lod.getNode()))->dump());
// which is a value necessary to VASTART.
DEBUG(errs() << "setVarArgsFrameIndex to:" << FI << "\n");
LM32FI->setVarArgsFrameIndex(FI);
-
- // All stores are grouped in one node to allow the matching between
- // the size of Ins and InVals. This only happens when on varg functions
- if (!OutChains.empty()) {
- OutChains.push_back(Chain);
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &OutChains[0], OutChains.size());
- }
}
}
+ // All stores are grouped in one node to allow the matching between
+ // the size of Ins and InVals. This only happens when on varg functions and
+ // byval arguments
+ if (!OutChains.empty()) {
+ OutChains.push_back(Chain);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &OutChains[0], OutChains.size());
+ }
return Chain;
}
View
3  lib/Target/LM32/LM32ISelLowering.h
@@ -151,6 +151,9 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
+ /// HandleByVal - Target-specific cleanup for ByVal support.
+ virtual void HandleByVal(CCState *, unsigned &) const;
+
#if 0
virtual MachineBasicBlock*
EmitCustomShift(MachineInstr *MI, MachineBasicBlock *MBB) const;
View
72 test/CodeGen/LM32/byval.ll
@@ -0,0 +1,72 @@
+; RUN: llc -march=lm32 < %s | FileCheck %s
+; NOTE: if the memcpy threshold is adjusted, some of those tests might
+; generate a false negative. Adjust accordingly in such case.
+%struct.s0 = type { [8 x i32] }
+%struct.s1 = type { [9 x i32] }
+%struct.s2 = type { [7 x i32] }
+
+; Test for byval aggregate that fits in arg regs.
+define void @f0_1(%struct.s0* byval %s) {
+entry:
+ call void @f0(%struct.s0* byval %s)
+ ret void
+}
+
+declare void @f0(%struct.s0* byval)
+; CHECK: f0_1:
+; CHECK: addi sp, sp, -36
+; CHECK: sw (sp+4), r1
+; CHECK: calli f0
+; CHECK: addi sp, sp, 36
+
+; Test for byval aggregate that doesn't fit in arg regs.
+define void @f1_1(%struct.s1* byval %s) {
+entry:
+ call void @f1(%struct.s1* byval %s)
+ ret void
+}
+
+declare void @f1(%struct.s1* byval)
+; CHECK: f1_1:
+; CHECK: addi sp, sp, -40
+; CHECK: addi r1, sp, 4
+; CHECK: addi r2, sp, 44
+; CHECK: addi r3, r0, 36
+; CHECK: calli memcpy
+; CHECK: calli f1
+; CHECK: addi sp, sp, 40
+
+
+; Test for byval aggregate with trailing argument that fit in arg regs.
+define void @f2_1(%struct.s2* byval %s) {
+entry:
+ call void @f2(%struct.s2* byval %s, i32 1)
+ ret void
+}
+
+declare void @f2(%struct.s2* byval, i32)
+; CHECK: f2_1:
+; CHECK: addi sp, sp, -32
+; CHECK: sw (sp+4), r1
+; CHECK: addi r8, r0, 1
+; CHECK: calli f2
+; CHECK: addi sp, sp, 32
+
+; Test for byval aggregate with trailing argument that doesn't fit in arg regs.
+define void @f3_1(%struct.s1* byval %s) {
+entry:
+ call void @f3(%struct.s1* byval %s, i32 1)
+ ret void
+}
+
+declare void @f3(%struct.s1* byval, i32)
+; CEHCK: f3_1:
+; CHECK: addi sp, sp, -44
+; CHECK: addi r1, sp, 4
+; CHECK: addi r2, sp, 48
+; CHECK: addi r3, r0, 36
+; CHECK: calli memcpy
+; CHECK: addi r{{[1-25]}}, r0, 1
+; CHECK: sw (sp+40), r{{[1-25]}}
+; CHECK: calli f3
+; CHECK: addi sp, sp, 44
Please sign in to comment.
Something went wrong with that request. Please try again.