Skip to content

Commit

Permalink
[RISCV] Fix stack slot for argument types (Bug 49500)
Browse files Browse the repository at this point in the history
This is an complementary/alternative fix for D99068. It takes a slightly
different approach by explicitly summing up all of the required split
part type sizes and ensuring we allocate enough space for them. It also
takes the maximum alignment of each part.

Compared with D99068 there are fewer changes to the stack objects in
existing tests. However, @luismarques has shown in that patch that there
are opportunities to reduce our stack usage in the future.

Reviewed By: luismarques

Differential Revision: https://reviews.llvm.org/D99087
  • Loading branch information
frasercrmck committed Apr 29, 2021
1 parent eb56fa9 commit 43ad058
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 43 deletions.
33 changes: 27 additions & 6 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Expand Up @@ -7080,6 +7080,11 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
return true;
}

static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
return DAG.getDataLayout().getPrefTypeAlign(
VT.getTypeForEVT(*DAG.getContext()));
}

// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
// and output parameter nodes.
SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
Expand Down Expand Up @@ -7194,11 +7199,10 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// For now, only handle fully promoted and indirect arguments.
if (VA.getLocInfo() == CCValAssign::Indirect) {
// Store the argument in a stack slot and pass its address.
SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
MemOpChains.push_back(
DAG.getStore(Chain, DL, ArgValue, SpillSlot,
MachinePointerInfo::getFixedStack(MF, FI)));
Align StackAlign =
std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
getPrefTypeAlign(ArgValue.getValueType(), DAG));
TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
// If the original argument was split (e.g. i128), we need
// to store the required parts of it here (and pass just one address).
// Vectors may be partly split to registers and partly to the stack, in
Expand All @@ -7207,15 +7211,32 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
unsigned ArgIndex = Outs[i].OrigArgIndex;
unsigned ArgPartOffset = Outs[i].PartOffset;
assert(VA.getValVT().isVector() || ArgPartOffset == 0);
// Calculate the total size to store. We don't have access to what we're
// actually storing other than performing the loop and collecting the
// info.
SmallVector<std::pair<SDValue, unsigned>> Parts;
while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
SDValue PartValue = OutVals[i + 1];
unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
EVT PartVT = PartValue.getValueType();
StoredSize += PartVT.getStoreSize();
StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
Parts.push_back(std::make_pair(PartValue, PartOffset));
++i;
}
SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
MemOpChains.push_back(
DAG.getStore(Chain, DL, ArgValue, SpillSlot,
MachinePointerInfo::getFixedStack(MF, FI)));
for (const auto &Part : Parts) {
SDValue PartValue = Part.first;
unsigned PartOffset = Part.second;
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
DAG.getIntPtrConstant(PartOffset, DL));
MemOpChains.push_back(
DAG.getStore(Chain, DL, PartValue, Address,
MachinePointerInfo::getFixedStack(MF, FI)));
++i;
}
ArgValue = SpillSlot;
} else {
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
Expand Up @@ -1074,13 +1074,13 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
;
; LMULMAX2-LABEL: call_split_vector_args:
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi sp, sp, -256
; LMULMAX2-NEXT: .cfi_def_cfa_offset 256
; LMULMAX2-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
; LMULMAX2-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
; LMULMAX2-NEXT: addi sp, sp, -128
; LMULMAX2-NEXT: .cfi_def_cfa_offset 128
; LMULMAX2-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
; LMULMAX2-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
; LMULMAX2-NEXT: .cfi_offset ra, -8
; LMULMAX2-NEXT: .cfi_offset s0, -16
; LMULMAX2-NEXT: addi s0, sp, 256
; LMULMAX2-NEXT: addi s0, sp, 128
; LMULMAX2-NEXT: .cfi_def_cfa s0, 0
; LMULMAX2-NEXT: andi sp, sp, -128
; LMULMAX2-NEXT: vsetivli a2, 2, e32,m1,ta,mu
Expand All @@ -1105,21 +1105,21 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
; LMULMAX2-NEXT: vmv1r.v v12, v8
; LMULMAX2-NEXT: vmv2r.v v22, v14
; LMULMAX2-NEXT: call split_vector_args@plt
; LMULMAX2-NEXT: addi sp, s0, -256
; LMULMAX2-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
; LMULMAX2-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
; LMULMAX2-NEXT: addi sp, sp, 256
; LMULMAX2-NEXT: addi sp, s0, -128
; LMULMAX2-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
; LMULMAX2-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
; LMULMAX2-NEXT: addi sp, sp, 128
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: call_split_vector_args:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: addi sp, sp, -256
; LMULMAX1-NEXT: .cfi_def_cfa_offset 256
; LMULMAX1-NEXT: sd ra, 248(sp) # 8-byte Folded Spill
; LMULMAX1-NEXT: sd s0, 240(sp) # 8-byte Folded Spill
; LMULMAX1-NEXT: addi sp, sp, -128
; LMULMAX1-NEXT: .cfi_def_cfa_offset 128
; LMULMAX1-NEXT: sd ra, 120(sp) # 8-byte Folded Spill
; LMULMAX1-NEXT: sd s0, 112(sp) # 8-byte Folded Spill
; LMULMAX1-NEXT: .cfi_offset ra, -8
; LMULMAX1-NEXT: .cfi_offset s0, -16
; LMULMAX1-NEXT: addi s0, sp, 256
; LMULMAX1-NEXT: addi s0, sp, 128
; LMULMAX1-NEXT: .cfi_def_cfa s0, 0
; LMULMAX1-NEXT: andi sp, sp, -128
; LMULMAX1-NEXT: vsetivli a2, 2, e32,m1,ta,mu
Expand Down Expand Up @@ -1158,10 +1158,10 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
; LMULMAX1-NEXT: vmv1r.v v22, v14
; LMULMAX1-NEXT: vmv1r.v v23, v15
; LMULMAX1-NEXT: call split_vector_args@plt
; LMULMAX1-NEXT: addi sp, s0, -256
; LMULMAX1-NEXT: ld s0, 240(sp) # 8-byte Folded Reload
; LMULMAX1-NEXT: ld ra, 248(sp) # 8-byte Folded Reload
; LMULMAX1-NEXT: addi sp, sp, 256
; LMULMAX1-NEXT: addi sp, s0, -128
; LMULMAX1-NEXT: ld s0, 112(sp) # 8-byte Folded Reload
; LMULMAX1-NEXT: ld ra, 120(sp) # 8-byte Folded Reload
; LMULMAX1-NEXT: addi sp, sp, 128
; LMULMAX1-NEXT: ret
%a = load <2 x i32>, <2 x i32>* %pa
%b = load <32 x i32>, <32 x i32>* %pb
Expand Down
26 changes: 12 additions & 14 deletions llvm/test/CodeGen/RISCV/stack-slot-size.ll
Expand Up @@ -13,7 +13,6 @@ declare void @callee129(i129)
declare void @callee160(i160)
declare void @callee161(i161)

; FIXME: Stack write clobbers the spilled value (on RV64).
define i32 @caller129() nounwind {
; RV32I-LABEL: caller129:
; RV32I: # %bb.0:
Expand All @@ -35,18 +34,18 @@ define i32 @caller129() nounwind {
;
; RV64I-LABEL: caller129:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: addi sp, sp, -48
; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT: addi a0, zero, 42
; RV64I-NEXT: sw a0, 20(sp)
; RV64I-NEXT: sw a0, 36(sp)
; RV64I-NEXT: sd zero, 16(sp)
; RV64I-NEXT: sd zero, 8(sp)
; RV64I-NEXT: mv a0, sp
; RV64I-NEXT: sd zero, 0(sp)
; RV64I-NEXT: call callee129@plt
; RV64I-NEXT: lw a0, 20(sp)
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: lw a0, 36(sp)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
%1 = alloca i32
store i32 42, i32* %1
Expand All @@ -55,7 +54,6 @@ define i32 @caller129() nounwind {
ret i32 %2
}

; FIXME: Stack write clobbers the spilled value (on RV64).
define i32 @caller160() nounwind {
; RV32I-LABEL: caller160:
; RV32I: # %bb.0:
Expand All @@ -77,18 +75,18 @@ define i32 @caller160() nounwind {
;
; RV64I-LABEL: caller160:
; RV64I: # %bb.0:
; RV64I-NEXT: addi sp, sp, -32
; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64I-NEXT: addi sp, sp, -48
; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
; RV64I-NEXT: addi a0, zero, 42
; RV64I-NEXT: sw a0, 20(sp)
; RV64I-NEXT: sw a0, 36(sp)
; RV64I-NEXT: sd zero, 16(sp)
; RV64I-NEXT: sd zero, 8(sp)
; RV64I-NEXT: mv a0, sp
; RV64I-NEXT: sd zero, 0(sp)
; RV64I-NEXT: call callee160@plt
; RV64I-NEXT: lw a0, 20(sp)
; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 32
; RV64I-NEXT: lw a0, 36(sp)
; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; RV64I-NEXT: addi sp, sp, 48
; RV64I-NEXT: ret
%1 = alloca i32
store i32 42, i32* %1
Expand Down
7 changes: 2 additions & 5 deletions llvm/test/CodeGen/RISCV/vector-abi.ll
@@ -1,15 +1,12 @@
; RUN: llc -mtriple=riscv32 -stop-after finalize-isel < %s | FileCheck %s -check-prefix=RV32
; RUN: llc -mtriple=riscv64 -stop-after finalize-isel < %s | FileCheck %s -check-prefix=RV64

; FIXME: The stack location used to pass the parameter to the function has the
; incorrect size and alignment for how we use it, and we clobber the stack.

declare void @callee(<4 x i8> %v)

define void @caller() {
; RV32-LABEL: name: caller
; RV32: stack:
; RV32: - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
; RV32: - { id: 0, name: '', type: default, offset: 0, size: 16, alignment: 4,
; RV32-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
; RV32-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
; RV32: bb.0 (%ir-block.0):
Expand All @@ -29,7 +26,7 @@ define void @caller() {
; RV32: PseudoRET
; RV64-LABEL: name: caller
; RV64: stack:
; RV64: - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
; RV64: - { id: 0, name: '', type: default, offset: 0, size: 32, alignment: 8,
; RV64-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
; RV64-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
; RV64: bb.0 (%ir-block.0):
Expand Down

0 comments on commit 43ad058

Please sign in to comment.