Skip to content

Commit

Permalink
[PowerPC][AIX] Enable passing byval formal arguments in multiple regi…
Browse files Browse the repository at this point in the history
…sters.

Any or all the argument registers can be used to pass a byval formal
argument, with the limitation that the argument must fit in the
available registers (ie: is not split between registers and stack).

Differential Revision: https://reviews.llvm.org/D76902
  • Loading branch information
mandlebug committed Apr 8, 2020
1 parent 5f25d22 commit 8abfd2c
Show file tree
Hide file tree
Showing 3 changed files with 192 additions and 34 deletions.
78 changes: 49 additions & 29 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Expand Up @@ -7059,12 +7059,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(

SmallVector<SDValue, 8> MemOps;

for (CCValAssign &VA : ArgLocs) {
EVT ValVT = VA.getValVT();
for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
CCValAssign &VA = ArgLocs[I++];
MVT LocVT = VA.getLocVT();
ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
assert((VA.isRegLoc() || VA.isMemLoc()) &&
"Unexpected location for function call argument.");

// For compatibility with the AIX XL compiler, the float args in the
// parameter save area are initialized even if the argument is available
Expand Down Expand Up @@ -7092,42 +7090,64 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
if (Flags.isByVal()) {
assert(VA.isRegLoc() && "MemLocs should already be handled.");

const unsigned ByValSize = Flags.getByValSize();
if (ByValSize > PtrByteSize)
report_fatal_error("Formal arguments greater then register size not "
"implemented yet.");

const MCPhysReg ArgReg = VA.getLocReg();
const PPCFrameLowering *FL = Subtarget.getFrameLowering();
const unsigned Offset = mapArgRegToOffsetAIX(ArgReg, FL);

const unsigned StackSize = alignTo(ByValSize, PtrByteSize);
if (Flags.getByValAlign() > PtrByteSize)
report_fatal_error("Over aligned byvals not supported yet.");

const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
const int FI = MF.getFrameInfo().CreateFixedObject(
StackSize, Offset, /* IsImmutable */ false, /* IsAliased */ true);
StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
/* IsAliased */ true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);

InVals.push_back(FIN);

const unsigned VReg = MF.addLiveIn(ArgReg, IsPPC64 ? &PPC::G8RCRegClass
: &PPC::GPRCRegClass);

// Since the callers side has left justified the aggregate in the
// register, we can simply store the entire register into the stack
// slot.
// The store to the fixedstack object is needed becuase accessing a
// field of the ByVal will use a gep and load. Ideally we will optimize
// to extracting the value from the register directly, and elide the
// stores when the arguments address is not taken, but that will need to
// be future work.
SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
SDValue Store =
DAG.getStore(CopyFrom.getValue(1), dl, CopyFrom, FIN,
MachinePointerInfo::getFixedStack(MF, FI, 0));
// Add live ins for all the RegLocs for the same ByVal.
const TargetRegisterClass *RegClass =
IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;

auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
unsigned Offset) {
const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
// Since the callers side has left justified the aggregate in the
// register, we can simply store the entire register into the stack
// slot.
SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
// The store to the fixedstack object is needed becuase accessing a
// field of the ByVal will use a gep and load. Ideally we will optimize
// to extracting the value from the register directly, and elide the
// stores when the arguments address is not taken, but that will need to
// be future work.
SDValue Store =
DAG.getStore(CopyFrom.getValue(1), dl, CopyFrom,
DAG.getObjectPtrOffset(dl, FIN, Offset),
MachinePointerInfo::getFixedStack(MF, FI, Offset));

MemOps.push_back(Store);
MemOps.push_back(Store);
};

unsigned Offset = 0;
HandleRegLoc(VA.getLocReg(), Offset);
Offset += PtrByteSize;
for (; Offset != StackSize; Offset += PtrByteSize) {
assert(I != End &&
"Expecting enough RegLocs to copy entire ByVal arg.");

if (!ArgLocs[I].isRegLoc())
report_fatal_error("Passing ByVals split between registers and stack "
"not yet implemented.");

assert(ArgLocs[I].getValNo() == VA.getValNo() &&
"Expecting more RegLocs for ByVal argument.");

const CCValAssign RL = ArgLocs[I++];
HandleRegLoc(RL.getLocReg(), Offset);
}
continue;
}

EVT ValVT = VA.getValVT();
if (VA.isRegLoc()) {
MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
unsigned VReg =
Expand Down
20 changes: 20 additions & 0 deletions llvm/test/CodeGen/PowerPC/aix-cc-byval-split.ll
@@ -0,0 +1,20 @@
; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp \
; RUN: -mcpu=pwr4 -mattr=-altivec -verify-machineinstrs 2>&1 < %s | FileCheck %s

; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp \
; RUN: -mcpu=pwr4 -mattr=-altivec -verify-machineinstrs 2>&1 < %s | FileCheck %s

; CHECK: LLVM ERROR: Pass-by-value arguments are only supported in registers.

%struct.Spill = type { [12 x i64 ] }
@GS = external global %struct.Spill, align 4

define i64 @test(%struct.Spill* byval(%struct.Spill) align 4 %s) {
entry:
%arrayidx_a = getelementptr inbounds %struct.Spill, %struct.Spill* %s, i32 0, i32 0, i32 2
%arrayidx_b = getelementptr inbounds %struct.Spill, %struct.Spill* %s, i32 0, i32 0, i32 10
%a = load i64, i64* %arrayidx_a
%b = load i64, i64* %arrayidx_b
%add = add i64 %a, %b
ret i64 %add
}
128 changes: 123 additions & 5 deletions llvm/test/CodeGen/PowerPC/aix-cc-byval.ll
Expand Up @@ -687,8 +687,6 @@ entry:
ret void
}

declare zeroext i8 @test_byval_32Byte(%struct.S32* byval(%struct.S32) align 1 %s)

; CHECK-LABEL: name: call_test_byval_32Byte{{.*}}

; The DAG block permits some invalid inputs for the benefit of allowing more valid orderings.
Expand Down Expand Up @@ -740,18 +738,78 @@ declare zeroext i8 @test_byval_32Byte(%struct.S32* byval(%struct.S32) align 1 %s
; ASM64-NEXT: bl .test_byval_32Byte
; ASM64-NEXT: nop

define zeroext i8 @test_byval_32Byte(%struct.S32* byval(%struct.S32) align 1 %s) {
entry:
%arrayidx = getelementptr inbounds %struct.S32, %struct.S32* %s, i32 0, i32 0, i32 21
%0 = load i8, i8* %arrayidx, align 1
ret i8 %0
}

; The ByVal handling produces dead stores. See `LowerFormalArguments_AIX` for
; details on why.

; CHECK-LABEL: name: test_byval_32Byte

; 32BIT: fixedStack:
; 32BIT-NEXT: - { id: 0, type: default, offset: 24, size: 32, alignment: 8, stack-id: default,
; 32BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,

; 32BIT: bb.0.entry:
; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10
; 32BIT-DAG: STW killed renamable $r3, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0
; 32BIT-DAG: STW killed renamable $r4, 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4
; 32BIT-DAG: STW killed renamable $r5, 8, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 8
; 32BIT-DAG: STW killed renamable $r6, 12, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 12
; 32BIT-DAG: STW killed renamable $r7, 16, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 16
; 32BIT-DAG: STW killed renamable $r8, 20, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 20
; 32BIT-DAG: STW killed renamable $r9, 24, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 24
; 32BIT-DAG: STW killed renamable $r10, 28, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 28
; 32BIT: renamable $r3 = LBZ 21, %fixed-stack.0 :: (dereferenceable load 1
; 32BIT: BLR

; 64BIT: fixedStack:
; 64BIT-NEXT: - { id: 0, type: default, offset: 48, size: 32, alignment: 16, stack-id: default,
; 64BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,

; 64BIT: bb.0.entry:
; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6
; 64BIT-DAG: STD killed renamable $x3, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0
; 64BIT-DAG: STD killed renamable $x4, 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8
; 64BIT-DAG: STD killed renamable $x5, 16, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 16
; 64BIT-DAG: STD killed renamable $x6, 24, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 24
; 64BIT-NEXT: renamable $x3 = LBZ8 21, %fixed-stack.0 :: (dereferenceable load 1
; 64BIT-NEXT: BLR8

; ASM-LABEL: .test_byval_32Byte:

%struct.S31 = type { [31 x i8] }
; ASM32: stw 8, 44(1)
; ASM32: stw 3, 24(1)
; ASM32-DAG: lbz 3, 45(1)
; ASM32-DAG: stw 4, 28(1)
; ASM32-DAG: stw 5, 32(1)
; ASM32-DAG: stw 6, 36(1)
; ASM32-DAG: stw 7, 40(1)
; ASM32-DAG: stw 9, 48(1)
; ASM32-DAG: stw 10, 52(1)
; ASM32-NEXT: blr

; ASM64: std 5, 64(1)
; ASM64: std 3, 48(1)
; ASM64-DAG: lbz 3, 69(1)
; ASM64-DAG: std 4, 56(1)
; ASM64-DAG: std 6, 72(1)
; ASM64-NEXT: blr

%struct.S31 = type <{ float, i32, i64, double, i32, i16, i8 }>

@gS31 = external global %struct.S31, align 1

define void @call_test_byval_31Byte() {
entry:
%call = call zeroext i8 @test_byval_31Byte(%struct.S31* byval(%struct.S31) align 1 @gS31)
%call = call double @test_byval_31Byte(%struct.S31* byval(%struct.S31) align 1 @gS31)
ret void
}

declare zeroext i8 @test_byval_31Byte(%struct.S31* byval(%struct.S31) align 1)

; CHECK-LABEL: name: call_test_byval_31Byte{{.*}}

Expand Down Expand Up @@ -821,6 +879,66 @@ declare zeroext i8 @test_byval_31Byte(%struct.S31* byval(%struct.S31) align 1)
; ASM64-NEXT: nop



define double @test_byval_31Byte(%struct.S31* byval(%struct.S31) align 1 %s) {
entry:
%gep = getelementptr inbounds %struct.S31, %struct.S31* %s, i32 0, i32 3
%load = load double, double* %gep, align 1
ret double %load
}

; CHECK-LABEL: name: test_byval_31Byte

; 32BIT: fixedStack:
; 32BIT-NEXT: - { id: 0, type: default, offset: 24, size: 32, alignment: 8, stack-id: default,
; 32BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,

; 32BIT: bb.0.entry:
; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10
; 32BIT-DAG: STW killed renamable $r3, 0, %fixed-stack.0 :: (store 4 into %fixed-stack.0
; 32BIT-DAG: STW killed renamable $r4, 4, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 4
; 32BIT-DAG: STW killed renamable $r5, 8, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 8
; 32BIT-DAG: STW killed renamable $r6, 12, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 12
; 32BIT-DAG: STW killed renamable $r7, 16, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 16
; 32BIT-DAG: STW killed renamable $r8, 20, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 20
; 32BIT-DAG: STW killed renamable $r9, 24, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 24
; 32BIT-DAG: STW killed renamable $r10, 28, %fixed-stack.0 :: (store 4 into %fixed-stack.0 + 28
; 32BIT-NEXT: renamable $f1 = LFD 16, %fixed-stack.0 :: (dereferenceable load 8
; 32BIT-NEXT: BLR

; 64BIT: fixedStack:
; 64BIT-NEXT: - { id: 0, type: default, offset: 48, size: 32, alignment: 16, stack-id: default,
; 64BIT-NEXT: isImmutable: false, isAliased: true, callee-saved-register: '', callee-saved-restored: true,

; 64BIT: bb.0.entry:
; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6
; 64BIT-DAG: STD killed renamable $x3, 0, %fixed-stack.0 :: (store 8 into %fixed-stack.0
; 64BIT-DAG: STD killed renamable $x4, 8, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 8
; 64BIT-DAG: STD killed renamable $x5, 16, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 16
; 64BIT-DAG: STD killed renamable $x6, 24, %fixed-stack.0 :: (store 8 into %fixed-stack.0 + 24
; 64BIT-NEXT: renamable $f1 = LFD 16, %fixed-stack.0 :: (dereferenceable load 8
; 64BIT-NEXT: BLR8

; ASM32-LABEL: .test_byval_31Byte:

; ASM32-DAG: stw 8, 44(1)
; ASM32: stw 7, 40(1)
; ASM32-DAG: lfd 1, 40(1)
; ASM32-DAG: stw 3, 24(1)
; ASM32-DAG: stw 4, 28(1)
; ASM32-DAG: stw 5, 32(1)
; ASM32-DAG: stw 6, 36(1)
; ASM32-DAG: stw 9, 48(1)
; ASM32-DAG: stw 10, 52(1)
; ASM32-NEXT: blr

; ASM64: std 5, 64(1)
; ASM64: lfd 1, 64(1)
; ASM64-DAG: std 3, 48(1)
; ASM64-DAG: std 4, 56(1)
; ASM64-DAG: std 6, 72(1)
; ASM64-NEXT: blr

%struct.F = type { float, float, float }

define i32 @call_test_byval_homogeneous_float_struct() {
Expand Down

0 comments on commit 8abfd2c

Please sign in to comment.