diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h index a7337f0f0ca611..1141bddbc1a8b0 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h @@ -164,13 +164,14 @@ class CallLowering { uint64_t Size, MachinePointerInfo &MPO, CCValAssign &VA) = 0; - /// An overload which takes an ArgInfo if additional information about - /// the arg is needed. - virtual void assignValueToAddress(const ArgInfo &Arg, Register Addr, - uint64_t Size, MachinePointerInfo &MPO, + /// An overload which takes an ArgInfo if additional information about the + /// arg is needed. \p ValRegIndex is the index in \p Arg.Regs for the value + /// to store. + virtual void assignValueToAddress(const ArgInfo &Arg, unsigned ValRegIndex, + Register Addr, uint64_t Size, + MachinePointerInfo &MPO, CCValAssign &VA) { - assert(Arg.Regs.size() == 1); - assignValueToAddress(Arg.Regs[0], Addr, Size, MPO, VA); + assignValueToAddress(Arg.Regs[ValRegIndex], Addr, Size, MPO, VA); } /// Handle custom values, which may be passed into one or more of \p VAs. diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index ca2d642c43377d..0c54a090ecc7f1 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -390,14 +390,8 @@ bool CallLowering::handleAssignments(CCState &CCInfo, // There should be Regs.size() ArgLocs per argument. VA = ArgLocs[j + Part]; if (VA.isMemLoc()) { - // Don't currently support loading/storing a type that needs to be split - // to the stack. Should be easy, just not implemented yet. - if (NumArgRegs > 1) { - LLVM_DEBUG( - dbgs() - << "Load/store a split arg to/from the stack not implemented yet\n"); - return false; - } + // Individual pieces may have been spilled to the stack and others + // passed in registers. // FIXME: Use correct address space for pointer size EVT LocVT = VA.getValVT(); @@ -406,8 +400,8 @@ bool CallLowering::handleAssignments(CCState &CCInfo, unsigned Offset = VA.getLocMemOffset(); MachinePointerInfo MPO; Register StackAddr = Handler.getStackAddress(MemSize, Offset, MPO); - Handler.assignValueToAddress(Args[i], StackAddr, - MemSize, MPO, VA); + Handler.assignValueToAddress(Args[i], Part, StackAddr, MemSize, MPO, + VA); continue; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 272857d379c2fa..bb953c447da31c 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -211,19 +211,17 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { MIRBuilder.buildStore(ValVReg, Addr, *MMO); } - void assignValueToAddress(const CallLowering::ArgInfo &Arg, Register Addr, - uint64_t Size, MachinePointerInfo &MPO, - CCValAssign &VA) override { + void assignValueToAddress(const CallLowering::ArgInfo &Arg, unsigned RegIndex, + Register Addr, uint64_t Size, + MachinePointerInfo &MPO, CCValAssign &VA) override { unsigned MaxSize = Size * 8; // For varargs, we always want to extend them to 8 bytes, in which case // we disable setting a max. if (!Arg.IsFixed) MaxSize = 0; - assert(Arg.Regs.size() == 1); - Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt - ? extendRegister(Arg.Regs[0], VA, MaxSize) + ? extendRegister(Arg.Regs[RegIndex], VA, MaxSize) : Arg.Regs[0]; // If we extended we might need to adjust the MMO's Size. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 3d0aac4a9a1808..e7a0d7e52f2224 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -242,12 +242,13 @@ struct AMDGPUOutgoingArgHandler : public AMDGPUValueHandler { MIRBuilder.buildStore(ValVReg, Addr, *MMO); } - void assignValueToAddress(const CallLowering::ArgInfo &Arg, Register Addr, + void assignValueToAddress(const CallLowering::ArgInfo &Arg, + unsigned ValRegIndex, Register Addr, uint64_t MemSize, MachinePointerInfo &MPO, CCValAssign &VA) override { Register ValVReg = VA.getLocInfo() != CCValAssign::LocInfo::FPExt - ? extendRegister(Arg.Regs[0], VA) - : Arg.Regs[0]; + ? extendRegister(Arg.Regs[ValRegIndex], VA) + : Arg.Regs[ValRegIndex]; // If we extended the value type we might need to adjust the MMO's // Size. This happens if ComputeValueVTs widened a small type value to a diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll index 0bbb9955ee1061..0b92477f3913f0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -126,17 +126,6 @@ define void @nonpow2_vector_add_fewerelements() { ret void } -; Currently can't handle dealing with a split type (s128 -> 2 x s64) on the stack yet. -declare void @use_s128(i128 %a, i128 %b) -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to lower arguments: i32 (i32, i128, i32, i32, i32, i128, i32)* (in function: fn1) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for fn1 -; FALLBACK-WITH-REPORT-OUT-LABEL: fn1: -define i32 @fn1(i32 %p1, i128 %p2, i32 %p3, i32 %p4, i32 %p5, i128 %p6, i32 %p7) { -entry: - call void @use_s128(i128 %p2, i128 %p6) - ret i32 0 -} - ; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: RET_ReallyLR implicit $x0 (in function: strict_align_feature) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for strict_align_feature ; FALLBACK-WITH-REPORT-OUT-LABEL: strict_align_feature diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll new file mode 100644 index 00000000000000..f4fcd01814e529 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-arguments.ll @@ -0,0 +1,43 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s + +declare void @use_s128(i128 %a, i128 %b) + +; Check partially passing a split type on the stack (s128 -> 2 x s64) +; CHECK-LABEL: name: call_use_s128 +; CHECK: fixedStack: +; CHECK: - { id: 0, type: default, offset: 16, size: 4, alignment: 16 +; CHECK: - { id: 1, type: default, offset: 8, size: 8, alignment: 8, stack-id: default, +; CHECK: - { id: 2, type: default, offset: 0, size: 8, alignment: 16, stack-id: default, +; CHECK: bb.1.entry: +; CHECK: liveins: $w0, $w4, $w5, $w6, $x2, $x3 +; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 +; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2 +; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x3 +; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY1]](s64), [[COPY2]](s64) +; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $w4 +; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $w5 +; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $w6 +; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 +; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.2, align 16) +; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 +; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 8 from %fixed-stack.1) +; CHECK: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[LOAD]](s64), [[LOAD1]](s64) +; CHECK: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 +; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load 4 from %fixed-stack.0, align 16) +; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp +; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV]](s128) +; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV1]](s128) +; CHECK: $x0 = COPY [[UV]](s64) +; CHECK: $x1 = COPY [[UV1]](s64) +; CHECK: $x2 = COPY [[UV2]](s64) +; CHECK: $x3 = COPY [[UV3]](s64) +; CHECK: BL @use_s128, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $x0, implicit $x1, implicit $x2, implicit $x3 +; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp +; CHECK: $w0 = COPY [[C]](s32) +; CHECK: RET_ReallyLR implicit $w0 +define i32 @call_use_s128(i32 %p1, i128 %p2, i32 %p3, i32 %p4, i32 %p5, i128 %p6, i32 %p7) { +entry: + call void @use_s128(i128 %p2, i128 %p6) + ret i32 0 +}