Skip to content

Commit

Permalink
[AArch64] Fix variadic tail-calls on ARM64EC (#79774)
Browse files Browse the repository at this point in the history
ARM64EC varargs calls expect that x4 = sp at entry, special handling is
needed to ensure this with tail calls since they occur after the
epilogue and the x4 write happens before.

I tried going through AArch64MachineFrameLowering for this, hoping to
avoid creating the dummy object but this was the best I could do since
the stack info that uses isn't populated at this stage,
CreateFixedObject also explicitly forbids 0 sized objects.
  • Loading branch information
bylaws authored and dpaoliello committed Mar 11, 2024
1 parent ea6c457 commit d7a9810
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 1 deletion.
10 changes: 9 additions & 1 deletion llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8007,11 +8007,19 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}

if (IsVarArg && Subtarget->isWindowsArm64EC()) {
SDValue ParamPtr = StackPtr;
if (IsTailCall) {
// Create a dummy object at the top of the stack that can be used to get
// the SP after the epilogue
int FI = MF.getFrameInfo().CreateFixedObject(1, FPDiff, true);
ParamPtr = DAG.getFrameIndex(FI, PtrVT);
}

// For vararg calls, the Arm64EC ABI requires values in x4 and x5
// describing the argument list. x4 contains the address of the
// first stack parameter. x5 contains the size in bytes of all parameters
// passed on the stack.
RegsToPass.emplace_back(AArch64::X4, StackPtr);
RegsToPass.emplace_back(AArch64::X4, ParamPtr);
RegsToPass.emplace_back(AArch64::X5,
DAG.getConstant(NumBytes, DL, MVT::i64));
}
Expand Down
37 changes: 37 additions & 0 deletions llvm/test/CodeGen/AArch64/arm64ec-varargs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -100,5 +100,42 @@ define void @varargs_many_argscalleer() nounwind {
ret void
}

define void @varargs_caller_tail() nounwind {
; CHECK-LABEL: varargs_caller_tail:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #48
; CHECK-NEXT: mov x4, sp
; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: mov x9, #4617315517961601024 // =0x4014000000000000
; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
; CHECK-NEXT: mov w1, #2 // =0x2
; CHECK-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000
; CHECK-NEXT: mov w3, #4 // =0x4
; CHECK-NEXT: mov w5, #16 // =0x10
; CHECK-NEXT: stp xzr, x30, [sp, #24] // 8-byte Folded Spill
; CHECK-NEXT: stp x9, x8, [sp]
; CHECK-NEXT: str xzr, [sp, #16]
; CHECK-NEXT: .weak_anti_dep varargs_callee
; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF
; CHECK-NEXT: .weak_anti_dep "#varargs_callee"
; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF
; CHECK-NEXT: bl "#varargs_callee"
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-NEXT: add x4, sp, #48
; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
; CHECK-NEXT: mov w1, #4 // =0x4
; CHECK-NEXT: mov w2, #3 // =0x3
; CHECK-NEXT: mov w3, #2 // =0x2
; CHECK-NEXT: mov x5, xzr
; CHECK-NEXT: add sp, sp, #48
; CHECK-NEXT: .weak_anti_dep varargs_callee
; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF
; CHECK-NEXT: .weak_anti_dep "#varargs_callee"
; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF
; CHECK-NEXT: b "#varargs_callee"
call void (double, ...) @varargs_callee(double 1.0, i32 2, double 3.0, i32 4, double 5.0, <2 x double> <double 0.0, double 0.0>)
tail call void (double, ...) @varargs_callee(double 1.0, i32 4, i32 3, i32 2)
ret void
}

declare void @llvm.va_start(ptr)
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/AArch64/vararg-tallcall.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; RUN: llc -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
; RUN: llc -mtriple=arm64ec-windows-msvc %s -o - | FileCheck %s --check-prefixes=CHECK-EC
; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s
; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s

Expand Down Expand Up @@ -32,3 +33,10 @@ attributes #1 = { noinline optnone "thunk" }
; CHECK: ldr x9, [x9]
; CHECK: mov v0.16b, v16.16b
; CHECK: br x9
; CHECK-EC: mov v7.16b, v0.16b
; CHECK-EC: ldr x9, [x0]
; CHECK-EC: ldr x11, [x9]
; CHECK-EC: mov v0.16b, v7.16b
; CHECK-EC: add x4, sp, #64
; CHECK-EC: add sp, sp, #64
; CHECK-EC: br x11

0 comments on commit d7a9810

Please sign in to comment.