diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp index 11248bb7aef31..03d641d04413e 100644 --- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp @@ -43,6 +43,8 @@ static cl::opt GenerateThunks("arm64ec-generate-thunks", cl::Hidden, namespace { +enum class ThunkType { GuestExit, Entry, Exit }; + class AArch64Arm64ECCallLowering : public ModulePass { public: static char ID; @@ -69,14 +71,14 @@ class AArch64Arm64ECCallLowering : public ModulePass { Type *I64Ty; Type *VoidTy; - void getThunkType(FunctionType *FT, AttributeList AttrList, bool EntryThunk, + void getThunkType(FunctionType *FT, AttributeList AttrList, ThunkType TT, raw_ostream &Out, FunctionType *&Arm64Ty, FunctionType *&X64Ty); void getThunkRetType(FunctionType *FT, AttributeList AttrList, raw_ostream &Out, Type *&Arm64RetTy, Type *&X64RetTy, SmallVectorImpl &Arm64ArgTypes, SmallVectorImpl &X64ArgTypes, bool &HasSretPtr); - void getThunkArgTypes(FunctionType *FT, AttributeList AttrList, + void getThunkArgTypes(FunctionType *FT, AttributeList AttrList, ThunkType TT, raw_ostream &Out, SmallVectorImpl &Arm64ArgTypes, SmallVectorImpl &X64ArgTypes, bool HasSretPtr); @@ -89,10 +91,11 @@ class AArch64Arm64ECCallLowering : public ModulePass { void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT, AttributeList AttrList, - bool EntryThunk, raw_ostream &Out, + ThunkType TT, raw_ostream &Out, FunctionType *&Arm64Ty, FunctionType *&X64Ty) { - Out << (EntryThunk ? "$ientry_thunk$cdecl$" : "$iexit_thunk$cdecl$"); + Out << (TT == ThunkType::Entry ? "$ientry_thunk$cdecl$" + : "$iexit_thunk$cdecl$"); Type *Arm64RetTy; Type *X64RetTy; @@ -102,8 +105,8 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT, // The first argument to a thunk is the called function, stored in x9. // For exit thunks, we pass the called function down to the emulator; - // for entry thunks, we just call the Arm64 function directly. - if (!EntryThunk) + // for entry/guest exit thunks, we just call the Arm64 function directly. + if (TT == ThunkType::Exit) Arm64ArgTypes.push_back(PtrTy); X64ArgTypes.push_back(PtrTy); @@ -111,14 +114,16 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT, getThunkRetType(FT, AttrList, Out, Arm64RetTy, X64RetTy, Arm64ArgTypes, X64ArgTypes, HasSretPtr); - getThunkArgTypes(FT, AttrList, Out, Arm64ArgTypes, X64ArgTypes, HasSretPtr); + getThunkArgTypes(FT, AttrList, TT, Out, Arm64ArgTypes, X64ArgTypes, + HasSretPtr); Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes, false); + X64Ty = FunctionType::get(X64RetTy, X64ArgTypes, false); } void AArch64Arm64ECCallLowering::getThunkArgTypes( - FunctionType *FT, AttributeList AttrList, raw_ostream &Out, + FunctionType *FT, AttributeList AttrList, ThunkType TT, raw_ostream &Out, SmallVectorImpl &Arm64ArgTypes, SmallVectorImpl &X64ArgTypes, bool HasSretPtr) { @@ -156,9 +161,11 @@ void AArch64Arm64ECCallLowering::getThunkArgTypes( X64ArgTypes.push_back(PtrTy); // x5 Arm64ArgTypes.push_back(I64Ty); - // FIXME: x5 isn't actually passed/used by the x64 side; revisit once we - // have proper isel for varargs - X64ArgTypes.push_back(I64Ty); + if (TT != ThunkType::Entry) { + // FIXME: x5 isn't actually used by the x64 side; revisit once we + // have proper isel for varargs + X64ArgTypes.push_back(I64Ty); + } return; } @@ -339,8 +346,7 @@ Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT, SmallString<256> ExitThunkName; llvm::raw_svector_ostream ExitThunkStream(ExitThunkName); FunctionType *Arm64Ty, *X64Ty; - getThunkType(FT, Attrs, /*EntryThunk*/ false, ExitThunkStream, Arm64Ty, - X64Ty); + getThunkType(FT, Attrs, ThunkType::Exit, ExitThunkStream, Arm64Ty, X64Ty); if (Function *F = M->getFunction(ExitThunkName)) return F; @@ -443,7 +449,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { SmallString<256> EntryThunkName; llvm::raw_svector_ostream EntryThunkStream(EntryThunkName); FunctionType *Arm64Ty, *X64Ty; - getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true, + getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::Entry, EntryThunkStream, Arm64Ty, X64Ty); if (Function *F = M->getFunction(EntryThunkName)) return F; @@ -465,10 +471,11 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { bool TransformDirectToSRet = X64RetType->isVoidTy() && !RetTy->isVoidTy(); unsigned ThunkArgOffset = TransformDirectToSRet ? 2 : 1; + unsigned PassthroughArgSize = F->isVarArg() ? 5 : Thunk->arg_size(); // Translate arguments to call. SmallVector Args; - for (unsigned i = ThunkArgOffset, e = Thunk->arg_size(); i != e; ++i) { + for (unsigned i = ThunkArgOffset, e = PassthroughArgSize; i != e; ++i) { Value *Arg = Thunk->getArg(i); Type *ArgTy = Arm64Ty->getParamType(i - ThunkArgOffset); if (ArgTy->isArrayTy() || ArgTy->isStructTy() || @@ -485,6 +492,22 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { Args.push_back(Arg); } + if (F->isVarArg()) { + // The 5th argument to variadic entry thunks is used to model the x64 sp + // which is passed to the thunk in x4, this can be passed to the callee as + // the variadic argument start address after skipping over the 32 byte + // shadow store. + + // The EC thunk CC will assign any argument marked as InReg to x4. + Thunk->addParamAttr(5, Attribute::InReg); + Value *Arg = Thunk->getArg(5); + Arg = IRB.CreatePtrAdd(Arg, IRB.getInt64(0x20)); + Args.push_back(Arg); + + // Pass in a zero variadic argument size (in x5). + Args.push_back(IRB.getInt64(0)); + } + // Call the function passed to the thunk. Value *Callee = Thunk->getArg(0); Callee = IRB.CreateBitCast(Callee, PtrTy); @@ -518,7 +541,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) { Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) { llvm::raw_null_ostream NullThunkName; FunctionType *Arm64Ty, *X64Ty; - getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true, + getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::GuestExit, NullThunkName, Arm64Ty, X64Ty); auto MangledName = getArm64ECMangledFunctionName(F->getName().str()); assert(MangledName && "Can't guest exit to function that's already native"); diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td index 78ea4a5180f70..8e67f0f5c8815 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -213,6 +213,9 @@ def CC_AArch64_Arm64EC_VarArg : CallingConv<[ // address is passed in X9. let Entry = 1 in def CC_AArch64_Arm64EC_Thunk : CallingConv<[ + // ARM64EC-specific: the InReg attribute can be used to access the x64 sp passed into entry thunks in x4 from the IR. + CCIfInReg>>, + // Byval aggregates are passed by pointer CCIfByVal>, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0287856560e91..196aa50cf4060 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8007,11 +8007,19 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } if (IsVarArg && Subtarget->isWindowsArm64EC()) { + SDValue ParamPtr = StackPtr; + if (IsTailCall) { + // Create a dummy object at the top of the stack that can be used to get + // the SP after the epilogue + int FI = MF.getFrameInfo().CreateFixedObject(1, FPDiff, true); + ParamPtr = DAG.getFrameIndex(FI, PtrVT); + } + // For vararg calls, the Arm64EC ABI requires values in x4 and x5 // describing the argument list. x4 contains the address of the // first stack parameter. x5 contains the size in bytes of all parameters // passed on the stack. - RegsToPass.emplace_back(AArch64::X4, StackPtr); + RegsToPass.emplace_back(AArch64::X4, ParamPtr); RegsToPass.emplace_back(AArch64::X5, DAG.getConstant(NumBytes, DL, MVT::i64)); } diff --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll index 5c56f51e1ca55..bb9ba05f7a272 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll @@ -147,8 +147,8 @@ define void @has_varargs(...) nounwind { ; CHECK-NEXT: add x29, sp, #160 ; CHECK-NEXT: .seh_add_fp 160 ; CHECK-NEXT: .seh_endprologue -; CHECK-NEXT: ldp x8, x5, [x4, #32] -; CHECK-NEXT: mov x4, x8 +; CHECK-NEXT: add x4, x4, #32 +; CHECK-NEXT: mov x5, xzr ; CHECK-NEXT: blr x9 ; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_ret ; CHECK-NEXT: ldr x0, [x8, :lo12:__os_arm64x_dispatch_ret] diff --git a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll index dc16b3a1a0f27..844fc52ddade6 100644 --- a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll @@ -100,5 +100,42 @@ define void @varargs_many_argscalleer() nounwind { ret void } +define void @varargs_caller_tail() nounwind { +; CHECK-LABEL: varargs_caller_tail: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: mov x4, sp +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: mov x9, #4617315517961601024 // =0x4014000000000000 +; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000 +; CHECK-NEXT: mov w1, #2 // =0x2 +; CHECK-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000 +; CHECK-NEXT: mov w3, #4 // =0x4 +; CHECK-NEXT: mov w5, #16 // =0x10 +; CHECK-NEXT: stp xzr, x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: stp x9, x8, [sp] +; CHECK-NEXT: str xzr, [sp, #16] +; CHECK-NEXT: .weak_anti_dep varargs_callee +; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF +; CHECK-NEXT: .weak_anti_dep "#varargs_callee" +; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF +; CHECK-NEXT: bl "#varargs_callee" +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: add x4, sp, #48 +; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000 +; CHECK-NEXT: mov w1, #4 // =0x4 +; CHECK-NEXT: mov w2, #3 // =0x3 +; CHECK-NEXT: mov w3, #2 // =0x2 +; CHECK-NEXT: mov x5, xzr +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: .weak_anti_dep varargs_callee +; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF +; CHECK-NEXT: .weak_anti_dep "#varargs_callee" +; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF +; CHECK-NEXT: b "#varargs_callee" + call void (double, ...) @varargs_callee(double 1.0, i32 2, double 3.0, i32 4, double 5.0, <2 x double> ) + tail call void (double, ...) @varargs_callee(double 1.0, i32 4, i32 3, i32 2) + ret void +} declare void @llvm.va_start(ptr) diff --git a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll index 2d6db1642247d..812837639196e 100644 --- a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll +++ b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll @@ -1,5 +1,6 @@ ; RUN: llc -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s ; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s +; RUN: llc -mtriple=arm64ec-windows-msvc %s -o - | FileCheck %s --check-prefixes=CHECK-EC ; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s ; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s @@ -32,3 +33,10 @@ attributes #1 = { noinline optnone "thunk" } ; CHECK: ldr x9, [x9] ; CHECK: mov v0.16b, v16.16b ; CHECK: br x9 +; CHECK-EC: mov v7.16b, v0.16b +; CHECK-EC: ldr x9, [x0] +; CHECK-EC: ldr x11, [x9] +; CHECK-EC: mov v0.16b, v7.16b +; CHECK-EC: add x4, sp, #64 +; CHECK-EC: add sp, sp, #64 +; CHECK-EC: br x11