Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PR for llvm/llvm-project#80752 #80754

Closed
wants to merge 2 commits into from
Closed

Conversation

llvmbot
Copy link
Collaborator

@llvmbot llvmbot commented Feb 5, 2024

resolves #80752

ARM64EC varargs calls expect that x4 = sp at entry, special handling is
needed to ensure this with tail calls since they occur after the
epilogue and the x4 write happens before.

I tried going through AArch64MachineFrameLowering for this, hoping to
avoid creating the dummy object but this was the best I could do since
the stack info that uses isn't populated at this stage,
CreateFixedObject also explicitly forbids 0 sized objects.

(cherry picked from commit c761b4a)
llvm#80595)

ISel handles filling in x4/x5 when calling variadic functions as they
don't correspond to the 5th/6th X64 arguments but rather to the end of
the shadow space on the stack and the size in bytes of all stack
parameters (ignored and written as 0 for calls from entry thunks).

Will PR a follow up with ISel handling after this is merged.

(cherry picked from commit 8f07014)
@llvmbot llvmbot added this to the LLVM 18.X Release milestone Feb 5, 2024
@llvmbot
Copy link
Collaborator Author

llvmbot commented Feb 5, 2024

@efriedma-quic @cjacek What do you think about merging this PR to the release branch?

@llvmbot
Copy link
Collaborator Author

llvmbot commented Feb 5, 2024

@llvm/pr-subscribers-backend-aarch64

Author: None (llvmbot)

Changes

resolves llvm/llvm-project#80752


Full diff: https://github.com/llvm/llvm-project/pull/80754.diff

5 Files Affected:

  • (modified) llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp (+27-21)
  • (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+9-1)
  • (modified) llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll (+2-2)
  • (modified) llvm/test/CodeGen/AArch64/arm64ec-varargs.ll (+37)
  • (modified) llvm/test/CodeGen/AArch64/vararg-tallcall.ll (+8)
diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
index 11248bb7aef31..91b4f18c73c93 100644
--- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
@@ -43,6 +43,8 @@ static cl::opt<bool> GenerateThunks("arm64ec-generate-thunks", cl::Hidden,
 
 namespace {
 
+enum class ThunkType { GuestExit, Entry, Exit };
+
 class AArch64Arm64ECCallLowering : public ModulePass {
 public:
   static char ID;
@@ -69,14 +71,14 @@ class AArch64Arm64ECCallLowering : public ModulePass {
   Type *I64Ty;
   Type *VoidTy;
 
-  void getThunkType(FunctionType *FT, AttributeList AttrList, bool EntryThunk,
+  void getThunkType(FunctionType *FT, AttributeList AttrList, ThunkType TT,
                     raw_ostream &Out, FunctionType *&Arm64Ty,
                     FunctionType *&X64Ty);
   void getThunkRetType(FunctionType *FT, AttributeList AttrList,
                        raw_ostream &Out, Type *&Arm64RetTy, Type *&X64RetTy,
                        SmallVectorImpl<Type *> &Arm64ArgTypes,
                        SmallVectorImpl<Type *> &X64ArgTypes, bool &HasSretPtr);
-  void getThunkArgTypes(FunctionType *FT, AttributeList AttrList,
+  void getThunkArgTypes(FunctionType *FT, AttributeList AttrList, ThunkType TT,
                         raw_ostream &Out,
                         SmallVectorImpl<Type *> &Arm64ArgTypes,
                         SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr);
@@ -89,10 +91,11 @@ class AArch64Arm64ECCallLowering : public ModulePass {
 
 void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,
                                               AttributeList AttrList,
-                                              bool EntryThunk, raw_ostream &Out,
+                                              ThunkType TT, raw_ostream &Out,
                                               FunctionType *&Arm64Ty,
                                               FunctionType *&X64Ty) {
-  Out << (EntryThunk ? "$ientry_thunk$cdecl$" : "$iexit_thunk$cdecl$");
+  Out << (TT == ThunkType::Entry ? "$ientry_thunk$cdecl$"
+                                 : "$iexit_thunk$cdecl$");
 
   Type *Arm64RetTy;
   Type *X64RetTy;
@@ -102,8 +105,8 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,
 
   // The first argument to a thunk is the called function, stored in x9.
   // For exit thunks, we pass the called function down to the emulator;
-  // for entry thunks, we just call the Arm64 function directly.
-  if (!EntryThunk)
+  // for entry/guest exit thunks, we just call the Arm64 function directly.
+  if (TT == ThunkType::Exit)
     Arm64ArgTypes.push_back(PtrTy);
   X64ArgTypes.push_back(PtrTy);
 
@@ -111,14 +114,16 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,
   getThunkRetType(FT, AttrList, Out, Arm64RetTy, X64RetTy, Arm64ArgTypes,
                   X64ArgTypes, HasSretPtr);
 
-  getThunkArgTypes(FT, AttrList, Out, Arm64ArgTypes, X64ArgTypes, HasSretPtr);
+  getThunkArgTypes(FT, AttrList, TT, Out, Arm64ArgTypes, X64ArgTypes,
+                   HasSretPtr);
 
-  Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes, false);
+  Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes,
+                              TT == ThunkType::Entry && FT->isVarArg());
   X64Ty = FunctionType::get(X64RetTy, X64ArgTypes, false);
 }
 
 void AArch64Arm64ECCallLowering::getThunkArgTypes(
-    FunctionType *FT, AttributeList AttrList, raw_ostream &Out,
+    FunctionType *FT, AttributeList AttrList, ThunkType TT, raw_ostream &Out,
     SmallVectorImpl<Type *> &Arm64ArgTypes,
     SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr) {
 
@@ -151,14 +156,16 @@ void AArch64Arm64ECCallLowering::getThunkArgTypes(
       X64ArgTypes.push_back(I64Ty);
     }
 
-    // x4
-    Arm64ArgTypes.push_back(PtrTy);
-    X64ArgTypes.push_back(PtrTy);
-    // x5
-    Arm64ArgTypes.push_back(I64Ty);
-    // FIXME: x5 isn't actually passed/used by the x64 side; revisit once we
-    // have proper isel for varargs
-    X64ArgTypes.push_back(I64Ty);
+    if (TT != ThunkType::Entry) {
+      // x4
+      Arm64ArgTypes.push_back(PtrTy);
+      X64ArgTypes.push_back(PtrTy);
+      // x5
+      Arm64ArgTypes.push_back(I64Ty);
+      // FIXME: x5 isn't actually passed/used by the x64 side; revisit once we
+      // have proper isel for varargs
+      X64ArgTypes.push_back(I64Ty);
+    }
     return;
   }
 
@@ -339,8 +346,7 @@ Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT,
   SmallString<256> ExitThunkName;
   llvm::raw_svector_ostream ExitThunkStream(ExitThunkName);
   FunctionType *Arm64Ty, *X64Ty;
-  getThunkType(FT, Attrs, /*EntryThunk*/ false, ExitThunkStream, Arm64Ty,
-               X64Ty);
+  getThunkType(FT, Attrs, ThunkType::Exit, ExitThunkStream, Arm64Ty, X64Ty);
   if (Function *F = M->getFunction(ExitThunkName))
     return F;
 
@@ -443,7 +449,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
   SmallString<256> EntryThunkName;
   llvm::raw_svector_ostream EntryThunkStream(EntryThunkName);
   FunctionType *Arm64Ty, *X64Ty;
-  getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true,
+  getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::Entry,
                EntryThunkStream, Arm64Ty, X64Ty);
   if (Function *F = M->getFunction(EntryThunkName))
     return F;
@@ -518,7 +524,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
 Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) {
   llvm::raw_null_ostream NullThunkName;
   FunctionType *Arm64Ty, *X64Ty;
-  getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true,
+  getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::GuestExit,
                NullThunkName, Arm64Ty, X64Ty);
   auto MangledName = getArm64ECMangledFunctionName(F->getName().str());
   assert(MangledName && "Can't guest exit to function that's already native");
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e97f5e3220148..957b556edaf31 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8007,11 +8007,19 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   }
 
   if (IsVarArg && Subtarget->isWindowsArm64EC()) {
+    SDValue ParamPtr = StackPtr;
+    if (IsTailCall) {
+      // Create a dummy object at the top of the stack that can be used to get
+      // the SP after the epilogue
+      int FI = MF.getFrameInfo().CreateFixedObject(1, FPDiff, true);
+      ParamPtr = DAG.getFrameIndex(FI, PtrVT);
+    }
+
     // For vararg calls, the Arm64EC ABI requires values in x4 and x5
     // describing the argument list.  x4 contains the address of the
     // first stack parameter. x5 contains the size in bytes of all parameters
     // passed on the stack.
-    RegsToPass.emplace_back(AArch64::X4, StackPtr);
+    RegsToPass.emplace_back(AArch64::X4, ParamPtr);
     RegsToPass.emplace_back(AArch64::X5,
                             DAG.getConstant(NumBytes, DL, MVT::i64));
   }
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
index 5c56f51e1ca55..0083818def151 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
@@ -147,8 +147,8 @@ define void @has_varargs(...) nounwind {
 ; CHECK-NEXT:     add     x29, sp, #160
 ; CHECK-NEXT:     .seh_add_fp     160
 ; CHECK-NEXT:     .seh_endprologue
-; CHECK-NEXT:     ldp     x8, x5, [x4, #32]
-; CHECK-NEXT:     mov     x4, x8
+; CHECK-NEXT:     mov     x4, sp
+; CHECK-NEXT:     mov     x5, xzr
 ; CHECK-NEXT:     blr     x9
 ; CHECK-NEXT:     adrp    x8, __os_arm64x_dispatch_ret
 ; CHECK-NEXT:     ldr     x0, [x8, :lo12:__os_arm64x_dispatch_ret]
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll
index dc16b3a1a0f27..844fc52ddade6 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll
@@ -100,5 +100,42 @@ define void @varargs_many_argscalleer() nounwind {
   ret void
 }
 
+define void @varargs_caller_tail() nounwind {
+; CHECK-LABEL: varargs_caller_tail:
+; CHECK:        // %bb.0:
+; CHECK-NEXT:        sub     sp, sp, #48
+; CHECK-NEXT:        mov     x4, sp
+; CHECK-NEXT:        add     x8, sp, #16
+; CHECK-NEXT:        mov     x9, #4617315517961601024        // =0x4014000000000000
+; CHECK-NEXT:        mov     x0, #4607182418800017408        // =0x3ff0000000000000
+; CHECK-NEXT:        mov     w1, #2                          // =0x2
+; CHECK-NEXT:        mov     x2, #4613937818241073152        // =0x4008000000000000
+; CHECK-NEXT:        mov     w3, #4                          // =0x4
+; CHECK-NEXT:        mov     w5, #16                         // =0x10
+; CHECK-NEXT:        stp     xzr, x30, [sp, #24]             // 8-byte Folded Spill
+; CHECK-NEXT:        stp     x9, x8, [sp]
+; CHECK-NEXT:        str     xzr, [sp, #16]
+; CHECK-NEXT:        .weak_anti_dep  varargs_callee
+; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF
+; CHECK-NEXT:        .weak_anti_dep  "#varargs_callee"
+; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF
+; CHECK-NEXT:        bl      "#varargs_callee"
+; CHECK-NEXT:        ldr     x30, [sp, #32]                  // 8-byte Folded Reload
+; CHECK-NEXT:        add     x4, sp, #48
+; CHECK-NEXT:        mov     x0, #4607182418800017408        // =0x3ff0000000000000
+; CHECK-NEXT:        mov     w1, #4                          // =0x4
+; CHECK-NEXT:        mov     w2, #3                          // =0x3
+; CHECK-NEXT:        mov     w3, #2                          // =0x2
+; CHECK-NEXT:        mov     x5, xzr
+; CHECK-NEXT:        add     sp, sp, #48
+; CHECK-NEXT:        .weak_anti_dep  varargs_callee
+; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF
+; CHECK-NEXT:        .weak_anti_dep  "#varargs_callee"
+; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF
+; CHECK-NEXT:        b       "#varargs_callee"
+  call void (double, ...) @varargs_callee(double 1.0, i32 2, double 3.0, i32 4, double 5.0, <2 x double> <double 0.0, double 0.0>)
+  tail call void (double, ...) @varargs_callee(double 1.0, i32 4, i32 3, i32 2)
+  ret void
+}
 
 declare void @llvm.va_start(ptr)
diff --git a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
index 2d6db1642247d..812837639196e 100644
--- a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
+++ b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm64ec-windows-msvc %s -o - | FileCheck %s --check-prefixes=CHECK-EC
 ; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s
 ; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
 
@@ -32,3 +33,10 @@ attributes #1 = { noinline optnone "thunk" }
 ; CHECK: ldr     x9, [x9]
 ; CHECK: mov     v0.16b, v16.16b
 ; CHECK: br      x9
+; CHECK-EC: mov     v7.16b, v0.16b
+; CHECK-EC: ldr     x9, [x0]
+; CHECK-EC: ldr     x11, [x9]
+; CHECK-EC: mov     v0.16b, v7.16b
+; CHECK-EC: add     x4, sp, #64
+; CHECK-EC: add     sp, sp, #64
+; CHECK-EC: br      x11

@dpaoliello
Copy link
Contributor

Superseded by #81800

@dpaoliello dpaoliello closed this Feb 14, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
Development

Successfully merging this pull request may close these issues.

None yet

3 participants