Skip to content

Commit

Permalink
MS ABI: Use musttail for vtable thunks that pass arguments by value
Browse files Browse the repository at this point in the history
This moves some memptr specific code into the generic thunk emission
codepath.

Fixes PR20053.

Reviewers: majnemer

Differential Revision: http://reviews.llvm.org/D4613

llvm-svn: 214004
  • Loading branch information
rnk committed Jul 26, 2014
1 parent 3f76ac7 commit ab2090d
Show file tree
Hide file tree
Showing 7 changed files with 205 additions and 58 deletions.
15 changes: 2 additions & 13 deletions clang/lib/CodeGen/CGCall.cpp
Expand Up @@ -2045,19 +2045,8 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
return args.add(RValue::get(Builder.CreateLoad(local)), type);
}

if (isInAllocaArgument(CGM.getCXXABI(), type)) {
AggValueSlot Slot = createPlaceholderSlot(*this, type);
Slot.setExternallyDestructed();

// FIXME: Either emit a copy constructor call, or figure out how to do
// guaranteed tail calls with perfect forwarding in LLVM.
CGM.ErrorUnsupported(param, "non-trivial argument copy for thunk");
EmitNullInitialization(Slot.getAddr(), type);

RValue RV = Slot.asRValue();
args.add(RV, type);
return;
}
assert(!isInAllocaArgument(CGM.getCXXABI(), type) &&
"cannot emit delegate call arguments for inalloca arguments!");

args.add(convertTempToRValue(local, type, loc), type);
}
Expand Down
73 changes: 71 additions & 2 deletions clang/lib/CodeGen/CGVTables.cpp
Expand Up @@ -236,6 +236,18 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Value *Callee,
*this, LoadCXXThis(), Thunk->This)
: LoadCXXThis();

if (CurFnInfo->usesInAlloca()) {
// We don't handle return adjusting thunks, because they require us to call
// the copy constructor. For now, fall through and pretend the return
// adjustment was empty so we don't crash.
if (Thunk && !Thunk->Return.isEmpty()) {
CGM.ErrorUnsupported(
MD, "non-trivial argument copy for return-adjusting thunk");
}
EmitMustTailThunk(MD, AdjustedThisPtr, Callee);
return;
}

// Start building CallArgs.
CallArgList CallArgs;
QualType ThisType = MD->getThisType(getContext());
Expand Down Expand Up @@ -278,8 +290,9 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Value *Callee,
Slot = ReturnValueSlot(ReturnValue, ResultType.isVolatileQualified());

// Now emit our call.
RValue RV = EmitCall(*CurFnInfo, Callee, Slot, CallArgs, MD);

llvm::Instruction *CallOrInvoke;
RValue RV = EmitCall(*CurFnInfo, Callee, Slot, CallArgs, MD, &CallOrInvoke);

// Consider return adjustment if we have ThunkInfo.
if (Thunk && !Thunk->Return.isEmpty())
RV = PerformReturnAdjustment(*this, ResultType, RV, *Thunk);
Expand All @@ -294,6 +307,62 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Value *Callee,
FinishFunction();
}

void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD,
llvm::Value *AdjustedThisPtr,
llvm::Value *Callee) {
// Emitting a musttail call thunk doesn't use any of the CGCall.cpp machinery
// to translate AST arguments into LLVM IR arguments. For thunks, we know
// that the caller prototype more or less matches the callee prototype with
// the exception of 'this'.
SmallVector<llvm::Value *, 8> Args;
for (llvm::Argument &A : CurFn->args())
Args.push_back(&A);

// Set the adjusted 'this' pointer.
const ABIArgInfo &ThisAI = CurFnInfo->arg_begin()->info;
if (ThisAI.isDirect()) {
const ABIArgInfo &RetAI = CurFnInfo->getReturnInfo();
int ThisArgNo = RetAI.isIndirect() && !RetAI.isSRetAfterThis() ? 1 : 0;
llvm::Type *ThisType = Args[ThisArgNo]->getType();
if (ThisType != AdjustedThisPtr->getType())
AdjustedThisPtr = Builder.CreateBitCast(AdjustedThisPtr, ThisType);
Args[ThisArgNo] = AdjustedThisPtr;
} else {
assert(ThisAI.isInAlloca() && "this is passed directly or inalloca");
llvm::Value *ThisAddr = GetAddrOfLocalVar(CXXABIThisDecl);
llvm::Type *ThisType =
cast<llvm::PointerType>(ThisAddr->getType())->getElementType();
if (ThisType != AdjustedThisPtr->getType())
AdjustedThisPtr = Builder.CreateBitCast(AdjustedThisPtr, ThisType);
Builder.CreateStore(AdjustedThisPtr, ThisAddr);
}

// Emit the musttail call manually. Even if the prologue pushed cleanups, we
// don't actually want to run them.
llvm::CallInst *Call = Builder.CreateCall(Callee, Args);
Call->setTailCallKind(llvm::CallInst::TCK_MustTail);

// Apply the standard set of call attributes.
unsigned CallingConv;
CodeGen::AttributeListType AttributeList;
CGM.ConstructAttributeList(*CurFnInfo, MD, AttributeList, CallingConv,
/*AttrOnCallSite=*/true);
llvm::AttributeSet Attrs =
llvm::AttributeSet::get(getLLVMContext(), AttributeList);
Call->setAttributes(Attrs);
Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));

if (Call->getType()->isVoidTy())
Builder.CreateRetVoid();
else
Builder.CreateRet(Call);

// Finish the function to maintain CodeGenFunction invariants.
// FIXME: Don't emit unreachable code.
EmitBlock(createBasicBlock());
FinishFunction();
}

void CodeGenFunction::GenerateThunk(llvm::Function *Fn,
const CGFunctionInfo &FnInfo,
GlobalDecl GD, const ThunkInfo &Thunk) {
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.h
Expand Up @@ -1210,6 +1210,10 @@ class CodeGenFunction : public CodeGenTypeCache {

void EmitCallAndReturnForThunk(llvm::Value *Callee, const ThunkInfo *Thunk);

/// Emit a musttail call for a thunk with a potentially adjusted this pointer.
void EmitMustTailThunk(const CXXMethodDecl *MD, llvm::Value *AdjustedThisPtr,
llvm::Value *Callee);

/// GenerateThunk - Generate a thunk for the given method.
void GenerateThunk(llvm::Function *Fn, const CGFunctionInfo &FnInfo,
GlobalDecl GD, const ThunkInfo &Thunk);
Expand Down
26 changes: 1 addition & 25 deletions clang/lib/CodeGen/MicrosoftCXXABI.cpp
Expand Up @@ -1467,31 +1467,7 @@ llvm::Function *MicrosoftCXXABI::EmitVirtualMemPtrThunk(
CGF.Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn");
llvm::Value *Callee = CGF.Builder.CreateLoad(VFuncPtr);

unsigned CallingConv;
CodeGen::AttributeListType AttributeList;
CGM.ConstructAttributeList(FnInfo, MD, AttributeList, CallingConv, true);
llvm::AttributeSet Attrs =
llvm::AttributeSet::get(CGF.getLLVMContext(), AttributeList);

// Do a musttail call with perfect argument forwarding. Any inalloca argument
// will be forwarded in place without any copy.
SmallVector<llvm::Value *, 8> Args;
for (llvm::Argument &A : ThunkFn->args())
Args.push_back(&A);
llvm::CallInst *Call = CGF.Builder.CreateCall(Callee, Args);
Call->setTailCallKind(llvm::CallInst::TCK_MustTail);
Call->setAttributes(Attrs);
Call->setCallingConv(static_cast<llvm::CallingConv::ID>(CallingConv));

if (Call->getType()->isVoidTy())
CGF.Builder.CreateRetVoid();
else
CGF.Builder.CreateRet(Call);

// Finish the function to maintain CodeGenFunction invariants.
// FIXME: Don't emit unreachable code.
CGF.EmitBlock(CGF.createBasicBlock());
CGF.FinishFunction();
CGF.EmitCallAndReturnForThunk(Callee, 0);

return ThunkFn;
}
Expand Down
98 changes: 93 additions & 5 deletions clang/test/CodeGenCXX/microsoft-abi-byval-thunks.cpp
@@ -1,4 +1,4 @@
// RUN: not %clang_cc1 %s -fno-rtti -triple=i686-pc-win32 -emit-llvm -o /dev/null 2>&1 | FileCheck --check-prefix=CHECK32 %s
// RUN: %clang_cc1 %s -fno-rtti -triple=i686-pc-win32 -emit-llvm -o - | FileCheck --check-prefix=CHECK32 %s
// RUN: %clang_cc1 %s -fno-rtti -triple=x86_64-pc-win32 -emit-llvm -o - | FileCheck --check-prefix=CHECK64 %s

namespace byval_thunk {
Expand All @@ -11,15 +11,103 @@ struct Agg {

struct A { virtual void foo(Agg x); };
struct B { virtual void foo(Agg x); };
struct C : A, B { virtual void foo(Agg x); };
C c;
struct C : A, B { C(); virtual void foo(Agg x); };
C::C() {} // force emission

// CHECK32: cannot compile this non-trivial argument copy for thunk yet
// CHECK32-LABEL: define linkonce_odr x86_thiscallcc void @"\01?foo@C@byval_thunk@@W3AEXUAgg@2@@Z"
// CHECK32: (%"struct.byval_thunk::C"* %this, <{ %"struct.byval_thunk::Agg" }>* inalloca)
// CHECK32: %2 = getelementptr i8* %{{.*}}, i32 -4
// CHECK32: musttail call x86_thiscallcc void @"\01?foo@C@byval_thunk@@UAEXUAgg@2@@Z"
// CHECK32: (%"struct.byval_thunk::C"* %{{.*}}, <{ %"struct.byval_thunk::Agg" }>* inalloca %0)
// CHECK32-NEXT: ret void

// CHECK64-LABEL: define linkonce_odr void @"\01?foo@C@byval_thunk@@W7EAAXUAgg@2@@Z"
// CHECK64: (%"struct.byval_thunk::C"* %this, %"struct.byval_thunk::Agg"* %x)
// CHECK64: getelementptr i8* %{{.*}}, i32 -8
// CHECK64: call void @"\01?foo@C@byval_thunk@@UEAAXUAgg@2@@Z"(%"struct.byval_thunk::C"* %{{.*}}, %"struct.byval_thunk::Agg"* %x)
// CHECK64: call void @"\01?foo@C@byval_thunk@@UEAAXUAgg@2@@Z"
// CHECK64: (%"struct.byval_thunk::C"* %{{.*}}, %"struct.byval_thunk::Agg"* %x)
// CHECK64-NOT: call
// CHECK64: ret void
}

namespace stdcall_thunk {
struct Agg {
Agg();
Agg(const Agg &);
~Agg();
int x;
};

struct A { virtual void __stdcall foo(Agg x); };
struct B { virtual void __stdcall foo(Agg x); };
struct C : A, B { C(); virtual void __stdcall foo(Agg x); };
C::C() {} // force emission

// CHECK32-LABEL: define linkonce_odr x86_stdcallcc void @"\01?foo@C@stdcall_thunk@@W3AGXUAgg@2@@Z"
// CHECK32: (<{ %"struct.stdcall_thunk::C"*, %"struct.stdcall_thunk::Agg" }>* inalloca)
// CHECK32: %[[this_slot:[^ ]*]] = getelementptr inbounds <{ %"struct.stdcall_thunk::C"*, %"struct.stdcall_thunk::Agg" }>* %0, i32 0, i32 0
// CHECK32: load %"struct.stdcall_thunk::C"** %[[this_slot]]
// CHECK32: getelementptr i8* %{{.*}}, i32 -4
// CHECK32: store %"struct.stdcall_thunk::C"* %{{.*}}, %"struct.stdcall_thunk::C"** %[[this_slot]]
// CHECK32: musttail call x86_stdcallcc void @"\01?foo@C@stdcall_thunk@@UAGXUAgg@2@@Z"
// CHECK32: (<{ %"struct.stdcall_thunk::C"*, %"struct.stdcall_thunk::Agg" }>* inalloca %0)
// CHECK32-NEXT: ret void

// CHECK64-LABEL: define linkonce_odr void @"\01?foo@C@stdcall_thunk@@W7EAAXUAgg@2@@Z"
// CHECK64: (%"struct.stdcall_thunk::C"* %this, %"struct.stdcall_thunk::Agg"* %x)
// CHECK64: getelementptr i8* %{{.*}}, i32 -8
// CHECK64: call void @"\01?foo@C@stdcall_thunk@@UEAAXUAgg@2@@Z"
// CHECK64: (%"struct.stdcall_thunk::C"* %{{.*}}, %"struct.stdcall_thunk::Agg"* %x)
// CHECK64-NOT: call
// CHECK64: ret void
}

namespace sret_thunk {
struct Agg {
Agg();
Agg(const Agg &);
~Agg();
int x;
};

struct A { virtual Agg __cdecl foo(Agg x); };
struct B { virtual Agg __cdecl foo(Agg x); };
struct C : A, B { C(); virtual Agg __cdecl foo(Agg x); };
C::C() {} // force emission

// CHECK32-LABEL: define linkonce_odr %"struct.sret_thunk::Agg"* @"\01?foo@C@sret_thunk@@W3AA?AUAgg@2@U32@@Z"
// CHECK32: (<{ %"struct.sret_thunk::C"*, %"struct.sret_thunk::Agg"*, %"struct.sret_thunk::Agg" }>* inalloca)
// CHECK32: %[[this_slot:[^ ]*]] = getelementptr inbounds <{ %"struct.sret_thunk::C"*, %"struct.sret_thunk::Agg"*, %"struct.sret_thunk::Agg" }>* %0, i32 0, i32 0
// CHECK32: load %"struct.sret_thunk::C"** %[[this_slot]]
// CHECK32: getelementptr i8* %{{.*}}, i32 -4
// CHECK32: store %"struct.sret_thunk::C"* %{{.*}}, %"struct.sret_thunk::C"** %[[this_slot]]
// CHECK32: %[[rv:[^ ]*]] = musttail call %"struct.sret_thunk::Agg"* @"\01?foo@C@sret_thunk@@UAA?AUAgg@2@U32@@Z"
// CHECK32: (<{ %"struct.sret_thunk::C"*, %"struct.sret_thunk::Agg"*, %"struct.sret_thunk::Agg" }>* inalloca %0)
// CHECK32-NEXT: ret %"struct.sret_thunk::Agg"* %[[rv]]

// CHECK64-LABEL: define linkonce_odr void @"\01?foo@C@sret_thunk@@W7EAA?AUAgg@2@U32@@Z"
// CHECK64: (%"struct.sret_thunk::C"* %this, %"struct.sret_thunk::Agg"* noalias sret %agg.result, %"struct.sret_thunk::Agg"* %x)
// CHECK64: getelementptr i8* %{{.*}}, i32 -8
// CHECK64: call void @"\01?foo@C@sret_thunk@@UEAA?AUAgg@2@U32@@Z"
// CHECK64: (%"struct.sret_thunk::C"* %{{.*}}, %"struct.sret_thunk::Agg"* sret %agg.result, %"struct.sret_thunk::Agg"* %x)
// CHECK64-NOT: call
// CHECK64: ret void
}

#if 0
// FIXME: When we extend LLVM IR to allow forwarding of varargs through musttail
// calls, use this test.
namespace variadic_thunk {
struct Agg {
Agg();
Agg(const Agg &);
~Agg();
int x;
};

struct A { virtual void foo(Agg x, ...); };
struct B { virtual void foo(Agg x, ...); };
struct C : A, B { C(); virtual void foo(Agg x, ...); };
C::C() {} // force emission
}
#endif
Expand Up @@ -18,7 +18,7 @@ struct B {
struct C : A, B {
C();
int c;
virtual C *clone(A); // expected-error {{cannot compile this non-trivial argument copy for thunk yet}}
virtual C *clone(A); // expected-error {{cannot compile this non-trivial argument copy for return-adjusting thunk yet}}
};
B::B() {} // force emission
C::C() {} // force emission

0 comments on commit ab2090d

Please sign in to comment.