Skip to content

Commit

Permalink
[clang][RelativeVTablesABI] Use dso_local_equivalent rather than emit…
Browse files Browse the repository at this point in the history
…ting stubs

Thanks to D77248, we can bypass the use of stubs altogether and use PLT
relocations if they are available for the target. LLVM and LLD support the
R_AARCH64_PLT32 relocation, so we can also guarantee a static PLT relocation on AArch64.
Not emitting these stubs saves a lot of extra binary size.

Differential Revision: https://reviews.llvm.org/D83812
  • Loading branch information
PiJoules committed Dec 1, 2020
1 parent 8cdf492 commit cf8ff75
Show file tree
Hide file tree
Showing 21 changed files with 51 additions and 357 deletions.
70 changes: 1 addition & 69 deletions clang/lib/CodeGen/CGVTables.cpp
Expand Up @@ -641,7 +641,7 @@ void CodeGenVTables::addRelativeComponent(ConstantArrayBuilder &builder,

llvm::Constant *target;
if (auto *func = dyn_cast<llvm::Function>(globalVal)) {
target = getOrCreateRelativeStub(func, stubLinkage, isCompleteDtor);
target = llvm::DSOLocalEquivalent::get(func);
} else {
llvm::SmallString<16> rttiProxyName(globalVal->getName());
rttiProxyName.append(".rtti_proxy");
Expand Down Expand Up @@ -669,74 +669,6 @@ void CodeGenVTables::addRelativeComponent(ConstantArrayBuilder &builder,
/*position=*/vtableAddressPoint);
}

llvm::Function *CodeGenVTables::getOrCreateRelativeStub(
llvm::Function *func, llvm::GlobalValue::LinkageTypes stubLinkage,
bool isCompleteDtor) const {
// A complete object destructor can later be substituted in the vtable for an
// appropriate base object destructor when optimizations are enabled. This can
// happen for child classes that don't have their own destructor. In the case
// where a parent virtual destructor is not guaranteed to be in the same
// linkage unit as the child vtable, it's possible for an external reference
// for this destructor to be substituted into the child vtable, preventing it
// from being in rodata. If this function is a complete virtual destructor, we
// can just force a stub to be emitted for it.
if (func->isDSOLocal() && !isCompleteDtor)
return func;

llvm::SmallString<16> stubName(func->getName());
stubName.append(".stub");

// Instead of taking the offset between the vtable and virtual function
// directly, we emit a dso_local stub that just contains a tail call to the
// original virtual function and take the offset between that and the
// vtable. We do this because there are some cases where the original
// function that would've been inserted into the vtable is not dso_local
// which may require some kind of dynamic relocation which prevents the
// vtable from being readonly. On x86_64, taking the offset between the
// function and the vtable gets lowered to the offset between the PLT entry
// for the function and the vtable which gives us a PLT32 reloc. On AArch64,
// right now only CALL26 and JUMP26 instructions generate PLT relocations,
// so we manifest them with stubs that are just jumps to the original
// function.
auto &module = CGM.getModule();
llvm::Function *stub = module.getFunction(stubName);
if (stub) {
assert(stub->isDSOLocal() &&
"The previous definition of this stub should've been dso_local.");
return stub;
}

stub = llvm::Function::Create(func->getFunctionType(), stubLinkage, stubName,
module);

// Propogate function attributes.
stub->setAttributes(func->getAttributes());

stub->setDSOLocal(true);
stub->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
if (!stub->hasLocalLinkage()) {
stub->setVisibility(llvm::GlobalValue::HiddenVisibility);
stub->setComdat(module.getOrInsertComdat(stubName));
}

// Fill the stub with a tail call that will be optimized.
llvm::BasicBlock *block =
llvm::BasicBlock::Create(module.getContext(), "entry", stub);
llvm::IRBuilder<> block_builder(block);
llvm::SmallVector<llvm::Value *, 8> args;
for (auto &arg : stub->args())
args.push_back(&arg);
llvm::CallInst *call = block_builder.CreateCall(func, args);
call->setAttributes(func->getAttributes());
call->setTailCall();
if (call->getType()->isVoidTy())
block_builder.CreateRetVoid();
else
block_builder.CreateRet(call);

return stub;
}

bool CodeGenVTables::useRelativeLayout() const {
return CGM.getTarget().getCXXABI().isItaniumFamily() &&
CGM.getItaniumVTableContext().isRelativeLayout();
Expand Down
Expand Up @@ -3,10 +3,7 @@

// RUN: %clang_cc1 %s -triple=aarch64-unknown-fuchsia -O1 -S -o - -emit-llvm -fexperimental-relative-c++-abi-vtables | FileCheck %s

// CHECK: $_ZN1B3fooEv.stub = comdat any

// The inline function is emitted in each module with the same comdat
// CHECK: $_ZN1A3fooEv.stub = comdat any
// CHECK: $_ZTS1A = comdat any
// CHECK: $_ZTI1A = comdat any
// CHECK: $_ZTI1B.rtti_proxy = comdat any
Expand All @@ -16,26 +13,19 @@
// CHECK: $_ZTI1A.rtti_proxy = comdat any

// The VTable for B is emitted here since it has a key function which is defined in this module
// CHECK: @_ZTV1B.local = private unnamed_addr constant { [3 x i32] } { [3 x i32] [i32 0, i32 trunc (i64 sub (i64 ptrtoint ({ i8*, i8*, i8* }** @_ZTI1B.rtti_proxy to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [3 x i32] }, { [3 x i32] }* @_ZTV1B.local, i32 0, i32 0, i32 2) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (void (%class.B*)* @_ZN1B3fooEv.stub to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [3 x i32] }, { [3 x i32] }* @_ZTV1B.local, i32 0, i32 0, i32 2) to i64)) to i32)] }, align 4
// CHECK: @_ZTV1B.local = private unnamed_addr constant { [3 x i32] } { [3 x i32] [i32 0, i32 trunc (i64 sub (i64 ptrtoint ({ i8*, i8*, i8* }** @_ZTI1B.rtti_proxy to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [3 x i32] }, { [3 x i32] }* @_ZTV1B.local, i32 0, i32 0, i32 2) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (void (%class.B*)* dso_local_equivalent @_ZN1B3fooEv to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [3 x i32] }, { [3 x i32] }* @_ZTV1B.local, i32 0, i32 0, i32 2) to i64)) to i32)] }, align 4

// The VTable for A is emitted here and in a comdat section since it has no key function, and is used in this module when creating an instance of A (in func()).
// CHECK: @_ZTV1A.local = linkonce_odr hidden unnamed_addr constant { [3 x i32] } { [3 x i32] [i32 0, i32 trunc (i64 sub (i64 ptrtoint ({ i8*, i8* }** @_ZTI1A.rtti_proxy to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [3 x i32] }, { [3 x i32] }* @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (void (%class.A*)* @_ZN1A3fooEv.stub to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [3 x i32] }, { [3 x i32] }* @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32)] }, comdat($_ZTV1A), align 4
// CHECK: @_ZTV1A.local = linkonce_odr hidden unnamed_addr constant { [3 x i32] } { [3 x i32] [i32 0, i32 trunc (i64 sub (i64 ptrtoint ({ i8*, i8* }** @_ZTI1A.rtti_proxy to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [3 x i32] }, { [3 x i32] }* @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (void (%class.A*)* dso_local_equivalent @_ZN1A3fooEv to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [3 x i32] }, { [3 x i32] }* @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32)] }, comdat($_ZTV1A), align 4

// CHECK: @_ZTV1B = unnamed_addr alias { [3 x i32] }, { [3 x i32] }* @_ZTV1B.local
// CHECK: @_ZTV1A = linkonce_odr unnamed_addr alias { [3 x i32] }, { [3 x i32] }* @_ZTV1A.local

// CHECK: define void @_ZN1B3fooEv(%class.B* nocapture {{[^,]*}} %this) unnamed_addr
// CHECK-NEXT: entry:
// CHECK-NEXT: ret void
// CHECK-NEXT: }

// CHECK: define hidden void @_ZN1B3fooEv.stub(%class.B* nocapture {{[^,]*}} %0) unnamed_addr #{{[0-9]+}} comdat
// CHECK: define void @_ZN1B3fooEv(%class.B* {{.*}}%this) unnamed_addr
// CHECK-NEXT: entry:
// CHECK-NEXT: ret void
// CHECK-NEXT: }

// CHECK: define hidden void @_ZN1A3fooEv.stub(%class.A* {{.*}}%0) unnamed_addr #{{[0-9]+}} comdat

class A {
public:
inline virtual void foo() {}
Expand Down
Expand Up @@ -3,9 +3,6 @@

// RUN: %clang_cc1 %s -triple=aarch64-unknown-fuchsia -O1 -S -o - -emit-llvm -fexperimental-relative-c++-abi-vtables | FileCheck %s

// CHECK-DAG: $_ZN1B3fooEv.stub = comdat any
// CHECK-DAG: $_ZN1A3fooEv.stub = comdat any

// A comdat is emitted for B but not A
// CHECK-DAG: $_ZTV1B = comdat any
// CHECK-DAG: $_ZTS1B = comdat any
Expand All @@ -14,7 +11,7 @@
// CHECK-DAG: $_ZTI1A.rtti_proxy = comdat any

// VTable for B is emitted here since we access it when creating an instance of B. The VTable is also linkonce_odr and in its own comdat.
// CHECK-DAG: @_ZTV1B.local = linkonce_odr hidden unnamed_addr constant { [3 x i32] } { [3 x i32] [i32 0, i32 trunc (i64 sub (i64 ptrtoint ({ i8*, i8*, i8* }** @_ZTI1B.rtti_proxy to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [3 x i32] }, { [3 x i32] }* @_ZTV1B.local, i32 0, i32 0, i32 2) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (void (%class.B*)* @_ZN1B3fooEv.stub to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [3 x i32] }, { [3 x i32] }* @_ZTV1B.local, i32 0, i32 0, i32 2) to i64)) to i32)] }, comdat($_ZTV1B), align 4
// CHECK-DAG: @_ZTV1B.local = linkonce_odr hidden unnamed_addr constant { [3 x i32] } { [3 x i32] [i32 0, i32 trunc (i64 sub (i64 ptrtoint ({ i8*, i8*, i8* }** @_ZTI1B.rtti_proxy to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [3 x i32] }, { [3 x i32] }* @_ZTV1B.local, i32 0, i32 0, i32 2) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (void (%class.B*)* dso_local_equivalent @_ZN1B3fooEv to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [3 x i32] }, { [3 x i32] }* @_ZTV1B.local, i32 0, i32 0, i32 2) to i64)) to i32)] }, comdat($_ZTV1B), align 4

// The RTTI objects aren’t that important, but it is good to know that they are emitted here since they are used in the vtable for B, and external references are used for RTTI stuff from A.
// CHECK-DAG: @_ZTVN10__cxxabiv120__si_class_type_infoE = external global i8*
Expand All @@ -28,16 +25,6 @@
// CHECK: @_ZTV1B = linkonce_odr unnamed_addr alias { [3 x i32] }, { [3 x i32] }* @_ZTV1B.local
// CHECK-NOT: @_ZTV1A = {{.*}}alias

// CHECK: define hidden void @_ZN1B3fooEv.stub(%class.B* {{.*}}%0) unnamed_addr #{{[0-9]+}} comdat

// CHECK: declare void @_ZN1A3fooEv(%class.A* {{[^,]*}}) unnamed_addr

// CHECK: define hidden void @_ZN1A3fooEv.stub(%class.A* {{[^,]*}} %0) unnamed_addr #{{[0-9]+}} comdat
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @_ZN1A3fooEv(%class.A* {{[^,]*}} %0)
// CHECK-NEXT: ret void
// CHECK-NEXT: }

class A {
public:
virtual void foo();
Expand Down
Expand Up @@ -5,35 +5,11 @@

#include "cross-tu-header.h"

// CHECK: $_ZN1A3fooEv.stub = comdat any
// CHECK: $_ZN1A3barEv.stub = comdat any
// CHECK: $_ZTI1A.rtti_proxy = comdat any

// CHECK: @_ZTV1A.local = private unnamed_addr constant { [4 x i32] } { [4 x i32] [i32 0, i32 trunc (i64 sub (i64 ptrtoint ({ i8*, i8* }** @_ZTI1A.rtti_proxy to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [4 x i32] }, { [4 x i32] }* @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (void (%class.A*)* @_ZN1A3fooEv.stub to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [4 x i32] }, { [4 x i32] }* @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (void (%class.A*)* @_ZN1A3barEv.stub to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [4 x i32] }, { [4 x i32] }* @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32)] }, align 4
// CHECK: @_ZTV1A.local = private unnamed_addr constant { [4 x i32] } { [4 x i32] [i32 0, i32 trunc (i64 sub (i64 ptrtoint ({ i8*, i8* }** @_ZTI1A.rtti_proxy to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [4 x i32] }, { [4 x i32] }* @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (void (%class.A*)* dso_local_equivalent @_ZN1A3fooEv to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [4 x i32] }, { [4 x i32] }* @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (void (%class.A*)* dso_local_equivalent @_ZN1A3barEv to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [4 x i32] }, { [4 x i32] }* @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32)] }, align 4
// @_ZTV1A = unnamed_addr alias { [4 x i32] }, { [4 x i32] }* @_ZTV1A.local

// A::foo() is still available for other modules to use since it is not marked with private or internal linkage.
// CHECK: define void @_ZN1A3fooEv(%class.A* nocapture {{[^,]*}} %this) unnamed_addr
// CHECK-NEXT: entry:
// CHECK-NEXT: ret void
// CHECK-NEXT: }

// The proxy that we take a reference to in the vtable has hidden visibility and external linkage so it can be used only by other modules in the same DSO. A::foo() is inlined into this stub since it is defined in the same module.
// CHECK: define hidden void @_ZN1A3fooEv.stub(%class.A* nocapture {{[^,]*}} %0) unnamed_addr #{{[0-9]+}} comdat
// CHECK-NEXT: entry:
// CHECK-NEXT: ret void
// CHECK-NEXT: }

// A::bar() is called within the module but not defined, even though the VTable for A is emitted here
// CHECK: declare void @_ZN1A3barEv(%class.A* {{[^,]*}}) unnamed_addr

// The stub for A::bar() is made private, so it will not appear in the symbol table and is only used in this module. We tail call here because A::bar() is not defined in the same module.
// CHECK: define hidden void @_ZN1A3barEv.stub(%class.A* {{[^,]*}} %0) unnamed_addr {{#[0-9]+}} comdat {
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @_ZN1A3barEv(%class.A* {{[^,]*}} %0)
// CHECK-NEXT: ret void
// CHECK-NEXT: }

void A::foo() {}
void A_foo(A *a) { a->foo(); }
void A_bar(A *a) { a->bar(); }
Expand Up @@ -5,31 +5,18 @@

#include "cross-tu-header.h"

// CHECK: $_ZN1B3fooEv.stub = comdat any
// CHECK: $_ZN1A3barEv.stub = comdat any
// CHECK: $_ZTI1B.rtti_proxy = comdat any

// CHECK: @_ZTV1B.local = private unnamed_addr constant { [4 x i32] } { [4 x i32] [i32 0, i32 trunc (i64 sub (i64 ptrtoint ({ i8*, i8*, i8* }** @_ZTI1B.rtti_proxy to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [4 x i32] }, { [4 x i32] }* @_ZTV1B.local, i32 0, i32 0, i32 2) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (void (%class.B*)* @_ZN1B3fooEv.stub to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [4 x i32] }, { [4 x i32] }* @_ZTV1B.local, i32 0, i32 0, i32 2) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (void (%class.A*)* @_ZN1A3barEv.stub to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [4 x i32] }, { [4 x i32] }* @_ZTV1B.local, i32 0, i32 0, i32 2) to i64)) to i32)] }, align 4
// CHECK: @_ZTV1B.local = private unnamed_addr constant { [4 x i32] } { [4 x i32] [i32 0, i32 trunc (i64 sub (i64 ptrtoint ({ i8*, i8*, i8* }** @_ZTI1B.rtti_proxy to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [4 x i32] }, { [4 x i32] }* @_ZTV1B.local, i32 0, i32 0, i32 2) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (void (%class.B*)* dso_local_equivalent @_ZN1B3fooEv to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [4 x i32] }, { [4 x i32] }* @_ZTV1B.local, i32 0, i32 0, i32 2) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (void (%class.A*)* dso_local_equivalent @_ZN1A3barEv to i64), i64 ptrtoint (i32* getelementptr inbounds ({ [4 x i32] }, { [4 x i32] }* @_ZTV1B.local, i32 0, i32 0, i32 2) to i64)) to i32)] }, align 4
// CHECK: @_ZTV1B = unnamed_addr alias { [4 x i32] }, { [4 x i32] }* @_ZTV1B.local

// A::bar() is defined outside of the module that defines the vtable for A
// CHECK: define void @_ZN1A3barEv(%class.A* nocapture {{[^,]*}} %this) unnamed_addr
// CHECK: define void @_ZN1A3barEv(%class.A* {{.*}}%this) unnamed_addr
// CHECK-NEXT: entry:
// CHECK-NEXT: ret void
// CHECK-NEXT: }

// CHECK: define void @_ZN1B3fooEv(%class.B* nocapture {{[^,]*}} %this) unnamed_addr
// CHECK-NEXT: entry:
// CHECK-NEXT: ret void
// CHECK-NEXT: }

// The stubs for B::foo() and A::bar() are hidden
// CHECK: define hidden void @_ZN1B3fooEv.stub(%class.B* nocapture {{[^,]*}} %0) unnamed_addr #{{[0-9]+}} comdat
// CHECK-NEXT: entry:
// CHECK-NEXT: ret void
// CHECK-NEXT: }

// CHECK: define hidden void @_ZN1A3barEv.stub(%class.A* nocapture {{[^,]*}} %0) unnamed_addr #{{[0-9]+}} comdat
// CHECK: define void @_ZN1B3fooEv(%class.B* {{.*}}%this) unnamed_addr
// CHECK-NEXT: entry:
// CHECK-NEXT: ret void
// CHECK-NEXT: }
Expand Down

0 comments on commit cf8ff75

Please sign in to comment.